FM-1976 commited on
Commit
0a03401
Β·
1 Parent(s): 63cc4c8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # First Commit inspiration:
2
+ #https://huggingface.co/spaces/lambeth-dai/Light-PDF-Web-QA-Chatbot/blob/main/app.py
3
+ #---------------------
4
+ #model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral',
5
+ #model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config))
6
+ #---------------------
7
+
8
+ import gradio as gr
9
+ import os
10
+ from ctransformers import AutoModelForCausalLM, AutoConfig, Config
11
+ import datetime
12
+
13
+ i_temperature = 0.30
14
+ i_max_new_tokens=1100
15
+ repo = 'TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF'
16
+ model_file = "tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
17
+ i_repetitionpenalty = 1.2
18
+ i_contextlength=12048
19
+ logfile = 'TinyLlamaOpenOrca1.1B-stream.txt'
20
+ print("loading model...")
21
+ stt = datetime.datetime.now()
22
+ conf = AutoConfig(Config(temperature=i_temperature, repetition_penalty=i_repetitionpenalty, batch_size=64,
23
+ max_new_tokens=i_max_new_tokens, context_length=i_contextlength))
24
+ llm = AutoModelForCausalLM.from_pretrained(repo, model_file=model_file,
25
+ model_type="llama",config = conf)
26
+ dt = datetime.datetime.now() - stt
27
+ print(f"Model loaded in {dt}")
28
+ #MODEL SETTINGS also for DISPLAY
29
+ im_user = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/456322.webp'
30
+ im_bot = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/TinyLlama_logo.png'
31
+ def writehistory(text):
32
+ with open(logfile, 'a', encoding='utf-8') as f:
33
+ f.write(text)
34
+ f.write('\n')
35
+ f.close()
36
+
37
+ with gr.Blocks(theme='ParityError/Interstellar') as demo:
38
+ #TITLE SECTION
39
+ with gr.Row():
40
+ with gr.Column(scale=12):
41
+ gr.HTML("<center>"
42
+ + "<h1>πŸ¦™ TinyLlama 1.1B πŸ‹ OpenOrca 4K context window</h2></center>")
43
+ gr.Markdown("""
44
+ **Currently Running**: [tinyllama-1.1b-1t-openorca.Q4_K_M.gguf](https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; **Chat History Log File**: *TinyLlamaOpenOrca1.1B-stream.txt*
45
+
46
+ - **Base Model**: PY007/TinyLlama-1.1B-intermediate-step-480k-1T, Fine tuned on OpenOrca GPT4 subset for 1 epoch,Using CHATML format.
47
+ - **License**: Apache 2.0, following the TinyLlama base model. The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk.
48
+ """)
49
+ gr.Image(value='https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/TinyLlama_logo.png', width=70)
50
+ # chat and parameters settings
51
+ with gr.Row():
52
+ with gr.Column(scale=4):
53
+ chatbot = gr.Chatbot(height = 350, show_copy_button=True,
54
+ avatar_images = [im_user,im_bot])
55
+ with gr.Row():
56
+ with gr.Column(scale=14):
57
+ msg = gr.Textbox(show_label=False,
58
+ placeholder="Enter text",
59
+ lines=2)
60
+ submitBtn = gr.Button("\nπŸ’¬ Send\n", size="lg", variant="primary", min_width=180)
61
+
62
+ with gr.Column(min_width=50,scale=1):
63
+ with gr.Tab(label="Parameter Setting"):
64
+ gr.Markdown("# Parameters")
65
+ top_p = gr.Slider(
66
+ minimum=-0,
67
+ maximum=1.0,
68
+ value=0.95,
69
+ step=0.05,
70
+ interactive=True,
71
+ label="Top-p",
72
+ )
73
+ temperature = gr.Slider(
74
+ minimum=0.1,
75
+ maximum=1.0,
76
+ value=0.30,
77
+ step=0.01,
78
+ interactive=True,
79
+ label="Temperature",
80
+ )
81
+ max_length_tokens = gr.Slider(
82
+ minimum=0,
83
+ maximum=4096,
84
+ value=1060,
85
+ step=4,
86
+ interactive=True,
87
+ label="Max Generation Tokens",
88
+ )
89
+ rep_pen = gr.Slider(
90
+ minimum=0,
91
+ maximum=5,
92
+ value=1.2,
93
+ step=0.05,
94
+ interactive=True,
95
+ label="Repetition Penalty",
96
+ )
97
+
98
+ clear = gr.Button("πŸ—‘οΈ Clear All Messages", variant='secondary')
99
+ def user(user_message, history):
100
+ writehistory(f"USER: {user_message}")
101
+ return "", history + [[user_message, None]]
102
+
103
+ def bot(history,t,p,m,r):
104
+ SYSTEM_PROMPT = """<|im_start|>system
105
+ You are a helpful bot. Your answers are clear and concise.
106
+ <|im_end|>
107
+
108
+ """
109
+ prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n"
110
+ print(f"history lenght: {len(history)}")
111
+ if len(history) == 1:
112
+ print("this is the first round")
113
+ else:
114
+ print("here we should pass more conversations")
115
+ history[-1][1] = ""
116
+ for character in llm(prompt,
117
+ temperature = t,
118
+ top_p = p,
119
+ repetition_penalty = r,
120
+ max_new_tokens=m,
121
+ stop = ['<|im_end|>'],
122
+ stream = True):
123
+ history[-1][1] += character
124
+ yield history
125
+ writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n")
126
+ #Log in the terminal the messages
127
+ print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n")
128
+ # Clicking the submitBtn will call the generation with Parameters in the slides
129
+ submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
130
+ bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot
131
+ )
132
+ clear.click(lambda: None, None, chatbot, queue=False)
133
+
134
+ demo.queue() #required to yield the streams from the text generation
135
+ demo.launch(inbrowser=True)