Satyam-Singh commited on
Commit
56c2aa5
·
verified ·
1 Parent(s): a8214c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -71
app.py CHANGED
@@ -7,13 +7,71 @@ client = InferenceClient("Satyam-Singh/LLaVa-Large-Language-Virtual-Assistant")
7
 
8
  TITLE = """<h1 align="center">LLaVa Large Language Virtual Assistant</h1>"""
9
 
10
- # Set up the model
11
- generation_config = {
12
- "temperature": temperature_component,#0.9,
13
- "top_p": top_p_component,#1,
14
- "top_k": top_k_component,#1,
15
- "max_output_tokens": max_output_tokens_component,#4096,
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  safety_settings = [
19
  {
@@ -36,6 +94,14 @@ safety_settings = [
36
 
37
  genai.configure(api_key=os.getenv("GOOGLE_PALM_KEY"))
38
 
 
 
 
 
 
 
 
 
39
  model = genai.GenerativeModel(model_name="gemini-pro",
40
  generation_config=generation_config,
41
  safety_settings=safety_settings)
@@ -99,71 +165,7 @@ convo = model.start_chat(history=[
99
  },
100
  ])
101
 
102
- temperature_component = gr.Slider(
103
- minimum=0,
104
- maximum=1.0,
105
- value=0.4,
106
- step=0.05,
107
- label="Temperature",
108
- info=(
109
- "Temperature controls the degree of randomness in token selection. Lower "
110
- "temperatures are good for prompts that expect a true or correct response, "
111
- "while higher temperatures can lead to more diverse or unexpected results. "
112
- ))
113
- max_output_tokens_component = gr.Slider(
114
- minimum=1,
115
- maximum=2048,
116
- value=1024,
117
- step=1,
118
- label="Token limit",
119
- info=(
120
- "Token limit determines the maximum amount of text output from one prompt. A "
121
- "token is approximately four characters. The default value is 2048."
122
- ))
123
- stop_sequences_component = gr.Textbox(
124
- label="Add stop sequence",
125
- value="",
126
- type="text",
127
- placeholder="STOP, END",
128
- info=(
129
- "A stop sequence is a series of characters (including spaces) that stops "
130
- "response generation if the model encounters it. The sequence is not included "
131
- "as part of the response. You can add up to five stop sequences."
132
- ))
133
- top_k_component = gr.Slider(
134
- minimum=1,
135
- maximum=40,
136
- value=32,
137
- step=1,
138
- label="Top-K",
139
- info=(
140
- "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
141
- "selected token is the most probable among all tokens in the model’s "
142
- "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
143
- "next token is selected from among the 3 most probable tokens (using "
144
- "temperature)."
145
- ))
146
- top_p_component = gr.Slider(
147
- minimum=0,
148
- maximum=1,
149
- value=1,
150
- step=0.01,
151
- label="Top-P",
152
- info=(
153
- "Top-p changes how the model selects tokens for output. Tokens are selected "
154
- "from most probable to least until the sum of their probabilities equals the "
155
- "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
156
- "and .1 and the top-p value is .5, then the model will select either A or B as "
157
- "the next token (using temperature). "
158
- ))
159
 
160
- additional_inputs = [
161
- temperature_component,
162
- max_output_tokens_component,
163
- stop_sequences_component,
164
- top_k_component,
165
- top_p_component,
166
- ]
167
 
168
  def gemini_chat(message, history):
169
  response = convo.send_message(message)
 
7
 
8
  TITLE = """<h1 align="center">LLaVa Large Language Virtual Assistant</h1>"""
9
 
10
+ temperature_component = gr.Slider(
11
+ minimum=0,
12
+ maximum=1.0,
13
+ value=0.4,
14
+ step=0.05,
15
+ label="Temperature",
16
+ info=(
17
+ "Temperature controls the degree of randomness in token selection. Lower "
18
+ "temperatures are good for prompts that expect a true or correct response, "
19
+ "while higher temperatures can lead to more diverse or unexpected results. "
20
+ ))
21
+ max_output_tokens_component = gr.Slider(
22
+ minimum=1,
23
+ maximum=2048,
24
+ value=1024,
25
+ step=1,
26
+ label="Token limit",
27
+ info=(
28
+ "Token limit determines the maximum amount of text output from one prompt. A "
29
+ "token is approximately four characters. The default value is 2048."
30
+ ))
31
+ stop_sequences_component = gr.Textbox(
32
+ label="Add stop sequence",
33
+ value="",
34
+ type="text",
35
+ placeholder="STOP, END",
36
+ info=(
37
+ "A stop sequence is a series of characters (including spaces) that stops "
38
+ "response generation if the model encounters it. The sequence is not included "
39
+ "as part of the response. You can add up to five stop sequences."
40
+ ))
41
+ top_k_component = gr.Slider(
42
+ minimum=1,
43
+ maximum=40,
44
+ value=32,
45
+ step=1,
46
+ label="Top-K",
47
+ info=(
48
+ "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
49
+ "selected token is the most probable among all tokens in the model’s "
50
+ "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
51
+ "next token is selected from among the 3 most probable tokens (using "
52
+ "temperature)."
53
+ ))
54
+ top_p_component = gr.Slider(
55
+ minimum=0,
56
+ maximum=1,
57
+ value=1,
58
+ step=0.01,
59
+ label="Top-P",
60
+ info=(
61
+ "Top-p changes how the model selects tokens for output. Tokens are selected "
62
+ "from most probable to least until the sum of their probabilities equals the "
63
+ "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
64
+ "and .1 and the top-p value is .5, then the model will select either A or B as "
65
+ "the next token (using temperature). "
66
+ ))
67
+
68
+ additional_inputs = [
69
+ temperature_component,
70
+ max_output_tokens_component,
71
+ stop_sequences_component,
72
+ top_k_component,
73
+ top_p_component,
74
+ ]
75
 
76
  safety_settings = [
77
  {
 
94
 
95
  genai.configure(api_key=os.getenv("GOOGLE_PALM_KEY"))
96
 
97
+ # Set up the model
98
+ generation_config = {
99
+ "temperature": temperature_component,#0.9,
100
+ "top_p": top_p_component,#1,
101
+ "top_k": top_k_component,#1,
102
+ "max_output_tokens": max_output_tokens_component,#4096,
103
+ }
104
+
105
  model = genai.GenerativeModel(model_name="gemini-pro",
106
  generation_config=generation_config,
107
  safety_settings=safety_settings)
 
165
  },
166
  ])
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
 
 
 
 
 
 
 
169
 
170
  def gemini_chat(message, history):
171
  response = convo.send_message(message)