FractalAIR commited on
Commit
d50edab
·
verified ·
1 Parent(s): e54749a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -52
app.py CHANGED
@@ -24,57 +24,6 @@ def generate_conversation_id():
24
  return str(uuid.uuid4())[:8]
25
 
26
 
27
- '''def generate_response(user_message, max_tokens, temperature, top_p, history_state):
28
- if not user_message.strip():
29
- return history_state, history_state
30
-
31
- start_tag = "<|im_start|>"
32
- sep_tag = "<|im_sep|>"
33
- end_tag = "<|im_end|>"
34
-
35
- system_message = "Your role as an assistant..."
36
- messages = [{"role": "system", "content": system_message}]
37
- for message in history_state:
38
- messages.append({"role": message["role"], "content": message["content"]})
39
- messages.append({"role": "user", "content": user_message})
40
-
41
- try:
42
- response = client.chat.completions.create(
43
- model="tgi",
44
- messages=messages,
45
- max_tokens=int(max_tokens),
46
- temperature=temperature,
47
- top_p=top_p,
48
- stream=True,
49
- extra_body={"max_new_tokens": int(max_tokens)}
50
- )
51
- except Exception as e:
52
- print(f"[ERROR] OpenAI API call failed: {e}")
53
- yield history_state + [{"role": "user", "content": user_message},
54
- {"role": "assistant", "content": "⚠️ Generation failed."}], history_state
55
- return
56
-
57
-
58
- assistant_response = ""
59
- new_history = history_state + [
60
- {"role": "user", "content": user_message},
61
- {"role": "assistant", "content": ""}
62
- ]
63
-
64
- try:
65
- for chunk in response:
66
- if not chunk.choices or not chunk.choices[0].delta or not chunk.choices[0].delta.content:
67
- continue
68
- token = chunk.choices[0].delta.content
69
- assistant_response += token
70
- new_history[-1]["content"] = assistant_response.strip()
71
- yield new_history, new_history
72
- except Exception:
73
- pass
74
-
75
- yield new_history, new_history'''
76
-
77
-
78
  import tiktoken
79
  enc = tiktoken.encoding_for_model("gpt-3.5-turbo") # any OpenAI encoding works
80
 
@@ -272,4 +221,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
272
  example3_button.click(fn=lambda: gr.update(value=example_messages["JEE Main 2025 Probability & Statistics"]), inputs=None, outputs=user_input)
273
  example4_button.click(fn=lambda: gr.update(value=example_messages["JEE Main 2025 Laws of Motion"]), inputs=None, outputs=user_input)
274
 
275
- demo.launch(share=True, ssr_mode=False)
 
 
 
 
 
 
 
 
 
 
 
24
  return str(uuid.uuid4())[:8]
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  import tiktoken
28
  enc = tiktoken.encoding_for_model("gpt-3.5-turbo") # any OpenAI encoding works
29
 
 
221
  example3_button.click(fn=lambda: gr.update(value=example_messages["JEE Main 2025 Probability & Statistics"]), inputs=None, outputs=user_input)
222
  example4_button.click(fn=lambda: gr.update(value=example_messages["JEE Main 2025 Laws of Motion"]), inputs=None, outputs=user_input)
223
 
224
+ #demo.launch(share=True, ssr_mode=False)
225
+
226
+ if __name__ == "__main__":
227
+ demo.queue( # turn the queue on
228
+ concurrency_count = 8, # how many jobs run simultaneously
229
+ max_size = 40, # 40-6 = 34 jobs can wait in line
230
+ api_open = False # no public /queue/status endpoint
231
+ ).launch(
232
+ share = True,
233
+ ssr_mode = False
234
+ )