phanerozoic commited on
Commit
0ea4bc5
Β·
verified Β·
1 Parent(s): 999c346

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -75
app.py CHANGED
@@ -1,56 +1,65 @@
1
- # app.py – SchoolSpiritΒ AI Space (streaming + rate‑limit)
2
- import os, re, time, datetime, traceback, threading, torch
3
- import gradio as gr
 
 
 
 
 
4
  from transformers import (
5
  AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
  )
7
  from transformers.utils import logging as hf_logging
8
 
9
- # --------------------------------------------------------------------
10
- # 0. Helpers + paths
11
- # --------------------------------------------------------------------
12
  os.environ["HF_HOME"] = "/data/.huggingface"
13
  LOG_FILE = "/data/requests.log"
14
- def log(msg:str):
15
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
16
  line = f"[{ts}] {msg}"
17
  print(line, flush=True)
18
- try: open(LOG_FILE,"a").write(line+"\n")
19
- except FileNotFoundError: pass
 
 
 
20
 
21
- # --------------------------------------------------------------------
22
- # 1. Config
23
- # --------------------------------------------------------------------
24
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
25
  CTX_TOKENS = 1800
26
  MAX_NEW_TOKENS = 64
27
  TEMP = 0.6
28
  MAX_INPUT_CH = 300
29
- RATE_N, RATE_SEC = 5, 60 # 5 msgs / 60Β s per IP
30
 
31
  SYSTEM_MSG = (
32
- "You are **SchoolSpiritΒ AI**, the friendly digital mascot of "
33
- "SchoolSpiritΒ AIΒ LLC, founded by CharlesΒ Norton inΒ 2025. "
34
- "The company installs on‑prem AI chat mascots, fine‑tunes language models, "
35
- "and ships turnkey GPU servers to K‑12 schools.\n\n"
36
- "RULES:\n"
37
- "β€’ Reply in ≀ 4 sentences unless asked for detail.\n"
38
- "β€’ No personal‑data collection; no medical/legal/financial advice.\n"
39
- "β€’ If uncertain, say so and suggest contacting a human.\n"
40
- "β€’ If you can’t answer, politely direct the user to admin@schoolspiritai.com.\n"
41
- "β€’ Keep language age‑appropriate; avoid profanity, politics, mature themes."
42
  )
43
  WELCOME = "HiΒ there! I’m SchoolSpiritΒ AI. How can I help?"
44
 
45
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
46
 
47
- # --------------------------------------------------------------------
48
- # 2. Load model + tokenizer
49
- # --------------------------------------------------------------------
50
  hf_logging.set_verbosity_error()
51
  try:
52
- log("Loading model …")
53
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
 
54
  model = AutoModelForCausalLM.from_pretrained(
55
  MODEL_ID,
56
  device_map="auto" if torch.cuda.is_available() else "cpu",
@@ -63,45 +72,49 @@ except Exception as exc:
63
  MODEL_ERR = f"Model load error: {exc}"
64
  log(MODEL_ERR)
65
 
66
- # --------------------------------------------------------------------
67
- # 3. In‑memory rate‑limit (IP β†’ timestamps)
68
- # --------------------------------------------------------------------
69
- VISITS: dict[str,list[float]] = {}
70
- def allowed(ip:str)->bool:
71
  now = time.time()
72
- times = [t for t in VISITS.get(ip,[]) if now-t < RATE_SEC]
73
  VISITS[ip] = times
74
- if len(times) >= RATE_N: return False
 
75
  VISITS[ip].append(now)
76
  return True
77
 
78
- # --------------------------------------------------------------------
79
- # 4. Prompt builder with token budget
80
- # --------------------------------------------------------------------
81
- def build_prompt(raw:list[dict]) -> str:
82
  def render(m):
83
  role = m["role"]
84
- if role == "system": return m["content"]
85
- prefix = "User:" if role=="user" else "AI:"
 
86
  return f"{prefix} {m['content']}"
 
87
  system, convo = raw[0], raw[1:]
88
  while True:
89
- prompt = "\n".join([system["content"]]+[render(m) for m in convo]+["AI:"])
90
- if len(tok.encode(prompt, add_special_tokens=False)) <= CTX_TOKENS or len(convo)<=2:
91
- return prompt
92
- convo = convo[2:]
93
-
94
- # --------------------------------------------------------------------
95
- # 5. Stream‑enabled chat callback
96
- # --------------------------------------------------------------------
97
- def chat_fn(user_msg, chat_history, state, request:gr.Request):
98
  ip = request.client.host if request else "anon"
99
  if not allowed(ip):
100
- chat_history.append((user_msg, "Rate limit: wait a minute and try again."))
101
  return chat_history, state
102
 
103
  user_msg = strip(user_msg or "")
104
- if not user_msg: return chat_history, state
 
105
  if len(user_msg) > MAX_INPUT_CH:
106
  chat_history.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
107
  return chat_history, state
@@ -109,42 +122,49 @@ def chat_fn(user_msg, chat_history, state, request:gr.Request):
109
  chat_history.append((user_msg, MODEL_ERR))
110
  return chat_history, state
111
 
112
- state["raw"].append({"role":"user","content":user_msg})
 
 
 
113
  prompt = build_prompt(state["raw"])
 
114
 
115
- # ---- Streaming generate ----
116
  streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
117
- kwargs = dict(
118
- input_ids=tok(prompt, return_tensors="pt").to(model.device).input_ids,
119
- max_new_tokens=MAX_NEW_TOKENS,
120
- temperature=TEMP,
121
- streamer=streamer,
122
- )
123
- thread = threading.Thread(target=model.generate, kwargs=kwargs)
124
- thread.start()
 
125
 
126
  partial = ""
127
  for token in streamer:
128
  partial += token
129
- yield chat_history + [(user_msg, partial)], state
 
130
 
131
- reply = strip(partial)
132
- state["raw"].append({"role":"assistant","content":reply})
133
- chat_history.append((user_msg, reply))
134
- yield chat_history, state # final
135
 
136
- # --------------------------------------------------------------------
137
- # 6. Launch Gradio Blocks (stream=True via generator)
138
- # --------------------------------------------------------------------
139
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
140
  gr.Markdown("### SchoolSpiritΒ AI Chat")
141
- bot = gr.Chatbot(value=[("", WELCOME)], height=480)
142
- st = gr.State({"raw":[{"role":"system","content":SYSTEM_MSG},
143
- {"role":"assistant","content":WELCOME}]})
 
 
 
 
144
  with gr.Row():
145
- txt = gr.Textbox(placeholder="Type here…", show_label=False, scale=4)
146
  btn = gr.Button("Send", variant="primary")
147
- btn.click(chat_fn, inputs=[txt,bot,st], outputs=[bot,st])
148
- txt.submit(chat_fn, inputs=[txt,bot,st], outputs=[bot,st])
149
 
150
  demo.launch()
 
1
+ """
2
+ SchoolSpiritΒ AI chatbot Space – Granite‑3.3‑2B
3
+ β€’ Streams tokens to Gradio UI
4
+ β€’ 5‑per‑minute rate‑limit per IP
5
+ β€’ Founder + email hand‑off in system prompt
6
+ """
7
+
8
+ import os, re, time, datetime, threading, traceback, torch, gradio as gr
9
  from transformers import (
10
  AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
11
  )
12
  from transformers.utils import logging as hf_logging
13
 
14
+ # ───────────────────────────────────────────────────────────────────────────────
15
+ # 0. Logging helper
16
+ # ───────────────────────────────────────────────────────────────────────────────
17
  os.environ["HF_HOME"] = "/data/.huggingface"
18
  LOG_FILE = "/data/requests.log"
19
+ def log(msg: str):
20
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
21
  line = f"[{ts}] {msg}"
22
  print(line, flush=True)
23
+ try:
24
+ with open(LOG_FILE, "a") as f:
25
+ f.write(line + "\n")
26
+ except FileNotFoundError:
27
+ pass
28
 
29
+ # ───────────────────────────────────────────────────────────────────────────────
30
+ # 1. Config
31
+ # ───────────────────────────────────────────────────────────────────────────────
32
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
33
  CTX_TOKENS = 1800
34
  MAX_NEW_TOKENS = 64
35
  TEMP = 0.6
36
  MAX_INPUT_CH = 300
37
+ RATE_N, RATE_SEC = 5, 60 # 5 messages / 60Β s
38
 
39
  SYSTEM_MSG = (
40
+ "You are **SchoolSpiritΒ AI**, the friendly digital mascot of "
41
+ "SchoolSpiritΒ AIΒ LLC, founded by CharlesΒ Norton inΒ 2025. The company "
42
+ "installs on‑prem AI chat mascots, fine‑tunes language models, and ships "
43
+ "turnkey GPU servers to K‑12 schools.\n\n"
44
+ "RULES:\n"
45
+ "β€’ Reply in ≀ 4 sentences unless asked for detail.\n"
46
+ "β€’ No personal‑data collection; no medical/legal/financial advice.\n"
47
+ "β€’ If uncertain, say so and suggest contacting a human.\n"
48
+ "β€’ If you can’t answer, politely direct the user to admin@schoolspiritai.com.\n"
49
+ "β€’ Keep language age‑appropriate; avoid profanity, politics, mature themes."
50
  )
51
  WELCOME = "HiΒ there! I’m SchoolSpiritΒ AI. How can I help?"
52
 
53
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
54
 
55
+ # ───────────────────────────────────────────────────────────────────────────────
56
+ # 2. Load model
57
+ # ───────────────────────────────────────────────────────────────────────────────
58
  hf_logging.set_verbosity_error()
59
  try:
60
+ log("Loading tokenizer / model …")
61
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
62
+
63
  model = AutoModelForCausalLM.from_pretrained(
64
  MODEL_ID,
65
  device_map="auto" if torch.cuda.is_available() else "cpu",
 
72
  MODEL_ERR = f"Model load error: {exc}"
73
  log(MODEL_ERR)
74
 
75
+ # ───────────────────────────────────────────────────────────────────────────────
76
+ # 3. Simple in‑memory rate‑limiter {ip: [timestamps]}
77
+ # ───────────────────────────────────────────────────────────────────────────────
78
+ VISITS: dict[str, list[float]] = {}
79
+ def allowed(ip: str) -> bool:
80
  now = time.time()
81
+ times = [t for t in VISITS.get(ip, []) if now - t < RATE_SEC]
82
  VISITS[ip] = times
83
+ if len(times) >= RATE_N:
84
+ return False
85
  VISITS[ip].append(now)
86
  return True
87
 
88
+ # ───────────────────────────────────────────────────────────────────────────────
89
+ # 4. Prompt builder (trims old turns to fit context)
90
+ # ───────────────────────────────────────────────────────────────────────────────
91
+ def build_prompt(raw: list[dict]) -> str:
92
  def render(m):
93
  role = m["role"]
94
+ if role == "system":
95
+ return m["content"]
96
+ prefix = "User:" if role == "user" else "AI:"
97
  return f"{prefix} {m['content']}"
98
+
99
  system, convo = raw[0], raw[1:]
100
  while True:
101
+ parts = [system["content"]] + [render(m) for m in convo] + ["AI:"]
102
+ if len(tok.encode("\n".join(parts), add_special_tokens=False)) <= CTX_TOKENS or len(convo) <= 2:
103
+ return "\n".join(parts)
104
+ convo = convo[2:] # drop oldest user+assistant pair
105
+
106
+ # ───────────────────────────────────────────────────────────────────────────────
107
+ # 5. Streaming chat callback
108
+ # ───────────────────────────────────────────────────────────────────────────────
109
+ def chat_fn(user_msg, chat_history, state, request: gr.Request):
110
  ip = request.client.host if request else "anon"
111
  if not allowed(ip):
112
+ chat_history.append((user_msg, "Rate limit exceeded β€” please wait a minute."))
113
  return chat_history, state
114
 
115
  user_msg = strip(user_msg or "")
116
+ if not user_msg:
117
+ return chat_history, state
118
  if len(user_msg) > MAX_INPUT_CH:
119
  chat_history.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
120
  return chat_history, state
 
122
  chat_history.append((user_msg, MODEL_ERR))
123
  return chat_history, state
124
 
125
+ # Append user line and placeholder for assistant
126
+ chat_history.append((user_msg, ""))
127
+ state["raw"].append({"role": "user", "content": user_msg})
128
+
129
  prompt = build_prompt(state["raw"])
130
+ input_ids = tok(prompt, return_tensors="pt").to(model.device).input_ids
131
 
 
132
  streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
133
+ threading.Thread(
134
+ target=model.generate,
135
+ kwargs=dict(
136
+ input_ids=input_ids,
137
+ max_new_tokens=MAX_NEW_TOKENS,
138
+ temperature=TEMP,
139
+ streamer=streamer,
140
+ ),
141
+ ).start()
142
 
143
  partial = ""
144
  for token in streamer:
145
  partial += token
146
+ chat_history[-1] = (user_msg, partial) # update last message
147
+ yield chat_history, state # stream to UI
148
 
149
+ state["raw"].append({"role": "assistant", "content": strip(partial)})
150
+ yield chat_history, state # final update
 
 
151
 
152
+ # ───────────────────────────────────────────────────────────────────────────────
153
+ # 6. Gradio UI
154
+ # ───────────────────────────────────────────────────────────────────────────────
155
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
156
  gr.Markdown("### SchoolSpiritΒ AI Chat")
157
+ bot = gr.Chatbot(value=[("", WELCOME)], height=480, label="SchoolSpiritΒ AI")
158
+ st = gr.State({
159
+ "raw": [
160
+ {"role": "system", "content": SYSTEM_MSG},
161
+ {"role": "assistant", "content": WELCOME},
162
+ ]
163
+ })
164
  with gr.Row():
165
+ txt = gr.Textbox(placeholder="Type your question here…", show_label=False, lines=1, scale=4)
166
  btn = gr.Button("Send", variant="primary")
167
+ btn.click(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
168
+ txt.submit(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
169
 
170
  demo.launch()