Spaces:

phanerozoic
/

SchoolSpiritAI

Paused

App Files Files Community

phanerozoic commited on Apr 21

Commit

0ea4bc5

verified ·

1 Parent(s): 999c346

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -75

app.py CHANGED Viewed

@@ -1,56 +1,65 @@
-# app.py  –  SchoolSpirit AI Space  (streaming + rate‑limit)
-import os, re, time, datetime, traceback, threading, torch
-import gradio as gr
 from transformers import (
     AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 )
 from transformers.utils import logging as hf_logging
-# --------------------------------------------------------------------
-# 0. Helpers + paths
-# --------------------------------------------------------------------
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
-def log(msg:str):
     ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
     line = f"[{ts}] {msg}"
     print(line, flush=True)
-    try: open(LOG_FILE,"a").write(line+"\n")
-    except FileNotFoundError: pass
-# --------------------------------------------------------------------
-# 1. Config
-# --------------------------------------------------------------------
 MODEL_ID          = "ibm-granite/granite-3.3-2b-instruct"
 CTX_TOKENS        = 1800
 MAX_NEW_TOKENS    = 64
 TEMP              = 0.6
 MAX_INPUT_CH      = 300
-RATE_N, RATE_SEC  = 5, 60          # 5 msgs / 60 s per IP
 SYSTEM_MSG = (
- "You are **SchoolSpirit AI**, the friendly digital mascot of "
- "SchoolSpirit AI LLC, founded by Charles Norton in 2025.  "
- "The company installs on‑prem AI chat mascots, fine‑tunes language models, "
- "and ships turnkey GPU servers to K‑12 schools.\n\n"
- "RULES:\n"
- "• Reply in ≤ 4 sentences unless asked for detail.\n"
- "• No personal‑data collection; no medical/legal/financial advice.\n"
- "• If uncertain, say so and suggest contacting a human.\n"
- "• If you can’t answer, politely direct the user to admin@schoolspiritai.com.\n"
- "• Keep language age‑appropriate; avoid profanity, politics, mature themes."
 )
 WELCOME = "Hi there! I’m SchoolSpirit AI. How can I help?"
 strip = lambda s: re.sub(r"\s+", " ", s.strip())
-# --------------------------------------------------------------------
-# 2. Load model + tokenizer
-# --------------------------------------------------------------------
 hf_logging.set_verbosity_error()
 try:
-    log("Loading model …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         device_map="auto" if torch.cuda.is_available() else "cpu",
@@ -63,45 +72,49 @@ except Exception as exc:
     MODEL_ERR = f"Model load error: {exc}"
     log(MODEL_ERR)
-# --------------------------------------------------------------------
-# 3. In‑memory rate‑limit  (IP → timestamps)
-# --------------------------------------------------------------------
-VISITS: dict[str,list[float]] = {}
-def allowed(ip:str)->bool:
     now = time.time()
-    times = [t for t in VISITS.get(ip,[]) if now-t < RATE_SEC]
     VISITS[ip] = times
-    if len(times) >= RATE_N: return False
     VISITS[ip].append(now)
     return True
-# --------------------------------------------------------------------
-# 4. Prompt builder with token budget
-# --------------------------------------------------------------------
-def build_prompt(raw:list[dict]) -> str:
     def render(m):
         role = m["role"]
-        if role == "system": return m["content"]
-        prefix = "User:" if role=="user" else "AI:"
         return f"{prefix} {m['content']}"
     system, convo = raw[0], raw[1:]
     while True:
-        prompt = "\n".join([system["content"]]+[render(m) for m in convo]+["AI:"])
-        if len(tok.encode(prompt, add_special_tokens=False)) <= CTX_TOKENS or len(convo)<=2:
-            return prompt
-        convo = convo[2:]
-# --------------------------------------------------------------------
-# 5. Stream‑enabled chat callback
-# --------------------------------------------------------------------
-def chat_fn(user_msg, chat_history, state, request:gr.Request):
     ip = request.client.host if request else "anon"
     if not allowed(ip):
-        chat_history.append((user_msg, "Rate limit: wait a minute and try again."))
         return chat_history, state
     user_msg = strip(user_msg or "")
-    if not user_msg: return chat_history, state
     if len(user_msg) > MAX_INPUT_CH:
         chat_history.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
         return chat_history, state
@@ -109,42 +122,49 @@ def chat_fn(user_msg, chat_history, state, request:gr.Request):
         chat_history.append((user_msg, MODEL_ERR))
         return chat_history, state
-    state["raw"].append({"role":"user","content":user_msg})
     prompt = build_prompt(state["raw"])
-    # ---- Streaming generate ----
     streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
-    kwargs = dict(
-        input_ids=tok(prompt, return_tensors="pt").to(model.device).input_ids,
-        max_new_tokens=MAX_NEW_TOKENS,
-        temperature=TEMP,
-        streamer=streamer,
-    )
-    thread = threading.Thread(target=model.generate, kwargs=kwargs)
-    thread.start()
     partial = ""
     for token in streamer:
         partial += token
-        yield chat_history + [(user_msg, partial)], state
-    reply = strip(partial)
-    state["raw"].append({"role":"assistant","content":reply})
-    chat_history.append((user_msg, reply))
-    yield chat_history, state  # final
-# --------------------------------------------------------------------
-# 6. Launch Gradio Blocks (stream=True via generator)
-# --------------------------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("### SchoolSpirit AI Chat")
-    bot = gr.Chatbot(value=[("", WELCOME)], height=480)
-    st  = gr.State({"raw":[{"role":"system","content":SYSTEM_MSG},
-                           {"role":"assistant","content":WELCOME}]})
     with gr.Row():
-        txt = gr.Textbox(placeholder="Type here…", show_label=False, scale=4)
         btn = gr.Button("Send", variant="primary")
-    btn.click(chat_fn, inputs=[txt,bot,st], outputs=[bot,st])
-    txt.submit(chat_fn, inputs=[txt,bot,st], outputs=[bot,st])
 demo.launch()

+"""
+SchoolSpirit AI chatbot Space  –  Granite‑3.3‑2B
+• Streams tokens to Gradio UI
+• 5‑per‑minute rate‑limit per IP
+• Founder + email hand‑off in system prompt
+"""
+import os, re, time, datetime, threading, traceback, torch, gradio as gr
 from transformers import (
     AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 )
 from transformers.utils import logging as hf_logging
+# ───────────────────────────────────────────────────────────────────────────────
+# 0.  Logging helper
+# ───────────────────────────────────────────────────────────────────────────────
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
+def log(msg: str):
     ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
     line = f"[{ts}] {msg}"
     print(line, flush=True)
+    try:
+        with open(LOG_FILE, "a") as f:
+            f.write(line + "\n")
+    except FileNotFoundError:
+        pass
+# ───────────────────────────────────────────────────────────────────────────────
+# 1.  Config
+# ───────────────────────────────────────────────────────────────────────────────
 MODEL_ID          = "ibm-granite/granite-3.3-2b-instruct"
 CTX_TOKENS        = 1800
 MAX_NEW_TOKENS    = 64
 TEMP              = 0.6
 MAX_INPUT_CH      = 300
+RATE_N, RATE_SEC  = 5, 60        # 5 messages / 60 s
 SYSTEM_MSG = (
+    "You are **SchoolSpirit AI**, the friendly digital mascot of "
+    "SchoolSpirit AI LLC, founded by Charles Norton in 2025. The company "
+    "installs on‑prem AI chat mascots, fine‑tunes language models, and ships "
+    "turnkey GPU servers to K‑12 schools.\n\n"
+    "RULES:\n"
+    "• Reply in ≤ 4 sentences unless asked for detail.\n"
+    "• No personal‑data collection; no medical/legal/financial advice.\n"
+    "• If uncertain, say so and suggest contacting a human.\n"
+    "• If you can’t answer, politely direct the user to admin@schoolspiritai.com.\n"
+    "• Keep language age‑appropriate; avoid profanity, politics, mature themes."
 )
 WELCOME = "Hi there! I’m SchoolSpirit AI. How can I help?"
 strip = lambda s: re.sub(r"\s+", " ", s.strip())
+# ───────────────────────────────────────────────────────────────────────────────
+# 2.  Load model
+# ───────────────────────────────────────────────────────────────────────────────
 hf_logging.set_verbosity_error()
 try:
+    log("Loading tokenizer / model …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         device_map="auto" if torch.cuda.is_available() else "cpu",
     MODEL_ERR = f"Model load error: {exc}"
     log(MODEL_ERR)
+# ───────────────────────────────────────────────────────────────────────────────
+# 3.  Simple in‑memory rate‑limiter  {ip: [timestamps]}
+# ───────────────────────────────────────────────────────────────────────────────
+VISITS: dict[str, list[float]] = {}
+def allowed(ip: str) -> bool:
     now = time.time()
+    times = [t for t in VISITS.get(ip, []) if now - t < RATE_SEC]
     VISITS[ip] = times
+    if len(times) >= RATE_N:
+        return False
     VISITS[ip].append(now)
     return True
+# ───────────────────────────────────────────────────────────────────────────────
+# 4.  Prompt builder (trims old turns to fit context)
+# ───────────────────────────────────────────────────────────────────────────────
+def build_prompt(raw: list[dict]) -> str:
     def render(m):
         role = m["role"]
+        if role == "system":
+            return m["content"]
+        prefix = "User:" if role == "user" else "AI:"
         return f"{prefix} {m['content']}"
     system, convo = raw[0], raw[1:]
     while True:
+        parts = [system["content"]] + [render(m) for m in convo] + ["AI:"]
+        if len(tok.encode("\n".join(parts), add_special_tokens=False)) <= CTX_TOKENS or len(convo) <= 2:
+            return "\n".join(parts)
+        convo = convo[2:]  # drop oldest user+assistant pair
+# ───────────────────────────────────────────────────────────────────────────────
+# 5.  Streaming chat callback
+# ───────────────────────────────────────────────────────────────────────────────
+def chat_fn(user_msg, chat_history, state, request: gr.Request):
     ip = request.client.host if request else "anon"
     if not allowed(ip):
+        chat_history.append((user_msg, "Rate limit exceeded — please wait a minute."))
         return chat_history, state
     user_msg = strip(user_msg or "")
+    if not user_msg:
+        return chat_history, state
     if len(user_msg) > MAX_INPUT_CH:
         chat_history.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
         return chat_history, state
         chat_history.append((user_msg, MODEL_ERR))
         return chat_history, state
+    # Append user line and placeholder for assistant
+    chat_history.append((user_msg, ""))
+    state["raw"].append({"role": "user", "content": user_msg})
     prompt = build_prompt(state["raw"])
+    input_ids = tok(prompt, return_tensors="pt").to(model.device).input_ids
     streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
+    threading.Thread(
+        target=model.generate,
+        kwargs=dict(
+            input_ids=input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            temperature=TEMP,
+            streamer=streamer,
+        ),
+    ).start()
     partial = ""
     for token in streamer:
         partial += token
+        chat_history[-1] = (user_msg, partial)   # update last message
+        yield chat_history, state                # stream to UI
+    state["raw"].append({"role": "assistant", "content": strip(partial)})
+    yield chat_history, state  # final update
+# ───────────────────────────────────────────────────────────────────────────────
+# 6.  Gradio UI
+# ───────────────────────────────────────────────────────────────────────────────
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("### SchoolSpirit AI Chat")
+    bot = gr.Chatbot(value=[("", WELCOME)], height=480, label="SchoolSpirit AI")
+    st  = gr.State({
+        "raw": [
+            {"role": "system", "content": SYSTEM_MSG},
+            {"role": "assistant", "content": WELCOME},
+        ]
+    })
     with gr.Row():
+        txt = gr.Textbox(placeholder="Type your question here…", show_label=False, lines=1, scale=4)
         btn = gr.Button("Send", variant="primary")
+    btn.click(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
+    txt.submit(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
 demo.launch()