Spaces:

phanerozoic
/

SchoolSpiritAI

Paused

App Files Files Community

phanerozoic commited on Apr 21

Commit

d672735

verified ·

1 Parent(s): dd7db97

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -43

app.py CHANGED Viewed

@@ -3,9 +3,7 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.utils import logging as hf_logging
-# ---------------------------------------------------------------------------
-# Logging helpers
-# ---------------------------------------------------------------------------
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
@@ -21,48 +19,39 @@ def log(msg: str):
         pass
-# ---------------------------------------------------------------------------
-# Configuration
-# ---------------------------------------------------------------------------
 MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
 MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
 SYSTEM_MSG = (
-    "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
     "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
     "mascots, offers custom fine‑tuning of language models, and ships turnkey "
-    "PC's with preinstalled language models to K‑12 schools.\n\n"
     "GUIDELINES:\n"
-    "• Use a warm, encouraging tone fit for students, parents, and staff.\n"
-    "• Keep replies short—no more than four sentences unless asked.\n"
-    "• If you’re unsure or out of scope, say so and suggest human follow‑up.\n"
-    "• Never collect personal data or provide medical, legal, or financial advice.\n"
     "• No profanity, politics, or mature themes."
 )
 WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
-# ---------------------------------------------------------------------------
-# Load model (GPU FP‑16 if available → CPU fallback)
-# ---------------------------------------------------------------------------
 hf_logging.set_verbosity_error()
 try:
     log("Loading tokenizer …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     if torch.cuda.is_available():
-        log("GPU detected → loading model in FP‑16")
         model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            device_map="auto",          # put layers on available GPU(s)
-            torch_dtype=torch.float16,
         )
     else:
-        log("No GPU → loading model on CPU (FP‑32)")
         model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            device_map="cpu",
-            torch_dtype="auto",
-            low_cpu_mem_usage=True,
         )
     gen = pipeline(
@@ -80,17 +69,25 @@ except Exception as exc:  # noqa: BLE001
     log(MODEL_ERR)
 clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…"
-trim = lambda m: m if len(m) <= 1 + MAX_TURNS * 2 else [m[0]] + m[-MAX_TURNS * 2 :]
-# ---------------------------------------------------------------------------
-# Chat logic
-# ---------------------------------------------------------------------------
 def chat_fn(user_msg: str, history: list):
     log(f"User sent {len(user_msg)} chars")
-    if not history or history[0]["role"] != "system":
         history.insert(0, {"role": "system", "content": SYSTEM_MSG})
     if MODEL_ERR:
@@ -105,21 +102,21 @@ def chat_fn(user_msg: str, history: list):
     history.append({"role": "user", "content": user_msg})
     history = trim(history)
-    prompt_lines = [
-        m["content"]
-        if m["role"] == "system"
-        else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
-        for m in history
-    ] + ["AI:"]
-    prompt = "\n".join(prompt_lines)
-    log(f"Prompt {len(prompt)} chars → generating")
-    t0 = time.time()
     try:
         raw = gen(prompt)[0]["generated_text"]
         reply = clean(raw.split("AI:", 1)[-1])
         reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
-        log(f"generate() {time.time()-t0:.2f}s, reply {len(reply)} chars")
     except Exception:
         log("❌ Inference exception:\n" + traceback.format_exc())
         reply = "Sorry—backend crashed. Please try again later."
@@ -127,15 +124,13 @@ def chat_fn(user_msg: str, history: list):
     return reply
-# ---------------------------------------------------------------------------
-# UI
-# ---------------------------------------------------------------------------
 gr.ChatInterface(
     fn=chat_fn,
     chatbot=gr.Chatbot(
         height=480,
         type="messages",
-        value=[{"role": "assistant", "content": WELCOME_MSG}],
     ),
     title="SchoolSpirit AI Chat",
     theme=gr.themes.Soft(primary_hue="blue"),

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.utils import logging as hf_logging
+# ---------- Logging ---------------------------------------------------------
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
         pass
+# ---------- Config ----------------------------------------------------------
 MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
 MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
 SYSTEM_MSG = (
+    "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
     "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
     "mascots, offers custom fine‑tuning of language models, and ships turnkey "
+    "GPU hardware to K‑12 schools.\n\n"
     "GUIDELINES:\n"
+    "• Warm, encouraging tone for students, parents, staff.\n"
+    "• Replies ≤ 4 sentences unless asked for detail.\n"
+    "• If unsure/out‑of‑scope: say so & suggest human follow‑up.\n"
+    "• No personal‑data collection or sensitive advice.\n"
     "• No profanity, politics, or mature themes."
 )
 WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
+# ---------- Model load (GPU FP‑16 → CPU fallback) ---------------------------
 hf_logging.set_verbosity_error()
 try:
     log("Loading tokenizer …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     if torch.cuda.is_available():
+        log("GPU detected → FP‑16")
         model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID, device_map="auto", torch_dtype=torch.float16
         )
     else:
+        log("CPU fallback")
         model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
         )
     gen = pipeline(
     log(MODEL_ERR)
 clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…"
+def trim(hist: list):
+    """keep system + last N user/AI pairs"""
+    sys = [m for m in hist if m["role"] == "system"]
+    convo = [m for m in hist if m["role"] != "system"]
+    return sys + convo[-MAX_TURNS * 2 :]
+# ---------- Chat callback ---------------------------------------------------
 def chat_fn(user_msg: str, history: list):
+    """
+    history: list[dict] like [{'role':'assistant','content':...}, ...]
+    Return -> reply_str (Gradio appends it as assistant msg)
+    """
     log(f"User sent {len(user_msg)} chars")
+    # Ensure system message present exactly once
+    if not any(m["role"] == "system" for m in history):
         history.insert(0, {"role": "system", "content": SYSTEM_MSG})
     if MODEL_ERR:
     history.append({"role": "user", "content": user_msg})
     history = trim(history)
+    prompt = "\n".join(
+        [
+            m["content"]
+            if m["role"] == "system"
+            else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
+            for m in history
+        ]
+        + ["AI:"]
+    )
     try:
         raw = gen(prompt)[0]["generated_text"]
         reply = clean(raw.split("AI:", 1)[-1])
         reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
+        log(f"Reply {len(reply)} chars")
     except Exception:
         log("❌ Inference exception:\n" + traceback.format_exc())
         reply = "Sorry—backend crashed. Please try again later."
     return reply
+# ---------- UI --------------------------------------------------------------
 gr.ChatInterface(
     fn=chat_fn,
     chatbot=gr.Chatbot(
         height=480,
         type="messages",
+        value=[{"role": "assistant", "content": WELCOME_MSG}],  # one‑time welcome
     ),
     title="SchoolSpirit AI Chat",
     theme=gr.themes.Soft(primary_hue="blue"),