SchoolSpiritAI / app.py
phanerozoic's picture
Update app.py
dd7db97 verified
raw
history blame
5.03 kB
import os, re, time, datetime, traceback, torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers.utils import logging as hf_logging
# ---------------------------------------------------------------------------
# Logging helpers
# ---------------------------------------------------------------------------
os.environ["HF_HOME"] = "/data/.huggingface"
LOG_FILE = "/data/requests.log"
def log(msg: str):
ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
line = f"[{ts}] {msg}"
print(line, flush=True)
try:
with open(LOG_FILE, "a") as f:
f.write(line + "\n")
except FileNotFoundError:
pass
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
SYSTEM_MSG = (
"You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
"founded by Charles Norton in 2025. The company installs on‑prem AI chat "
"mascots, offers custom fine‑tuning of language models, and ships turnkey "
"PC's with preinstalled language models to K‑12 schools.\n\n"
"GUIDELINES:\n"
"• Use a warm, encouraging tone fit for students, parents, and staff.\n"
"• Keep replies short—no more than four sentences unless asked.\n"
"• If you’re unsure or out of scope, say so and suggest human follow‑up.\n"
"• Never collect personal data or provide medical, legal, or financial advice.\n"
"• No profanity, politics, or mature themes."
)
WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
# ---------------------------------------------------------------------------
# Load model (GPU FP‑16 if available → CPU fallback)
# ---------------------------------------------------------------------------
hf_logging.set_verbosity_error()
try:
log("Loading tokenizer …")
tok = AutoTokenizer.from_pretrained(MODEL_ID)
if torch.cuda.is_available():
log("GPU detected → loading model in FP‑16")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto", # put layers on available GPU(s)
torch_dtype=torch.float16,
)
else:
log("No GPU → loading model on CPU (FP‑32)")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="cpu",
torch_dtype="auto",
low_cpu_mem_usage=True,
)
gen = pipeline(
"text-generation",
model=model,
tokenizer=tok,
max_new_tokens=MAX_TOKENS,
do_sample=True,
temperature=0.6,
)
MODEL_ERR = None
log("Model loaded ✔")
except Exception as exc: # noqa: BLE001
MODEL_ERR, gen = f"Model load error: {exc}", None
log(MODEL_ERR)
clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…"
trim = lambda m: m if len(m) <= 1 + MAX_TURNS * 2 else [m[0]] + m[-MAX_TURNS * 2 :]
# ---------------------------------------------------------------------------
# Chat logic
# ---------------------------------------------------------------------------
def chat_fn(user_msg: str, history: list):
log(f"User sent {len(user_msg)} chars")
if not history or history[0]["role"] != "system":
history.insert(0, {"role": "system", "content": SYSTEM_MSG})
if MODEL_ERR:
return MODEL_ERR
user_msg = clean(user_msg or "")
if not user_msg:
return "Please type something."
if len(user_msg) > MAX_INPUT_CH:
return f"Message too long (>{MAX_INPUT_CH} chars)."
history.append({"role": "user", "content": user_msg})
history = trim(history)
prompt_lines = [
m["content"]
if m["role"] == "system"
else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
for m in history
] + ["AI:"]
prompt = "\n".join(prompt_lines)
log(f"Prompt {len(prompt)} chars → generating")
t0 = time.time()
try:
raw = gen(prompt)[0]["generated_text"]
reply = clean(raw.split("AI:", 1)[-1])
reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
log(f"generate() {time.time()-t0:.2f}s, reply {len(reply)} chars")
except Exception:
log("❌ Inference exception:\n" + traceback.format_exc())
reply = "Sorry—backend crashed. Please try again later."
return reply
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
gr.ChatInterface(
fn=chat_fn,
chatbot=gr.Chatbot(
height=480,
type="messages",
value=[{"role": "assistant", "content": WELCOME_MSG}],
),
title="SchoolSpirit AI Chat",
theme=gr.themes.Soft(primary_hue="blue"),
type="messages",
).launch()