phanerozoic commited on
Commit
d672735
·
verified ·
1 Parent(s): dd7db97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -43
app.py CHANGED
@@ -3,9 +3,7 @@ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from transformers.utils import logging as hf_logging
5
 
6
- # ---------------------------------------------------------------------------
7
- # Logging helpers
8
- # ---------------------------------------------------------------------------
9
  os.environ["HF_HOME"] = "/data/.huggingface"
10
  LOG_FILE = "/data/requests.log"
11
 
@@ -21,48 +19,39 @@ def log(msg: str):
21
  pass
22
 
23
 
24
- # ---------------------------------------------------------------------------
25
- # Configuration
26
- # ---------------------------------------------------------------------------
27
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
28
  MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
29
 
30
  SYSTEM_MSG = (
31
- "You are **SchoolSpiritAI**, the digital mascot for SchoolSpiritAILLC, "
32
  "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
33
  "mascots, offers custom fine‑tuning of language models, and ships turnkey "
34
- "PC's with preinstalled language models to K‑12 schools.\n\n"
35
  "GUIDELINES:\n"
36
- "• Use a warm, encouraging tone fit for students, parents, and staff.\n"
37
- "• Keep replies short—no more than four sentences unless asked.\n"
38
- "• If you’re unsure or out of scope, say so and suggest human follow‑up.\n"
39
- "• Never collect personal data or provide medical, legal, or financial advice.\n"
40
  "• No profanity, politics, or mature themes."
41
  )
42
  WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
43
 
44
- # ---------------------------------------------------------------------------
45
- # Load model (GPU FP‑16 if available → CPU fallback)
46
- # ---------------------------------------------------------------------------
47
  hf_logging.set_verbosity_error()
48
  try:
49
  log("Loading tokenizer …")
50
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
51
 
52
  if torch.cuda.is_available():
53
- log("GPU detected → loading model in FP‑16")
54
  model = AutoModelForCausalLM.from_pretrained(
55
- MODEL_ID,
56
- device_map="auto", # put layers on available GPU(s)
57
- torch_dtype=torch.float16,
58
  )
59
  else:
60
- log("No GPU → loading model on CPU (FP‑32)")
61
  model = AutoModelForCausalLM.from_pretrained(
62
- MODEL_ID,
63
- device_map="cpu",
64
- torch_dtype="auto",
65
- low_cpu_mem_usage=True,
66
  )
67
 
68
  gen = pipeline(
@@ -80,17 +69,25 @@ except Exception as exc: # noqa: BLE001
80
  log(MODEL_ERR)
81
 
82
  clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…"
83
- trim = lambda m: m if len(m) <= 1 + MAX_TURNS * 2 else [m[0]] + m[-MAX_TURNS * 2 :]
84
 
85
- # ---------------------------------------------------------------------------
86
- # Chat logic
87
- # ---------------------------------------------------------------------------
88
 
 
 
 
 
 
89
 
 
 
90
  def chat_fn(user_msg: str, history: list):
 
 
 
 
91
  log(f"User sent {len(user_msg)} chars")
92
 
93
- if not history or history[0]["role"] != "system":
 
94
  history.insert(0, {"role": "system", "content": SYSTEM_MSG})
95
 
96
  if MODEL_ERR:
@@ -105,21 +102,21 @@ def chat_fn(user_msg: str, history: list):
105
  history.append({"role": "user", "content": user_msg})
106
  history = trim(history)
107
 
108
- prompt_lines = [
109
- m["content"]
110
- if m["role"] == "system"
111
- else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
112
- for m in history
113
- ] + ["AI:"]
114
- prompt = "\n".join(prompt_lines)
115
- log(f"Prompt {len(prompt)} chars → generating")
 
116
 
117
- t0 = time.time()
118
  try:
119
  raw = gen(prompt)[0]["generated_text"]
120
  reply = clean(raw.split("AI:", 1)[-1])
121
  reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
122
- log(f"generate() {time.time()-t0:.2f}s, reply {len(reply)} chars")
123
  except Exception:
124
  log("❌ Inference exception:\n" + traceback.format_exc())
125
  reply = "Sorry—backend crashed. Please try again later."
@@ -127,15 +124,13 @@ def chat_fn(user_msg: str, history: list):
127
  return reply
128
 
129
 
130
- # ---------------------------------------------------------------------------
131
- # UI
132
- # ---------------------------------------------------------------------------
133
  gr.ChatInterface(
134
  fn=chat_fn,
135
  chatbot=gr.Chatbot(
136
  height=480,
137
  type="messages",
138
- value=[{"role": "assistant", "content": WELCOME_MSG}],
139
  ),
140
  title="SchoolSpirit AI Chat",
141
  theme=gr.themes.Soft(primary_hue="blue"),
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from transformers.utils import logging as hf_logging
5
 
6
+ # ---------- Logging ---------------------------------------------------------
 
 
7
  os.environ["HF_HOME"] = "/data/.huggingface"
8
  LOG_FILE = "/data/requests.log"
9
 
 
19
  pass
20
 
21
 
22
+ # ---------- Config ----------------------------------------------------------
 
 
23
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
24
  MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
25
 
26
  SYSTEM_MSG = (
27
+ "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
28
  "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
29
  "mascots, offers custom fine‑tuning of language models, and ships turnkey "
30
+ "GPU hardware to K‑12 schools.\n\n"
31
  "GUIDELINES:\n"
32
+ "• Warm, encouraging tone for students, parents, staff.\n"
33
+ "• Replies ≤ 4 sentences unless asked for detail.\n"
34
+ "• If unsure/outofscope: say so & suggest human follow‑up.\n"
35
+ "• No personaldata collection or sensitive advice.\n"
36
  "• No profanity, politics, or mature themes."
37
  )
38
  WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
39
 
40
+ # ---------- Model load (GPU FP‑16 → CPU fallback) ---------------------------
 
 
41
  hf_logging.set_verbosity_error()
42
  try:
43
  log("Loading tokenizer …")
44
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
45
 
46
  if torch.cuda.is_available():
47
+ log("GPU detected → FP‑16")
48
  model = AutoModelForCausalLM.from_pretrained(
49
+ MODEL_ID, device_map="auto", torch_dtype=torch.float16
 
 
50
  )
51
  else:
52
+ log("CPU fallback")
53
  model = AutoModelForCausalLM.from_pretrained(
54
+ MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
 
 
 
55
  )
56
 
57
  gen = pipeline(
 
69
  log(MODEL_ERR)
70
 
71
  clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…"
 
72
 
 
 
 
73
 
74
+ def trim(hist: list):
75
+ """keep system + last N user/AI pairs"""
76
+ sys = [m for m in hist if m["role"] == "system"]
77
+ convo = [m for m in hist if m["role"] != "system"]
78
+ return sys + convo[-MAX_TURNS * 2 :]
79
 
80
+
81
+ # ---------- Chat callback ---------------------------------------------------
82
  def chat_fn(user_msg: str, history: list):
83
+ """
84
+ history: list[dict] like [{'role':'assistant','content':...}, ...]
85
+ Return -> reply_str (Gradio appends it as assistant msg)
86
+ """
87
  log(f"User sent {len(user_msg)} chars")
88
 
89
+ # Ensure system message present exactly once
90
+ if not any(m["role"] == "system" for m in history):
91
  history.insert(0, {"role": "system", "content": SYSTEM_MSG})
92
 
93
  if MODEL_ERR:
 
102
  history.append({"role": "user", "content": user_msg})
103
  history = trim(history)
104
 
105
+ prompt = "\n".join(
106
+ [
107
+ m["content"]
108
+ if m["role"] == "system"
109
+ else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
110
+ for m in history
111
+ ]
112
+ + ["AI:"]
113
+ )
114
 
 
115
  try:
116
  raw = gen(prompt)[0]["generated_text"]
117
  reply = clean(raw.split("AI:", 1)[-1])
118
  reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
119
+ log(f"Reply {len(reply)} chars")
120
  except Exception:
121
  log("❌ Inference exception:\n" + traceback.format_exc())
122
  reply = "Sorry—backend crashed. Please try again later."
 
124
  return reply
125
 
126
 
127
+ # ---------- UI --------------------------------------------------------------
 
 
128
  gr.ChatInterface(
129
  fn=chat_fn,
130
  chatbot=gr.Chatbot(
131
  height=480,
132
  type="messages",
133
+ value=[{"role": "assistant", "content": WELCOME_MSG}], # one‑time welcome
134
  ),
135
  title="SchoolSpirit AI Chat",
136
  theme=gr.themes.Soft(primary_hue="blue"),