Spaces:

TheVera
/

decision-making-model

Sleeping

App Files Files Community

TheVera commited on Aug 12

Commit

d2b7fba

verified ·

1 Parent(s): 278f2bb

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -98

app.py CHANGED Viewed

@@ -1,138 +1,149 @@
 import os
 import sys
 import json
 import traceback
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
-# Prefer a model that supports text-generation
-MODEL_ID = os.getenv("MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.3").strip()
-API_KEY = os.getenv("API_KEY")
-# Init client once
-client = InferenceClient(token=API_KEY) if API_KEY else None
 ALLOWED = {
-    "health_wellness",
-    "spiritual_guidance",
-    "generate_image",
-    "realtime_query",
-    "other_query",
 }
 def log(msg, **kv):
-    # compact console logging
-    parts = [msg] + [f"{k}={v}" for k, v in kv.items()]
     print(" | ".join(parts), file=sys.stderr, flush=True)
-def format_prompt(user_message: str, custom_instructions: str = "") -> str:
-    """
-    Single [INST] with a <<SYS>> system section works best for instruction-following.
-    """
-    sys_block = f"<<SYS>>{custom_instructions}<<SYS>>" if custom_instructions else ""
     return f"[INST] {sys_block}\nUser: {user_message}\nAssistant: [/INST]"
-def normalize_text(text: str) -> str:
-    if not text:
-        return ""
-    # lowercase then normalize a few British/var variants → American/neutral
-    repl = {
-        "summarise": "summarize",
-        "colour": "color",
-        "favour": "favor",
-        "centre": "center",
-    }
-    t = text.lower()
-    for k, v in repl.items():
-        t = t.replace(k, v)
-    return t
-def call_model(prompt: str,
-               temperature: float = 0.0,
-               max_new_tokens: int = 3,
-               top_p: float = 1.0,
-               repetition_penalty: float = 1.0,
-               stop=None) -> str:
-    """
-    Use text_generation for models that support it.
-    We keep decoding deterministic and tiny (single-token classification).
-    """
-    if stop is None:
-        stop = ["\n"]
-    out = client.text_generation(
-        prompt,                      # positional first arg
-        model=MODEL_ID,
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=False,            # deterministic
-        stop=stop,
-        details=False               # return plain str
-    )
-    return (out or "").strip()
 def extract_category(text: str) -> str:
-    if not text:
-        return "other_query"
-    raw = text.strip().lower()
-    # Grab the first allowed token if it appears anywhere
-    for token in ALLOWED:
-        if token in raw:
-            return token
-    # or just the first word (some models emit "category: health_wellness")
     first = raw.split()[0].strip(",.;:|") if raw else ""
     return first if first in ALLOWED else "other_query"
-@app.route("/generate_text", methods=["POST"])
 def generate_text():
     if not API_KEY:
         log("DECISION_ERR", reason="missing_api_key")
         return jsonify({"error": "Missing API_KEY"}), 400
-    if client is None:
-        log("DECISION_ERR", reason="client_not_initialized")
-        return jsonify({"error": "Client not initialized"}), 500
     data = request.get_json(silent=True) or {}
     prompt = (data.get("prompt") or "").strip()
     instructions = (data.get("instructions") or "").strip()
     if not prompt or not instructions:
         log("DECISION_BAD_REQ", has_prompt=bool(prompt), has_instructions=bool(instructions))
         return jsonify({"error": "Missing required fields"}), 400
     try:
-        norm_prompt = normalize_text(prompt)
-        formatted = format_prompt(norm_prompt, instructions)
-        log("DECISION_CALL",
-            model=MODEL_ID,
-            prompt_len=len(norm_prompt),
-            instr_len=len(instructions))
-        raw = call_model(
-            formatted,
-            temperature=0.0,
-            max_new_tokens=3,    # single token is enough
-            top_p=1.0,
-            repetition_penalty=1.0,
-            stop=["\n"]
-        )
-        token = extract_category(raw)
-        log("DECISION_OK", raw=raw.replace("\n", "\\n"), token=token)
-        return jsonify({"response": token}), 200
     except Exception as e:
-        tb = traceback.format_exc(limit=2)
-        log("DECISION_FAIL", error=str(e), trace=tb.replace("\n", "\\n"))
-        return jsonify({"response": "other_query", "error": str(e)}), 200
 if __name__ == "__main__":
-    # Use Flask’s dev server; in prod use gunicorn/uvicorn with a WSGI/ASGI wrapper as appropriate.
     port = int(os.getenv("PORT", 7860))
-    log("BOOT", model=MODEL_ID, port=port, api_key_set=bool(API_KEY))
     app.run(host="0.0.0.0", port=port)

 import os
 import sys
 import json
+import re
+import time
 import traceback
+import requests
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
+# --- Config ---
+MODEL_ID = os.getenv("MODEL_ID", "mistralai/Mixtral-8x7B-Instruct-v0.1").strip()
+API_KEY = os.getenv("API_KEY", "").strip()
+# If you created a private Inference Endpoint, put its full URL here:
+# e.g. https://xxxxxxxxx-abcdefg.hf.space or https://xxxx-yyy.endpoints.huggingface.cloud
+DECISION_ENDPOINT = os.getenv("DECISION_ENDPOINT", "").strip()  # optional but recommended
+TIMEOUT = 25
 ALLOWED = {
+    "health_wellness","spiritual_guidance","generate_image","realtime_query","other_query"
 }
 def log(msg, **kv):
+    parts = [msg] + [f"{k}={v}" for k,v in kv.items()]
     print(" | ".join(parts), file=sys.stderr, flush=True)
+# --- Prompt formatting ---
+def format_prompt(user_message: str, instructions: str = "") -> str:
+    sys_block = f"<<SYS>>{instructions}\nReturn EXACTLY one token from the list above. No quotes, no punctuation, no extra words.<<SYS>>" if instructions else ""
     return f"[INST] {sys_block}\nUser: {user_message}\nAssistant: [/INST]"
+# --- Extractor ---
+_token_re = re.compile(r"\b(health_wellness|spiritual_guidance|generate_image|realtime_query|other_query)\b", re.I)
 def extract_category(text: str) -> str:
+    raw = (text or "").strip().lower()
+    m = _token_re.search(raw)
+    if m: return m.group(1)
     first = raw.split()[0].strip(",.;:|") if raw else ""
     return first if first in ALLOWED else "other_query"
+# --- Conversational REST call (works even if client lacks .conversational) ---
+def hf_conversational(prompt: str) -> str:
+    url = DECISION_ENDPOINT or f"https://api-inference.huggingface.co/models/{MODEL_ID}"
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "inputs": {
+            "past_user_inputs": [],
+            "generated_responses": [],
+            "text": prompt,
+        },
+        "parameters": {
+            "max_new_tokens": 3,
+            "temperature": 0.0,
+            "top_p": 1.0,
+            "repetition_penalty": 1.0,
+            "stop": ["\n"],
+            "return_full_text": False,
+        },
+        "options": {
+            "use_cache": True,
+            "wait_for_model": True
+        }
+    }
+    for attempt in range(3):
+        r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=TIMEOUT)
+        if r.status_code == 503:
+            time.sleep(2 + attempt)
+            continue
+        r.raise_for_status()
+        data = r.json()
+        # Common shapes:
+        if isinstance(data, dict) and "generated_text" in data:
+            return str(data["generated_text"]).strip()
+        if isinstance(data, dict) and "choices" in data and data["choices"]:
+            ch = data["choices"][0]
+            txt = ch.get("text") or ch.get("message", {}).get("content")
+            if txt: return str(txt).strip()
+        if isinstance(data, list) and data and isinstance(data[0], dict):
+            if "generated_text" in data[0]:
+                return str(data[0]["generated_text"]).strip()
+            if "generated_responses" in data[0]:
+                gresps = data[0]["generated_responses"]
+                if isinstance(gresps, list) and gresps:
+                    return str(gresps[-1]).strip()
+        return str(data).strip()
+    return ""
+# --- Text-generation (use if provider supports it) ---
+def try_text_generation(client: InferenceClient, formatted: str) -> str:
+    return client.text_generation(
+        formatted,
+        model=MODEL_ID,
+        temperature=0.0,
+        max_new_tokens=3,
+        top_p=1.0,
+        repetition_penalty=1.0,
+        do_sample=False,
+        stop=["\n"],
+        details=False
+    )
+@app.post("/generate_text")
 def generate_text():
     if not API_KEY:
         log("DECISION_ERR", reason="missing_api_key")
         return jsonify({"error": "Missing API_KEY"}), 400
     data = request.get_json(silent=True) or {}
     prompt = (data.get("prompt") or "").strip()
     instructions = (data.get("instructions") or "").strip()
     if not prompt or not instructions:
         log("DECISION_BAD_REQ", has_prompt=bool(prompt), has_instructions=bool(instructions))
         return jsonify({"error": "Missing required fields"}), 400
+    formatted = format_prompt(prompt, instructions)
+    raw = ""
     try:
+        # First, try text-generation path
+        client = InferenceClient(token=API_KEY)
+        log("DECISION_CALL_TG", model=MODEL_ID, endpoint="hf_hub_text_generation")
+        raw = try_text_generation(client, formatted)
     except Exception as e:
+        # If provider says "Supported task: conversational", fallback to REST conversational
+        msg = str(e)
+        log("DECISION_TG_FAIL", error=msg)
+        log("DECISION_CALL_CONV", model=MODEL_ID, endpoint=(DECISION_ENDPOINT or "api-inference"))
+        try:
+            raw = hf_conversational(formatted)
+        except Exception as e2:
+            trace = traceback.format_exc().replace("\n", "\\n")
+            log("DECISION_CONV_FAIL", error=str(e2), trace=trace)
+            return jsonify({"response": "other_query", "error": str(e2)}), 200
+    token = extract_category(raw)
+    log("DECISION_OK", raw=raw.replace("\n", "\\n"), token=token)
+    return jsonify({"response": token}), 200
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
+    log("BOOT", model=MODEL_ID, port=port, endpoint=DECISION_ENDPOINT or "api-inference")
     app.run(host="0.0.0.0", port=port)