Spaces:

TheVera
/

decision-making-model

Running

App Files Files Community

TheVera commited on 9 days ago

Commit

369b62f

verified ·

1 Parent(s): 6f4563e

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -112

app.py CHANGED Viewed

@@ -1,143 +1,81 @@
 import os
 import sys
-import json
-import re
-import time
-import traceback
-import requests
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
-# ---------- Config ----------
-MODEL_ID = os.getenv("MODEL_ID", "HuggingFaceH4/zephyr-7b-beta").strip()
-API_KEY = os.getenv("API_KEY", "").strip()
-# Optional: your private Inference Endpoint URL (recommended for Mixtral)
-DECISION_ENDPOINT = os.getenv("DECISION_ENDPOINT", "").strip()
-TIMEOUT = 25
-ALLOWED = {
-    "health_wellness", "spiritual_guidance", "generate_image", "realtime_query", "other_query"
-}
 def log(msg, **kv):
-    print(" | ".join([msg] + [f"{k}={v}" for k, v in kv.items()]),
-          file=sys.stderr, flush=True)
-def format_prompt(user_message: str, instructions: str = "") -> str:
-    sys_block = f"<<SYS>>{instructions}\nReturn EXACTLY one token from the list above. No quotes, no punctuation, no extra words.<<SYS>>" if instructions else ""
-    return f"[INST] {sys_block}\nUser: {user_message}\nAssistant: [/INST]"
-_token_re = re.compile(
-    r"\b(health_wellness|spiritual_guidance|generate_image|realtime_query|other_query)\b",
-    re.I
-)
-def extract_category(text: str) -> str:
-    raw = (text or "").strip().lower()
-    m = _token_re.search(raw)
-    if m: return m.group(1)
-    first = raw.split()[0].strip(",.;:|") if raw else ""
-    return first if first in ALLOWED else "other_query"
-def hf_conversational(prompt: str) -> str:
-    """Call conversational endpoint (public API or your private DECISION_ENDPOINT)."""
-    url = DECISION_ENDPOINT or f"https://api-inference.huggingface.co/models/{MODEL_ID}"
-    headers = {
-        "Authorization": f"Bearer {API_KEY}",
-        "Accept": "application/json",
-        "Content-Type": "application/json",
-    }
-    payload = {
-        "inputs": {
-            "past_user_inputs": [],
-            "generated_responses": [],
-            "text": prompt,
-        },
-        "parameters": {
-            "max_new_tokens": 3,
-            "temperature": 0.0,
-            "top_p": 1.0,
-            "repetition_penalty": 1.0,
-            "stop": ["\n"],
-            "return_full_text": False,
-        },
-        "options": {"use_cache": True, "wait_for_model": True},
-    }
-    for attempt in range(3):
-        r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=TIMEOUT)
-        if r.status_code == 503:
-            time.sleep(2 + attempt)
-            continue
-        r.raise_for_status()
-        data = r.json()
-        if isinstance(data, dict) and "generated_text" in data:
-            return str(data["generated_text"]).strip()
-        if isinstance(data, dict) and "choices" in data and data["choices"]:
-            ch = data["choices"][0]
-            txt = ch.get("text") or ch.get("message", {}).get("content")
-            if txt: return str(txt).strip()
-        if isinstance(data, list) and data and isinstance(data[0], dict):
-            if "generated_text" in data[0]:
-                return str(data[0]["generated_text"]).strip()
-            if "generated_responses" in data[0]:
-                gresps = data[0]["generated_responses"]
-                if isinstance(gresps, list) and gresps:
-                    return str(gresps[-1]).strip()
-        return str(data).strip()
-    return ""
-def try_text_generation(client: InferenceClient, formatted: str) -> str:
-    return client.text_generation(
-        formatted,
-        model=MODEL_ID,
-        temperature=0.0,
-        max_new_tokens=3,
-        top_p=1.0,
-        repetition_penalty=1.0,
-        do_sample=False,
-        stop=["\n"],
-        details=False
-    )
 @app.get("/")
 def root():
-    return jsonify({"ok": True, "model": MODEL_ID})
 @app.post("/generate_text")
 def generate_text():
     if not API_KEY:
         log("DECISION_ERR", reason="missing_api_key")
         return jsonify({"error": "Missing API_KEY"}), 400
     data = request.get_json(silent=True) or {}
     prompt = (data.get("prompt") or "").strip()
-    instructions = (data.get("instructions") or "").strip()
-    if not prompt or not instructions:
-        log("DECISION_BAD_REQ", has_prompt=bool(prompt), has_instructions=bool(instructions))
         return jsonify({"error": "Missing required fields"}), 400
-    formatted = format_prompt(prompt, instructions)
-    raw = ""
     try:
-        client = InferenceClient(token=API_KEY)
-        log("DECISION_CALL_TG", model=MODEL_ID, endpoint="hf_hub_text_generation")
-        raw = try_text_generation(client, formatted)
-    except Exception as e:
-        log("DECISION_TG_FAIL", error=str(e))
-        log("DECISION_CALL_CONV", model=MODEL_ID, endpoint=(DECISION_ENDPOINT or "api-inference"))
-        try:
-            raw = hf_conversational(formatted)
-        except Exception as e2:
-            trace = traceback.format_exc().replace("\n", "\\n")
-            log("DECISION_CONV_FAIL", error=str(e2), trace=trace)
-            return jsonify({"response": "other_query", "error": str(e2)}), 200
-    token = extract_category(raw)
-    log("DECISION_OK", raw=raw.replace("\n", "\\n"), token=token)
-    return jsonify({"response": token}), 200
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
-    log("BOOT", model=MODEL_ID, port=port, endpoint=DECISION_ENDPOINT or "api-inference")
     app.run(host="0.0.0.0", port=port)

 import os
 import sys
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
+API_KEY = (os.getenv("API_KEY") or "").strip()
+# Multilingual zero-shot model (handles Hindi + English well)
+ZSL_MODEL_ID = os.getenv("ZSL_MODEL_ID", "joeddav/xlm-roberta-large-xnli").strip()
+LABELS = [
+    "health_wellness",
+    "spiritual_guidance",
+    "generate_image",
+    "realtime_query",
+    "other_query",
+]
+ALLOWED = set(LABELS)
 def log(msg, **kv):
+    print(" | ".join([msg] + [f"{k}={v}"]) for k, v in kv.items())
+    print(" | ".join([msg] + [f"{k}={v}" for k, v in kv.items()]), file=sys.stderr, flush=True)
+# Init HF client once
+client = InferenceClient(token=API_KEY) if API_KEY else None
 @app.get("/")
 def root():
+    return jsonify({"ok": True, "model": ZSL_MODEL_ID})
 @app.post("/generate_text")
 def generate_text():
     if not API_KEY:
         log("DECISION_ERR", reason="missing_api_key")
         return jsonify({"error": "Missing API_KEY"}), 400
+    if client is None:
+        log("DECISION_ERR", reason="client_not_initialized")
+        return jsonify({"error": "Client not initialized"}), 500
     data = request.get_json(silent=True) or {}
     prompt = (data.get("prompt") or "").strip()
+    instructions = (data.get("instructions") or "").strip()  # not required here
+    if not prompt:
+        log("DECISION_BAD_REQ", has_prompt=False)
         return jsonify({"error": "Missing required fields"}), 400
+    # Fast-path: explicit image command
+    if prompt.startswith("/image "):
+        log("DECISION_FAST", token="generate_image")
+        return jsonify({"response": "generate_image"}), 200
     try:
+        # Zero-shot classification
+        log("DECISION_CALL_ZSL", model=ZSL_MODEL_ID, prompt_len=len(prompt))
+        zs = client.zero_shot_classification(
+            prompt,
+            LABELS,
+            model=ZSL_MODEL_ID,
+            hypothesis_template="This text is about {}.",
+            multi_label=False,          # pick the best single label
+        )
+        # Response shape: {'labels': [...], 'scores': [...], 'sequence': '...'}
+        labels = zs.get("labels") or []
+        scores = zs.get("scores") or []
+        best = labels[0] if labels else "other_query"
+        score = float(scores[0]) if scores else 0.0
+        token = best if best in ALLOWED else "other_query"
+        log("DECISION_OK", token=token, top_label=best, score=round(score, 4))
+        return jsonify({"response": token}), 200
+    except Exception as e:
+        log("DECISION_FAIL", error=str(e))
+        return jsonify({"response": "other_query", "error": str(e)}), 200
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
+    log("BOOT", port=port, zsl_model=ZSL_MODEL_ID, api_key_set=bool(API_KEY))
     app.run(host="0.0.0.0", port=port)