Spaces:

TheVera
/

decision-making-model

Sleeping

App Files Files Community

TheVera commited on 18 days ago

Commit

16db54b

verified ·

1 Parent(s): d2b7fba

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -24

app.py CHANGED Viewed

@@ -10,29 +10,29 @@ from huggingface_hub import InferenceClient
 app = Flask(__name__)
-# --- Config ---
-MODEL_ID = os.getenv("MODEL_ID", "mistralai/Mixtral-8x7B-Instruct-v0.1").strip()
 API_KEY = os.getenv("API_KEY", "").strip()
-# If you created a private Inference Endpoint, put its full URL here:
-# e.g. https://xxxxxxxxx-abcdefg.hf.space or https://xxxx-yyy.endpoints.huggingface.cloud
-DECISION_ENDPOINT = os.getenv("DECISION_ENDPOINT", "").strip()  # optional but recommended
 TIMEOUT = 25
 ALLOWED = {
-    "health_wellness","spiritual_guidance","generate_image","realtime_query","other_query"
 }
 def log(msg, **kv):
-    parts = [msg] + [f"{k}={v}" for k,v in kv.items()]
-    print(" | ".join(parts), file=sys.stderr, flush=True)
-# --- Prompt formatting ---
 def format_prompt(user_message: str, instructions: str = "") -> str:
     sys_block = f"<<SYS>>{instructions}\nReturn EXACTLY one token from the list above. No quotes, no punctuation, no extra words.<<SYS>>" if instructions else ""
     return f"[INST] {sys_block}\nUser: {user_message}\nAssistant: [/INST]"
-# --- Extractor ---
-_token_re = re.compile(r"\b(health_wellness|spiritual_guidance|generate_image|realtime_query|other_query)\b", re.I)
 def extract_category(text: str) -> str:
     raw = (text or "").strip().lower()
     m = _token_re.search(raw)
@@ -40,8 +40,8 @@ def extract_category(text: str) -> str:
     first = raw.split()[0].strip(",.;:|") if raw else ""
     return first if first in ALLOWED else "other_query"
-# --- Conversational REST call (works even if client lacks .conversational) ---
 def hf_conversational(prompt: str) -> str:
     url = DECISION_ENDPOINT or f"https://api-inference.huggingface.co/models/{MODEL_ID}"
     headers = {
         "Authorization": f"Bearer {API_KEY}",
@@ -62,10 +62,7 @@ def hf_conversational(prompt: str) -> str:
             "stop": ["\n"],
             "return_full_text": False,
         },
-        "options": {
-            "use_cache": True,
-            "wait_for_model": True
-        }
     }
     for attempt in range(3):
         r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=TIMEOUT)
@@ -74,8 +71,6 @@ def hf_conversational(prompt: str) -> str:
             continue
         r.raise_for_status()
         data = r.json()
-        # Common shapes:
         if isinstance(data, dict) and "generated_text" in data:
             return str(data["generated_text"]).strip()
         if isinstance(data, dict) and "choices" in data and data["choices"]:
@@ -89,11 +84,9 @@ def hf_conversational(prompt: str) -> str:
                 gresps = data[0]["generated_responses"]
                 if isinstance(gresps, list) and gresps:
                     return str(gresps[-1]).strip()
         return str(data).strip()
     return ""
-# --- Text-generation (use if provider supports it) ---
 def try_text_generation(client: InferenceClient, formatted: str) -> str:
     return client.text_generation(
         formatted,
@@ -107,6 +100,10 @@ def try_text_generation(client: InferenceClient, formatted: str) -> str:
         details=False
     )
 @app.post("/generate_text")
 def generate_text():
     if not API_KEY:
@@ -123,14 +120,11 @@ def generate_text():
     formatted = format_prompt(prompt, instructions)
     raw = ""
     try:
-        # First, try text-generation path
         client = InferenceClient(token=API_KEY)
         log("DECISION_CALL_TG", model=MODEL_ID, endpoint="hf_hub_text_generation")
         raw = try_text_generation(client, formatted)
     except Exception as e:
-        # If provider says "Supported task: conversational", fallback to REST conversational
-        msg = str(e)
-        log("DECISION_TG_FAIL", error=msg)
         log("DECISION_CALL_CONV", model=MODEL_ID, endpoint=(DECISION_ENDPOINT or "api-inference"))
         try:
             raw = hf_conversational(formatted)

 app = Flask(__name__)
+# ---------- Config ----------
+MODEL_ID = os.getenv("MODEL_ID", "HuggingFaceH4/zephyr-7b-beta").strip()
 API_KEY = os.getenv("API_KEY", "").strip()
+# Optional: your private Inference Endpoint URL (recommended for Mixtral)
+DECISION_ENDPOINT = os.getenv("DECISION_ENDPOINT", "").strip()
 TIMEOUT = 25
 ALLOWED = {
+    "health_wellness", "spiritual_guidance", "generate_image", "realtime_query", "other_query"
 }
 def log(msg, **kv):
+    print(" | ".join([msg] + [f"{k}={v}" for k, v in kv.items()]),
+          file=sys.stderr, flush=True)
 def format_prompt(user_message: str, instructions: str = "") -> str:
     sys_block = f"<<SYS>>{instructions}\nReturn EXACTLY one token from the list above. No quotes, no punctuation, no extra words.<<SYS>>" if instructions else ""
     return f"[INST] {sys_block}\nUser: {user_message}\nAssistant: [/INST]"
+_token_re = re.compile(
+    r"\b(health_wellness|spiritual_guidance|generate_image|realtime_query|other_query)\b",
+    re.I
+)
 def extract_category(text: str) -> str:
     raw = (text or "").strip().lower()
     m = _token_re.search(raw)
     first = raw.split()[0].strip(",.;:|") if raw else ""
     return first if first in ALLOWED else "other_query"
 def hf_conversational(prompt: str) -> str:
+    """Call conversational endpoint (public API or your private DECISION_ENDPOINT)."""
     url = DECISION_ENDPOINT or f"https://api-inference.huggingface.co/models/{MODEL_ID}"
     headers = {
         "Authorization": f"Bearer {API_KEY}",
             "stop": ["\n"],
             "return_full_text": False,
         },
+        "options": {"use_cache": True, "wait_for_model": True},
     }
     for attempt in range(3):
         r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=TIMEOUT)
             continue
         r.raise_for_status()
         data = r.json()
         if isinstance(data, dict) and "generated_text" in data:
             return str(data["generated_text"]).strip()
         if isinstance(data, dict) and "choices" in data and data["choices"]:
                 gresps = data[0]["generated_responses"]
                 if isinstance(gresps, list) and gresps:
                     return str(gresps[-1]).strip()
         return str(data).strip()
     return ""
 def try_text_generation(client: InferenceClient, formatted: str) -> str:
     return client.text_generation(
         formatted,
         details=False
     )
+@app.get("/")
+def root():
+    return jsonify({"ok": True, "model": MODEL_ID})
 @app.post("/generate_text")
 def generate_text():
     if not API_KEY:
     formatted = format_prompt(prompt, instructions)
     raw = ""
     try:
         client = InferenceClient(token=API_KEY)
         log("DECISION_CALL_TG", model=MODEL_ID, endpoint="hf_hub_text_generation")
         raw = try_text_generation(client, formatted)
     except Exception as e:
+        log("DECISION_TG_FAIL", error=str(e))
         log("DECISION_CALL_CONV", model=MODEL_ID, endpoint=(DECISION_ENDPOINT or "api-inference"))
         try:
             raw = hf_conversational(formatted)