Spaces:

TheVera
/

decision-making-model

Sleeping

App Files Files Community

TheVera commited on 18 days ago

Commit

8e98672

verified ·

1 Parent(s): f9faf91

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -127

app.py CHANGED Viewed

@@ -1,143 +1,76 @@
 import os
-import re
-import sys
-import time
-import json
-from typing import Any, Dict
-import requests
 from flask import Flask, request, jsonify
 app = Flask(__name__)
 MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-API_KEY = os.getenv("API_KEY")
-HF_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
-TIMEOUT = 25  # seconds
-ALLOWED = {
-    "health_wellness",
-    "spiritual_guidance",
-    "generate_image",
-    "realtime_query",
-    "other_query",
-}
-_token_re = re.compile(
-    r"\b(health_wellness|spiritual_guidance|generate_image|realtime_query|other_query)\b",
-    re.I
-)
-def format_prompt(user_text: str, instructions: str) -> str:
-    # One [INST] block with a <<SYS>> section is reliable for Mixtral
-    system = (
-        f"<<SYS>>{instructions}\n"
-        f"Return EXACTLY one token from the list above. No quotes, no punctuation, no extra words."
-        f"<<SYS>>"
-    )
-    return f"[INST] {system}\nUser: {user_text}\nAssistant: [/INST]"
-def extract_category(text: str) -> str:
-    m = _token_re.search((text or "").strip().lower())
-    return m.group(1) if m else "other_query"
-def hf_conversational_call(prompt: str) -> str:
-    """
-    Call HF Inference API using the 'conversational' task payload.
-    Handles model warmup (503) and different response shapes.
-    """
-    headers = {
-        "Authorization": f"Bearer {API_KEY}",
-        "Accept": "application/json",
-        "Content-Type": "application/json",
-    }
-    payload: Dict[str, Any] = {
-        "inputs": {
-            # Conversational schema — we pass a single-turn prompt
-            "past_user_inputs": [],
-            "generated_responses": [],
-            "text": prompt,
-        },
-        "parameters": {
-            "max_new_tokens": 3,     # just enough to emit one category token
-            "temperature": 0.0,
-            "top_p": 1.0,
-            "repetition_penalty": 1.0,
-            "stop": ["\n"],          # cut at first newline if it tries to add more
-            # Some backends use 'return_full_text'; harmless if ignored
-            "return_full_text": False,
-        },
-        "options": {
-            "use_cache": True,
-            "wait_for_model": True,  # block until the model is loaded
-        },
     }
-    # minimal retry on cold start
-    for attempt in range(3):
-        r = requests.post(HF_URL, headers=headers, data=json.dumps(payload), timeout=TIMEOUT)
-        if r.status_code == 503:
-            # model loading — wait and retry
-            time.sleep(2 + attempt)
-            continue
-        r.raise_for_status()
-        data = r.json()
-        # Response can be a dict or a list (legacy). Try common shapes:
-        # 1) dict with 'generated_text'
-        if isinstance(data, dict) and "generated_text" in data:
-            return str(data["generated_text"]).strip()
-        # 2) dict with 'choices' (some backends)
-        if isinstance(data, dict) and "choices" in data and data["choices"]:
-            # choices[i].text or .message?.content
-            ch = data["choices"][0]
-            txt = ch.get("text") or ch.get("message", {}).get("content")
-            if txt:
-                return str(txt).strip()
-        # 3) list with first item having 'generated_text'
-        if isinstance(data, list) and data and isinstance(data[0], dict):
-            if "generated_text" in data[0]:
-                return str(data[0]["generated_text"]).strip()
-            # sometimes 'conversation' shaped
-            if "generated_responses" in data[0]:
-                gresps = data[0]["generated_responses"]
-                if isinstance(gresps, list) and gresps:
-                    return str(gresps[-1]).strip()
-        # fallback: stringify
-        return str(data).strip()
-    # If all retries hit 503, give up gracefully
-    return ""
-@app.post("/generate_text")
 def generate_text():
-    data = request.get_json(silent=True) or {}
-    prompt = (data.get("prompt") or "").strip()
-    instructions = (data.get("instructions") or "").strip()
-    if not API_KEY:
-        return jsonify({"error": "Missing API_KEY"}), 400
-    if not prompt or not instructions:
         return jsonify({"error": "Missing required fields"}), 400
-    try:
-        formatted = format_prompt(prompt, instructions)
-        raw = hf_conversational_call(formatted)
-        token = extract_category(raw)
-        print("RAW_DECISION:", repr(raw), "->", token, file=sys.stderr, flush=True)
-        if token not in ALLOWED:
-            token = "other_query"
-        return jsonify({"response": token})
-    except requests.HTTPError as he:
-        print("DECISION_HTTP_ERROR:", repr(he), file=sys.stderr, flush=True)
-        return jsonify({"response": "other_query", "error": str(he)}), 200
-    except Exception as e:
-        print("DECISION_ERROR:", repr(e), file=sys.stderr, flush=True)
-        return jsonify({"response": "other_query", "error": str(e)}), 200
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 import os
+import uvicorn
 from flask import Flask, request, jsonify
+from huggingface_hub import InferenceClient
 app = Flask(__name__)
+# Fixed API URL
 MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+# Use environment variable for the API key
+API_KEY = os.getenv("API_KEY")
+def format_prompt(message, custom_instructions=None):
+    prompt = ""
+    if custom_instructions:
+        prompt += f"[INST] {custom_instructions} [/INST]"
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
+def normalize_text(text):
+    # Normalize text to handle different spellings
+    replacements = {
+        'summarise': 'Summarize',
+        'colour': 'Color',
+        'favour': 'Favor',
+        'centre': 'Center',
+        # Add more replacements as needed
     }
+    text = text.lower()
+    for british, american in replacements.items():
+        text = text.replace(british, american)
+        text = text.capitalize()
+    return text
+def Mistral7B(prompt, instructions, api_key, temperature=0.2, max_new_tokens=18, top_p=0.9, repetition_penalty=1.0):
+    try:
+        temperature = max(float(temperature), 1e-2)
+        top_p = float(top_p)
+        generate_kwargs = dict(
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            seed=69,
+        )
+        custom_instructions = instructions
+        formatted_prompt = format_prompt(prompt, custom_instructions)
+        client = InferenceClient(token=api_key)
+        response = client.text_generation(formatted_prompt, model=MODEL_ID, **generate_kwargs)
+        return response
+    except Exception as e:
+        return str(e)
+@app.route("/generate_text", methods=["POST"])
 def generate_text():
+    data = request.json
+    prompt = data.get("prompt")
+    instructions = data.get("instructions")
+    api_key = API_KEY  # Use the API key from the environment variable
+    if not prompt or not instructions or not api_key:
         return jsonify({"error": "Missing required fields"}), 400
+    normalized_prompt = normalize_text(prompt)
+    response = Mistral7B(normalized_prompt, instructions, api_key)
+    print(response)
+    return jsonify({"response": response}), 200
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))