Spaces:

santanavagner
/

responsible-prompting-demo

Sleeping

App Files Files Community

santanavagner commited on May 23

Commit

6cd7f2c

verified ·

1 Parent(s): d51e591

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py CHANGED Viewed

@@ -122,6 +122,55 @@ def log():
         json.dump(existing_data, f)
     return jsonify({'message': 'Data added successfully', 'data': existing_data}), 201
 if __name__=='__main__':
     debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
     app.run(host='0.0.0.0', port='7860', debug=debug_mode)

         json.dump(existing_data, f)
     return jsonify({'message': 'Data added successfully', 'data': existing_data}), 201
+@app.route("/demo_inference", methods=['GET'])
+@cross_origin()
+def demo_inference():
+    args = request.args
+    # model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+    model_id = args.get('model_id', default="meta-llama/Llama-4-Scout-17B-16E-Instruct")
+    temperature = args.get('temperature', default=0.5)
+    max_new_tokens = args.get('max_new_tokens', default=1000)
+    hf_token, hf_url = get_credentials.get_credentials()
+    prompt = args.get('prompt')
+    API_URL = "https://router.huggingface.co/together/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {hf_token}",
+    }
+    response = requests.post(
+        API_URL,
+        headers=headers,
+        json={
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                    ]
+                }
+            ],
+            "model": model_id,
+            'temperature': temperature,
+            'max_new_tokens': max_new_tokens,
+        }
+    )
+    try:
+        response = response.json()["choices"][0]["message"]
+        response.update({
+            'model_id': model_id,
+            'temperature': temperature,
+            'max_new_tokens': max_new_tokens,
+        })
+        return response
+    except:
+        return response.text, response.status_code
 if __name__=='__main__':
     debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
     app.run(host='0.0.0.0', port='7860', debug=debug_mode)