Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,25 @@
|
|
1 |
-
from fastapi import FastAPI
|
|
|
|
|
2 |
from pydantic import BaseModel
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
-
import torch
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
class PromptRequest(BaseModel):
|
9 |
prompt: str
|
10 |
|
11 |
-
# Load small LLaMA 3.2B model (or any other compatible)
|
12 |
-
MODEL_NAME = "TheBloke/Llama-3-OpenOrca-2.2B-GGUF"
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
14 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
15 |
-
|
16 |
-
@app.get("/")
|
17 |
-
def root():
|
18 |
-
return {"message": "LLaMA 3.2B API for QuizForge is live!"}
|
19 |
|
20 |
@app.post("/generate")
|
21 |
-
def generate_text(data: PromptRequest):
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request, HTTPException
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
from pydantic import BaseModel
|
|
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
+
|
9 |
+
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
|
10 |
+
HF_TOKEN = os.getenv("HF_API_KEY") # Load from environment variable
|
11 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
12 |
+
|
13 |
+
|
14 |
class PromptRequest(BaseModel):
|
15 |
prompt: str
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
@app.post("/generate")
|
19 |
+
async def generate_text(data: PromptRequest):
|
20 |
+
try:
|
21 |
+
response = requests.post(API_URL, headers=headers, json={"inputs": data.prompt})
|
22 |
+
response.raise_for_status()
|
23 |
+
return response.json()
|
24 |
+
except Exception as e:
|
25 |
+
raise HTTPException(status_code=500, detail=str(e))
|