ragul2607 commited on
Commit
87d6326
·
verified ·
1 Parent(s): 68d3b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -1,25 +1,25 @@
1
- from fastapi import FastAPI
 
 
2
  from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch
5
 
6
  app = FastAPI()
7
 
 
 
 
 
 
 
8
  class PromptRequest(BaseModel):
9
  prompt: str
10
 
11
- # Load small LLaMA 3.2B model (or any other compatible)
12
- MODEL_NAME = "TheBloke/Llama-3-OpenOrca-2.2B-GGUF"
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
15
-
16
- @app.get("/")
17
- def root():
18
- return {"message": "LLaMA 3.2B API for QuizForge is live!"}
19
 
20
  @app.post("/generate")
21
- def generate_text(data: PromptRequest):
22
- inputs = tokenizer(data.prompt, return_tensors="pt")
23
- outputs = model.generate(**inputs, max_new_tokens=1024)
24
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
- return {"response": output_text}
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ import requests
3
+ import os
4
  from pydantic import BaseModel
 
 
5
 
6
  app = FastAPI()
7
 
8
+
9
+ API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
10
+ HF_TOKEN = os.getenv("HF_API_KEY") # Load from environment variable
11
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
12
+
13
+
14
  class PromptRequest(BaseModel):
15
  prompt: str
16
 
 
 
 
 
 
 
 
 
17
 
18
  @app.post("/generate")
19
+ async def generate_text(data: PromptRequest):
20
+ try:
21
+ response = requests.post(API_URL, headers=headers, json={"inputs": data.prompt})
22
+ response.raise_for_status()
23
+ return response.json()
24
+ except Exception as e:
25
+ raise HTTPException(status_code=500, detail=str(e))