ragul2607 commited on
Commit
68d3b08
·
1 Parent(s): 80bc276

initialize project structure with Dockerfile, app.py, and requirements.txt

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +25 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ app = FastAPI()
7
+
8
+ class PromptRequest(BaseModel):
9
+ prompt: str
10
+
11
+ # Load small LLaMA 3.2B model (or any other compatible)
12
+ MODEL_NAME = "TheBloke/Llama-3-OpenOrca-2.2B-GGUF"
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
15
+
16
+ @app.get("/")
17
+ def root():
18
+ return {"message": "LLaMA 3.2B API for QuizForge is live!"}
19
+
20
+ @app.post("/generate")
21
+ def generate_text(data: PromptRequest):
22
+ inputs = tokenizer(data.prompt, return_tensors="pt")
23
+ outputs = model.generate(**inputs, max_new_tokens=1024)
24
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ return {"response": output_text}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch