File size: 923 Bytes
7e405ea
 
 
24242bc
 
184700f
7e405ea
 
 
184700f
 
 
24242bc
184700f
24242bc
 
7e405ea
 
 
 
 
 
184700f
7e405ea
184700f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Hugging Face API token

app = FastAPI()

# Load Falcon 7B
MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
)

@app.post("/generate")
async def generate_text(request: Request):
    data = await request.json()
    prompt = data.get("prompt", "")
    max_length = data.get("max_length", 50)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs["input_ids"], max_length=max_length)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"generated_text": response}