Spaces:
Sleeping
Sleeping
File size: 923 Bytes
7e405ea 24242bc 184700f 7e405ea 184700f 24242bc 184700f 24242bc 7e405ea 184700f 7e405ea 184700f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Hugging Face API token
app = FastAPI()
# Load Falcon 7B
MODEL_NAME = "SpiceyToad/demo-falc" # Replace with your model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
)
@app.post("/generate")
async def generate_text(request: Request):
data = await request.json()
prompt = data.get("prompt", "")
max_length = data.get("max_length", 50)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(inputs["input_ids"], max_length=max_length)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"generated_text": response}
|