demo-falc-api / app.py
SpiceyToad's picture
Fix API token and device handling
24242bc
raw
history blame
1.25 kB
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# Retrieve the Hugging Face API token from the environment
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
app = FastAPI()
# Load the Falcon 7B model and tokenizer
MODEL_NAME = "SpiceyToad/demo-falc" # Replace with your Hugging Face repo name
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto", use_auth_token=HF_API_TOKEN
)
# Automatically determine if CUDA is available for GPU support
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
@app.post("/generate")
async def generate_text(request: Request):
# Parse input JSON
data = await request.json()
prompt = data.get("prompt", "")
max_length = data.get("max_length", 50)
# Tokenize input and generate text
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(inputs["input_ids"], max_length=max_length)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"generated_text": generated_text}