from fastapi import FastAPI, Request from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM import torch app = FastAPI() # Ladda modellen model_id = "AI-Sweden/gpt-sw3-126m" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) # Om du kör på CPU – lägg till detta device = torch.device("cpu") model.to(device) # Input-modell class Prompt(BaseModel): text: str max_new_tokens: int = 50 @app.post("/generate") async def generate_text(prompt: Prompt): inputs = tokenizer(prompt.text, return_tensors="pt").to(device) outputs = model.generate(**inputs, max_new_tokens=prompt.max_new_tokens) generated = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"response": generated}