Spaces:

SpiceyToad
/

demo-falc-api

Sleeping

SpiceyToad commited on Dec 4, 2024

Commit

184700f

1 Parent(s): 24242bc

edits

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,32 +3,24 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
-# Retrieve the Hugging Face API token from the environment
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 app = FastAPI()
-# Load the Falcon 7B model and tokenizer
-MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your Hugging Face repo name
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto", use_auth_token=HF_API_TOKEN
 )
-# Automatically determine if CUDA is available for GPU support
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
 @app.post("/generate")
 async def generate_text(request: Request):
-    # Parse input JSON
     data = await request.json()
     prompt = data.get("prompt", "")
     max_length = data.get("max_length", 50)
-    # Tokenize input and generate text
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     outputs = model.generate(inputs["input_ids"], max_length=max_length)
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return {"generated_text": generated_text}

 import torch
 import os
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Hugging Face API token
 app = FastAPI()
+# Load Falcon 7B
+MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
 )
 @app.post("/generate")
 async def generate_text(request: Request):
     data = await request.json()
     prompt = data.get("prompt", "")
     max_length = data.get("max_length", 50)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(inputs["input_ids"], max_length=max_length)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return {"generated_text": response}