Spaces:

SpiceyToad
/

demo-falc-api

Sleeping

SpiceyToad commited on Dec 5, 2024

Commit

1bef953

verified ·

1 Parent(s): 81fe8c1

Upload 2 files

Update configuration

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,24 +1,26 @@
-# Use a lightweight PyTorch image with GPU support
 FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
-# Set the working directory
-WORKDIR /app
-# Set writable directories for Hugging Face cache
-ENV TRANSFORMERS_CACHE=/app/cache
-ENV HF_HOME=/app/cache
-# Create the cache directory
-RUN mkdir -p /app/cache
-# Copy the application files into the container
-COPY . /app
-# Install required Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Expose the FastAPI port
 EXPOSE 7860
-# Command to run the FastAPI application
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
+# Create a new user with user ID 1000
+RUN useradd -m -u 1000 user
+# Set environment variables for cache
+ENV HF_HOME=/home/user/cache
+# Set the working directory and switch to the new user
+USER user
+WORKDIR /home/user/app
+# Create the cache directory with correct ownership and permissions
+RUN mkdir -p /home/user/cache && chmod -R 777 /home/user/cache
+# Copy application files with ownership set to the new user
+COPY --chown=user . /home/user/app
+# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Expose the application port
 EXPOSE 7860
+# Run the application
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,17 +1,26 @@
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-import os
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Hugging Face API token
 app = FastAPI()
-# Load Falcon 7B
-MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
 )
 @app.post("/generate")
@@ -20,6 +29,7 @@ async def generate_text(request: Request):
     prompt = data.get("prompt", "")
     max_length = data.get("max_length", 50)
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(inputs["input_ids"], max_length=max_length)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)

+import os
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Set Hugging Face cache directory
+os.environ["HF_HOME"] = "/home/user/cache"
+# Get Hugging Face API token
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
+if not HF_API_TOKEN:
+    raise ValueError("HF_API_TOKEN environment variable is not set!")
 app = FastAPI()
+# Load Falcon 7B model
+MODEL_NAME = "SpiceyToad/demo-falc"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token=HF_API_TOKEN
 )
 @app.post("/generate")
     prompt = data.get("prompt", "")
     max_length = data.get("max_length", 50)
+    # Tokenize and generate
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(inputs["input_ids"], max_length=max_length)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)