SpiceyToad commited on
Commit
1bef953
·
verified ·
1 Parent(s): 81fe8c1

Upload 2 files

Browse files

Update configuration

Files changed (2) hide show
  1. Dockerfile +15 -13
  2. app.py +15 -5
Dockerfile CHANGED
@@ -1,24 +1,26 @@
1
- # Use a lightweight PyTorch image with GPU support
2
  FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
3
 
4
- # Set the working directory
5
- WORKDIR /app
6
 
7
- # Set writable directories for Hugging Face cache
8
- ENV TRANSFORMERS_CACHE=/app/cache
9
- ENV HF_HOME=/app/cache
10
 
11
- # Create the cache directory
12
- RUN mkdir -p /app/cache
 
13
 
14
- # Copy the application files into the container
15
- COPY . /app
16
 
17
- # Install required Python dependencies
 
 
 
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
- # Expose the FastAPI port
21
  EXPOSE 7860
22
 
23
- # Command to run the FastAPI application
24
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
1
  FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
2
 
3
+ # Create a new user with user ID 1000
4
+ RUN useradd -m -u 1000 user
5
 
6
+ # Set environment variables for cache
7
+ ENV HF_HOME=/home/user/cache
 
8
 
9
+ # Set the working directory and switch to the new user
10
+ USER user
11
+ WORKDIR /home/user/app
12
 
13
+ # Create the cache directory with correct ownership and permissions
14
+ RUN mkdir -p /home/user/cache && chmod -R 777 /home/user/cache
15
 
16
+ # Copy application files with ownership set to the new user
17
+ COPY --chown=user . /home/user/app
18
+
19
+ # Install Python dependencies
20
  RUN pip install --no-cache-dir -r requirements.txt
21
 
22
+ # Expose the application port
23
  EXPOSE 7860
24
 
25
+ # Run the application
26
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,17 +1,26 @@
 
1
  from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- import os
5
 
6
- HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Hugging Face API token
 
 
 
 
 
 
7
 
8
  app = FastAPI()
9
 
10
- # Load Falcon 7B
11
- MODEL_NAME = "SpiceyToad/demo-falc" # Replace with your model
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
13
  model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
 
 
 
15
  )
16
 
17
  @app.post("/generate")
@@ -20,6 +29,7 @@ async def generate_text(request: Request):
20
  prompt = data.get("prompt", "")
21
  max_length = data.get("max_length", 50)
22
 
 
23
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
24
  outputs = model.generate(inputs["input_ids"], max_length=max_length)
25
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
1
+ import os
2
  from fastapi import FastAPI, Request
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
 
5
 
6
+ # Set Hugging Face cache directory
7
+ os.environ["HF_HOME"] = "/home/user/cache"
8
+
9
+ # Get Hugging Face API token
10
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN")
11
+ if not HF_API_TOKEN:
12
+ raise ValueError("HF_API_TOKEN environment variable is not set!")
13
 
14
  app = FastAPI()
15
 
16
+ # Load Falcon 7B model
17
+ MODEL_NAME = "SpiceyToad/demo-falc"
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
19
  model = AutoModelForCausalLM.from_pretrained(
20
+ MODEL_NAME,
21
+ device_map="auto",
22
+ torch_dtype=torch.bfloat16,
23
+ token=HF_API_TOKEN
24
  )
25
 
26
  @app.post("/generate")
 
29
  prompt = data.get("prompt", "")
30
  max_length = data.get("max_length", 50)
31
 
32
+ # Tokenize and generate
33
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
  outputs = model.generate(inputs["input_ids"], max_length=max_length)
35
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)