crespo12 commited on
Commit
19f7665
·
1 Parent(s): bba6aca
Files changed (3) hide show
  1. Dockerfile +7 -12
  2. app.py +24 -12
  3. requirement.txt +5 -1
Dockerfile CHANGED
@@ -1,16 +1,11 @@
1
- FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
2
 
3
- # Installer les dépendances système
4
- RUN apt-get update && apt-get install -y \
5
- python3 python3-pip git && \
6
- ln -s /usr/bin/python3 /usr/bin/python
7
 
8
- # Installer VLLM
9
- RUN pip install --upgrade pip
10
- RUN pip install vllm
11
 
12
- # Ajouter ton script
13
- COPY app.py /app.py
 
14
 
15
- # Lancer le serveur VLLM
16
- CMD ["python", "/app.py"]
 
1
+ FROM python:3.10
2
 
3
+ WORKDIR /app
 
 
 
4
 
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
 
7
 
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
11
 
 
 
app.py CHANGED
@@ -1,12 +1,24 @@
1
- import subprocess
2
-
3
- # Exemple avec un modèle léger (à adapter)
4
- model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
5
-
6
- # Lancement de VLLM en mode API
7
- subprocess.run([
8
- "python3", "-m", "vllm.entrypoints.api_server",
9
- "--model", model,
10
- "--host", "0.0.0.0",
11
- "--port", "7860"
12
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ app = FastAPI()
7
+
8
+ # Charger le modèle
9
+ model_name = "google/medgemma-4b-pt"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
12
+
13
+ # Requête attendue
14
+ class Request(BaseModel):
15
+ prompt: str
16
+
17
+ @app.post("/generate")
18
+ def generate(request: Request):
19
+ inputs = tokenizer(request.prompt, return_tensors="pt").to(model.device)
20
+ with torch.no_grad():
21
+ outputs = model.generate(**inputs, max_new_tokens=100)
22
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+ return {"response": result}
24
+
requirement.txt CHANGED
@@ -1 +1,5 @@
1
- vllm
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+