VortexHunter23 commited on
Commit
b1d31b7
·
verified ·
1 Parent(s): acb3b01

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +24 -0
  2. app.py +33 -0
  3. requirements.txt +5 -0
  4. start.sh +34 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Install curl and Ollama
4
+ RUN apt-get update && apt-get install -y curl && \
5
+ curl -fsSL https://ollama.ai/install.sh | sh && \
6
+ apt-get clean && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Set up user and environment
9
+ RUN useradd -m -u 1000 user
10
+ USER user
11
+ ENV HOME=/home/user \
12
+ PATH="/home/user/.local/bin:$PATH"
13
+
14
+ WORKDIR $HOME/app
15
+
16
+ COPY --chown=user requirements.txt .
17
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
18
+
19
+ COPY --chown=user . .
20
+
21
+ # Make the start script executable
22
+ RUN chmod +x start.sh
23
+
24
+ CMD ["./start.sh"]
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from pydantic import BaseModel
6
+ from langchain_community.llms import Ollama
7
+ from langchain.callbacks.manager import CallbackManager
8
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ app = FastAPI()
14
+ MODEL_NAME = 'tinyllama'
15
+
16
+ def get_llm():
17
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
18
+ return Ollama(model=MODEL_NAME, callback_manager=callback_manager)
19
+
20
+ class Question(BaseModel):
21
+ text: str
22
+
23
+ @app.get("/")
24
+ def read_root():
25
+ return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
26
+
27
+ @app.on_event("startup")
28
+ async def startup_event():
29
+ logger.info(f"Starting up with model: {MODEL_NAME}")
30
+
31
+ @app.on_event("shutdown")
32
+ async def shutdown_event():
33
+ logger.info("Shutting down")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ langchain
4
+ langchain_community
5
+ ollama
start.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Set environment variables for optimization
4
+ export OMP_NUM_THREADS=4
5
+ export MKL_NUM_THREADS=4
6
+ export CUDA_VISIBLE_DEVICES=0
7
+
8
+ # Start Ollama in the background
9
+ ollama serve &
10
+
11
+ # Pull the model if not already present
12
+ if ! ollama list | grep -q "tinyllama"; then
13
+ ollama pull tinyllama
14
+ fi
15
+
16
+ # Wait for Ollama to start up
17
+ max_attempts=30
18
+ attempt=0
19
+ while ! curl -s http://localhost:11434/api/tags >/dev/null; do
20
+ sleep 1
21
+ attempt=$((attempt + 1))
22
+ if [ $attempt -eq $max_attempts ]; then
23
+ echo "Ollama failed to start within 30 seconds. Exiting."
24
+ exit 1
25
+ fi
26
+ done
27
+
28
+ echo "Ollama is ready."
29
+
30
+ # Print the API URL
31
+ echo "API is running on: http://0.0.0.0:7860"
32
+
33
+ # Start the FastAPI server
34
+ uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20