Spaces:

VortexHunter23
/

tesstapi

Sleeping

VortexHunter23 commited on May 28

Commit

b1d31b7

verified ·

1 Parent(s): acb3b01

Upload 4 files

Files changed (4) hide show

Dockerfile ADDED Viewed

+FROM python:3.9-slim
+# Install curl and Ollama
+RUN apt-get update && apt-get install -y curl && \
+    curl -fsSL https://ollama.ai/install.sh | sh && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+# Set up user and environment
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH="/home/user/.local/bin:$PATH"
+WORKDIR $HOME/app
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . .
+# Make the start script executable
+RUN chmod +x start.sh
+CMD ["./start.sh"]

app.py ADDED Viewed

+import os
+import logging
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from langchain_community.llms import Ollama
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+MODEL_NAME = 'tinyllama'
+def get_llm():
+    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+    return Ollama(model=MODEL_NAME, callback_manager=callback_manager)
+class Question(BaseModel):
+    text: str
+@app.get("/")
+def read_root():
+    return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
+@app.on_event("startup")
+async def startup_event():
+    logger.info(f"Starting up with model: {MODEL_NAME}")
+@app.on_event("shutdown")
+async def shutdown_event():
+    logger.info("Shutting down")

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+langchain
+langchain_community
+ollama

start.sh ADDED Viewed

+#!/bin/bash
+# Set environment variables for optimization
+export OMP_NUM_THREADS=4
+export MKL_NUM_THREADS=4
+export CUDA_VISIBLE_DEVICES=0
+# Start Ollama in the background
+ollama serve &
+# Pull the model if not already present
+if ! ollama list | grep -q "tinyllama"; then
+    ollama pull tinyllama
+fi
+# Wait for Ollama to start up
+max_attempts=30
+attempt=0
+while ! curl -s http://localhost:11434/api/tags >/dev/null; do
+    sleep 1
+    attempt=$((attempt + 1))
+    if [ $attempt -eq $max_attempts ]; then
+        echo "Ollama failed to start within 30 seconds. Exiting."
+        exit 1
+    fi
+done
+echo "Ollama is ready."
+# Print the API URL
+echo "API is running on: http://0.0.0.0:7860"
+# Start the FastAPI server
+uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20