Spaces:

SpiceyToad
/

demo-falc-api

Sleeping

App Files Files Community

SpiceyToad commited on Dec 4, 2024

Commit

7e405ea

1 Parent(s): 1087336

initial commit

Browse files

Files changed (4) hide show

Dockerfile +12 -60
README.md +30 -7
app.py +24 -0
requirements.txt +4 -0

Dockerfile CHANGED Viewed

@@ -1,65 +1,17 @@
-FROM zenmldocker/zenml-server:latest
-ENV ZENML_ANALYTICS_OPT_IN=true
-ENV ZENML_SERVER_DEPLOYMENT_TYPE="hf_spaces"
-ENV ZENML_LOGGING_VERBOSITY=DEBUG
-################################################################################
-#
-# CONFIGURING YOUR ZENML HF SPACES SERVER
-# ---------------------------------------
-# By default this space is not persistent. All ZenML metadata is stored in
-# localstorage in a SQLite database. If you would like to make your storage
-# persistent, use the appropriate environment variables below to configure the
-# image to use a MySQL-compatible database service that is reachable from the
-# container. See https://docs.zenml.io/getting-started/deploying-zenml/docker
-# for more information on how to configure these environment variables.
-# You can also configure the secrets store to use for your ZenML server. Be
-# sure to use Huggingface Spaces' 'Repository Secrets' feature to store any
-# secrets referenced here. See
-# https://huggingface.co/docs/hub/spaces-overview#managing-secrets for more
-# information on how to configure these environment variables.
-# ENV ZENML_DEFAULT_PROJECT_NAME=""
-# ENV ZENML_DEFAULT_USER_NAME=""
-# ENV ZENML_DEFAULT_USER_PASSWORD=""
-# ENV ZENML_STORE_URL=""
-# ENV ZENML_STORE_SSL_CA=""
-# ENV ZENML_STORE_SSL_CERT=""
-# ENV ZENML_STORE_SSL_KEY=""
-# ENV ZENML_STORE_SSL_VERIFY_SERVER_CERT=""
-# ENV ZENML_LOGGING_VERBOSITY=""
-# # SECRETS STORE CONFIGURATION
-# ENV ZENML_SECRETS_STORE_TYPE=""
-# ENV ZENML_SECRETS_STORE_ENCRYPTION_KEY=""
-# ENV ZENML_SECRETS_STORE_CLASS_PATH=""
-# ENV ZENML_JWT_SECRET_KEY=""
-# # AWS Secrets Store Configuration
-# ENV ZENML_SECRETS_STORE_REGION_NAME=""
-# ENV ZENML_SECRETS_STORE_AWS_ACCESS_KEY_ID=""
-# ENV ZENML_SECRETS_STORE_AWS_SECRET_ACCESS_KEY=""
-# ENV ZENML_SECRETS_STORE_AWS_SESSION_TOKEN=""
-# ENV ZENML_SECRETS_STORE_SECRET_LIST_REFRESH_TIMEOUT=""
-# # GCP Secrets Store Configuration
-# ENV ZENML_SECRETS_STORE_PROJECT_ID=""
-# ENV GOOGLE_APPLICATION_CREDENTIALS=""
-# # Azure Secrets Store Configuration
-# ENV ZENML_SECRETS_STORE_KEY_VAULT_NAME=""
-# ENV ZENML_SECRETS_STORE_AZURE_CLIENT_ID=""
-# ENV ZENML_SECRETS_STORE_AZURE_CLIENT_SECRET=""
-# ENV ZENML_SECRETS_STORE_AZURE_TENANT_ID=""
-# # Hashicorp Secrets Store Configuration
-# ENV ZENML_SECRETS_STORE_VAULT_ADDR=""
-# ENV ZENML_SECRETS_STORE_VAULT_TOKEN=""
-# ENV ZENML_SECRETS_STORE_VAULT_NAMESPACE=""
-# ENV ZENML_SECRETS_STORE_MAX_VERSIONS=""
-ENTRYPOINT ["uvicorn", "zenml.zen_server.zen_server_api:app",  "--log-level", "debug"]
-CMD ["--proxy-headers", "--port", "8080", "--host",  "0.0.0.0"]

+# Use a lightweight PyTorch image with GPU support
+FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
+# Set the working directory
+WORKDIR /app
+# Copy the application files into the container
+COPY . /app
+# Install required Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose the FastAPI port
+EXPOSE 7860
+# Command to run the FastAPI application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,33 @@
----
-title: Demo Falc Api
-emoji: 🧘
-colorFrom: purple
 colorTo: green
 sdk: docker
-pinned: false
-app_port: 8080
 license: apache-2.0
----

+title: Falcon 7B FastAPI Service
+emoji: 🚀
+colorFrom: blue
 colorTo: green
 sdk: docker
+pinned: true
+app_port: 7860
 license: apache-2.0
+# Falcon 7B FastAPI Service
+This Space hosts a FastAPI application that serves the Falcon 7B model for text generation. The API is deployed using Docker.
+## Endpoints
+### `/generate`
+**Method**: POST
+**Description**: Generate text from a given prompt.
+**Payload**:
+```json
+{
+  "prompt": "Your input prompt",
+  "max_length": 50
+}
+```
+**Response**:
+```json
+{
+  "generated_text": "Generated text from Falcon 7B"
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from fastapi import FastAPI, Request
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+app = FastAPI()
+# Load the Falcon 7B model and tokenizer
+MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your Hub repo name
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto")
+@app.post("/generate")
+async def generate_text(request: Request):
+    # Parse input JSON
+    data = await request.json()
+    prompt = data.get("prompt", "")
+    max_length = data.get("max_length", 50)
+    # Tokenize input and generate text
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(inputs["input_ids"], max_length=max_length)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return {"generated_text": response}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi
+uvicorn
+torch
+transformers