Spaces:

dang-w
/

ai-content-summariser-api

Sleeping

App Files Files Community

Dan Walsh commited on Mar 11

Commit

b089011

1 Parent(s): 9e707a5

Updates to hugging face spaces config

Browse files

Files changed (5) hide show

.env +3 -0
Dockerfile +9 -0
README.md +10 -5
app/services/summariser.py +128 -83
main.py +14 -4

.env ADDED Viewed

	@@ -0,0 +1,3 @@

+TRANSFORMERS_CACHE=/tmp/huggingface_cache
+HF_HOME=/tmp/huggingface_cache
+HUGGINGFACE_HUB_CACHE=/tmp/huggingface_cache

Dockerfile CHANGED Viewed

@@ -2,6 +2,15 @@ FROM python:3.9-slim
 WORKDIR /app
 # Copy requirements first for better caching
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

 WORKDIR /app
+# Create a writable cache directory
+RUN mkdir -p /tmp/huggingface_cache && \
+  chmod 777 /tmp/huggingface_cache
+# Set environment variables for model caching
+ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache
+ENV HF_HOME=/tmp/huggingface_cache
+ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_cache
 # Copy requirements first for better caching
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

README.md CHANGED Viewed

@@ -136,11 +136,16 @@ See the deployment guide in the frontend repository for detailed instructions on
 ### Deploying to Hugging Face Spaces
-1. Create a new Space on Hugging Face
-2. Choose Docker as the SDK
-3. Upload your backend code
-4. Configure the environment variables:
-   - `CORS_ORIGINS`: Your frontend URL
 ## Performance Optimizations

 ### Deploying to Hugging Face Spaces
+When deploying to Hugging Face Spaces, make sure to:
+1. Set the following environment variables in the Space settings:
+   - `TRANSFORMERS_CACHE=/tmp/huggingface_cache`
+   - `HF_HOME=/tmp/huggingface_cache`
+   - `HUGGINGFACE_HUB_CACHE=/tmp/huggingface_cache`
+2. Use the Docker SDK in your Space settings
+3. If you encounter memory issues, consider using a smaller model by changing the `model_name` in `summariser.py`
 ## Performance Optimizations

app/services/summariser.py CHANGED Viewed

@@ -2,6 +2,7 @@ import numpy as np  # Import NumPy first
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import time
 import re
 class SummariserService:
@@ -15,35 +16,74 @@ class SummariserService:
         # Consider these alternative models
         model_options = {
-            "general": "facebook/bart-large-cnn",  # Better for news articles
-            "news": "facebook/bart-large-xsum",    # Better for short news summaries
-            "long_form": "google/pegasus-large",   # Better for long documents
-            "literary": "t5-large"                 # Better for literary text
         }
-        # Choose the most appropriate model - let's switch to BART for better literary summaries
-        model_name = model_options["general"]  # Changed from "literary" to "general"
         # Update loading status
         self.model_loading_status["is_loading"] = True
         self.model_loading_status["step"] = "Initializing tokenizer"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model_loading_status["step"] = "Loading model"
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(
-            model_name,
-            force_download=False,
-            local_files_only=False
-        )
-        # Move to GPU if available
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
         self.model_loading_status["is_loading"] = False
         self.model_loading_status["progress"] = 100
         # Track current processing job
         self.current_job = {
             "in_progress": False,
@@ -54,28 +94,6 @@ class SummariserService:
             "progress": 0
         }
-    def get_status(self):
-        """Return the current status of the summarizer service"""
-        status = {
-            "model_loading": self.model_loading_status,
-            "device": self.device,
-            "current_job": self.current_job
-        }
-        # Update estimated time remaining if job in progress
-        if self.current_job["in_progress"] and self.current_job["start_time"]:
-            elapsed = time.time() - self.current_job["start_time"]
-            estimated = self.current_job["estimated_time"]
-            remaining = max(0, estimated - elapsed)
-            status["current_job"]["time_remaining"] = round(remaining, 1)
-            # Update progress based on time
-            if estimated > 0:
-                progress = min(95, (elapsed / estimated) * 100)
-                status["current_job"]["progress"] = round(progress, 0)
-        return status
     def clean_summary(self, summary):
         """Clean and format the summary text"""
         # Remove any leading punctuation or spaces
@@ -99,6 +117,28 @@ class SummariserService:
         return summary
     def summarise(self, text, max_length=250, min_length=100, do_sample=True, temperature=1.2):
         """
         Summarise the given text using the loaded model.
@@ -128,54 +168,59 @@ class SummariserService:
             "metadata": {
                 "input_word_count": self.current_job["input_word_count"],
                 "estimated_time_seconds": self.current_job["estimated_time"],
-                "model_used": "facebook/bart-large-cnn",  # Update to match the model we're using
                 "processing_device": self.device
             }
         }
-        # Tokenization step
-        inputs = self.tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
-        input_ids = inputs.input_ids.to(self.device)
-        # Update metadata with token info
-        result["metadata"]["input_token_count"] = len(input_ids[0])
-        result["metadata"]["truncated"] = len(input_ids[0]) == 1024
-        # Update job status
-        self.current_job["stage"] = "Generating summary"
-        self.current_job["progress"] = 30
-        # Enhanced generation parameters - adjusted for better literary summaries
-        summary_ids = self.model.generate(
-            input_ids,
-            max_length=max_length,
-            min_length=min_length,
-            do_sample=do_sample,
-            temperature=temperature,
-            num_beams=5,  # Increased from 4 to 5 for better quality
-            early_stopping=True,
-            no_repeat_ngram_size=3,
-            length_penalty=2.0,
-            top_k=50,  # Added top_k parameter
-            top_p=0.95,  # Added top_p parameter for nucleus sampling
-        )
-        # Update job status
-        self.current_job["stage"] = "Post-processing summary"
-        self.current_job["progress"] = 90
-        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-        # Clean and format the summary
-        summary = self.clean_summary(summary)
-        result["summary"] = summary
-        result["metadata"]["output_word_count"] = len(summary.split())
-        result["metadata"]["compression_ratio"] = round(len(summary.split()) / self.current_job["input_word_count"] * 100, 1)
-        # Complete job
-        self.current_job["in_progress"] = False
-        self.current_job["stage"] = "Complete"
-        self.current_job["progress"] = 100
         return result

 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import time
+import os
 import re
 class SummariserService:
         # Consider these alternative models
         model_options = {
+            "general": "facebook/bart-large-cnn",
+            "news": "facebook/bart-large-xsum",
+            "long_form": "google/pegasus-large",
+            "literary": "t5-large"
         }
+        # Choose the most appropriate model
+        model_name = model_options["literary"]  # Better for literary text
         # Update loading status
         self.model_loading_status["is_loading"] = True
         self.model_loading_status["step"] = "Initializing tokenizer"
+        # Ensure cache directory exists and is writable
+        cache_dir = os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                cache_dir=cache_dir,
+                local_files_only=False
+            )
+            self.model_loading_status["step"] = "Loading model"
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name,
+                cache_dir=cache_dir,
+                force_download=False,
+                local_files_only=False
+            )
+            # Move to GPU if available
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.model.to(self.device)
+        except Exception as e:
+            # Fallback to a smaller model if the main one fails
+            print(f"Error loading model {model_name}: {str(e)}")
+            print("Falling back to smaller model...")
+            fallback_model = "sshleifer/distilbart-cnn-6-6"  # Much smaller model
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                fallback_model,
+                cache_dir=cache_dir,
+                local_files_only=False
+            )
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                fallback_model,
+                cache_dir=cache_dir,
+                force_download=False,
+                local_files_only=False
+            )
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.model.to(self.device)
+            # Update model name for metadata
+            model_name = fallback_model
         self.model_loading_status["is_loading"] = False
         self.model_loading_status["progress"] = 100
+        # Store the actual model name used
+        self.model_name = model_name
         # Track current processing job
         self.current_job = {
             "in_progress": False,
             "progress": 0
         }
     def clean_summary(self, summary):
         """Clean and format the summary text"""
         # Remove any leading punctuation or spaces
         return summary
+    def get_status(self):
+        """Return the current status of the summarizer service"""
+        status = {
+            "model_loading": self.model_loading_status,
+            "device": self.device,
+            "current_job": self.current_job
+        }
+        # Update estimated time remaining if job in progress
+        if self.current_job["in_progress"] and self.current_job["start_time"]:
+            elapsed = time.time() - self.current_job["start_time"]
+            estimated = self.current_job["estimated_time"]
+            remaining = max(0, estimated - elapsed)
+            status["current_job"]["time_remaining"] = round(remaining, 1)
+            # Update progress based on time
+            if estimated > 0:
+                progress = min(95, (elapsed / estimated) * 100)
+                status["current_job"]["progress"] = round(progress, 0)
+        return status
     def summarise(self, text, max_length=250, min_length=100, do_sample=True, temperature=1.2):
         """
         Summarise the given text using the loaded model.
             "metadata": {
                 "input_word_count": self.current_job["input_word_count"],
                 "estimated_time_seconds": self.current_job["estimated_time"],
+                "model_used": self.model_name,
                 "processing_device": self.device
             }
         }
+        try:
+            # Tokenization step
+            inputs = self.tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
+            input_ids = inputs.input_ids.to(self.device)
+            # Update metadata with token info
+            result["metadata"]["input_token_count"] = len(input_ids[0])
+            result["metadata"]["truncated"] = len(input_ids[0]) == 1024
+            # Update job status
+            self.current_job["stage"] = "Generating summary"
+            self.current_job["progress"] = 30
+            # Enhanced generation parameters
+            summary_ids = self.model.generate(
+                input_ids,
+                max_length=max_length,
+                min_length=min_length,
+                do_sample=do_sample,
+                temperature=temperature,
+                num_beams=4,
+                early_stopping=True,
+                no_repeat_ngram_size=3,
+                length_penalty=2.0,
+            )
+            # Update job status
+            self.current_job["stage"] = "Post-processing summary"
+            self.current_job["progress"] = 90
+            summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+            # Clean and format the summary
+            summary = self.clean_summary(summary)
+            result["summary"] = summary
+            result["metadata"]["output_word_count"] = len(summary.split())
+            result["metadata"]["compression_ratio"] = round(len(summary.split()) / self.current_job["input_word_count"] * 100, 1)
+        except Exception as e:
+            # Handle errors gracefully
+            print(f"Error during summarization: {str(e)}")
+            result["summary"] = "An error occurred during summarization. Please try again with a shorter text or different parameters."
+            result["error"] = str(e)
+        finally:
+            # Complete job
+            self.current_job["in_progress"] = False
+            self.current_job["stage"] = "Complete"
+            self.current_job["progress"] = 100
         return result

main.py CHANGED Viewed

@@ -1,18 +1,28 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from app.api.routes import router as api_router
 app = FastAPI(title="AI Content Summariser API")
-# Configure CORS
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["http://localhost:3000"],  # Add your frontend URL
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 app.include_router(api_router)
 @app.get("/health")
@@ -21,4 +31,4 @@ async def health_check():
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+import os
+# Import the router from the correct location
+# Check which router file exists and use that one
+if os.path.exists("app/api/routes.py"):
+    from app.api.routes import router as api_router
+elif os.path.exists("app/routers/api.py"):
+    from app.routers.api import router as api_router
+else:
+    raise ImportError("Could not find router file")
 app = FastAPI(title="AI Content Summariser API")
+# Configure CORS - allow requests from the frontend
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # For development - restrict this in production
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# Include the router
 app.include_router(api_router)
 @app.get("/health")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))