Spaces:

dang-w
/

ai-content-summariser-api

Running

App Files Files Community

Dan Walsh commited on Mar 11

Commit

9e707a5

1 Parent(s): 124b5b5

Adding additional improvements & refinements

Browse files

Files changed (7) hide show

__pycache__/main.cpython-311.pyc +0 -0
app/api/__pycache__/routes.cpython-311.pyc +0 -0
app/api/routes.py +11 -24
app/services/__pycache__/summariser.cpython-311.pyc +0 -0
app/services/summariser.py +149 -70
main.py +5 -20
requirements.txt +8 -5

__pycache__/main.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-311.pyc and b/__pycache__/main.cpython-311.pyc differ

app/api/__pycache__/routes.cpython-311.pyc CHANGED Viewed

Binary files a/app/api/__pycache__/routes.cpython-311.pyc and b/app/api/__pycache__/routes.cpython-311.pyc differ

app/api/routes.py CHANGED Viewed

@@ -5,7 +5,8 @@ from app.services.summariser import SummariserService
 from app.services.url_extractor import URLExtractorService
 from app.services.cache import hash_text, get_cached_summary, cache_summary
-router = APIRouter()
 class TextSummaryRequest(BaseModel):
     text: str = Field(..., min_length=10, description="The text to summarise")
@@ -45,8 +46,7 @@ async def summarise_text(request: TextSummaryRequest):
             return cached_summary
         # If not in cache, generate summary
-        summariser = SummariserService()
-        summary = summariser.summarise(
             text=request.text,
             max_length=request.max_length,
             min_length=request.min_length,
@@ -54,23 +54,6 @@ async def summarise_text(request: TextSummaryRequest):
             temperature=request.temperature
         )
-        result = {
-            "original_text_length": len(request.text),
-            "summary": summary,
-            "summary_length": len(summary),
-            "source_type": "text"
-        }
-        # Cache the result
-        cache_summary(
-            text_hash,
-            request.max_length,
-            request.min_length,
-            request.do_sample,
-            request.temperature,
-            result
-        )
         return result
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -86,8 +69,7 @@ async def summarise_url(request: URLSummaryRequest):
             raise HTTPException(status_code=422, detail="Could not extract sufficient content from the URL")
         # Summarise the extracted content
-        summariser = SummariserService()
-        summary = summariser.summarise(
             text=content,
             max_length=request.max_length,
             min_length=request.min_length,
@@ -97,8 +79,8 @@ async def summarise_url(request: URLSummaryRequest):
         return {
             "original_text_length": len(content),
-            "summary": summary,
-            "summary_length": len(summary),
             "source_type": "url",
             "source_url": str(request.url)
         }
@@ -106,3 +88,8 @@ async def summarise_url(request: URLSummaryRequest):
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from app.services.url_extractor import URLExtractorService
 from app.services.cache import hash_text, get_cached_summary, cache_summary
+router = APIRouter(prefix="/api")
+summariser_service = SummariserService()
 class TextSummaryRequest(BaseModel):
     text: str = Field(..., min_length=10, description="The text to summarise")
             return cached_summary
         # If not in cache, generate summary
+        result = summariser_service.summarise(
             text=request.text,
             max_length=request.max_length,
             min_length=request.min_length,
             temperature=request.temperature
         )
         return result
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
             raise HTTPException(status_code=422, detail="Could not extract sufficient content from the URL")
         # Summarise the extracted content
+        result = summariser_service.summarise(
             text=content,
             max_length=request.max_length,
             min_length=request.min_length,
         return {
             "original_text_length": len(content),
+            "summary": result["summary"],
+            "summary_length": len(result["summary"]),
             "source_type": "url",
             "source_url": str(request.url)
         }
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@router.get("/status")
+async def get_status():
+    """Get the current status of the summariser service"""
+    return summariser_service.get_status()

app/services/__pycache__/summariser.cpython-311.pyc CHANGED Viewed

Binary files a/app/services/__pycache__/summariser.cpython-311.pyc and b/app/services/__pycache__/summariser.cpython-311.pyc differ

app/services/summariser.py CHANGED Viewed

@@ -1,19 +1,105 @@
 import numpy as np  # Import NumPy first
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 class SummariserService:
     def __init__(self):
-        # Initialize with a smaller model for faster loading
-        model_name = "facebook/bart-large-cnn"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         # Move to GPU if available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
-    def summarise(self, text, max_length=150, min_length=50, do_sample=False, temperature=1.0):
         """
         Summarise the given text using the loaded model.
@@ -25,78 +111,71 @@ class SummariserService:
             temperature (float): Sampling temperature (higher = more random)
         Returns:
-            str: The generated summary
         """
-        # Convert character lengths to approximate token counts
-        # A rough estimate is that 1 token ≈ 4 characters in English
-        max_tokens = max(1, max_length // 4)
-        min_tokens = max(1, min_length // 4)
-        # Ensure text is within model's max token limit
-        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
-        inputs = inputs.to(self.device)
-        # Set generation parameters
-        generation_params = {
-            "max_length": max_tokens,
-            "min_length": min_tokens,
-            "num_beams": 4,
-            "length_penalty": 2.0,
-            "early_stopping": True,
         }
-        # Handle sampling and temperature
-        if do_sample:
-            try:
-                # First attempt: try with the requested temperature
-                generation_params["do_sample"] = True
-                generation_params["temperature"] = temperature
-                summary_ids = self.model.generate(
-                    inputs["input_ids"],
-                    **generation_params
-                )
-            except Exception as e:
-                # If that fails, try with default temperature (1.0)
-                print(f"Error with temperature {temperature}, falling back to default: {str(e)}")
-                generation_params["temperature"] = 1.0
-                try:
-                    summary_ids = self.model.generate(
-                        inputs["input_ids"],
-                        **generation_params
-                    )
-                except Exception:
-                    # If sampling still fails, fall back to beam search without sampling
-                    print("Sampling failed, falling back to beam search")
-                    generation_params.pop("do_sample", None)
-                    generation_params.pop("temperature", None)
-                    summary_ids = self.model.generate(
-                        inputs["input_ids"],
-                        **generation_params
-                    )
-        else:
-            # Standard beam search without sampling
-            summary_ids = self.model.generate(
-                inputs["input_ids"],
-                **generation_params
-            )
         summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-        # If the summary is still too long, truncate it
-        if len(summary) > max_length:
-            # Try to truncate at a sentence boundary
-            sentences = summary.split('. ')
-            truncated_summary = ''
-            for sentence in sentences:
-                if len(truncated_summary) + len(sentence) + 2 <= max_length:  # +2 for '. '
-                    truncated_summary += sentence + '. '
-                else:
-                    break
-            # If we couldn't even fit one sentence, just truncate at max_length
-            if not truncated_summary:
-                truncated_summary = summary[:max_length]
-            summary = truncated_summary.strip()
-        return summary

 import numpy as np  # Import NumPy first
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import time
+import re
 class SummariserService:
     def __init__(self):
+        # Status tracking
+        self.model_loading_status = {
+            "is_loading": False,
+            "step": "",
+            "progress": 0
+        }
+        # Consider these alternative models
+        model_options = {
+            "general": "facebook/bart-large-cnn",  # Better for news articles
+            "news": "facebook/bart-large-xsum",    # Better for short news summaries
+            "long_form": "google/pegasus-large",   # Better for long documents
+            "literary": "t5-large"                 # Better for literary text
+        }
+        # Choose the most appropriate model - let's switch to BART for better literary summaries
+        model_name = model_options["general"]  # Changed from "literary" to "general"
+        # Update loading status
+        self.model_loading_status["is_loading"] = True
+        self.model_loading_status["step"] = "Initializing tokenizer"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model_loading_status["step"] = "Loading model"
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(
+            model_name,
+            force_download=False,
+            local_files_only=False
+        )
         # Move to GPU if available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
+        self.model_loading_status["is_loading"] = False
+        self.model_loading_status["progress"] = 100
+        # Track current processing job
+        self.current_job = {
+            "in_progress": False,
+            "start_time": None,
+            "input_word_count": 0,
+            "estimated_time": 0,
+            "stage": "",
+            "progress": 0
+        }
+    def get_status(self):
+        """Return the current status of the summarizer service"""
+        status = {
+            "model_loading": self.model_loading_status,
+            "device": self.device,
+            "current_job": self.current_job
+        }
+        # Update estimated time remaining if job in progress
+        if self.current_job["in_progress"] and self.current_job["start_time"]:
+            elapsed = time.time() - self.current_job["start_time"]
+            estimated = self.current_job["estimated_time"]
+            remaining = max(0, estimated - elapsed)
+            status["current_job"]["time_remaining"] = round(remaining, 1)
+            # Update progress based on time
+            if estimated > 0:
+                progress = min(95, (elapsed / estimated) * 100)
+                status["current_job"]["progress"] = round(progress, 0)
+        return status
+    def clean_summary(self, summary):
+        """Clean and format the summary text"""
+        # Remove any leading punctuation or spaces
+        summary = re.sub(r'^[,.\s]+', '', summary)
+        # Ensure the first letter is capitalized
+        if summary and len(summary) > 0:
+            summary = summary[0].upper() + summary[1:]
+        # Ensure proper ending punctuation
+        if summary and not any(summary.endswith(end) for end in ['.', '!', '?']):
+            last_sentence_end = max(
+                summary.rfind('.'),
+                summary.rfind('!'),
+                summary.rfind('?')
+            )
+            if last_sentence_end > 0:
+                summary = summary[:last_sentence_end + 1]
+            else:
+                summary = summary + '.'
+        return summary
+    def summarise(self, text, max_length=250, min_length=100, do_sample=True, temperature=1.2):
         """
         Summarise the given text using the loaded model.
             temperature (float): Sampling temperature (higher = more random)
         Returns:
+            dict: The generated summary and processing metadata
         """
+        # Reset and start job tracking
+        self.current_job = {
+            "in_progress": True,
+            "start_time": time.time(),
+            "input_word_count": len(text.split()),
+            "estimated_time": max(1, min(30, len(text.split()) / 500)),  # Rough estimate
+            "stage": "Tokenizing input text",
+            "progress": 5
         }
+        result = {
+            "summary": "",
+            "metadata": {
+                "input_word_count": self.current_job["input_word_count"],
+                "estimated_time_seconds": self.current_job["estimated_time"],
+                "model_used": "facebook/bart-large-cnn",  # Update to match the model we're using
+                "processing_device": self.device
+            }
+        }
+        # Tokenization step
+        inputs = self.tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
+        input_ids = inputs.input_ids.to(self.device)
+        # Update metadata with token info
+        result["metadata"]["input_token_count"] = len(input_ids[0])
+        result["metadata"]["truncated"] = len(input_ids[0]) == 1024
+        # Update job status
+        self.current_job["stage"] = "Generating summary"
+        self.current_job["progress"] = 30
+        # Enhanced generation parameters - adjusted for better literary summaries
+        summary_ids = self.model.generate(
+            input_ids,
+            max_length=max_length,
+            min_length=min_length,
+            do_sample=do_sample,
+            temperature=temperature,
+            num_beams=5,  # Increased from 4 to 5 for better quality
+            early_stopping=True,
+            no_repeat_ngram_size=3,
+            length_penalty=2.0,
+            top_k=50,  # Added top_k parameter
+            top_p=0.95,  # Added top_p parameter for nucleus sampling
+        )
+        # Update job status
+        self.current_job["stage"] = "Post-processing summary"
+        self.current_job["progress"] = 90
         summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+        # Clean and format the summary
+        summary = self.clean_summary(summary)
+        result["summary"] = summary
+        result["metadata"]["output_word_count"] = len(summary.split())
+        result["metadata"]["compression_ratio"] = round(len(summary.split()) / self.current_job["input_word_count"] * 100, 1)
+        # Complete job
+        self.current_job["in_progress"] = False
+        self.current_job["stage"] = "Complete"
+        self.current_job["progress"] = 100
+        return result

main.py CHANGED Viewed

@@ -1,39 +1,24 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-import os
-app = FastAPI(
-    title="AI Content Summariser API",
-    description="API for summarising text content using NLP models",
-    version="0.1.0"
-)
 # Configure CORS
-origins = os.getenv("CORS_ORIGINS", "http://localhost:3000").split(",")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=origins,
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-@app.get("/")
-async def root():
-    return {"message": "Welcome to the AI Content Summariser API"}
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
-# Import and include API routes
-from app.api.routes import router as api_router
-app.include_router(api_router, prefix="/api")
-# Import and include async API routes
-from app.api.async_routes import router as async_router
-app.include_router(async_router, prefix="/api/async")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from app.api.routes import router as api_router
+app = FastAPI(title="AI Content Summariser API")
 # Configure CORS
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["http://localhost:3000"],  # Add your frontend URL
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+app.include_router(api_router)
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -1,12 +1,15 @@
-fastapi==0.100.1
-uvicorn==0.23.2
-pydantic==2.1.1
-transformers==4.31.0
 torch==2.0.1
 sentencepiece==0.1.99
 python-dotenv==1.0.0
 httpx==0.24.1
 accelerate==0.21.0
-beautifulsoup4==4.12.2
 pytest==7.3.1
 pytest-cov==4.1.0

+numpy==1.24.3
 torch==2.0.1
+transformers==4.30.2
+huggingface_hub==0.16.4
+fastapi==0.100.0
+uvicorn==0.22.0
+pydantic==1.10.8
+beautifulsoup4==4.12.2
+requests==2.31.0
 sentencepiece==0.1.99
 python-dotenv==1.0.0
 httpx==0.24.1
 accelerate==0.21.0
 pytest==7.3.1
 pytest-cov==4.1.0