File size: 4,049 Bytes
9cf5fee
 
 
 
 
124b5b5
9cf5fee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124b5b5
 
 
 
 
 
 
 
 
 
 
 
 
 
9cf5fee
 
 
 
 
 
 
 
 
124b5b5
9cf5fee
 
 
 
 
124b5b5
 
 
 
 
 
 
 
 
 
 
 
9cf5fee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, Field, HttpUrl
from typing import Optional, Union
from app.services.summariser import SummariserService
from app.services.url_extractor import URLExtractorService
from app.services.cache import hash_text, get_cached_summary, cache_summary

router = APIRouter()

class TextSummaryRequest(BaseModel):
    text: str = Field(..., min_length=10, description="The text to summarise")
    max_length: Optional[int] = Field(150, ge=30, le=500, description="Maximum length of the summary")
    min_length: Optional[int] = Field(50, ge=10, le=200, description="Minimum length of the summary")
    do_sample: Optional[bool] = Field(False, description="Whether to use sampling for generation")
    temperature: Optional[float] = Field(1.0, ge=0.7, le=2.0, description="Sampling temperature")

class URLSummaryRequest(BaseModel):
    url: HttpUrl = Field(..., description="The URL to extract content from and summarise")
    max_length: Optional[int] = Field(150, ge=30, le=500, description="Maximum length of the summary")
    min_length: Optional[int] = Field(50, ge=10, le=200, description="Minimum length of the summary")
    do_sample: Optional[bool] = Field(False, description="Whether to use sampling for generation")
    temperature: Optional[float] = Field(1.0, ge=0.7, le=2.0, description="Sampling temperature")

class SummaryResponse(BaseModel):
    original_text_length: int
    summary: str
    summary_length: int
    source_type: str = "text"  # "text" or "url"
    source_url: Optional[str] = None

@router.post("/summarise", response_model=SummaryResponse)
async def summarise_text(request: TextSummaryRequest):
    try:
        # Check cache first
        text_hash = hash_text(request.text)
        cached_summary = get_cached_summary(
            text_hash,
            request.max_length,
            request.min_length,
            request.do_sample,
            request.temperature
        )

        if cached_summary:
            return cached_summary

        # If not in cache, generate summary
        summariser = SummariserService()
        summary = summariser.summarise(
            text=request.text,
            max_length=request.max_length,
            min_length=request.min_length,
            do_sample=request.do_sample,
            temperature=request.temperature
        )

        result = {
            "original_text_length": len(request.text),
            "summary": summary,
            "summary_length": len(summary),
            "source_type": "text"
        }

        # Cache the result
        cache_summary(
            text_hash,
            request.max_length,
            request.min_length,
            request.do_sample,
            request.temperature,
            result
        )

        return result
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@router.post("/summarise-url", response_model=SummaryResponse)
async def summarise_url(request: URLSummaryRequest):
    try:
        # Extract content from URL
        url_extractor = URLExtractorService()
        content = await url_extractor.extract_content(str(request.url))

        if not content or len(content) < 100:
            raise HTTPException(status_code=422, detail="Could not extract sufficient content from the URL")

        # Summarise the extracted content
        summariser = SummariserService()
        summary = summariser.summarise(
            text=content,
            max_length=request.max_length,
            min_length=request.min_length,
            do_sample=request.do_sample,
            temperature=request.temperature
        )

        return {
            "original_text_length": len(content),
            "summary": summary,
            "summary_length": len(summary),
            "source_type": "url",
            "source_url": str(request.url)
        }
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))