Spaces:
Running
Running
# summarizer.py | |
# This script summarizes the content of each article of the specified topic using the Hugging Face Transformers library. | |
from transformers import pipeline | |
# Load summarization pipeline | |
summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize") | |
# Load once globally | |
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
#tokenizer = AutoTokenizer.from_pretrained("flant5-base") | |
#model = AutoModelForSeq2SeqLM.from_pretrained("flant5-base") | |
#summarizer = pipeline("summarization", model=model, tokenizer=tokenizer) | |
# Function to split text into smaller chunks | |
def split_text(text, max_tokens=512): | |
words = text.split() | |
for i in range(0, len(words), max_tokens): | |
yield ' '.join(words[i:i + max_tokens]) | |
# Function to clean text | |
def clean_text(text): | |
text = ' '.join(text.split()) | |
text = ' '.join(word for word in text.split() if len(word) < 100) | |
return text | |
def generate_summary(content): | |
try: | |
if not content.strip(): | |
return "No input provided." | |
text = content | |
cleaned_text = clean_text(text) | |
chunks = list(split_text(cleaned_text)) | |
cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else '' | |
summary = summarizer(text, do_sample=False)[0]['summary_text'] | |
return summary | |
except Exception as e: | |
return f"Error generating summary: {str(e)}" | |