Spaces:

harao-ml
/

QuickPulse

Running

QuickPulse / summarizer.py

Upload 6 files

7c3be27 verified 27 days ago

1.49 kB

	# summarizer.py
	# This script summarizes the content of each article of the specified topic using the Hugging Face Transformers library.

	from transformers import pipeline

	# Load summarization pipeline
	summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")

	# Load once globally

	#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	#tokenizer = AutoTokenizer.from_pretrained("flant5-base")
	#model = AutoModelForSeq2SeqLM.from_pretrained("flant5-base")
	#summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)

	# Function to split text into smaller chunks
	def split_text(text, max_tokens=512):
	words = text.split()
	for i in range(0, len(words), max_tokens):
	yield ' '.join(words[i:i + max_tokens])

	# Function to clean text
	def clean_text(text):
	text = ' '.join(text.split())
	text = ' '.join(word for word in text.split() if len(word) < 100)
	return text

	def generate_summary(content):
	try:
	if not content.strip():
	return "No input provided."
	text = content
	cleaned_text = clean_text(text)
	chunks = list(split_text(cleaned_text))
	cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
	summary = summarizer(text, do_sample=False)[0]['summary_text']
	return summary
	except Exception as e:
	return f"Error generating summary: {str(e)}"