from transformers import AutoTokenizer, BartForConditionalGeneration import torch import math class Summarizer: def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") self.model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6") def split_text(self, text, max_tokens=1024): words = text.split() chunks = [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)] return chunks def summarize(self, text): chunks = self.split_text(text) partial_summaries = [] for chunk in chunks: inputs = self.tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024) summary_ids = self.model.generate(inputs["input_ids"], max_new_tokens=200) summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) partial_summaries.append(summary) # Final merged summary full_summary = " ".join(partial_summaries) return full_summary