Spaces:
Build error
Build error
from transformers import AutoTokenizer, BartForConditionalGeneration | |
import torch | |
import math | |
class Summarizer: | |
def __init__(self): | |
self.tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
self.model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
def split_text(self, text, max_tokens=1024): | |
words = text.split() | |
chunks = [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)] | |
return chunks | |
def summarize(self, text): | |
chunks = self.split_text(text) | |
partial_summaries = [] | |
for chunk in chunks: | |
inputs = self.tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024) | |
summary_ids = self.model.generate(inputs["input_ids"], max_new_tokens=200) | |
summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
partial_summaries.append(summary) | |
# Final merged summary | |
full_summary = " ".join(partial_summaries) | |
return full_summary | |