rcook commited on
Commit
558dbba
·
verified ·
1 Parent(s): cf3dd49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -17,5 +17,17 @@ def summarize():
17
  # return df.head()
18
  checkpoint = "google-t5/t5-small"
19
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
20
-
 
 
 
 
 
 
 
 
 
 
 
 
21
  return tokenizer
 
17
  # return df.head()
18
  checkpoint = "google-t5/t5-small"
19
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
20
+
21
+ prefix = "summarize: "
22
+
23
+
24
+ def preprocess_function(examples):
25
+ inputs = [prefix + doc for doc in examples["text"]]
26
+ model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
27
+
28
+ labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
29
+
30
+ model_inputs["labels"] = labels["input_ids"]
31
+ return model_inputs
32
+
33
  return tokenizer