Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,16 +7,15 @@ import streamlit as st
|
|
7 |
import torch
|
8 |
|
9 |
# Load the BillSum dataset
|
10 |
-
|
11 |
-
|
12 |
ds = load_dataset("FiscalNote/billsum")
|
|
|
13 |
# Initialize models
|
14 |
sbert_model = SentenceTransformer("all-mpnet-base-v2")
|
15 |
t5_tokenizer = AutoTokenizer.from_pretrained("t5-small")
|
16 |
-
t5_model =
|
17 |
|
18 |
# Prepare data and build FAISS index
|
19 |
-
texts =
|
20 |
case_embeddings = sbert_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
|
21 |
|
22 |
# Convert embeddings to numpy array and handle deprecation warning
|
|
|
7 |
import torch
|
8 |
|
9 |
# Load the BillSum dataset
|
|
|
|
|
10 |
ds = load_dataset("FiscalNote/billsum")
|
11 |
+
|
12 |
# Initialize models
|
13 |
sbert_model = SentenceTransformer("all-mpnet-base-v2")
|
14 |
t5_tokenizer = AutoTokenizer.from_pretrained("t5-small")
|
15 |
+
t5_model = AutoAutoModelForSeq2SeqLM.from_pretrained("t5-small")
|
16 |
|
17 |
# Prepare data and build FAISS index
|
18 |
+
texts = ds["train"]["text"][:100] # Limiting to 100 samples for speed, and selecting the train split.
|
19 |
case_embeddings = sbert_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
|
20 |
|
21 |
# Convert embeddings to numpy array and handle deprecation warning
|