Spaces:
Sleeping
Sleeping
File size: 1,106 Bytes
8d1ea69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import os
from datasets import load_dataset
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import SupabaseVectorStore
from supabase import create_client
# 1. Load GAIA train split
dataset = load_dataset("gaia-benchmark/GAIA", split="train")
# 2. Build Documents: "Q: …\nA: …"
docs = []
for ex in dataset:
q, a = ex["question"], ex["answer"]
docs.append(Document(
page_content=f"Q: {q}\nA: {a}",
metadata={"task_id": ex.get("task_id"), "split": "train"}
))
# 3. Initialize embedding & Supabase client
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
supabase_url = os.environ["SUPABASE_URL"]
supabase_key = os.environ["SUPABASE_SERVICE_KEY"]
supabase = create_client(supabase_url, supabase_key)
# 4. Upload to Supabase
vectorstore = SupabaseVectorStore.from_documents(
docs,
embedding=embeddings,
client=supabase,
table_name="documents",
query_name="match_documents_langchain"
)
print(f"Seeded {len(docs)} GAIA examples into Supabase.") |