from datasets import load_dataset from langchain.docstore.document import Document from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings # Load dataset dataset = load_dataset("facebook/kilt_tasks", "fever", split="train[:10%]") # Convert to documents documents = [] for item in dataset: text = item['input'] documents.append(Document(page_content=text, metadata={"id": item['id']})) # Embed documents embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(documents, embeddings) # Save FAISS index db.save_local("vectorstore") print("✅ Saved vectorstore!")