Spaces:
Runtime error
Runtime error
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import UnstructuredFileLoader, CSVLoader | |
from langchain.vectorstores.faiss import FAISS | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma, Pinecone | |
import pickle | |
import pinecone | |
# Load Data | |
# loader = UnstructuredFileLoader("output.md") | |
# raw_documents = loader.load() | |
loader = CSVLoader(file_path='./posts.csv', source_column="Post Title", encoding='utf-8') | |
raw_documents = loader.load() | |
# Split text | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=0) | |
documents = text_splitter.split_documents(raw_documents) | |
# | |
# # Load Data to vectorstore | |
embeddings = OpenAIEmbeddings() | |
# vectorstore = FAISS.from_documents(documents, embeddings) | |
# # Save vectorstore | |
# with open("posts.pkl", "wb") as f: | |
# pickle.dump(vectorstore, f) | |
PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8' | |
PINECONE_API_ENV = 'asia-southeast1-gcp' | |
# initialize pinecone | |
pinecone.init( | |
api_key=PINECONE_API_KEY, # find at app.pinecone.io | |
environment=PINECONE_API_ENV # next to api key in console | |
) | |
index_name = "twimbit-answer" | |
Pinecone.from_texts([t.page_content for t in documents], embeddings, index_name=index_name) | |
# query = "How many neo banks are in india ?" | |
# | |
# docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings) | |
# | |
# docs = docsearch.similarity_search(query, include_metadata=True) | |