Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
import serpapi | |
load_dotenv() | |
os.environ["HF_HOME"] = "/tmp/huggingface" | |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers" | |
os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets" | |
# LLM (Groq + LLaMA3) | |
llm = ChatOpenAI( | |
model="llama3-8b-8192", | |
openai_api_base="https://api.groq.com/openai/v1", | |
openai_api_key=os.environ["GROQ_API_KEY"] | |
) | |
# Embeddings (HuggingFace) | |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
# Load PDFs and create FAISS vectorstore | |
def load_vectorstore(pdf_dir="pdfs/"): | |
docs = [] | |
for file in os.listdir(pdf_dir): | |
if file.endswith(".pdf"): | |
loader = PyPDFLoader(os.path.join(pdf_dir, file)) | |
docs.extend(loader.load()) | |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
chunks = splitter.split_documents(docs) | |
return FAISS.from_documents(chunks, embedding=embeddings) | |
# Custom Web Search tool using SerpAPI | |
def search_tool(query: str): | |
client = serpapi.Client(api_key=os.getenv("SERPAPI_API_KEY")) | |
search = client.search({ | |
"engine": "google", | |
"q": query, | |
}) | |
results = dict(search) | |
return results["organic_results"][0]["snippet"] # Return the snippet or any part of the result | |