import os from dotenv import load_dotenv from langchain.chat_models import ChatOpenAI from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings import serpapi load_dotenv() os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers" os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets" # LLM (Groq + LLaMA3) llm = ChatOpenAI( model="llama3-8b-8192", openai_api_base="https://api.groq.com/openai/v1", openai_api_key=os.environ["GROQ_API_KEY"] ) # Embeddings (HuggingFace) embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") # Load PDFs and create FAISS vectorstore def load_vectorstore(pdf_dir="pdfs/"): docs = [] for file in os.listdir(pdf_dir): if file.endswith(".pdf"): loader = PyPDFLoader(os.path.join(pdf_dir, file)) docs.extend(loader.load()) splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = splitter.split_documents(docs) return FAISS.from_documents(chunks, embedding=embeddings) # Custom Web Search tool using SerpAPI def search_tool(query: str): client = serpapi.Client(api_key=os.getenv("SERPAPI_API_KEY")) search = client.search({ "engine": "google", "q": query, }) results = dict(search) return results["organic_results"][0]["snippet"] # Return the snippet or any part of the result