Engg-SS_ChatBOT / app_GenAI Embeddings.py
abhivsh's picture
Rename app.py to app_GenAI Embeddings.py
284f004 verified
raw
history blame
3.77 kB
# !pip install langchain
# !pip install langchain_community
# !pip install langchain_text_splitters
# !pip install langchain-google-genai
# !pip install gradio
# !pip install openai
# !pip install pypdf
# !pip install chromadb
# !pip install tiktoken
# !pip install python-dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import gradio as gr
import os
import requests
import sys
sys.path.append('../..')
# For Google Colab
'''
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
hf_token = userdata.get('hf_token')
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
# For Desktop
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # Read local .env file
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
hf_token = os.environ['hf_token']
GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
'''
# For Hugging Face
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
hf_token = os.environ.get('hf_token')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
fs_token = os.environ.get('fs_token')
llm_name = "gpt-3.5-turbo"
hf_model = "sentence-transformers/all-MiniLM-L6-v2"
from huggingface_hub import HfFileSystem
fs = HfFileSystem(token=fs_token)
file_paths = fs.glob("datasets/abhivsh/Model-TS/*.pdf")
hf_file_paths = ["hf://"+ file_path for file_path in file_paths]
def chat_query(question):
loaders = []
# Loop through PDF Files
loaders = []
for file_path in hf_file_paths:
loaders.append(PyPDFLoader(file_path))
docs = []
for loader in loaders:
docs.extend(loader.load())
# Splitting Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
splits = text_splitter.split_documents(docs)
# Using Google GenAI Text Embeddings
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY)
# Create Embeddings for Searching the Splits
persist_directory = './chroma/'
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
vectordb.persist()
llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
# Memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# Conversation Retrival Chain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
# Replace input() with question variable for Gradio
result = qa({"question": question})
return result['answer']
logo_path = os.path.join(os.getcwd(), "Logo.png")
iface = gr.Interface(
fn=chat_query,
inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"),
outputs=gr.Textbox(label="Chatbot Reply : "),
title = " -----: ChatBot :----- ",
description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported).
\n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \
\n\n-- Reply time is solely based on the File size. """,
concurrency_limit = None,
thumbnail = logo_path,
)
iface.launch(share=True, debug=True)
# What should be the GIB height outside the GIS hall ?