Spaces:
Runtime error
Runtime error
# !pip install langchain | |
# !pip install langchain_community | |
# !pip install langchain_text_splitters | |
# !pip install langchain-google-genai | |
# !pip install gradio | |
# !pip install openai | |
# !pip install pypdf | |
# !pip install chromadb | |
# !pip install tiktoken | |
# !pip install python-dotenv | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.vectorstores import Chroma | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_openai import ChatOpenAI | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
import gradio as gr | |
import os | |
import requests | |
import sys | |
sys.path.append('../..') | |
# For Google Colab | |
''' | |
from google.colab import userdata | |
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY') | |
hf_token = userdata.get('hf_token') | |
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY') | |
# For Desktop | |
from dotenv import load_dotenv, find_dotenv | |
_ = load_dotenv(find_dotenv()) # Read local .env file | |
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] | |
hf_token = os.environ['hf_token'] | |
GEMINI_API_KEY = os.environ['GEMINI_API_KEY'] | |
''' | |
# For Hugging Face | |
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') | |
hf_token = os.environ.get('hf_token') | |
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY') | |
fs_token = os.environ.get('fs_token') | |
llm_name = "gpt-3.5-turbo" | |
hf_model = "sentence-transformers/all-MiniLM-L6-v2" | |
from huggingface_hub import HfFileSystem | |
fs = HfFileSystem(token=fs_token) | |
file_paths = fs.glob("datasets/abhivsh/Model-TS/*.pdf") | |
hf_file_paths = ["hf://"+ file_path for file_path in file_paths] | |
def chat_query(question): | |
loaders = [] | |
# Loop through PDF Files | |
loaders = [] | |
for file_path in hf_file_paths: | |
loaders.append(PyPDFLoader(file_path)) | |
docs = [] | |
for loader in loaders: | |
docs.extend(loader.load()) | |
# Splitting Documents | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150) | |
splits = text_splitter.split_documents(docs) | |
# Using Google GenAI Text Embeddings | |
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY) | |
# Create Embeddings for Searching the Splits | |
persist_directory = './chroma/' | |
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model) | |
vectordb.persist() | |
llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY) | |
# Memory | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# Conversation Retrival Chain | |
retriever=vectordb.as_retriever() | |
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory) | |
# Replace input() with question variable for Gradio | |
result = qa({"question": question}) | |
return result['answer'] | |
logo_path = os.path.join(os.getcwd(), "Logo.png") | |
iface = gr.Interface( | |
fn=chat_query, | |
inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"), | |
outputs=gr.Textbox(label="Chatbot Reply : "), | |
title = " -----: ChatBot :----- ", | |
description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported). | |
\n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \ | |
\n\n-- Reply time is solely based on the File size. """, | |
concurrency_limit = None, | |
thumbnail = logo_path, | |
) | |
iface.launch(share=True, debug=True) | |
# What should be the GIB height outside the GIS hall ? |