File size: 3,767 Bytes
9254348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3600e92
d620266
9254348
 
 
 
 
 
 
 
d620266
9254348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# !pip install langchain
# !pip install langchain_community
# !pip install langchain_text_splitters
# !pip install langchain-google-genai
# !pip install gradio
# !pip install openai
# !pip install pypdf
# !pip install chromadb
# !pip install tiktoken
# !pip install python-dotenv

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_google_genai import GoogleGenerativeAIEmbeddings

import gradio as gr
import os
import requests

import sys
sys.path.append('../..')

# For Google Colab
'''
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
hf_token = userdata.get('hf_token')
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')

# For Desktop

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # Read local .env file
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
hf_token = os.environ['hf_token']
GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
'''

# For Hugging Face
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
hf_token = os.environ.get('hf_token')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
fs_token = os.environ.get('fs_token')

llm_name = "gpt-3.5-turbo"
hf_model = "sentence-transformers/all-MiniLM-L6-v2"

from huggingface_hub import HfFileSystem
fs = HfFileSystem(token=fs_token)
file_paths = fs.glob("datasets/abhivsh/Model-TS/*.pdf")
hf_file_paths = ["hf://"+ file_path for file_path in file_paths]

def chat_query(question):
    
    loaders = []
        
    # Loop through PDF Files
    loaders = []

    for file_path in hf_file_paths:
        loaders.append(PyPDFLoader(file_path))

    docs = []
    for loader in loaders:
         docs.extend(loader.load())

    # Splitting Documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
    splits = text_splitter.split_documents(docs)

    # Using Google GenAI Text Embeddings  
    embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY)
    
    # Create Embeddings for Searching the Splits
    persist_directory = './chroma/'
    vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
    vectordb.persist()
    llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)

    # Memory
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Conversation Retrival Chain
    retriever=vectordb.as_retriever()
    qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

    # Replace input() with question variable for Gradio
    result = qa({"question": question})
    return result['answer']


logo_path = os.path.join(os.getcwd(), "Logo.png")

iface = gr.Interface(
    fn=chat_query,
    inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"),
    outputs=gr.Textbox(label="Chatbot Reply : "),
    title  = " -----:  ChatBot  :----- ",
    description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported).
                   \n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \
                   \n\n-- Reply time is solely based on the File size. """,
    concurrency_limit = None,
    thumbnail = logo_path,
)


iface.launch(share=True, debug=True)

# What should be the GIB height outside the GIS hall ?