Spaces:

abhivsh
/

Engg-SS_ChatBOT

Runtime error

App Files Files Community

Engg-SS_ChatBOT / app_GenAI Embeddings.py

abhivsh

Rename app.py to app_GenAI Embeddings.py

284f004 verified over 1 year ago

raw

history blame

3.77 kB

	# !pip install langchain
	# !pip install langchain_community
	# !pip install langchain_text_splitters
	# !pip install langchain-google-genai
	# !pip install gradio
	# !pip install openai
	# !pip install pypdf
	# !pip install chromadb
	# !pip install tiktoken
	# !pip install python-dotenv

	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import Chroma
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_openai import ChatOpenAI
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain_google_genai import GoogleGenerativeAIEmbeddings

	import gradio as gr
	import os
	import requests

	import sys
	sys.path.append('../..')

	# For Google Colab
	'''
	from google.colab import userdata
	OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
	hf_token = userdata.get('hf_token')
	GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')

	# For Desktop

	from dotenv import load_dotenv, find_dotenv
	_ = load_dotenv(find_dotenv()) # Read local .env file
	OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
	hf_token = os.environ['hf_token']
	GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
	'''

	# For Hugging Face
	OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
	hf_token = os.environ.get('hf_token')
	GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
	fs_token = os.environ.get('fs_token')

	llm_name = "gpt-3.5-turbo"
	hf_model = "sentence-transformers/all-MiniLM-L6-v2"

	from huggingface_hub import HfFileSystem
	fs = HfFileSystem(token=fs_token)
	file_paths = fs.glob("datasets/abhivsh/Model-TS/*.pdf")
	hf_file_paths = ["hf://"+ file_path for file_path in file_paths]

	def chat_query(question):

	loaders = []

	# Loop through PDF Files
	loaders = []

	for file_path in hf_file_paths:
	loaders.append(PyPDFLoader(file_path))

	docs = []
	for loader in loaders:
	docs.extend(loader.load())

	# Splitting Documents
	text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
	splits = text_splitter.split_documents(docs)

	# Using Google GenAI Text Embeddings
	embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY)

	# Create Embeddings for Searching the Splits
	persist_directory = './chroma/'
	vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
	vectordb.persist()
	llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)

	# Memory
	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

	# Conversation Retrival Chain
	retriever=vectordb.as_retriever()
	qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

	# Replace input() with question variable for Gradio
	result = qa({"question": question})
	return result['answer']


	logo_path = os.path.join(os.getcwd(), "Logo.png")

	iface = gr.Interface(
	fn=chat_query,
	inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"),
	outputs=gr.Textbox(label="Chatbot Reply : "),
	title = " -----: ChatBot :----- ",
	description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported).
	\n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \
	\n\n-- Reply time is solely based on the File size. """,
	concurrency_limit = None,
	thumbnail = logo_path,
	)


	iface.launch(share=True, debug=True)

	# What should be the GIB height outside the GIS hall ?