SidiM-AI commited on
Commit
59d89c2
·
verified ·
1 Parent(s): d051d9c

Upload 3 files

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +114 -0
  3. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Ignore environment files
2
+ .env
3
+ /venv
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["STREAMLIT_SERVER_ENABLE_FILE_WATCHER"] = "false"
3
+
4
+ import logging
5
+ from dotenv import load_dotenv
6
+ import streamlit as st
7
+ from PyPDF2 import PdfReader
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.memory import ConversationBufferMemory
13
+ from langchain_groq import ChatGroq
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+
18
+ # Set up logging
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(levelname)s - %(message)s'
22
+ )
23
+
24
+ # Function to extract text from PDF files
25
+ def get_pdf_text(pdf_docs):
26
+ text = ""
27
+ for pdf in pdf_docs:
28
+ pdf_reader = PdfReader(pdf)
29
+ for page in pdf_reader.pages:
30
+ text += page.extract_text() or ""
31
+ return text
32
+
33
+ # Function to split the extracted text into chunks
34
+ def get_text_chunks(text):
35
+ text_splitter = CharacterTextSplitter(
36
+ separator="\n",
37
+ chunk_size=1000,
38
+ chunk_overlap=200,
39
+ length_function=len
40
+ )
41
+ return text_splitter.split_text(text)
42
+
43
+ # Function to create a FAISS vectorstore using Hugging Face embeddings
44
+ def get_vectorstore(text_chunks):
45
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
46
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
47
+ return vectorstore
48
+
49
+ # Function to set up the conversational retrieval chain with memory
50
+ def get_conversation_chain(vectorstore):
51
+ try:
52
+ groq_api_key = os.getenv("GROQ_API_KEY")
53
+ llm = ChatGroq(model="llama3-8b-8192", api_key=groq_api_key, temperature=0.5)
54
+
55
+ memory = ConversationBufferMemory(
56
+ memory_key='chat_history',
57
+ return_messages=True
58
+ )
59
+
60
+ conversation_chain = ConversationalRetrievalChain.from_llm(
61
+ llm=llm,
62
+ retriever=vectorstore.as_retriever(),
63
+ memory=memory
64
+ )
65
+
66
+ logging.info("Conversation chain created successfully.")
67
+ return conversation_chain
68
+ except Exception as e:
69
+ logging.error(f"Error creating conversation chain: {e}")
70
+ st.error("An error occurred while setting up the conversation chain.")
71
+
72
+ # Handle user input
73
+ def handle_userinput(user_question):
74
+ if st.session_state.conversation:
75
+ response = st.session_state.conversation({'question': user_question})
76
+
77
+ if 'chat_history' in response:
78
+ for i, msg in enumerate(response['chat_history']):
79
+ if i % 2 == 0:
80
+ st.write(f"**You:** {msg.content}")
81
+ else:
82
+ st.write(f"**Bot:** {msg.content}")
83
+ else:
84
+ st.write(f"**Bot:** {response['answer']}")
85
+ else:
86
+ st.warning("Please process the documents first.")
87
+
88
+ # Main function to run the Streamlit app
89
+ def main():
90
+ load_dotenv()
91
+ st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
92
+
93
+ if "conversation" not in st.session_state:
94
+ st.session_state.conversation = None
95
+
96
+ st.header("Chat with multiple PDFs :books:")
97
+ user_question = st.text_input("Ask a question about your documents:")
98
+ if user_question:
99
+ handle_userinput(user_question)
100
+
101
+ with st.sidebar:
102
+ st.subheader("Your documents")
103
+ pdf_docs = st.file_uploader(
104
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
105
+ )
106
+ if st.button("Process"):
107
+ with st.spinner("Processing..."):
108
+ raw_text = get_pdf_text(pdf_docs)
109
+ text_chunks = get_text_chunks(raw_text)
110
+ vectorstore = get_vectorstore(text_chunks)
111
+ st.session_state.conversation = get_conversation_chain(vectorstore)
112
+
113
+ if __name__ == '__main__':
114
+ main()
requirements.txt ADDED
Binary file (4.01 kB). View file