Prachidwi commited on
Commit
1ab4fe8
·
verified ·
1 Parent(s): 04625d8

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -99
app.py DELETED
@@ -1,99 +0,0 @@
1
- import os
2
- import streamlit as st
3
- from dotenv import load_dotenv
4
- from PyPDF2 import PdfReader
5
- from langchain.text_splitter import CharacterTextSplitter
6
- from langchain_community.vectorstores import FAISS
7
- from langchain.memory import ConversationBufferMemory
8
- from langchain.chains import ConversationalRetrievalChain
9
- from langchain.llms import HuggingFaceHub
10
- from langchain.embeddings import HuggingFaceEmbeddings
11
-
12
- def get_pdf_text(pdf_docs):
13
- text = ""
14
- for pdf in pdf_docs:
15
- try:
16
- pdf_reader = PdfReader(pdf)
17
- for page in pdf_reader.pages:
18
- text += page.extract_text()
19
- except Exception as e:
20
- st.error(f"Error reading {pdf.name}: {e}. Skipping this file.")
21
- return text
22
-
23
- def get_text_chunks(text):
24
- text_splitter = CharacterTextSplitter(
25
- separator="\n",
26
- chunk_size=1000,
27
- chunk_overlap=200,
28
- length_function=len
29
- )
30
- chunks = text_splitter.split_text(text)
31
- return chunks
32
-
33
- def get_vectorstore(text_chunks):
34
- try:
35
- embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embedding)
37
- return vectorstore
38
- except Exception as e:
39
- st.error(f"Error creating vector store: {e}")
40
- return None
41
-
42
- def get_conversation_chain(vectorstore):
43
- # Fetch the HuggingFace API token from environment variable
44
- api_token = os.getenv("HUGGINGFACE_API_TOKEN")
45
- if not api_token:
46
- st.error("HuggingFace API token not found. Please ensure it is set in the environment variables.")
47
- return None
48
-
49
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512}, huggingfacehub_api_token=api_token)
50
-
51
- memory = ConversationBufferMemory(
52
- memory_key='chat_history', return_messages=True)
53
- conversation_chain = ConversationalRetrievalChain.from_llm(
54
- llm=llm,
55
- retriever=vectorstore.as_retriever(),
56
- memory=memory
57
- )
58
- return conversation_chain
59
-
60
- def handle_userinput(user_question):
61
- response = st.session_state.conversation({'question': user_question})
62
- st.session_state.chat_history = response['chat_history']
63
-
64
- def main():
65
- load_dotenv()
66
- st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
67
-
68
- if "conversation" not in st.session_state:
69
- st.session_state.conversation = None
70
- if "chat_history" not in st.session_state:
71
- st.session_state.chat_history = None
72
-
73
- st.header("Chat with multiple PDFs :books:")
74
- user_question = st.text_input("Ask a question about your documents:")
75
- if user_question:
76
- handle_userinput(user_question)
77
-
78
- with st.sidebar:
79
- st.subheader("Your documents")
80
- pdf_docs = st.file_uploader(
81
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
82
- if st.button("Process"):
83
- with st.spinner("Processing"):
84
- # get pdf text
85
- raw_text = get_pdf_text(pdf_docs)
86
-
87
- if raw_text: # Proceed only if there is valid text
88
- # get the text chunks
89
- text_chunks = get_text_chunks(raw_text)
90
-
91
- # create vector store
92
- vectorstore = get_vectorstore(text_chunks)
93
-
94
- if vectorstore: # Check if vectorstore is valid
95
- # create conversation chain
96
- st.session_state.conversation = get_conversation_chain(vectorstore)
97
-
98
- if __name__ == '__main__':
99
- main()