httpdaniel commited on
Commit
90231c1
·
1 Parent(s): 3a8a9b9

Updating UI

Browse files
Files changed (1) hide show
  1. app.py +22 -58
app.py CHANGED
@@ -8,7 +8,7 @@ from langchain_core.prompts import ChatPromptTemplate
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain.chains import create_retrieval_chain
10
 
11
- def initialise_vectorstore(pdf, progress=gr.Progress()):
12
  progress(0, desc="Reading PDF")
13
 
14
  loader = PyPDFLoader(pdf.name)
@@ -16,27 +16,21 @@ def initialise_vectorstore(pdf, progress=gr.Progress()):
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
  splits = text_splitter.split_documents(pages)
18
 
19
- progress(0.5, desc="Initialising Vectorstore")
20
 
21
  vectorstore = Chroma.from_documents(
22
  splits,
23
  embedding=HuggingFaceEmbeddings()
24
  )
25
 
26
- progress(1, desc="Complete")
27
-
28
- return vectorstore, progress
29
-
30
- def initialise_chain(llm, vectorstore, progress=gr.Progress()):
31
-
32
- progress(0, desc="Initialising LLM")
33
 
34
  llm = HuggingFaceEndpoint(
35
  repo_id=llm,
36
  task="text-generation",
37
  max_new_tokens=512,
38
  top_k=4,
39
- temperature=0.1
40
  )
41
 
42
  chat = ChatHuggingFace(
@@ -44,16 +38,14 @@ def initialise_chain(llm, vectorstore, progress=gr.Progress()):
44
  verbose=True
45
  )
46
 
47
- progress(0.5, desc="Initialising RAG Chain")
48
-
49
- retriever = vectorstore.as_retriever()
50
 
51
  system_prompt = (
52
  "You are an assistant for question-answering tasks. "
53
  "Use the following pieces of retrieved context to answer "
54
  "the question. If you don't know the answer, say that you "
55
- "don't know. Use three sentences maximum and keep the "
56
- "answer concise."
57
  "\n\n"
58
  "{context}"
59
  )
@@ -68,9 +60,7 @@ def initialise_chain(llm, vectorstore, progress=gr.Progress()):
68
  question_answer_chain = create_stuff_documents_chain(chat, prompt)
69
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)
70
 
71
- progress(0.9, desc="Complete")
72
-
73
- return rag_chain, progress
74
 
75
  def send(message, rag_chain, chat_history):
76
  response = rag_chain.invoke({"input": message})
@@ -87,46 +77,20 @@ with gr.Blocks() as demo:
87
  gr.Markdown("<H3>Upload and ask questions about your PDF files</H3>")
88
  gr.Markdown("<H6>Note: This project uses LangChain to perform RAG (Retrieval Augmented Generation) on PDF files, allowing users to ask any questions related to their contents. When a PDF file is uploaded, it is embedded and stored in an in-memory Chroma vectorstore, which the chatbot uses as a source of knowledge when aswering user questions.</H6>")
89
 
90
- with gr.Tab("Vectorstore"):
91
- with gr.Row():
92
- input_pdf = gr.File()
93
- with gr.Row():
94
- with gr.Column(scale=1, min_width=0):
95
- pass
96
- with gr.Column(scale=2, min_width=0):
97
- initialise_vectorstore_btn = gr.Button(
98
- "Initialise Vectorstore",
99
- variant='primary'
100
- )
101
- with gr.Column(scale=1, min_width=0):
102
- pass
103
- with gr.Row():
104
- vectorstore_initialisation_progress = gr.Textbox(value="None", label="Initialization")
105
-
106
- with gr.Tab("RAG Chain"):
107
- with gr.Row():
108
- language_model = gr.Radio(["microsoft/Phi-3-mini-4k-instruct", "mistralai/Mistral-7B-Instruct-v0.2", "HuggingFaceH4/zephyr-7b-beta", "mistralai/Mixtral-8x7B-Instruct-v0.1"])
109
- with gr.Row():
110
- with gr.Column(scale=1, min_width=0):
111
- pass
112
- with gr.Column(scale=2, min_width=0):
113
- initialise_chain_btn = gr.Button(
114
- "Initialise RAG Chain",
115
- variant='primary'
116
- )
117
- with gr.Column(scale=1, min_width=0):
118
- pass
119
- with gr.Row():
120
- chain_initialisation_progress = gr.Textbox(value="None", label="Initialization")
121
-
122
- with gr.Tab("Chatbot"):
123
- with gr.Row():
124
- chatbot = gr.Chatbot()
125
- with gr.Row():
126
- message = gr.Textbox()
127
-
128
- initialise_vectorstore_btn.click(fn=initialise_vectorstore, inputs=input_pdf, outputs=[vectorstore, vectorstore_initialisation_progress])
129
- initialise_chain_btn.click(fn=initialise_chain, inputs=[language_model, vectorstore], outputs=[rag_chain, chain_initialisation_progress])
130
  message.submit(fn=send, inputs=[message, rag_chain, chatbot], outputs=[message, chatbot])
131
 
132
  demo.launch()
 
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain.chains import create_retrieval_chain
10
 
11
+ def initialise_chatbot(pdf, llm, progress=gr.Progress()):
12
  progress(0, desc="Reading PDF")
13
 
14
  loader = PyPDFLoader(pdf.name)
 
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
  splits = text_splitter.split_documents(pages)
18
 
19
+ progress(0.25, desc="Initialising Vectorstore")
20
 
21
  vectorstore = Chroma.from_documents(
22
  splits,
23
  embedding=HuggingFaceEmbeddings()
24
  )
25
 
26
+ progress(0.85, desc="Initialising LLM")
 
 
 
 
 
 
27
 
28
  llm = HuggingFaceEndpoint(
29
  repo_id=llm,
30
  task="text-generation",
31
  max_new_tokens=512,
32
  top_k=4,
33
+ temperature=0.05
34
  )
35
 
36
  chat = ChatHuggingFace(
 
38
  verbose=True
39
  )
40
 
41
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
 
 
42
 
43
  system_prompt = (
44
  "You are an assistant for question-answering tasks. "
45
  "Use the following pieces of retrieved context to answer "
46
  "the question. If you don't know the answer, say that you "
47
+ "don't know. Use two sentences maximum and keep the "
48
+ "answer concise and to the point."
49
  "\n\n"
50
  "{context}"
51
  )
 
60
  question_answer_chain = create_stuff_documents_chain(chat, prompt)
61
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)
62
 
63
+ return rag_chain, "Complete!"
 
 
64
 
65
  def send(message, rag_chain, chat_history):
66
  response = rag_chain.invoke({"input": message})
 
77
  gr.Markdown("<H3>Upload and ask questions about your PDF files</H3>")
78
  gr.Markdown("<H6>Note: This project uses LangChain to perform RAG (Retrieval Augmented Generation) on PDF files, allowing users to ask any questions related to their contents. When a PDF file is uploaded, it is embedded and stored in an in-memory Chroma vectorstore, which the chatbot uses as a source of knowledge when aswering user questions.</H6>")
79
 
80
+ with gr.Row():
81
+ with gr.Column(scale=1):
82
+ input_pdf = gr.File(label="1. Upload PDF")
83
+ language_model = gr.Radio(label="2. Choose LLM", choices=["microsoft/Phi-3-mini-4k-instruct", "mistralai/Mistral-7B-Instruct-v0.2", "HuggingFaceH4/zephyr-7b-beta", "mistralai/Mixtral-8x7B-Instruct-v0.1"])
84
+ initialise_chatbot_btn = gr.Button(value="3. Initialise Chatbot", variant='primary')
85
+ chatbot_initialisation_progress = gr.Textbox(value="Not Started", label="Initialization Progress")
86
+
87
+ with gr.Column(scale=4):
88
+ chatbot = gr.Chatbot(scale=1)
89
+ message = gr.Textbox(label="4. Ask questions about your PDF")
90
+
91
+ initialise_chatbot_btn.click(
92
+ fn=initialise_chatbot, inputs=[input_pdf, language_model], outputs=[rag_chain, chatbot_initialisation_progress]
93
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  message.submit(fn=send, inputs=[message, rag_chain, chatbot], outputs=[message, chatbot])
95
 
96
  demo.launch()