mahi2k commited on
Commit
b8cf414
·
verified ·
1 Parent(s): d01c433

uplaod app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """RAGLLAMA.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1z-I8OtD-uFaX9KHENLECbxMmPPuPDNSp
8
+ """
9
+
10
+ pip install transformers==4.37.2 optimum==1.12.0 --quiet
11
+ pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ --quiet
12
+ pip install langchain==0.1.9 --quiet
13
+ # !pip install chromadb
14
+ pip install sentence_transformers==2.4.0 --quiet
15
+ pip install unstructured --quiet
16
+ pip install pdf2image --quiet
17
+ pip install pdfminer.six==20221105 --quiet
18
+ pip install unstructured-inference --quiet
19
+ pip install faiss-gpu==1.7.2 --quiet
20
+ pip install pikepdf==8.13.0 --quiet
21
+ pip install pypdf==4.0.2 --quiet
22
+ pip install pillow_heif==0.15.0 --quiet
23
+
24
+ from langchain.llms import HuggingFacePipeline
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
26
+
27
+ model_name = "TheBloke/Llama-2-13b-Chat-GPTQ"
28
+
29
+ model = AutoModelForCausalLM.from_pretrained(model_name,
30
+ device_map="auto",
31
+ trust_remote_code=True)
32
+
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
34
+
35
+ gen_cfg = GenerationConfig.from_pretrained(model_name)
36
+ gen_cfg.max_new_tokens=512
37
+ gen_cfg.temperature=0.0000001 # 0.0
38
+ gen_cfg.return_full_text=True
39
+ gen_cfg.do_sample=True
40
+ gen_cfg.repetition_penalty=1.11
41
+
42
+ pipe=pipeline(
43
+ task="text-generation",
44
+ model=model,
45
+ tokenizer=tokenizer,
46
+ generation_config=gen_cfg
47
+ )
48
+
49
+ llm = HuggingFacePipeline(pipeline=pipe)
50
+
51
+ """Testing LLM with Prompt Structure
52
+
53
+ """
54
+
55
+ from textwrap import fill
56
+ from langchain.prompts import PromptTemplate
57
+
58
+ template = "Question: {text}"
59
+
60
+ prompt = PromptTemplate(
61
+ input_variables=["text"],
62
+ template=template,
63
+ )
64
+
65
+ text = "how a coffee bean is roasted?"
66
+ result = llm.invoke(prompt.format(text=text))
67
+ print(fill(result.strip(), width=100))
68
+
69
+ import locale
70
+ locale.getpreferredencoding = lambda: "UTF-8"
71
+
72
+ """Load PDF"""
73
+
74
+ from langchain.document_loaders import UnstructuredPDFLoader
75
+ from langchain.vectorstores.utils import filter_complex_metadata # 'filter_complex_metadata' removes complex metadata that are not in str, int, float or bool format
76
+
77
+ # Replace 'your_pdf_file.pdf' with the name of your uploaded PDF file
78
+ pdf_loader = UnstructuredPDFLoader('coffee.pdf')
79
+ pdf_doc = pdf_loader.load()
80
+ updated_pdf_doc = filter_complex_metadata(pdf_doc)
81
+
82
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
83
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
84
+ chunked_pdf_doc = text_splitter.split_documents(updated_pdf_doc)
85
+ len(chunked_pdf_doc)
86
+
87
+ from langchain.embeddings import HuggingFaceEmbeddings
88
+ embeddings = HuggingFaceEmbeddings()
89
+
90
+ """Creating Vector Database with FAISS (Similarity search)"""
91
+
92
+ # Commented out IPython magic to ensure Python compatibility.
93
+ #
94
+ # %%time
95
+ # # Create the vectorized db with FAISS
96
+ # from langchain.vectorstores import FAISS
97
+ # db_pdf = FAISS.from_documents(chunked_pdf_doc, embeddings)
98
+ #
99
+ # # Create the vectorized db with Chroma
100
+ # # from langchain.vectorstores import Chroma
101
+ # # db_pdf = Chroma.from_documents(chunked_pdf_doc, embeddings)
102
+
103
+ import transformers
104
+ transformers.logging.set_verbosity_error()
105
+
106
+ """RetrivalQA - LLM Prompt Structure"""
107
+
108
+ # Commented out IPython magic to ensure Python compatibility.
109
+ # %%time
110
+ # from langchain.prompts import PromptTemplate
111
+ # from langchain.chains import RetrievalQA
112
+ #
113
+ # # use the recommended propt style for the LLAMA 2 LLM
114
+ # prompt_template = """
115
+ # [INST] <>
116
+ # Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
117
+ #
118
+ # <>
119
+ #
120
+ # {context}[/INST]
121
+ #
122
+ # Question: {question}
123
+ # """
124
+ #
125
+ #
126
+ # prompt = PromptTemplate(template=prompt_template, input_variables=["context","question"])
127
+ # Chain_pdf = RetrievalQA.from_chain_type(
128
+ # llm=llm,
129
+ # chain_type="stuff",
130
+ # # retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.8})
131
+ # # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
132
+ # # k defines how many documents are returned; defaults to 4.
133
+ # # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
134
+ # # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
135
+ # retriever=db_pdf.as_retriever(), # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
136
+ # chain_type_kwargs={"prompt": prompt },
137
+ # )
138
+ # query = "what is a coffee?"
139
+ # result = Chain_pdf.invoke(query)
140
+ # result_text = result['result']
141
+ # answer_start_index = result_text.find("Question:") # Find the start index of the answer
142
+ # answer = result_text[answer_start_index:].strip() # Extract the answer
143
+ # print(fill(answer, width=100))
144
+ #
145
+
146
+ # Commented out IPython magic to ensure Python compatibility.
147
+ # %%time
148
+ # query = "explain about coffee brewing?"
149
+ # result = Chain_pdf.invoke(query)
150
+ # result_text = result['result']
151
+ # answer_start_index = result_text.find("Question:")
152
+ # answer = result_text[answer_start_index:].strip()
153
+ # print(fill(answer, width=100))
154
+
155
+ """Hallucination Test
156
+
157
+ """
158
+
159
+ # Commented out IPython magic to ensure Python compatibility.
160
+ # %%time
161
+ # query = "How to make a milk tea?"
162
+ # result = Chain_pdf.invoke(query)
163
+ # result_text = result['result']
164
+ # answer_start_index = result_text.find("Question:")
165
+ # answer = result_text[answer_start_index:].strip()
166
+ # print(fill(answer, width=100))
167
+
168
+ pip install gradio transformers
169
+
170
+ import gradio as gr
171
+
172
+ def query_qa(query):
173
+ result = Chain_pdf.invoke(query)
174
+ result_text = result['result']
175
+ answer_start_index = result_text.find("Question:")
176
+ answer = result_text[answer_start_index:].strip()
177
+ return answer
178
+
179
+ qa_interface = gr.Interface(query_qa, "text", "text", title="Coffee Q&A")
180
+
181
+ qa_interface.launch()
182
+ debug=True
183
+