File size: 5,541 Bytes
b8cf414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b056ff
b8cf414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# -*- coding: utf-8 -*-
"""RAGLLAMA.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1z-I8OtD-uFaX9KHENLECbxMmPPuPDNSp
"""


from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

model_name = "TheBloke/Llama-2-13b-Chat-GPTQ"

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

gen_cfg = GenerationConfig.from_pretrained(model_name)
gen_cfg.max_new_tokens=512
gen_cfg.temperature=0.0000001 # 0.0
gen_cfg.return_full_text=True
gen_cfg.do_sample=True
gen_cfg.repetition_penalty=1.11

pipe=pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=gen_cfg
)

llm = HuggingFacePipeline(pipeline=pipe)

"""Testing LLM with Prompt Structure

"""

from textwrap import fill
from langchain.prompts import PromptTemplate

template = "Question: {text}"

prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

text = "how a coffee bean is roasted?"
result = llm.invoke(prompt.format(text=text))
print(fill(result.strip(), width=100))

import locale
locale.getpreferredencoding = lambda: "UTF-8"

"""Load PDF"""

from langchain.document_loaders import UnstructuredPDFLoader
from langchain.vectorstores.utils import filter_complex_metadata  # 'filter_complex_metadata' removes complex metadata that are not in str, int, float or bool format

# Replace 'your_pdf_file.pdf' with the name of your uploaded PDF file
pdf_loader = UnstructuredPDFLoader('coffee.pdf')
pdf_doc = pdf_loader.load()
updated_pdf_doc = filter_complex_metadata(pdf_doc)

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
chunked_pdf_doc = text_splitter.split_documents(updated_pdf_doc)
len(chunked_pdf_doc)

from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

"""Creating Vector Database with FAISS (Similarity search)"""

# Commented out IPython magic to ensure Python compatibility.
# 
# %%time
# # Create the vectorized db with FAISS
# from langchain.vectorstores import FAISS
# db_pdf = FAISS.from_documents(chunked_pdf_doc, embeddings)
# 
# # Create the vectorized db with Chroma
# # from langchain.vectorstores import Chroma
# # db_pdf = Chroma.from_documents(chunked_pdf_doc, embeddings)

import transformers
transformers.logging.set_verbosity_error()

"""RetrivalQA - LLM Prompt Structure"""

# Commented out IPython magic to ensure Python compatibility.
# %%time
# from langchain.prompts import PromptTemplate
# from langchain.chains import RetrievalQA
# 
# # use the recommended propt style for the LLAMA 2 LLM
# prompt_template = """
# [INST] <>
# Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
# 
# <>
# 
# {context}[/INST]
# 
# Question: {question}
# """
# 
# 
# prompt = PromptTemplate(template=prompt_template, input_variables=["context","question"])
# Chain_pdf = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",
#     # retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.8})
#     # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
#     # k defines how many documents are returned; defaults to 4.
#     # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
#     # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
#     retriever=db_pdf.as_retriever(), # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
#     chain_type_kwargs={"prompt": prompt },
# )
# query = "what is a coffee?"
# result = Chain_pdf.invoke(query)
# result_text = result['result']
# answer_start_index = result_text.find("Question:")  # Find the start index of the answer
# answer = result_text[answer_start_index:].strip()  # Extract the answer
# print(fill(answer, width=100))
#

# Commented out IPython magic to ensure Python compatibility.
# %%time
# query = "explain about coffee brewing?"
# result = Chain_pdf.invoke(query)
# result_text = result['result']
# answer_start_index = result_text.find("Question:")
# answer = result_text[answer_start_index:].strip()
# print(fill(answer, width=100))

"""Hallucination Test

"""

# Commented out IPython magic to ensure Python compatibility.
# %%time
# query = "How to make a milk tea?"
# result = Chain_pdf.invoke(query)
# result_text = result['result']
# answer_start_index = result_text.find("Question:")
# answer = result_text[answer_start_index:].strip()
# print(fill(answer, width=100))



import gradio as gr

def query_qa(query):
    result = Chain_pdf.invoke(query)
    result_text = result['result']
    answer_start_index = result_text.find("Question:")
    answer = result_text[answer_start_index:].strip()
    return answer

qa_interface = gr.Interface(query_qa, "text", "text", title="Coffee Q&A")

qa_interface.launch()
debug=True