Bryan Bimantaka (Monash University) commited on
Commit
d774ace
·
1 Parent(s): d1161d3
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -1,6 +1,6 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
2
- from langchain_community.document_loaders import TextLoader # Diubah
3
- from huggingface_hub import InferenceClient # Pastikan ini ada
4
  import transformers
5
  from sentence_transformers import SentenceTransformer
6
  from datasets import Dataset, Features, Value, Sequence
@@ -11,14 +11,27 @@ import torch
11
  import gradio as gr
12
 
13
  ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
14
- BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
15
  DOMAIN_DATA_DIR = "./data"
 
16
  SYS_MSG = """
17
  Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
18
  Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
19
  """
20
- TOP_K = 3
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
23
  pages = []
24
 
@@ -57,58 +70,62 @@ def retrieve(query, top_k=3):
57
 
58
  return scores, retrieved_examples['text']
59
 
60
- client = InferenceClient(BASE_MODEL)
61
-
62
- def respond(
63
- message,
64
- history: list[tuple[str, str]],
65
- # system_message,
66
- max_tokens=256,
67
- temperature=0.6,
68
- top_p=0.9,
69
- ):
70
- # Retrieve top 3 relevant documents based on the user's query
71
- _, retrieved_docs = retrieve(message, top_k=TOP_K)
72
-
73
- # Prepare the retrieved context
74
- context = "\n".join([f"Dokumen {i+1}: {doc}" for i, doc in enumerate(retrieved_docs)])
75
-
76
- messages = [{"role": "system", "content": SYS_MSG}]
77
 
78
- for val in history:
79
- if val[0]:
80
- messages.append({"role": "user", "content": val[0]})
81
- if val[1]:
82
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
83
 
84
- # messages.append({"role": "user", "content": message})
85
-
86
- # Append the current user message along with the retrieved context
87
- user_context = f"{message}\nKonteks:\n{context}"
88
- messages.append({"role": "user", "content": user_context})
 
 
 
89
 
90
- response = ""
 
 
 
 
 
91
 
92
- for message in client.chat_completion(
 
 
 
 
 
 
93
  messages,
94
- max_tokens=max_tokens,
95
- stream=True,
96
- temperature=temperature,
97
- top_p=top_p,
98
- ):
99
- token = message.choices[0].delta.content
100
-
101
- response += token
102
- yield response
103
-
104
- """
105
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
106
- """
 
 
107
  demo = gr.ChatInterface(
108
- respond,
109
  textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
 
110
  )
111
 
112
-
113
  if __name__ == "__main__":
114
- demo.launch()
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
2
+ from langchain_community.document_loaders import TextLoader
3
+ from huggingface_hub import InferenceClient
4
  import transformers
5
  from sentence_transformers import SentenceTransformer
6
  from datasets import Dataset, Features, Value, Sequence
 
11
  import gradio as gr
12
 
13
  ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
14
+ BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
15
  DOMAIN_DATA_DIR = "./data"
16
+ CACHE_DIR = "./cache"
17
  SYS_MSG = """
18
  Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
19
  Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
20
  """
 
21
 
22
+ # LOGIN HF Auth
23
+ from huggingface_hub import login
24
+
25
+ # Ambil token API dari environment variable (jika disimpan di secrets)
26
+ import os
27
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
28
+
29
+ # Autentikasi secara manual menggunakan token
30
+ login(token=hf_token)
31
+
32
+ # ----------------------------------------------------------------------------------------------------------
33
+ # RAG PROCESS
34
+ TOP_K = 1
35
  domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
36
  pages = []
37
 
 
70
 
71
  return scores, retrieved_examples['text']
72
 
73
+ # END RAG
74
+ # ----------------------------------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ # LLM
77
+ # use quantization to lower GPU usage
78
+ bnb_config = BitsAndBytesConfig(
79
+ load_in_4bit=True,
80
+ bnb_4bit_use_double_quant=True,
81
+ bnb_4bit_quant_type="nf4",
82
+ bnb_4bit_compute_dtype=torch.bfloat16
83
+ )
84
 
85
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, cache_dir=CACHE_DIR)
86
+ model = AutoModelForCausalLM.from_pretrained(
87
+ BASE_MODEL,
88
+ torch_dtype=torch.bfloat16,
89
+ # device_map="auto",
90
+ quantization_config=bnb_config,
91
+ cache_dir=CACHE_DIR
92
+ )
93
 
94
+ def format_prompt(prompt, retrieved_documents, k):
95
+ """using the retrieved documents we will prompt the model to generate our responses"""
96
+ PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
97
+ for idx in range(k) :
98
+ PROMPT+= f"{retrieved_documents[idx]}\n"
99
+ return PROMPT
100
 
101
+ def chat_function(message, history, max_new_tokens=256, temperature=0.6):
102
+ _, retrieved_doc = retrieve(message, TOP_K)
103
+ formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
104
+
105
+ messages = [{"role":"system","content":SYS_MSG},
106
+ {"role":"user", "content":formatted_prompt}]
107
+ prompt = pipeline.tokenizer.apply_chat_template(
108
  messages,
109
+ tokenize=False,
110
+ add_generation_prompt=True,)
111
+ print(f"Prompt: {prompt}\n")
112
+ terminators = [
113
+ pipeline.tokenizer.eos_token_id,
114
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
115
+ outputs = pipeline(
116
+ prompt,
117
+ max_new_tokens = max_new_tokens,
118
+ eos_token_id = terminators,
119
+ do_sample = True,
120
+ temperature = temperature,
121
+ top_p = 0.9,)
122
+ return outputs[0]["generated_text"][len(prompt):]
123
+
124
  demo = gr.ChatInterface(
125
+ chat_function,
126
  textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
127
+ chatbot=gr.Chatbot(height=400),
128
  )
129
 
 
130
  if __name__ == "__main__":
131
+ demo.launch(share=True)
.ipynb_checkpoints/main-checkpoint.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
2
+ from langchain_community.document_loaders import TextLoader
3
+ from huggingface_hub import InferenceClient
4
+ import transformers
5
+ from sentence_transformers import SentenceTransformer
6
+ from datasets import Dataset, Features, Value, Sequence
7
+ import pandas as pd
8
+ import faiss
9
+ import os
10
+ import torch
11
+ import gradio as gr
12
+
13
+ ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
14
+ BASE_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
15
+ DOMAIN_DATA_DIR = "./data"
16
+ SYS_MSG = """
17
+ Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
18
+ Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
19
+ """
20
+ TOP_K = 1
21
+
22
+ from huggingface_hub import login
23
+
24
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
25
+
26
+ # Autentikasi secara manual menggunakan token
27
+ login(token=hf_token)
28
+
29
+ domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
30
+ pages = []
31
+
32
+ for file in domain_data:
33
+ text_loader = TextLoader(file)
34
+ file_pages = text_loader.load()
35
+ pages.extend(file_pages)
36
+
37
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
38
+
39
+ splitter = RecursiveCharacterTextSplitter(
40
+ chunk_size=300,
41
+ chunk_overlap=64,
42
+ separators=["\n\n"]
43
+ )
44
+
45
+ documents = splitter.split_documents(pages)
46
+ content = [doc.page_content.strip() for doc in documents]
47
+
48
+ ST = SentenceTransformer(ST_MODEL)
49
+ embeddings = ST.encode(content)
50
+
51
+ features = Features({
52
+ 'text': Value('string'),
53
+ 'embeddings': Sequence(Value('float32'))
54
+ })
55
+
56
+ data = {'text': content, 'embeddings': [embedding.tolist() for embedding in embeddings]}
57
+ dataset = Dataset.from_dict(data, features=features)
58
+
59
+ dataset.add_faiss_index(column='embeddings')
60
+
61
+ def retrieve(query, top_k=3):
62
+ query_embedding = ST.encode([query])
63
+ scores, retrieved_examples = dataset.get_nearest_examples('embeddings', query_embedding, k=top_k)
64
+
65
+ return scores, retrieved_examples['text']
66
+
67
+ # use quantization to lower GPU usage
68
+ bnb_config = BitsAndBytesConfig(
69
+ load_in_4bit=True,
70
+ bnb_4bit_use_double_quant=True,
71
+ bnb_4bit_quant_type="nf4",
72
+ bnb_4bit_compute_dtype=torch.bfloat16
73
+ )
74
+
75
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
76
+ model = AutoModelForCausalLM.from_pretrained(
77
+ BASE_MODEL,
78
+ torch_dtype=torch.bfloat16,
79
+ device_map="auto",
80
+ quantization_config=bnb_config,
81
+ )
82
+
83
+ def format_prompt(prompt, retrieved_documents, k):
84
+ """using the retrieved documents we will prompt the model to generate our responses"""
85
+ PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
86
+ for idx in range(k) :
87
+ PROMPT+= f"{retrieved_documents[idx]}\n"
88
+
89
+ return PROMPT
90
+
91
+ def chat_function(message, history, max_new_tokens=256, temperature=0.6):
92
+ scores, retrieved_doc = retrieve(message, TOP_K)
93
+ formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
94
+
95
+ messages = [{"role":"system","content":SYS_MSG},
96
+ {"role":"user", "content":formatted_prompt}]
97
+ prompt = pipeline.tokenizer.apply_chat_template(
98
+ messages,
99
+ tokenize=False,
100
+ add_generation_prompt=True,)
101
+ terminators = [
102
+ pipeline.tokenizer.eos_token_id,
103
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
104
+ outputs = pipeline(
105
+ prompt,
106
+ max_new_tokens = max_new_tokens,
107
+ eos_token_id = terminators,
108
+ do_sample = True,
109
+ temperature = temperature + 0.1,
110
+ top_p = 0.9,)
111
+ return outputs[0]["generated_text"][len(prompt):]
112
+
113
+ """
114
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
115
+ """
116
+ # demo = gr.ChatInterface(
117
+ # respond,
118
+ # textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
119
+ # )
120
+
121
+ demo = gr.ChatInterface(
122
+ chat_function,
123
+ textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
124
+ chatbot=gr.Chatbot(height=400),
125
+ )
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch(share=True)
app.py CHANGED
@@ -12,16 +12,26 @@ import gradio as gr
12
 
13
  ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
14
  BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
15
- # BASE_MODEL = "HuggingFaceH4/zephyr-7b-beta"
16
- # BASE_MODEL = "HuggingFaceH4/mistral-7b-sft-beta"
17
- # BASE_MODEL = "openai-community/gpt2"
18
  DOMAIN_DATA_DIR = "./data"
 
19
  SYS_MSG = """
20
  Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
21
  Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
22
  """
23
- TOP_K = 1
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
26
  pages = []
27
 
@@ -60,62 +70,62 @@ def retrieve(query, top_k=3):
60
 
61
  return scores, retrieved_examples['text']
62
 
63
- client = InferenceClient(BASE_MODEL)
64
-
65
- def respond(
66
- message,
67
- history: list[tuple[str, str]],
68
- max_tokens=256,
69
- temperature=0.4,
70
- top_p=0.9,
71
- ):
72
- # Retrieve top 3 relevant documents based on the user's query
73
- _, retrieved_docs = retrieve(message, top_k=TOP_K)
74
-
75
- # Prepare the retrieved context
76
- context = "\n".join([f"Dokumen {i+1}: {doc}" for i, doc in enumerate(retrieved_docs)])
77
-
78
- print(f"Feed:\n{context}")
79
-
80
- messages = [{"role": "system", "content": SYS_MSG}]
81
 
82
- for val in history:
83
- if val[0]:
84
- messages.append({"role": "user", "content": val[0]})
85
- if val[1]:
86
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
87
 
88
- # messages.append({"role": "user", "content": message})
89
-
90
- # Append the current user message along with the retrieved context
91
- user_context = f"{message}\nKonteks:\n{context}"
92
- messages.append({"role": "user", "content": user_context})
 
 
 
93
 
94
- response = ""
 
 
 
 
 
95
 
96
- for message in client.chat_completion(
 
 
 
 
 
 
97
  messages,
98
- max_tokens=max_tokens,
99
- # stream=False,
100
- stream=True,
101
- temperature=temperature,
102
- top_p=top_p,
103
- ):
104
- print(f"Message:\n{message}\n\n")
105
- token = message.choices[0].delta.content
106
-
107
- response += token
108
- print(f"Response:\n{response}")
109
- yield response
110
-
111
- """
112
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
113
- """
114
  demo = gr.ChatInterface(
115
- respond,
116
  textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
 
117
  )
118
 
119
-
120
  if __name__ == "__main__":
121
  demo.launch(share=True)
 
12
 
13
  ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
14
  BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
 
 
15
  DOMAIN_DATA_DIR = "./data"
16
+ CACHE_DIR = "./cache"
17
  SYS_MSG = """
18
  Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
19
  Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
20
  """
 
21
 
22
+ # LOGIN HF Auth
23
+ from huggingface_hub import login
24
+
25
+ # Ambil token API dari environment variable (jika disimpan di secrets)
26
+ import os
27
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
28
+
29
+ # Autentikasi secara manual menggunakan token
30
+ login(token=hf_token)
31
+
32
+ # ----------------------------------------------------------------------------------------------------------
33
+ # RAG PROCESS
34
+ TOP_K = 1
35
  domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
36
  pages = []
37
 
 
70
 
71
  return scores, retrieved_examples['text']
72
 
73
+ # END RAG
74
+ # ----------------------------------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ # LLM
77
+ # use quantization to lower GPU usage
78
+ bnb_config = BitsAndBytesConfig(
79
+ load_in_4bit=True,
80
+ bnb_4bit_use_double_quant=True,
81
+ bnb_4bit_quant_type="nf4",
82
+ bnb_4bit_compute_dtype=torch.bfloat16
83
+ )
84
 
85
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, cache_dir=CACHE_DIR)
86
+ model = AutoModelForCausalLM.from_pretrained(
87
+ BASE_MODEL,
88
+ torch_dtype=torch.bfloat16,
89
+ # device_map="auto",
90
+ quantization_config=bnb_config,
91
+ cache_dir=CACHE_DIR
92
+ )
93
 
94
+ def format_prompt(prompt, retrieved_documents, k):
95
+ """using the retrieved documents we will prompt the model to generate our responses"""
96
+ PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
97
+ for idx in range(k) :
98
+ PROMPT+= f"{retrieved_documents[idx]}\n"
99
+ return PROMPT
100
 
101
+ def chat_function(message, history, max_new_tokens=256, temperature=0.6):
102
+ _, retrieved_doc = retrieve(message, TOP_K)
103
+ formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
104
+
105
+ messages = [{"role":"system","content":SYS_MSG},
106
+ {"role":"user", "content":formatted_prompt}]
107
+ prompt = pipeline.tokenizer.apply_chat_template(
108
  messages,
109
+ tokenize=False,
110
+ add_generation_prompt=True,)
111
+ print(f"Prompt: {prompt}\n")
112
+ terminators = [
113
+ pipeline.tokenizer.eos_token_id,
114
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
115
+ outputs = pipeline(
116
+ prompt,
117
+ max_new_tokens = max_new_tokens,
118
+ eos_token_id = terminators,
119
+ do_sample = True,
120
+ temperature = temperature,
121
+ top_p = 0.9,)
122
+ return outputs[0]["generated_text"][len(prompt):]
123
+
 
124
  demo = gr.ChatInterface(
125
+ chat_function,
126
  textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
127
+ chatbot=gr.Chatbot(height=400),
128
  )
129
 
 
130
  if __name__ == "__main__":
131
  demo.launch(share=True)