Spaces:

bnwb
/

bestie

Running

App Files Files Community

Bryan Bimantaka (Monash University) commited on Oct 2, 2024

Commit

d774ace

1 Parent(s): d1161d3

add cache

Browse files

Files changed (3) hide show

.ipynb_checkpoints/app-checkpoint.py +66 -49
.ipynb_checkpoints/main-checkpoint.py +128 -0
app.py +62 -52

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
-from langchain_community.document_loaders import TextLoader  # Diubah
-from huggingface_hub import InferenceClient  # Pastikan ini ada
 import transformers
 from sentence_transformers import SentenceTransformer
 from datasets import Dataset, Features, Value, Sequence
@@ -11,14 +11,27 @@ import torch
 import gradio as gr
 ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
-BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 DOMAIN_DATA_DIR = "./data"
 SYS_MSG = """
 Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
 Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
 """
-TOP_K = 3
 domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
 pages = []
@@ -57,58 +70,62 @@ def retrieve(query, top_k=3):
     return scores, retrieved_examples['text']
-client = InferenceClient(BASE_MODEL)
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    # system_message,
-    max_tokens=256,
-    temperature=0.6,
-    top_p=0.9,
-):
-    # Retrieve top 3 relevant documents based on the user's query
-    _, retrieved_docs = retrieve(message, top_k=TOP_K)
-    # Prepare the retrieved context
-    context = "\n".join([f"Dokumen {i+1}: {doc}" for i, doc in enumerate(retrieved_docs)])
-    messages = [{"role": "system", "content": SYS_MSG}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    # messages.append({"role": "user", "content": message})
-    # Append the current user message along with the retrieved context
-    user_context = f"{message}\nKonteks:\n{context}"
-    messages.append({"role": "user", "content": user_context})
-    response = ""
-    for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
 )
 if __name__ == "__main__":
-    demo.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
+from langchain_community.document_loaders import TextLoader
+from huggingface_hub import InferenceClient
 import transformers
 from sentence_transformers import SentenceTransformer
 from datasets import Dataset, Features, Value, Sequence
 import gradio as gr
 ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
+BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 DOMAIN_DATA_DIR = "./data"
+CACHE_DIR = "./cache"
 SYS_MSG = """
 Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
 Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
 """
+# LOGIN HF Auth
+from huggingface_hub import login
+# Ambil token API dari environment variable (jika disimpan di secrets)
+import os
+hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Autentikasi secara manual menggunakan token
+login(token=hf_token)
+# ----------------------------------------------------------------------------------------------------------
+# RAG PROCESS
+TOP_K = 1
 domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
 pages = []
     return scores, retrieved_examples['text']
+# END RAG
+# ----------------------------------------------------------------------------------------------------------
+# LLM
+# use quantization to lower GPU usage
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, cache_dir=CACHE_DIR)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.bfloat16,
+    # device_map="auto",
+    quantization_config=bnb_config,
+    cache_dir=CACHE_DIR
+)
+def format_prompt(prompt, retrieved_documents, k):
+    """using the retrieved documents we will prompt the model to generate our responses"""
+    PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
+    for idx in range(k) :
+        PROMPT+= f"{retrieved_documents[idx]}\n"
+    return PROMPT
+def chat_function(message, history, max_new_tokens=256, temperature=0.6):
+    _, retrieved_doc = retrieve(message, TOP_K)
+    formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
+    messages = [{"role":"system","content":SYS_MSG},
+                {"role":"user", "content":formatted_prompt}]
+    prompt = pipeline.tokenizer.apply_chat_template(
         messages,
+        tokenize=False,
+        add_generation_prompt=True,)
+    print(f"Prompt: {prompt}\n")
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+    outputs = pipeline(
+        prompt,
+        max_new_tokens = max_new_tokens,
+        eos_token_id = terminators,
+        do_sample = True,
+        temperature = temperature,
+        top_p = 0.9,)
+    return outputs[0]["generated_text"][len(prompt):]
 demo = gr.ChatInterface(
+    chat_function,
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
+    chatbot=gr.Chatbot(height=400),
 )
 if __name__ == "__main__":
+    demo.launch(share=True)

.ipynb_checkpoints/main-checkpoint.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
+from langchain_community.document_loaders import TextLoader
+from huggingface_hub import InferenceClient
+import transformers
+from sentence_transformers import SentenceTransformer
+from datasets import Dataset, Features, Value, Sequence
+import pandas as pd
+import faiss
+import os
+import torch
+import gradio as gr
+ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
+BASE_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
+DOMAIN_DATA_DIR = "./data"
+SYS_MSG = """
+Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
+Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
+"""
+TOP_K = 1
+from huggingface_hub import login
+hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Autentikasi secara manual menggunakan token
+login(token=hf_token)
+domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
+pages = []
+for file in domain_data:
+    text_loader = TextLoader(file)
+    file_pages = text_loader.load()
+    pages.extend(file_pages)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=300,
+    chunk_overlap=64,
+    separators=["\n\n"]
+)
+documents = splitter.split_documents(pages)
+content = [doc.page_content.strip() for doc in documents]
+ST = SentenceTransformer(ST_MODEL)
+embeddings = ST.encode(content)
+features = Features({
+    'text': Value('string'),
+    'embeddings': Sequence(Value('float32'))
+})
+data = {'text': content, 'embeddings': [embedding.tolist() for embedding in embeddings]}
+dataset = Dataset.from_dict(data, features=features)
+dataset.add_faiss_index(column='embeddings')
+def retrieve(query, top_k=3):
+    query_embedding = ST.encode([query])
+    scores, retrieved_examples = dataset.get_nearest_examples('embeddings', query_embedding, k=top_k)
+    return scores, retrieved_examples['text']
+# use quantization to lower GPU usage
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    quantization_config=bnb_config,
+)
+def format_prompt(prompt, retrieved_documents, k):
+    """using the retrieved documents we will prompt the model to generate our responses"""
+    PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
+    for idx in range(k) :
+        PROMPT+= f"{retrieved_documents[idx]}\n"
+    return PROMPT
+def chat_function(message, history, max_new_tokens=256, temperature=0.6):
+    scores, retrieved_doc = retrieve(message, TOP_K)
+    formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
+    messages = [{"role":"system","content":SYS_MSG},
+                {"role":"user", "content":formatted_prompt}]
+    prompt = pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,)
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+    outputs = pipeline(
+        prompt,
+        max_new_tokens = max_new_tokens,
+        eos_token_id = terminators,
+        do_sample = True,
+        temperature = temperature + 0.1,
+        top_p = 0.9,)
+    return outputs[0]["generated_text"][len(prompt):]
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
+# demo = gr.ChatInterface(
+#     respond,
+#     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
+# )
+demo = gr.ChatInterface(
+    chat_function,
+    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
+    chatbot=gr.Chatbot(height=400),
+)
+if __name__ == "__main__":
+    demo.launch(share=True)

app.py CHANGED Viewed

@@ -12,16 +12,26 @@ import gradio as gr
 ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
 BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
-# BASE_MODEL = "HuggingFaceH4/zephyr-7b-beta"
-# BASE_MODEL = "HuggingFaceH4/mistral-7b-sft-beta"
-# BASE_MODEL = "openai-community/gpt2"
 DOMAIN_DATA_DIR = "./data"
 SYS_MSG = """
 Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
 Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
 """
-TOP_K = 1
 domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
 pages = []
@@ -60,62 +70,62 @@ def retrieve(query, top_k=3):
     return scores, retrieved_examples['text']
-client = InferenceClient(BASE_MODEL)
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    max_tokens=256,
-    temperature=0.4,
-    top_p=0.9,
-):
-    # Retrieve top 3 relevant documents based on the user's query
-    _, retrieved_docs = retrieve(message, top_k=TOP_K)
-    # Prepare the retrieved context
-    context = "\n".join([f"Dokumen {i+1}: {doc}" for i, doc in enumerate(retrieved_docs)])
-    print(f"Feed:\n{context}")
-    messages = [{"role": "system", "content": SYS_MSG}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    # messages.append({"role": "user", "content": message})
-    # Append the current user message along with the retrieved context
-    user_context = f"{message}\nKonteks:\n{context}"
-    messages.append({"role": "user", "content": user_context})
-    response = ""
-    for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
-        # stream=False,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        print(f"Message:\n{message}\n\n")
-        token = message.choices[0].delta.content
-        response += token
-        print(f"Response:\n{response}")
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
 )
 if __name__ == "__main__":
     demo.launch(share=True)

 ST_MODEL = "LazarusNLP/all-indo-e5-small-v4"
 BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 DOMAIN_DATA_DIR = "./data"
+CACHE_DIR = "./cache"
 SYS_MSG = """
 Kamu adalah asisten dalam sebuah perusahaan penyedia listrik (PLN) yang membantu menjawab pertanyaan seputar 'sexual harassment' dalam Bahasa Indonesia.
 Jawab dengan singkat menggunakan konteks untuk menjawab pertanyaan dalam Bahasa Indonesia.
 """
+# LOGIN HF Auth
+from huggingface_hub import login
+# Ambil token API dari environment variable (jika disimpan di secrets)
+import os
+hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Autentikasi secara manual menggunakan token
+login(token=hf_token)
+# ----------------------------------------------------------------------------------------------------------
+# RAG PROCESS
+TOP_K = 1
 domain_data = [os.path.join(DOMAIN_DATA_DIR, f) for f in os.listdir(DOMAIN_DATA_DIR) if f.endswith('.txt')]
 pages = []
     return scores, retrieved_examples['text']
+# END RAG
+# ----------------------------------------------------------------------------------------------------------
+# LLM
+# use quantization to lower GPU usage
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, cache_dir=CACHE_DIR)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.bfloat16,
+    # device_map="auto",
+    quantization_config=bnb_config,
+    cache_dir=CACHE_DIR
+)
+def format_prompt(prompt, retrieved_documents, k):
+    """using the retrieved documents we will prompt the model to generate our responses"""
+    PROMPT = f"Pertanyaan:{prompt}\nKonteks:"
+    for idx in range(k) :
+        PROMPT+= f"{retrieved_documents[idx]}\n"
+    return PROMPT
+def chat_function(message, history, max_new_tokens=256, temperature=0.6):
+    _, retrieved_doc = retrieve(message, TOP_K)
+    formatted_prompt = format_prompt(message, retrieved_doc, TOP_K)
+    messages = [{"role":"system","content":SYS_MSG},
+                {"role":"user", "content":formatted_prompt}]
+    prompt = pipeline.tokenizer.apply_chat_template(
         messages,
+        tokenize=False,
+        add_generation_prompt=True,)
+    print(f"Prompt: {prompt}\n")
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+    outputs = pipeline(
+        prompt,
+        max_new_tokens = max_new_tokens,
+        eos_token_id = terminators,
+        do_sample = True,
+        temperature = temperature,
+        top_p = 0.9,)
+    return outputs[0]["generated_text"][len(prompt):]
 demo = gr.ChatInterface(
+    chat_function,
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
+    chatbot=gr.Chatbot(height=400),
 )
 if __name__ == "__main__":
     demo.launch(share=True)