nurqoneah commited on
Commit
fbbd368
·
verified ·
1 Parent(s): b98531c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -6,6 +6,7 @@ from langchain.prompts import PromptTemplate
6
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  import warnings
 
9
  import os
10
  from dotenv import load_dotenv
11
 
@@ -18,7 +19,7 @@ INITIAL_MESSAGE = """Halo! 👋 Saya adalah asisten kesehatan feminacare yang si
18
  Silakan ajukan pertanyaan apa saja dan saya akan membantu Anda dengan informasi yang akurat."""
19
 
20
  # Model configurations
21
- MODEL_NAME = "SeaLLMs/SeaLLM-7B-v2"
22
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
23
  TOP_K_DOCS = 5
24
 
@@ -34,15 +35,41 @@ def initialize_models():
34
 
35
  def create_llm():
36
  """Initialize the language model with optimized parameters"""
37
- return HuggingFaceHub(
38
- repo_id=MODEL_NAME,
39
- model_kwargs={
40
- "temperature": 0.7, # Balanced between creativity and accuracy
41
- "max_new_tokens": 1024,
42
- "top_p": 0.9,
43
- "frequency_penalty": 0.5
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # Improved prompt template with better context handling and response structure
48
  PROMPT_TEMPLATE = """
 
6
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  import warnings
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
  import os
11
  from dotenv import load_dotenv
12
 
 
19
  Silakan ajukan pertanyaan apa saja dan saya akan membantu Anda dengan informasi yang akurat."""
20
 
21
  # Model configurations
22
+ MODEL_NAME = "SeaLLMs/SeaLLMs-v3-7B-Chat"
23
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
24
  TOP_K_DOCS = 5
25
 
 
35
 
36
  def create_llm():
37
  """Initialize the language model with optimized parameters"""
38
+
39
+
40
+ bnb_config = BitsAndBytesConfig(
41
+ load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
42
+ )
43
+
44
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config)
45
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
46
+
47
+ terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
48
+
49
+ text_generation_pipeline = pipeline(
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ task="text-generation",
53
+ temperature=0.2,
54
+ do_sample=True,
55
+ repetition_penalty=1.1,
56
+ return_full_text=False,
57
+ max_new_tokens=200,
58
+ eos_token_id=terminators,
59
  )
60
+
61
+ llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
62
+
63
+ # return HuggingFaceHub(
64
+ # repo_id=MODEL_NAME,
65
+ # model_kwargs={
66
+ # "temperature": 0.7, # Balanced between creativity and accuracy
67
+ # "max_new_tokens": 1024,
68
+ # "top_p": 0.9,
69
+ # "frequency_penalty": 0.5
70
+ # }
71
+ # )
72
+ return llm
73
 
74
  # Improved prompt template with better context handling and response structure
75
  PROMPT_TEMPLATE = """