DragonProgrammer commited on
Commit
bac1dc6
·
verified ·
1 Parent(s): 619ec30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -42
app.py CHANGED
@@ -66,52 +66,22 @@ class LangChainAgentWrapper:
66
  def __init__(self):
67
  print("Initializing LangChainAgentWrapper...")
68
 
69
- model_id = "google/gemma-2b-it"
 
70
 
71
  try:
72
- hf_auth_token = os.getenv("HF_TOKEN")
73
- if not hf_auth_token:
74
- raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
75
- else:
76
- print("HF_TOKEN secret found.")
77
-
78
- # --- CORRECTED MODEL LOADING ---
79
-
80
- # 1. Create the 4-bit quantization configuration
81
- print("Creating 4-bit quantization config...")
82
- quantization_config = transformers.BitsAndBytesConfig(
83
- load_in_4bit=True,
84
- bnb_4bit_quant_type="nf4",
85
- bnb_4bit_compute_dtype="bfloat16"
86
- )
87
- print("Quantization config created.")
88
-
89
- # 2. Load the tokenizer
90
- print(f"Loading tokenizer for: {model_id}")
91
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
92
- print("Tokenizer loaded successfully.")
93
-
94
- # 3. Load the model with the quantization config
95
- print(f"Loading model '{model_id}' with quantization...")
96
- model = transformers.AutoModelForCausalLM.from_pretrained(
97
- model_id,
98
- quantization_config=quantization_config,
99
- device_map="auto", # Automatically maps model to available hardware (CPU/GPU)
100
- token=hf_auth_token
101
- )
102
- print("Model loaded successfully.")
103
-
104
- # 4. Create the Hugging Face pipeline with the pre-loaded model and tokenizer
105
- print("Creating text-generation pipeline...")
106
  llm_pipeline = transformers.pipeline(
107
- "text-generation",
108
- model=model,
109
- tokenizer=tokenizer,
110
- # No need to pass quantization_config here anymore
111
  )
112
- print("Model pipeline created successfully.")
113
-
114
- # --- END CORRECTION ---
115
 
116
  # Wrap the pipeline in a LangChain LLM object
117
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
 
66
  def __init__(self):
67
  print("Initializing LangChainAgentWrapper...")
68
 
69
+ # --- CHANGE 1: Switched to a smaller, CPU-friendly model ---
70
+ model_id = "google/flan-t5-base"
71
 
72
  try:
73
+ hf_auth_token = os.getenv("HF_TOKEN") # Good practice to keep, but not needed for FLAN-T5
74
+
75
+ # --- CHANGE 2 & 3: Use the correct task for T5 and remove quantization ---
76
+ # We no longer need to load the tokenizer and model separately,
77
+ # as we are not applying a custom quantization config.
78
+ print(f"Loading model pipeline for: {model_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  llm_pipeline = transformers.pipeline(
80
+ "text2text-generation", # <<< IMPORTANT: Changed task for T5 models
81
+ model=model_id,
82
+ device_map="auto"
 
83
  )
84
+ print("Model pipeline loaded successfully.")
 
 
85
 
86
  # Wrap the pipeline in a LangChain LLM object
87
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)