Nithish3115 commited on
Commit
e0f9a34
·
verified ·
1 Parent(s): 1659873

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -147
app.py CHANGED
@@ -2,6 +2,13 @@ import os
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
 
 
 
 
 
 
 
5
 
6
  # Define paths for storage - avoid persistent folder issues
7
  MODEL_CACHE_DIR = "./model_cache"
@@ -19,7 +26,7 @@ os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
19
 
20
  # Initialize the model and tokenizer - only when explicitly requested
21
  def initialize_model():
22
- print("Loading model and tokenizer... This may take a few minutes.")
23
 
24
  try:
25
  # Load the tokenizer
@@ -32,197 +39,246 @@ def initialize_model():
32
  model = AutoModelForCausalLM.from_pretrained(
33
  "abhinand/tamil-llama-7b-instruct-v0.2",
34
  device_map="auto",
35
- torch_dtype="auto",
36
  low_cpu_mem_usage=True,
37
  cache_dir=MODEL_CACHE_DIR
38
  )
39
 
40
- print("Model and tokenizer loaded successfully!")
 
41
  return model, tokenizer
42
  except Exception as e:
43
- print(f"Error loading model: {e}")
44
  return None, None
45
 
46
  # Generate response
47
  def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
48
  # Check if model and tokenizer are loaded
49
  if model is None or tokenizer is None:
50
- return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்." # Model not loaded, please click 'Load Model' button
51
-
52
- # System message for the Tamil LLaMA model
53
- system_message = "You are a helpful assistant that provides accurate information in Tamil language."
54
-
55
- # Create the prompt using the template from documentation
56
- prompt_template = f"<|im_start|>system\n{system_message}<|im_end|>\n"
57
-
58
- # Process conversation history - chat_history format is list of tuples [(user_msg, bot_msg), ...]
59
- if chat_history:
60
- for user_msg, bot_msg in chat_history:
61
- if user_msg and bot_msg: # Ensure both messages exist
62
- prompt_template += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
63
- prompt_template += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
64
-
65
- # Add the current user message
66
- prompt_template += f"<|im_start|>user\n{user_input}<|im_end|>\n"
67
- prompt_template += "<|im_start|>assistant\n"
68
 
69
  try:
 
 
 
 
 
70
  # Tokenize input
71
- inputs = tokenizer(prompt_template, return_tensors="pt", padding=True)
72
  input_ids = inputs["input_ids"].to(model.device)
73
  attention_mask = inputs["attention_mask"].to(model.device)
74
 
 
 
 
 
75
  # Generate response with user-specified parameters
76
  with torch.no_grad():
77
- output = model.generate(
78
  input_ids,
79
  attention_mask=attention_mask,
80
- max_new_tokens=256,
81
  do_sample=True,
82
  temperature=temperature,
83
  top_p=top_p,
84
  top_k=top_k,
85
- pad_token_id=tokenizer.eos_token_id,
86
- eos_token_id=tokenizer.encode("<|im_end|>", add_special_tokens=False)[0] if "<|im_end|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
87
  )
88
 
89
- # Decode output - get only the generated part
90
- prompt_length = input_ids.shape[1]
91
- generated_ids = output[0][prompt_length:]
92
- generated_text = tokenizer.decode(generated_ids, skip_special_tokens=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- # Extract the response by removing special tokens
95
- assistant_response = generated_text.split("<|im_end|>")[0].strip() if "<|im_end|>" in generated_text else generated_text.strip()
96
-
97
- print(f"Generated response: {assistant_response}") # Debug print
98
- return assistant_response
99
  except Exception as e:
100
- print(f"Error generating response: {e}")
101
- return f"பிழை ஏற்பட்டது. மீண்டும் முயற்சிக்கவும்." # Error occurred, please try again
102
-
103
- # Function to vote/like a response
104
- def vote(data, vote_type, model_name):
105
- # This is a placeholder for the voting functionality
106
- print(f"Received {vote_type} for response: {data}")
107
- return data
108
 
109
  # Create the Gradio interface
110
  def create_chatbot_interface():
111
- with gr.Blocks(css="css/index.css") as demo:
112
  title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
113
  description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
114
 
115
  gr.Markdown(title)
116
  gr.Markdown(description)
117
 
118
- # Model loading indicator
119
- with gr.Row():
120
- model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
121
- load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
122
-
123
- # Model and tokenizer states
124
- model = gr.State(None)
125
- tokenizer = gr.State(None)
126
-
127
- # Parameter sliders
128
- with gr.Accordion("Generation Parameters", open=False):
129
- temperature = gr.Slider(
130
- label="temperature",
131
- value=0.2,
132
- minimum=0.0,
133
- maximum=2.0,
134
- step=0.05,
135
- interactive=True,
136
- info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
137
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- top_p = gr.Slider(
140
- label="top_p",
141
- value=1.0,
142
- minimum=0.0,
143
- maximum=1.0,
144
- step=0.01,
145
- interactive=True,
146
- info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it"
147
  )
148
 
149
- top_k = gr.Slider(
150
- label="top_k",
151
- value=40,
152
- minimum=0,
153
- maximum=1000,
154
- step=1,
155
- interactive=True,
156
- info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit."
157
  )
158
 
159
- # Function to load model on button click
160
- def load_model_fn():
161
- m, t = initialize_model()
162
- if m is not None and t is not None:
163
- return " மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
164
- else:
165
- return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
166
-
167
- # Function to respond to user messages
168
- def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
169
- # Check if model is loaded
170
- if model_state is None:
171
- bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
172
- else:
173
- # Generate bot response with parameters
174
- bot_message = generate_response(
175
- model_state,
176
- tokenizer_state,
177
- message,
178
- history,
179
- temperature=temp,
180
- top_p=tp,
181
- top_k=tk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  )
183
 
184
- # Update history with new exchange
185
- history = history + [(message, bot_message)]
186
- return "", history
187
-
188
- # Create the chat interface
189
- chatbot = gr.Chatbot()
190
- msg = gr.Textbox(
191
- show_label=False,
192
- placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
193
- )
194
- clear = gr.Button("அழி (Clear)")
195
-
196
- # Set up the chat interface
197
- msg.submit(
198
- chat_function,
199
- [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
200
- [msg, chatbot],
201
- queue=True,
202
- )
203
- clear.click(lambda: None, None, chatbot, queue=False)
204
-
205
- # Add examples
206
- examples = gr.Examples(
207
- examples=[
208
- "வணக்கம், நீங்கள் யார்?",
209
- "நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?",
210
- "பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்.",
211
- "சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?",
212
- "விரைவாக தூங்குவது எப்படி?"
213
- ],
214
- inputs=msg,
215
- )
216
-
217
- # Connect the model loading button
218
- load_model_btn.click(
219
- load_model_fn,
220
- outputs=[model_status, model, tokenizer]
221
- )
222
-
223
- # Add like functionality
224
- chatbot.like(vote, None, None)
225
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  return demo
227
 
228
  # Create and launch the demo
@@ -230,4 +286,4 @@ demo = create_chatbot_interface()
230
 
231
  # Launch the demo
232
  if __name__ == "__main__":
233
- demo.queue(max_size=3).launch()
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
+ import logging
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO,
9
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
10
+ handlers=[logging.StreamHandler()])
11
+ logger = logging.getLogger(__name__)
12
 
13
  # Define paths for storage - avoid persistent folder issues
14
  MODEL_CACHE_DIR = "./model_cache"
 
26
 
27
  # Initialize the model and tokenizer - only when explicitly requested
28
  def initialize_model():
29
+ logger.info("Loading model and tokenizer... This may take a few minutes.")
30
 
31
  try:
32
  # Load the tokenizer
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  "abhinand/tamil-llama-7b-instruct-v0.2",
41
  device_map="auto",
42
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
43
  low_cpu_mem_usage=True,
44
  cache_dir=MODEL_CACHE_DIR
45
  )
46
 
47
+ logger.info(f"Model device: {next(model.parameters()).device}")
48
+ logger.info("Model and tokenizer loaded successfully!")
49
  return model, tokenizer
50
  except Exception as e:
51
+ logger.error(f"Error loading model: {e}")
52
  return None, None
53
 
54
  # Generate response
55
  def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
56
  # Check if model and tokenizer are loaded
57
  if model is None or tokenizer is None:
58
+ return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்." # Model not loaded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  try:
61
+ logger.info(f"Generating response for input: {user_input[:50]}...")
62
+
63
+ # Simple prompt approach to test basic generation
64
+ prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
65
+
66
  # Tokenize input
67
+ inputs = tokenizer(prompt, return_tensors="pt")
68
  input_ids = inputs["input_ids"].to(model.device)
69
  attention_mask = inputs["attention_mask"].to(model.device)
70
 
71
+ # Debug info
72
+ logger.info(f"Input shape: {input_ids.shape}")
73
+ logger.info(f"Device: {input_ids.device}")
74
+
75
  # Generate response with user-specified parameters
76
  with torch.no_grad():
77
+ output_ids = model.generate(
78
  input_ids,
79
  attention_mask=attention_mask,
80
+ max_new_tokens=100, # Start with a smaller value for testing
81
  do_sample=True,
82
  temperature=temperature,
83
  top_p=top_p,
84
  top_k=top_k,
85
+ pad_token_id=tokenizer.eos_token_id
 
86
  )
87
 
88
+ # Get only the generated part
89
+ new_tokens = output_ids[0, input_ids.shape[1]:]
90
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True)
91
+
92
+ logger.info(f"Generated response (raw): {response}")
93
+
94
+ # Clean up response if needed
95
+ if "<|im_end|>" in response:
96
+ response = response.split("<|im_end|>")[0].strip()
97
+
98
+ logger.info(f"Final response: {response}")
99
+
100
+ # Fallback if empty response
101
+ if not response or response.isspace():
102
+ logger.warning("Empty response generated, returning fallback message")
103
+ return "வருந்துகிறேன், பதிலை உருவாக்குவதில் சிக்கல் உள்ளது. மீண்டும் முயற்சிக்கவும்." # Sorry, there was a problem generating a response
104
+
105
+ return response
106
 
 
 
 
 
 
107
  except Exception as e:
108
+ logger.error(f"Error generating response: {e}", exc_info=True)
109
+ return f"பிழை ஏற்பட்டது: {str(e)}" # Error occurred
 
 
 
 
 
 
110
 
111
  # Create the Gradio interface
112
  def create_chatbot_interface():
113
+ with gr.Blocks() as demo:
114
  title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
115
  description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
116
 
117
  gr.Markdown(title)
118
  gr.Markdown(description)
119
 
120
+ # Add a direct testing area to debug the model
121
+ with gr.Tab("Debug Mode"):
122
+ with gr.Row():
123
+ debug_status = gr.Markdown("⚠️ Debug Mode - Model not loaded")
124
+ debug_load_model_btn = gr.Button("Load Model (Debug)")
125
+
126
+ debug_model = gr.State(None)
127
+ debug_tokenizer = gr.State(None)
128
+
129
+ with gr.Row():
130
+ with gr.Column(scale=3):
131
+ debug_input = gr.Textbox(label="Input Text", lines=3)
132
+ debug_submit = gr.Button("Generate Response")
133
+ with gr.Column(scale=3):
134
+ debug_output = gr.Textbox(label="Raw Output", lines=8)
135
+
136
+ def debug_load_model_fn():
137
+ m, t = initialize_model()
138
+ if m is not None and t is not None:
139
+ return "✅ Debug Model loaded", m, t
140
+ else:
141
+ return "❌ Debug Model loading failed", None, None
142
+
143
+ def debug_generate(input_text, model, tokenizer):
144
+ if model is None:
145
+ return "Model not loaded yet. Please load the model first."
146
+
147
+ try:
148
+ # Simple direct generation for testing
149
+ prompt = f"<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant\n"
150
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
151
+
152
+ with torch.no_grad():
153
+ output_ids = model.generate(
154
+ inputs["input_ids"],
155
+ max_new_tokens=100,
156
+ temperature=0.2,
157
+ do_sample=True
158
+ )
159
+
160
+ full_output = tokenizer.decode(output_ids[0], skip_special_tokens=False)
161
+ response = full_output[len(prompt):]
162
+
163
+ # Log the full output for debugging
164
+ logger.info(f"Debug full output: {full_output}")
165
+
166
+ return f"FULL OUTPUT:\n{full_output}\n\nEXTRACTED:\n{response}"
167
+ except Exception as e:
168
+ logger.error(f"Debug error: {e}", exc_info=True)
169
+ return f"Error: {str(e)}"
170
 
171
+ debug_load_model_btn.click(
172
+ debug_load_model_fn,
173
+ outputs=[debug_status, debug_model, debug_tokenizer]
 
 
 
 
 
174
  )
175
 
176
+ debug_submit.click(
177
+ debug_generate,
178
+ inputs=[debug_input, debug_model, debug_tokenizer],
179
+ outputs=[debug_output]
 
 
 
 
180
  )
181
 
182
+ # Regular chatbot interface
183
+ with gr.Tab("Chatbot"):
184
+ # Model loading indicator
185
+ with gr.Row():
186
+ model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
187
+ load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
188
+
189
+ # Model and tokenizer states
190
+ model = gr.State(None)
191
+ tokenizer = gr.State(None)
192
+
193
+ # Parameter sliders
194
+ with gr.Accordion("Generation Parameters", open=False):
195
+ temperature = gr.Slider(
196
+ label="temperature",
197
+ value=0.2,
198
+ minimum=0.0,
199
+ maximum=2.0,
200
+ step=0.05,
201
+ interactive=True
202
+ )
203
+
204
+ top_p = gr.Slider(
205
+ label="top_p",
206
+ value=1.0,
207
+ minimum=0.0,
208
+ maximum=1.0,
209
+ step=0.01,
210
+ interactive=True
211
+ )
212
+
213
+ top_k = gr.Slider(
214
+ label="top_k",
215
+ value=40,
216
+ minimum=0,
217
+ maximum=1000,
218
+ step=1,
219
+ interactive=True
220
  )
221
 
222
+ # Function to load model on button click
223
+ def load_model_fn():
224
+ m, t = initialize_model()
225
+ if m is not None and t is not None:
226
+ return "✅ மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
227
+ else:
228
+ return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
229
+
230
+ # Function to respond to user messages - with error handling
231
+ def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
232
+ if not message.strip():
233
+ return "", history
234
+
235
+ try:
236
+ # Check if model is loaded
237
+ if model_state is None:
238
+ bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
239
+ else:
240
+ # Generate bot response with parameters
241
+ bot_message = generate_response(
242
+ model_state,
243
+ tokenizer_state,
244
+ message,
245
+ history,
246
+ temperature=temp,
247
+ top_p=tp,
248
+ top_k=tk
249
+ )
250
+
251
+ # Create new history entry
252
+ new_history = history + [(message, bot_message)]
253
+ return "", new_history
254
+
255
+ except Exception as e:
256
+ logger.error(f"Chat function error: {e}", exc_info=True)
257
+ return "", history + [(message, f"Error: {str(e)}")]
258
+
259
+ # Create the chat interface
260
+ chatbot = gr.Chatbot()
261
+ msg = gr.TextArea(
262
+ placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
263
+ lines=3
264
+ )
265
+ clear = gr.Button("அழி (Clear)")
266
+
267
+ # Set up the chat interface
268
+ msg.submit(
269
+ chat_function,
270
+ [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
271
+ [msg, chatbot]
272
+ )
273
+
274
+ clear.click(lambda: None, None, chatbot, queue=False)
275
+
276
+ # Connect the model loading button
277
+ load_model_btn.click(
278
+ load_model_fn,
279
+ outputs=[model_status, model, tokenizer]
280
+ )
281
+
282
  return demo
283
 
284
  # Create and launch the demo
 
286
 
287
  # Launch the demo
288
  if __name__ == "__main__":
289
+ demo.queue(concurrency_count=1).launch()