Kuberwastaken commited on
Commit
bba8dbb
·
1 Parent(s): f599acb

Fixed bool iterable error

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -33,18 +33,16 @@ def initialize_vision_model():
33
  def analyze_image(image, vision_components):
34
  processor = vision_components["processor"]
35
  model = vision_components["model"]
36
-
37
- # Convert to RGB if needed
38
  if isinstance(image, np.ndarray):
39
  image = Image.fromarray(image)
40
-
41
- inputs = processor(image, return_tensors="pt")
42
-
43
- with torch.no_grad():
44
- outputs = model.generate(**inputs, max_length=30)
45
-
46
- caption = processor.decode(outputs[0], skip_special_tokens=True)
47
- return caption
48
 
49
  def initialize_llm():
50
  model_id = "meta-llama/Llama-3.2-1B-Instruct"
@@ -76,25 +74,22 @@ def initialize_llm():
76
  def generate_roast(caption, llm_components):
77
  model = llm_components["model"]
78
  tokenizer = llm_components["tokenizer"]
79
-
80
  prompt = f"""[INST] You are AsianMOM, a stereotypical Asian mother who always has high expectations. \nYou just observed your child doing this: \"{caption}\"\n \nRespond with a short, humorous roast (maximum 2-3 sentences) in the style of a stereotypical Asian mother. \nInclude at least one of these elements:\n- Comparison to more successful relatives/cousins\n- High expectations about academic success\n- Mild threats about using slippers\n- Questioning life choices\n- Asking when they'll get married or have kids\n- Commenting on appearance\n- Saying \"back in my day\" and describing hardship\n\nBe funny but not hurtful. Keep it brief. [/INST]"""
81
-
82
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
83
-
84
- with torch.no_grad():
85
- outputs = model.generate(
86
- **inputs,
87
- max_length=300,
88
- temperature=0.7,
89
- top_p=0.9,
90
- do_sample=True
91
- )
92
-
93
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
94
- # Extract just the response part, not the prompt
95
- response = response.split("[/INST]")[1].strip()
96
-
97
- return response
98
 
99
  # Parler-TTS setup
100
  parler_device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -151,6 +146,9 @@ def setup_processing_chain(video_feed, analysis_output, roast_output, audio_outp
151
  def process_webcam(image):
152
  nonlocal last_process_time
153
  current_time = time.time()
 
 
 
154
  if current_time - last_process_time >= processing_interval and image is not None:
155
  last_process_time = current_time
156
  caption, roast, audio = process_frame(
@@ -158,8 +156,11 @@ def setup_processing_chain(video_feed, analysis_output, roast_output, audio_outp
158
  vision_components,
159
  llm_components
160
  )
161
- return image, caption, roast, audio
162
- return image, None, None, None
 
 
 
163
  video_feed.change(
164
  process_webcam,
165
  inputs=[video_feed],
 
33
  def analyze_image(image, vision_components):
34
  processor = vision_components["processor"]
35
  model = vision_components["model"]
 
 
36
  if isinstance(image, np.ndarray):
37
  image = Image.fromarray(image)
38
+ try:
39
+ inputs = processor(image, return_tensors="pt")
40
+ with torch.no_grad():
41
+ outputs = model.generate(**inputs, max_length=30)
42
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
43
+ return caption if isinstance(caption, str) else ""
44
+ except Exception:
45
+ return "" # Return empty string on error
46
 
47
  def initialize_llm():
48
  model_id = "meta-llama/Llama-3.2-1B-Instruct"
 
74
  def generate_roast(caption, llm_components):
75
  model = llm_components["model"]
76
  tokenizer = llm_components["tokenizer"]
 
77
  prompt = f"""[INST] You are AsianMOM, a stereotypical Asian mother who always has high expectations. \nYou just observed your child doing this: \"{caption}\"\n \nRespond with a short, humorous roast (maximum 2-3 sentences) in the style of a stereotypical Asian mother. \nInclude at least one of these elements:\n- Comparison to more successful relatives/cousins\n- High expectations about academic success\n- Mild threats about using slippers\n- Questioning life choices\n- Asking when they'll get married or have kids\n- Commenting on appearance\n- Saying \"back in my day\" and describing hardship\n\nBe funny but not hurtful. Keep it brief. [/INST]"""
78
+ try:
79
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
80
+ with torch.no_grad():
81
+ outputs = model.generate(
82
+ **inputs,
83
+ max_length=300,
84
+ temperature=0.7,
85
+ top_p=0.9,
86
+ do_sample=True
87
+ )
88
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
89
+ response = response.split("[/INST]")[1].strip()
90
+ return response if isinstance(response, str) else ""
91
+ except Exception:
92
+ return "" # Return empty string on error
 
 
93
 
94
  # Parler-TTS setup
95
  parler_device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
146
  def process_webcam(image):
147
  nonlocal last_process_time
148
  current_time = time.time()
149
+ default_caption = ""
150
+ default_roast = ""
151
+ default_audio = (PARLER_SAMPLE_RATE, np.zeros(1))
152
  if current_time - last_process_time >= processing_interval and image is not None:
153
  last_process_time = current_time
154
  caption, roast, audio = process_frame(
 
156
  vision_components,
157
  llm_components
158
  )
159
+ final_caption = caption if isinstance(caption, str) else default_caption
160
+ final_roast = roast if isinstance(roast, str) else default_roast
161
+ final_audio = audio if isinstance(audio, tuple) and len(audio) == 2 and isinstance(audio[1], np.ndarray) else default_audio
162
+ return image, final_caption, final_roast, final_audio
163
+ return image, default_caption, default_roast, default_audio
164
  video_feed.change(
165
  process_webcam,
166
  inputs=[video_feed],