Saiyaswanth007 commited on
Commit
4e75e2b
·
1 Parent(s): 8acaa5d

Updated html to json

Browse files
Files changed (2) hide show
  1. shared.py +87 -17
  2. ui.py +51 -4
shared.py CHANGED
@@ -455,43 +455,100 @@ class RealtimeSpeakerDiarization:
455
  return f"Settings updated: Threshold={threshold:.2f}, Max Speakers={max_speakers}"
456
 
457
  def get_formatted_conversation(self):
458
- """Get the formatted conversation"""
459
- return self.current_conversation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
 
461
  def get_status_info(self):
462
- """Get current status information"""
463
  if not self.speaker_detector:
464
- return "Speaker detector not initialized"
465
 
466
  try:
467
- status = self.speaker_detector.get_status_info()
468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  status_lines = [
470
  f"**Current Speaker:** {status['current_speaker'] + 1}",
471
- f"**Active Speakers:** {status['active_speakers']} of {status['max_speakers']}",
472
  f"**Last Similarity:** {status['last_similarity']:.3f}",
473
- f"**Change Threshold:** {status['threshold']:.2f}",
474
- f"**Total Sentences:** {len(self.full_sentences)}",
475
- f"**Segments Processed:** {status['segment_counter']}",
476
  "",
477
  "**Speaker Activity:**"
478
  ]
479
 
480
- for i in range(status['max_speakers']):
481
- color_name = SPEAKER_COLOR_NAMES[i] if i < len(SPEAKER_COLOR_NAMES) else f"Speaker {i+1}"
482
- count = status['speaker_counts'][i]
483
- active = "🟢" if count > 0 else "⚫"
484
- status_lines.append(f"{active} Speaker {i+1} ({color_name}): {count} segments")
485
 
486
- return "\n".join(status_lines)
 
 
487
 
488
  except Exception as e:
489
- return f"Error getting status: {e}"
 
 
490
 
491
  def process_audio_chunk(self, audio_data, sample_rate=16000):
492
  """Process audio chunk from WebSocket input"""
493
  if not self.is_running or self.audio_processor is None:
494
- return
495
 
496
  try:
497
  # Convert bytes to numpy array if needed
@@ -517,6 +574,10 @@ class RealtimeSpeakerDiarization:
517
  self.audio_processor.add_audio_chunk(audio_data)
518
 
519
  # Periodically extract embeddings for speaker detection
 
 
 
 
520
  if len(self.audio_processor.audio_buffer) % (SAMPLE_RATE // 2) == 0: # Every 0.5 seconds
521
  embedding = self.audio_processor.extract_embedding_from_buffer()
522
  if embedding is not None:
@@ -527,9 +588,18 @@ class RealtimeSpeakerDiarization:
527
  with self.transcription_lock:
528
  self.full_sentences.append((f"[Audio segment {self.speaker_detector.segment_counter}]", speaker_id))
529
  self.update_conversation_display()
 
 
 
 
 
 
 
 
530
 
531
  except Exception as e:
532
  logger.error(f"Error processing audio chunk: {e}")
 
533
 
534
  def resample_audio(self, audio_bytes, from_rate, to_rate):
535
  """Resample audio to target sample rate"""
 
455
  return f"Settings updated: Threshold={threshold:.2f}, Max Speakers={max_speakers}"
456
 
457
  def get_formatted_conversation(self):
458
+ """Get the formatted conversation with structured data"""
459
+ try:
460
+ # Create conversation HTML format as before
461
+ html_content = self.current_conversation
462
+
463
+ # Create structured data
464
+ structured_data = {
465
+ "html_content": html_content,
466
+ "sentences": [],
467
+ "current_transcript": self.last_transcription,
468
+ "current_speaker": self.speaker_detector.current_speaker if self.speaker_detector else 0
469
+ }
470
+
471
+ # Add sentence data
472
+ for sentence_text, speaker_id in self.full_sentences:
473
+ color = self.speaker_detector.get_color_for_speaker(speaker_id) if self.speaker_detector else "#FFFFFF"
474
+ structured_data["sentences"].append({
475
+ "text": sentence_text,
476
+ "speaker_id": speaker_id,
477
+ "speaker_name": f"Speaker {speaker_id + 1}",
478
+ "color": color
479
+ })
480
+
481
+ return html_content
482
+ except Exception as e:
483
+ logger.error(f"Error formatting conversation: {e}")
484
+ return f"<i>Error formatting conversation: {str(e)}</i>"
485
 
486
  def get_status_info(self):
487
+ """Get current status information as structured data"""
488
  if not self.speaker_detector:
489
+ return {"error": "Speaker detector not initialized"}
490
 
491
  try:
492
+ speaker_status = self.speaker_detector.get_status_info()
493
 
494
+ # Format speaker activity
495
+ speaker_activity = []
496
+ for i in range(speaker_status['max_speakers']):
497
+ color_name = SPEAKER_COLOR_NAMES[i] if i < len(SPEAKER_COLOR_NAMES) else f"Speaker {i+1}"
498
+ count = speaker_status['speaker_counts'][i]
499
+ active = count > 0
500
+ speaker_activity.append({
501
+ "id": i,
502
+ "name": f"Speaker {i+1}",
503
+ "color": SPEAKER_COLORS[i] if i < len(SPEAKER_COLORS) else "#FFFFFF",
504
+ "color_name": color_name,
505
+ "segment_count": count,
506
+ "active": active
507
+ })
508
+
509
+ # Create structured status object
510
+ status = {
511
+ "current_speaker": speaker_status['current_speaker'],
512
+ "current_speaker_name": f"Speaker {speaker_status['current_speaker'] + 1}",
513
+ "active_speakers_count": speaker_status['active_speakers'],
514
+ "max_speakers": speaker_status['max_speakers'],
515
+ "last_similarity": speaker_status['last_similarity'],
516
+ "change_threshold": speaker_status['threshold'],
517
+ "total_sentences": len(self.full_sentences),
518
+ "segments_processed": speaker_status['segment_counter'],
519
+ "speaker_activity": speaker_activity,
520
+ "timestamp": time.time()
521
+ }
522
+
523
+ # Also create a formatted text version for UI display
524
  status_lines = [
525
  f"**Current Speaker:** {status['current_speaker'] + 1}",
526
+ f"**Active Speakers:** {status['active_speakers_count']} of {status['max_speakers']}",
527
  f"**Last Similarity:** {status['last_similarity']:.3f}",
528
+ f"**Change Threshold:** {status['change_threshold']:.2f}",
529
+ f"**Total Sentences:** {status['total_sentences']}",
530
+ f"**Segments Processed:** {status['segments_processed']}",
531
  "",
532
  "**Speaker Activity:**"
533
  ]
534
 
535
+ for speaker in status["speaker_activity"]:
536
+ active = "🟢" if speaker["active"] else ""
537
+ status_lines.append(f"{active} Speaker {speaker['id']+1} ({speaker['color_name']}): {speaker['segment_count']} segments")
 
 
538
 
539
+ status["formatted_text"] = "\n".join(status_lines)
540
+
541
+ return status
542
 
543
  except Exception as e:
544
+ error_msg = f"Error getting status: {e}"
545
+ logger.error(error_msg)
546
+ return {"error": error_msg, "formatted_text": error_msg}
547
 
548
  def process_audio_chunk(self, audio_data, sample_rate=16000):
549
  """Process audio chunk from WebSocket input"""
550
  if not self.is_running or self.audio_processor is None:
551
+ return {"status": "not_running"}
552
 
553
  try:
554
  # Convert bytes to numpy array if needed
 
574
  self.audio_processor.add_audio_chunk(audio_data)
575
 
576
  # Periodically extract embeddings for speaker detection
577
+ embedding = None
578
+ speaker_id = self.speaker_detector.current_speaker
579
+ similarity = 1.0
580
+
581
  if len(self.audio_processor.audio_buffer) % (SAMPLE_RATE // 2) == 0: # Every 0.5 seconds
582
  embedding = self.audio_processor.extract_embedding_from_buffer()
583
  if embedding is not None:
 
588
  with self.transcription_lock:
589
  self.full_sentences.append((f"[Audio segment {self.speaker_detector.segment_counter}]", speaker_id))
590
  self.update_conversation_display()
591
+
592
+ # Return processing result
593
+ return {
594
+ "status": "processed",
595
+ "buffer_size": len(self.audio_processor.audio_buffer),
596
+ "speaker_id": speaker_id,
597
+ "similarity": similarity if embedding is not None else None
598
+ }
599
 
600
  except Exception as e:
601
  logger.error(f"Error processing audio chunk: {e}")
602
+ return {"status": "error", "message": str(e)}
603
 
604
  def resample_audio(self, audio_bytes, from_rate, to_rate):
605
  """Resample audio to target sample rate"""
ui.py CHANGED
@@ -173,10 +173,57 @@ def build_ui():
173
  };
174
 
175
  wsConnection.onmessage = (event) => {
176
- document.getElementById("conversation").innerHTML = event.data;
177
- // Auto-scroll to bottom
178
- const container = document.getElementById("conversation");
179
- container.scrollTop = container.scrollHeight;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  };
181
 
182
  wsConnection.onerror = (error) => {
 
173
  };
174
 
175
  wsConnection.onmessage = (event) => {
176
+ try {
177
+ // Parse the JSON message
178
+ const message = JSON.parse(event.data);
179
+
180
+ // Process different message types
181
+ switch(message.type) {
182
+ case 'transcription':
183
+ // Handle transcription data
184
+ if (message.data && typeof message.data === 'object') {
185
+ document.getElementById("conversation").innerHTML = message.data.conversation_html ||
186
+ JSON.stringify(message.data);
187
+ }
188
+ break;
189
+
190
+ case 'connection':
191
+ console.log('Connection status:', message.status);
192
+ updateStatus(message.status === 'connected' ? 'connected' : 'warning');
193
+ break;
194
+
195
+ case 'conversation_update':
196
+ if (message.conversation_html) {
197
+ document.getElementById("conversation").innerHTML = message.conversation_html;
198
+ }
199
+ break;
200
+
201
+ case 'conversation_cleared':
202
+ document.getElementById("conversation").innerHTML =
203
+ "<i>Conversation cleared. Start speaking again...</i>";
204
+ break;
205
+
206
+ case 'error':
207
+ console.error('Error message from server:', message.message);
208
+ updateStatus('warning', message.message);
209
+ break;
210
+
211
+ default:
212
+ // If it's just HTML content without proper JSON structure (legacy format)
213
+ document.getElementById("conversation").innerHTML = event.data;
214
+ }
215
+
216
+ // Auto-scroll to bottom
217
+ const container = document.getElementById("conversation");
218
+ container.scrollTop = container.scrollHeight;
219
+ } catch (e) {
220
+ // Fallback for non-JSON messages (legacy format)
221
+ document.getElementById("conversation").innerHTML = event.data;
222
+
223
+ // Auto-scroll to bottom
224
+ const container = document.getElementById("conversation");
225
+ container.scrollTop = container.scrollHeight;
226
+ }
227
  };
228
 
229
  wsConnection.onerror = (error) => {