Omarrran commited on
Commit
968d3a5
·
verified ·
1 Parent(s): 0b8958e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -41
app.py CHANGED
@@ -145,7 +145,7 @@ class TTSDatasetCollector:
145
  continue
146
 
147
  # Split on common sentence endings
148
- parts = line.replace('!', '.').replace('?', '.').split('.')
149
  for part in parts:
150
  part = part.strip()
151
  if part:
@@ -160,7 +160,7 @@ class TTSDatasetCollector:
160
 
161
  try:
162
  # Try NLTK first
163
- self.sentences = nltk.sent_tokenize(text.strip())
164
  except Exception as e:
165
  logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
166
  # Fallback to simple splitting
@@ -191,8 +191,7 @@ class TTSDatasetCollector:
191
  if not file.name.endswith('.txt'):
192
  return False, "Only .txt files are supported"
193
 
194
- with open(file.name, 'r', encoding='utf-8') as f:
195
- text = f.read()
196
 
197
  return self.process_text(text)
198
 
@@ -214,7 +213,8 @@ class TTSDatasetCollector:
214
  def set_font(self, font_style: str) -> Tuple[bool, str]:
215
  """Set the current font style"""
216
  if font_style not in FONT_STYLES and font_style not in self.custom_fonts:
217
- return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys()) + ', ' + ', '.join(self.custom_fonts.keys())}"
 
218
  self.current_font = font_style
219
  return True, f"Font style set to {font_style}"
220
 
@@ -231,7 +231,7 @@ class TTSDatasetCollector:
231
 
232
  # Save the font file
233
  with open(font_dest, 'wb') as f:
234
- f.write(font_file.read())
235
 
236
  # Add to custom fonts
237
  self.custom_fonts[font_family] = {
@@ -469,7 +469,7 @@ Font_Style: {metadata['font_style']}
469
  metadata = json.load(f)
470
  # Flatten statistics for display
471
  total_sentences = len(self.sentences)
472
- recorded = len(set(metadata['speakers'][list(metadata['speakers'].keys())[0]]['datasets'][list(metadata['speakers'][list(metadata['speakers'].keys())[0]]['datasets'].keys())[0]]['recorded_sentences'])) if metadata['speakers'] else 0
473
  remaining = total_sentences - recorded
474
  stats = {
475
  "Total Sentences": total_sentences,
@@ -512,8 +512,8 @@ def create_interface():
512
  max-width: 1200px !important;
513
  }
514
  .record-button {
515
- font-size: 1.2em !important;
516
- padding: 20px !important;
517
  }
518
  .sentence-display {
519
  font-size: 1.4em !important;
@@ -523,6 +523,9 @@ def create_interface():
523
  margin: 10px 0 !important;
524
  min-height: 100px !important;
525
  }
 
 
 
526
  """
527
 
528
  # Add font-face declarations
@@ -544,38 +547,57 @@ def create_interface():
544
 
545
  with gr.Row():
546
  # Left column - Configuration and Input
547
- with gr.Column():
548
  text_input = gr.Textbox(
549
  label="Paste Text",
550
  placeholder="Paste your text here...",
551
- lines=5
 
552
  )
553
  file_input = gr.File(
554
  label="Or Upload Text File (.txt)",
555
- file_types=[".txt"]
 
556
  )
557
  speaker_id = gr.Textbox(
558
  label="Speaker ID",
559
- placeholder="Enter unique speaker identifier (letters and numbers only)"
 
560
  )
561
  dataset_name = gr.Textbox(
562
  label="Dataset Name",
563
- placeholder="Enter dataset name (letters and numbers only)"
 
564
  )
565
  font_select = gr.Dropdown(
566
  choices=list(FONT_STYLES.keys()),
567
  value="english_serif",
568
- label="Select Font Style"
 
569
  )
570
  # Custom font upload
571
  font_file_input = gr.File(
572
  label="Upload Custom Font (.ttf)",
573
- file_types=[".ttf"]
 
574
  )
575
  add_font_btn = gr.Button("Add Custom Font")
576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  # Right column - Recording
578
- with gr.Column():
579
  current_text = gr.HTML(
580
  label="Current Sentence",
581
  elem_classes=["sentence-display"]
@@ -584,7 +606,7 @@ def create_interface():
584
  label="Next Sentence",
585
  elem_classes=["sentence-display"]
586
  )
587
- progress = gr.Markdown("")
588
 
589
  audio_recorder = gr.Audio(
590
  label="Record Audio",
@@ -594,26 +616,13 @@ def create_interface():
594
  # Controls
595
  with gr.Row():
596
  prev_btn = gr.Button("Previous", variant="secondary")
597
- save_btn = gr.Button("Save Recording", variant="primary", elem_classes=["record-button"])
598
  next_btn = gr.Button("Next", variant="primary")
599
 
600
- # Status and Progress
601
- status = gr.Textbox(
602
- label="Status",
603
- interactive=False,
604
- max_lines=3
605
- )
606
-
607
- # Dataset Info and Download Links
608
- with gr.Row():
609
- dataset_info = gr.JSON(
610
- label="Dataset Statistics",
611
- value={}
612
- )
613
-
614
- with gr.Row():
615
- download_audio = gr.File(label="Download Audio", interactive=False)
616
- download_transcript = gr.File(label="Download Transcript", interactive=False)
617
 
618
  def process_pasted_text(text):
619
  """Handle pasted text input"""
@@ -637,7 +646,7 @@ def create_interface():
637
  }
638
 
639
  nav_info = collector.get_navigation_info()
640
- progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
641
  return {
642
  current_text: nav_info['current'],
643
  next_text: nav_info['next'],
@@ -681,7 +690,7 @@ def create_interface():
681
  }
682
 
683
  nav_info = collector.get_navigation_info()
684
- progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
685
  return {
686
  current_text: nav_info['current'],
687
  next_text: nav_info['next'],
@@ -717,7 +726,7 @@ def create_interface():
717
 
718
  # Auto-advance to next sentence after successful save
719
  nav_info = collector.navigate("next")
720
- progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
721
  return {
722
  current_text: nav_info['current'],
723
  next_text: nav_info['next'],
@@ -731,7 +740,7 @@ def create_interface():
731
  def navigate_sentences(direction):
732
  """Handle navigation between sentences"""
733
  nav_info = collector.navigate(direction)
734
- progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
735
  return {
736
  current_text: nav_info['current'],
737
  next_text: nav_info['next'],
@@ -816,4 +825,4 @@ if __name__ == "__main__":
816
  except Exception as e:
817
  logger.error(f"Failed to launch interface: {str(e)}")
818
  logger.error(traceback.format_exc())
819
- raise
 
145
  continue
146
 
147
  # Split on common sentence endings
148
+ parts = re.split(r'[.!?]', line)
149
  for part in parts:
150
  part = part.strip()
151
  if part:
 
160
 
161
  try:
162
  # Try NLTK first
163
+ self.sentences = nltk.sent_tokenize(text.strip(), language='english')
164
  except Exception as e:
165
  logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
166
  # Fallback to simple splitting
 
191
  if not file.name.endswith('.txt'):
192
  return False, "Only .txt files are supported"
193
 
194
+ text = file.read().decode('utf-8')
 
195
 
196
  return self.process_text(text)
197
 
 
213
  def set_font(self, font_style: str) -> Tuple[bool, str]:
214
  """Set the current font style"""
215
  if font_style not in FONT_STYLES and font_style not in self.custom_fonts:
216
+ available_fonts = ', '.join(list(FONT_STYLES.keys()) + list(self.custom_fonts.keys()))
217
+ return False, f"Invalid font style. Available styles: {available_fonts}"
218
  self.current_font = font_style
219
  return True, f"Font style set to {font_style}"
220
 
 
231
 
232
  # Save the font file
233
  with open(font_dest, 'wb') as f:
234
+ f.write(font_file.getvalue())
235
 
236
  # Add to custom fonts
237
  self.custom_fonts[font_family] = {
 
469
  metadata = json.load(f)
470
  # Flatten statistics for display
471
  total_sentences = len(self.sentences)
472
+ recorded = sum(len(dataset['recorded_sentences']) for speaker in metadata['speakers'].values() for dataset in speaker['datasets'].values())
473
  remaining = total_sentences - recorded
474
  stats = {
475
  "Total Sentences": total_sentences,
 
512
  max-width: 1200px !important;
513
  }
514
  .record-button {
515
+ font-size: 1em !important;
516
+ padding: 10px !important;
517
  }
518
  .sentence-display {
519
  font-size: 1.4em !important;
 
523
  margin: 10px 0 !important;
524
  min-height: 100px !important;
525
  }
526
+ .small-input {
527
+ max-width: 300px !important;
528
+ }
529
  """
530
 
531
  # Add font-face declarations
 
547
 
548
  with gr.Row():
549
  # Left column - Configuration and Input
550
+ with gr.Column(scale=1):
551
  text_input = gr.Textbox(
552
  label="Paste Text",
553
  placeholder="Paste your text here...",
554
+ lines=5,
555
+ elem_classes=["small-input"]
556
  )
557
  file_input = gr.File(
558
  label="Or Upload Text File (.txt)",
559
+ file_types=[".txt"],
560
+ elem_classes=["small-input"]
561
  )
562
  speaker_id = gr.Textbox(
563
  label="Speaker ID",
564
+ placeholder="Enter unique speaker identifier (letters and numbers only)",
565
+ elem_classes=["small-input"]
566
  )
567
  dataset_name = gr.Textbox(
568
  label="Dataset Name",
569
+ placeholder="Enter dataset name (letters and numbers only)",
570
+ elem_classes=["small-input"]
571
  )
572
  font_select = gr.Dropdown(
573
  choices=list(FONT_STYLES.keys()),
574
  value="english_serif",
575
+ label="Select Font Style",
576
+ elem_classes=["small-input"]
577
  )
578
  # Custom font upload
579
  font_file_input = gr.File(
580
  label="Upload Custom Font (.ttf)",
581
+ file_types=[".ttf"],
582
+ elem_classes=["small-input"]
583
  )
584
  add_font_btn = gr.Button("Add Custom Font")
585
 
586
+ status = gr.Textbox(
587
+ label="Status",
588
+ interactive=False,
589
+ max_lines=3
590
+ )
591
+
592
+ # Dataset Info
593
+ with gr.Accordion("Dataset Statistics", open=False):
594
+ dataset_info = gr.JSON(
595
+ label="",
596
+ value={}
597
+ )
598
+
599
  # Right column - Recording
600
+ with gr.Column(scale=2):
601
  current_text = gr.HTML(
602
  label="Current Sentence",
603
  elem_classes=["sentence-display"]
 
606
  label="Next Sentence",
607
  elem_classes=["sentence-display"]
608
  )
609
+ progress = gr.HTML("")
610
 
611
  audio_recorder = gr.Audio(
612
  label="Record Audio",
 
616
  # Controls
617
  with gr.Row():
618
  prev_btn = gr.Button("Previous", variant="secondary")
619
+ save_btn = gr.Button("Save Recording", variant="primary")
620
  next_btn = gr.Button("Next", variant="primary")
621
 
622
+ # Download Links
623
+ with gr.Row():
624
+ download_audio = gr.File(label="Download Audio", interactive=False)
625
+ download_transcript = gr.File(label="Download Transcript", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
626
 
627
  def process_pasted_text(text):
628
  """Handle pasted text input"""
 
646
  }
647
 
648
  nav_info = collector.get_navigation_info()
649
+ progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
650
  return {
651
  current_text: nav_info['current'],
652
  next_text: nav_info['next'],
 
690
  }
691
 
692
  nav_info = collector.get_navigation_info()
693
+ progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
694
  return {
695
  current_text: nav_info['current'],
696
  next_text: nav_info['next'],
 
726
 
727
  # Auto-advance to next sentence after successful save
728
  nav_info = collector.navigate("next")
729
+ progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
730
  return {
731
  current_text: nav_info['current'],
732
  next_text: nav_info['next'],
 
740
  def navigate_sentences(direction):
741
  """Handle navigation between sentences"""
742
  nav_info = collector.navigate(direction)
743
+ progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
744
  return {
745
  current_text: nav_info['current'],
746
  next_text: nav_info['next'],
 
825
  except Exception as e:
826
  logger.error(f"Failed to launch interface: {str(e)}")
827
  logger.error(traceback.format_exc())
828
+ raise