Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -145,7 +145,7 @@ class TTSDatasetCollector:
|
|
145 |
continue
|
146 |
|
147 |
# Split on common sentence endings
|
148 |
-
parts =
|
149 |
for part in parts:
|
150 |
part = part.strip()
|
151 |
if part:
|
@@ -160,7 +160,7 @@ class TTSDatasetCollector:
|
|
160 |
|
161 |
try:
|
162 |
# Try NLTK first
|
163 |
-
self.sentences = nltk.sent_tokenize(text.strip())
|
164 |
except Exception as e:
|
165 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
166 |
# Fallback to simple splitting
|
@@ -191,8 +191,7 @@ class TTSDatasetCollector:
|
|
191 |
if not file.name.endswith('.txt'):
|
192 |
return False, "Only .txt files are supported"
|
193 |
|
194 |
-
|
195 |
-
text = f.read()
|
196 |
|
197 |
return self.process_text(text)
|
198 |
|
@@ -214,7 +213,8 @@ class TTSDatasetCollector:
|
|
214 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
215 |
"""Set the current font style"""
|
216 |
if font_style not in FONT_STYLES and font_style not in self.custom_fonts:
|
217 |
-
|
|
|
218 |
self.current_font = font_style
|
219 |
return True, f"Font style set to {font_style}"
|
220 |
|
@@ -231,7 +231,7 @@ class TTSDatasetCollector:
|
|
231 |
|
232 |
# Save the font file
|
233 |
with open(font_dest, 'wb') as f:
|
234 |
-
f.write(font_file.
|
235 |
|
236 |
# Add to custom fonts
|
237 |
self.custom_fonts[font_family] = {
|
@@ -469,7 +469,7 @@ Font_Style: {metadata['font_style']}
|
|
469 |
metadata = json.load(f)
|
470 |
# Flatten statistics for display
|
471 |
total_sentences = len(self.sentences)
|
472 |
-
recorded = len(
|
473 |
remaining = total_sentences - recorded
|
474 |
stats = {
|
475 |
"Total Sentences": total_sentences,
|
@@ -512,8 +512,8 @@ def create_interface():
|
|
512 |
max-width: 1200px !important;
|
513 |
}
|
514 |
.record-button {
|
515 |
-
font-size:
|
516 |
-
padding:
|
517 |
}
|
518 |
.sentence-display {
|
519 |
font-size: 1.4em !important;
|
@@ -523,6 +523,9 @@ def create_interface():
|
|
523 |
margin: 10px 0 !important;
|
524 |
min-height: 100px !important;
|
525 |
}
|
|
|
|
|
|
|
526 |
"""
|
527 |
|
528 |
# Add font-face declarations
|
@@ -544,38 +547,57 @@ def create_interface():
|
|
544 |
|
545 |
with gr.Row():
|
546 |
# Left column - Configuration and Input
|
547 |
-
with gr.Column():
|
548 |
text_input = gr.Textbox(
|
549 |
label="Paste Text",
|
550 |
placeholder="Paste your text here...",
|
551 |
-
lines=5
|
|
|
552 |
)
|
553 |
file_input = gr.File(
|
554 |
label="Or Upload Text File (.txt)",
|
555 |
-
file_types=[".txt"]
|
|
|
556 |
)
|
557 |
speaker_id = gr.Textbox(
|
558 |
label="Speaker ID",
|
559 |
-
placeholder="Enter unique speaker identifier (letters and numbers only)"
|
|
|
560 |
)
|
561 |
dataset_name = gr.Textbox(
|
562 |
label="Dataset Name",
|
563 |
-
placeholder="Enter dataset name (letters and numbers only)"
|
|
|
564 |
)
|
565 |
font_select = gr.Dropdown(
|
566 |
choices=list(FONT_STYLES.keys()),
|
567 |
value="english_serif",
|
568 |
-
label="Select Font Style"
|
|
|
569 |
)
|
570 |
# Custom font upload
|
571 |
font_file_input = gr.File(
|
572 |
label="Upload Custom Font (.ttf)",
|
573 |
-
file_types=[".ttf"]
|
|
|
574 |
)
|
575 |
add_font_btn = gr.Button("Add Custom Font")
|
576 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
# Right column - Recording
|
578 |
-
with gr.Column():
|
579 |
current_text = gr.HTML(
|
580 |
label="Current Sentence",
|
581 |
elem_classes=["sentence-display"]
|
@@ -584,7 +606,7 @@ def create_interface():
|
|
584 |
label="Next Sentence",
|
585 |
elem_classes=["sentence-display"]
|
586 |
)
|
587 |
-
progress = gr.
|
588 |
|
589 |
audio_recorder = gr.Audio(
|
590 |
label="Record Audio",
|
@@ -594,26 +616,13 @@ def create_interface():
|
|
594 |
# Controls
|
595 |
with gr.Row():
|
596 |
prev_btn = gr.Button("Previous", variant="secondary")
|
597 |
-
save_btn = gr.Button("Save Recording", variant="primary"
|
598 |
next_btn = gr.Button("Next", variant="primary")
|
599 |
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
max_lines=3
|
605 |
-
)
|
606 |
-
|
607 |
-
# Dataset Info and Download Links
|
608 |
-
with gr.Row():
|
609 |
-
dataset_info = gr.JSON(
|
610 |
-
label="Dataset Statistics",
|
611 |
-
value={}
|
612 |
-
)
|
613 |
-
|
614 |
-
with gr.Row():
|
615 |
-
download_audio = gr.File(label="Download Audio", interactive=False)
|
616 |
-
download_transcript = gr.File(label="Download Transcript", interactive=False)
|
617 |
|
618 |
def process_pasted_text(text):
|
619 |
"""Handle pasted text input"""
|
@@ -637,7 +646,7 @@ def create_interface():
|
|
637 |
}
|
638 |
|
639 |
nav_info = collector.get_navigation_info()
|
640 |
-
progress_bar =
|
641 |
return {
|
642 |
current_text: nav_info['current'],
|
643 |
next_text: nav_info['next'],
|
@@ -681,7 +690,7 @@ def create_interface():
|
|
681 |
}
|
682 |
|
683 |
nav_info = collector.get_navigation_info()
|
684 |
-
progress_bar =
|
685 |
return {
|
686 |
current_text: nav_info['current'],
|
687 |
next_text: nav_info['next'],
|
@@ -717,7 +726,7 @@ def create_interface():
|
|
717 |
|
718 |
# Auto-advance to next sentence after successful save
|
719 |
nav_info = collector.navigate("next")
|
720 |
-
progress_bar =
|
721 |
return {
|
722 |
current_text: nav_info['current'],
|
723 |
next_text: nav_info['next'],
|
@@ -731,7 +740,7 @@ def create_interface():
|
|
731 |
def navigate_sentences(direction):
|
732 |
"""Handle navigation between sentences"""
|
733 |
nav_info = collector.navigate(direction)
|
734 |
-
progress_bar =
|
735 |
return {
|
736 |
current_text: nav_info['current'],
|
737 |
next_text: nav_info['next'],
|
@@ -816,4 +825,4 @@ if __name__ == "__main__":
|
|
816 |
except Exception as e:
|
817 |
logger.error(f"Failed to launch interface: {str(e)}")
|
818 |
logger.error(traceback.format_exc())
|
819 |
-
raise
|
|
|
145 |
continue
|
146 |
|
147 |
# Split on common sentence endings
|
148 |
+
parts = re.split(r'[.!?]', line)
|
149 |
for part in parts:
|
150 |
part = part.strip()
|
151 |
if part:
|
|
|
160 |
|
161 |
try:
|
162 |
# Try NLTK first
|
163 |
+
self.sentences = nltk.sent_tokenize(text.strip(), language='english')
|
164 |
except Exception as e:
|
165 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
166 |
# Fallback to simple splitting
|
|
|
191 |
if not file.name.endswith('.txt'):
|
192 |
return False, "Only .txt files are supported"
|
193 |
|
194 |
+
text = file.read().decode('utf-8')
|
|
|
195 |
|
196 |
return self.process_text(text)
|
197 |
|
|
|
213 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
214 |
"""Set the current font style"""
|
215 |
if font_style not in FONT_STYLES and font_style not in self.custom_fonts:
|
216 |
+
available_fonts = ', '.join(list(FONT_STYLES.keys()) + list(self.custom_fonts.keys()))
|
217 |
+
return False, f"Invalid font style. Available styles: {available_fonts}"
|
218 |
self.current_font = font_style
|
219 |
return True, f"Font style set to {font_style}"
|
220 |
|
|
|
231 |
|
232 |
# Save the font file
|
233 |
with open(font_dest, 'wb') as f:
|
234 |
+
f.write(font_file.getvalue())
|
235 |
|
236 |
# Add to custom fonts
|
237 |
self.custom_fonts[font_family] = {
|
|
|
469 |
metadata = json.load(f)
|
470 |
# Flatten statistics for display
|
471 |
total_sentences = len(self.sentences)
|
472 |
+
recorded = sum(len(dataset['recorded_sentences']) for speaker in metadata['speakers'].values() for dataset in speaker['datasets'].values())
|
473 |
remaining = total_sentences - recorded
|
474 |
stats = {
|
475 |
"Total Sentences": total_sentences,
|
|
|
512 |
max-width: 1200px !important;
|
513 |
}
|
514 |
.record-button {
|
515 |
+
font-size: 1em !important;
|
516 |
+
padding: 10px !important;
|
517 |
}
|
518 |
.sentence-display {
|
519 |
font-size: 1.4em !important;
|
|
|
523 |
margin: 10px 0 !important;
|
524 |
min-height: 100px !important;
|
525 |
}
|
526 |
+
.small-input {
|
527 |
+
max-width: 300px !important;
|
528 |
+
}
|
529 |
"""
|
530 |
|
531 |
# Add font-face declarations
|
|
|
547 |
|
548 |
with gr.Row():
|
549 |
# Left column - Configuration and Input
|
550 |
+
with gr.Column(scale=1):
|
551 |
text_input = gr.Textbox(
|
552 |
label="Paste Text",
|
553 |
placeholder="Paste your text here...",
|
554 |
+
lines=5,
|
555 |
+
elem_classes=["small-input"]
|
556 |
)
|
557 |
file_input = gr.File(
|
558 |
label="Or Upload Text File (.txt)",
|
559 |
+
file_types=[".txt"],
|
560 |
+
elem_classes=["small-input"]
|
561 |
)
|
562 |
speaker_id = gr.Textbox(
|
563 |
label="Speaker ID",
|
564 |
+
placeholder="Enter unique speaker identifier (letters and numbers only)",
|
565 |
+
elem_classes=["small-input"]
|
566 |
)
|
567 |
dataset_name = gr.Textbox(
|
568 |
label="Dataset Name",
|
569 |
+
placeholder="Enter dataset name (letters and numbers only)",
|
570 |
+
elem_classes=["small-input"]
|
571 |
)
|
572 |
font_select = gr.Dropdown(
|
573 |
choices=list(FONT_STYLES.keys()),
|
574 |
value="english_serif",
|
575 |
+
label="Select Font Style",
|
576 |
+
elem_classes=["small-input"]
|
577 |
)
|
578 |
# Custom font upload
|
579 |
font_file_input = gr.File(
|
580 |
label="Upload Custom Font (.ttf)",
|
581 |
+
file_types=[".ttf"],
|
582 |
+
elem_classes=["small-input"]
|
583 |
)
|
584 |
add_font_btn = gr.Button("Add Custom Font")
|
585 |
|
586 |
+
status = gr.Textbox(
|
587 |
+
label="Status",
|
588 |
+
interactive=False,
|
589 |
+
max_lines=3
|
590 |
+
)
|
591 |
+
|
592 |
+
# Dataset Info
|
593 |
+
with gr.Accordion("Dataset Statistics", open=False):
|
594 |
+
dataset_info = gr.JSON(
|
595 |
+
label="",
|
596 |
+
value={}
|
597 |
+
)
|
598 |
+
|
599 |
# Right column - Recording
|
600 |
+
with gr.Column(scale=2):
|
601 |
current_text = gr.HTML(
|
602 |
label="Current Sentence",
|
603 |
elem_classes=["sentence-display"]
|
|
|
606 |
label="Next Sentence",
|
607 |
elem_classes=["sentence-display"]
|
608 |
)
|
609 |
+
progress = gr.HTML("")
|
610 |
|
611 |
audio_recorder = gr.Audio(
|
612 |
label="Record Audio",
|
|
|
616 |
# Controls
|
617 |
with gr.Row():
|
618 |
prev_btn = gr.Button("Previous", variant="secondary")
|
619 |
+
save_btn = gr.Button("Save Recording", variant="primary")
|
620 |
next_btn = gr.Button("Next", variant="primary")
|
621 |
|
622 |
+
# Download Links
|
623 |
+
with gr.Row():
|
624 |
+
download_audio = gr.File(label="Download Audio", interactive=False)
|
625 |
+
download_transcript = gr.File(label="Download Transcript", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
626 |
|
627 |
def process_pasted_text(text):
|
628 |
"""Handle pasted text input"""
|
|
|
646 |
}
|
647 |
|
648 |
nav_info = collector.get_navigation_info()
|
649 |
+
progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
|
650 |
return {
|
651 |
current_text: nav_info['current'],
|
652 |
next_text: nav_info['next'],
|
|
|
690 |
}
|
691 |
|
692 |
nav_info = collector.get_navigation_info()
|
693 |
+
progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
|
694 |
return {
|
695 |
current_text: nav_info['current'],
|
696 |
next_text: nav_info['next'],
|
|
|
726 |
|
727 |
# Auto-advance to next sentence after successful save
|
728 |
nav_info = collector.navigate("next")
|
729 |
+
progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
|
730 |
return {
|
731 |
current_text: nav_info['current'],
|
732 |
next_text: nav_info['next'],
|
|
|
740 |
def navigate_sentences(direction):
|
741 |
"""Handle navigation between sentences"""
|
742 |
nav_info = collector.navigate(direction)
|
743 |
+
progress_bar = progress.update(value=f"<progress value='{collector.current_index + 1}' max='{len(collector.sentences)}'></progress> {nav_info['progress']}")
|
744 |
return {
|
745 |
current_text: nav_info['current'],
|
746 |
next_text: nav_info['next'],
|
|
|
825 |
except Exception as e:
|
826 |
logger.error(f"Failed to launch interface: {str(e)}")
|
827 |
logger.error(traceback.format_exc())
|
828 |
+
raise
|