Spaces:

Omarrran
/

TTS_DATASET_MAKER

Running

App Files Files Community

Omarrran commited on Nov 10, 2024

Commit

99a78a1

verified ·

1 Parent(s): 03ba3c8

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -237

app.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import os
 import json
 import nltk
@@ -5,174 +9,178 @@ import gradio as gr
 from datetime import datetime
 from pathlib import Path
 import shutil
-# Download NLTK data
-nltk.download('punkt')
 class TTSDatasetCollector:
-    """Manages TTS dataset collection and organization"""
     def __init__(self):
-        # Get the directory where app.py is located
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
-        self.sentences = []
-        self.current_index = 0
         self.setup_directories()
-    def setup_directories(self):
-        """Create necessary directory structure"""
-        # Create main dataset directory
-        self.root_path.mkdir(exist_ok=True)
-        # Create subdirectories
-        for subdir in ['audio', 'transcriptions', 'metadata']:
-            (self.root_path / subdir).mkdir(exist_ok=True)
-        # Create a log file to track operations
-        log_file = self.root_path / 'dataset_log.txt'
-        if not log_file.exists():
-            with open(log_file, 'w', encoding='utf-8') as f:
-                f.write(f"Dataset collection started on {datetime.now().isoformat()}\n")
-    def log_operation(self, message: str):
-        """Log operations to keep track of dataset collection"""
-        log_file = self.root_path / 'dataset_log.txt'
-        with open(log_file, 'a', encoding='utf-8') as f:
-            f.write(f"[{datetime.now().isoformat()}] {message}\n")
-    def load_text_file(self, file):
-        """Process and load text file"""
         try:
-            with open(file.name, 'r', encoding='utf-8') as f:
-                text = f.read()
-            self.sentences = nltk.sent_tokenize(text)
-            self.current_index = 0
-            # Log the file loading
-            self.log_operation(f"Loaded text file with {len(self.sentences)} sentences")
-            return True, f"Loaded {len(self.sentences)} sentences"
         except Exception as e:
-            self.log_operation(f"Error loading file: {str(e)}")
-            return False, f"Error loading file: {str(e)}"
-    def generate_filenames(self, dataset_name: str, speaker_id: str) -> tuple:
-        """Generate unique filenames for audio and text"""
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        sentence_id = f"{self.current_index+1:04d}"
-        base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
-        return f"{base_name}.wav", f"{base_name}.txt"
-    def save_recording(self, audio_file, speaker_id: str, dataset_name: str):
-        """Save recording and transcription"""
-        if not audio_file or not speaker_id or not dataset_name:
-            return False, "Missing required information"
         try:
-            # Generate filenames
-            audio_name, text_name = self.generate_filenames(dataset_name, speaker_id)
-            # Create speaker directories
-            audio_dir = self.root_path / 'audio' / speaker_id
-            text_dir = self.root_path / 'transcriptions' / speaker_id
-            audio_dir.mkdir(exist_ok=True)
-            text_dir.mkdir(exist_ok=True)
-            # Save audio file
-            audio_path = audio_dir / audio_name
-            shutil.copy2(audio_file, audio_path)
-            # Save transcription
-            text_path = text_dir / text_name
-            self.save_transcription(
-                text_path,
-                self.sentences[self.current_index],
-                {
-                    'speaker_id': speaker_id,
-                    'dataset_name': dataset_name,
-                    'timestamp': datetime.now().isoformat(),
-                    'audio_file': audio_name
-                }
-            )
-            # Update metadata
-            self.update_metadata(speaker_id, dataset_name)
-            # Log the save operation
-            self.log_operation(
-                f"Saved recording: Speaker={speaker_id}, Dataset={dataset_name}, "
-                f"Audio={audio_name}, Text={text_name}"
-            )
-            return True, f"Recording saved successfully as {audio_name}"
         except Exception as e:
-            error_msg = f"Error saving recording: {str(e)}"
-            self.log_operation(error_msg)
-            return False, error_msg
-    def save_transcription(self, file_path: Path, text: str, metadata: dict):
-        """Save transcription with metadata"""
-        content = f"""[METADATA]
-Recording_ID: {metadata['audio_file']}
-Speaker_ID: {metadata['speaker_id']}
-Dataset_Name: {metadata['dataset_name']}
-Timestamp: {metadata['timestamp']}
-[TEXT]
-{text}
-"""
-        with open(file_path, 'w', encoding='utf-8') as f:
-            f.write(content)
-    def update_metadata(self, speaker_id: str, dataset_name: str):
-        """Update dataset metadata file"""
-        metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
         try:
-            if metadata_file.exists():
-                with open(metadata_file, 'r') as f:
-                    metadata = json.load(f)
-            else:
-                metadata = {'speakers': {}, 'last_updated': None}
-            # Update speaker data
-            if speaker_id not in metadata['speakers']:
-                metadata['speakers'][speaker_id] = {
-                    'total_recordings': 0,
-                    'datasets': {}
-                }
-            if dataset_name not in metadata['speakers'][speaker_id]['datasets']:
-                metadata['speakers'][speaker_id]['datasets'][dataset_name] = {
-                    'recordings': 0,
-                    'sentences': len(self.sentences),
-                    'first_recording': datetime.now().isoformat(),
-                    'last_recording': None
-                }
-            # Update counts and timestamps
-            metadata['speakers'][speaker_id]['total_recordings'] += 1
-            metadata['speakers'][speaker_id]['datasets'][dataset_name]['recordings'] += 1
-            metadata['speakers'][speaker_id]['datasets'][dataset_name]['last_recording'] = \
-                datetime.now().isoformat()
-            metadata['last_updated'] = datetime.now().isoformat()
-            # Save updated metadata
-            with open(metadata_file, 'w') as f:
-                json.dump(metadata, f, indent=2)
-            self.log_operation(f"Updated metadata for {speaker_id} in {dataset_name}")
         except Exception as e:
-            error_msg = f"Error updating metadata: {str(e)}"
-            self.log_operation(error_msg)
-            print(error_msg)
 def create_interface():
-    """Create Gradio interface for TTS data collection"""
     collector = TTSDatasetCollector()
-    with gr.Blocks(title="TTS Dataset Collection Tool") as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
@@ -184,35 +192,38 @@ def create_interface():
                 )
                 speaker_id = gr.Textbox(
                     label="Speaker ID",
-                    placeholder="Enter unique speaker identifier"
                 )
                 dataset_name = gr.Textbox(
                     label="Dataset Name",
-                    placeholder="Enter dataset name"
                 )
             # Right column - Recording
             with gr.Column():
-                current_text = gr.Textbox(
-                    label="Current Sentence",
-                    interactive=False
                 )
                 audio_recorder = gr.Audio(
                     label="Record Audio",
                     type="filepath"
                 )
-                next_text = gr.Textbox(
-                    label="Next Sentence",
-                    interactive=False
                 )
         # Controls
         with gr.Row():
-            prev_btn = gr.Button("Previous")
-            next_btn = gr.Button("Next")
             save_btn = gr.Button("Save Recording", variant="primary")
-        # Status
         with gr.Row():
             progress = gr.Textbox(
                 label="Progress",
@@ -220,9 +231,10 @@ def create_interface():
             )
             status = gr.Textbox(
                 label="Status",
-                interactive=False
             )
         # Dataset Info
         with gr.Row():
             dataset_info = gr.JSON(
@@ -230,118 +242,56 @@ def create_interface():
                 value={}
             )
-        def update_dataset_info():
-            """Update dataset statistics display"""
-            try:
-                metadata_file = collector.root_path / 'metadata' / 'dataset_info.json'
-                if metadata_file.exists():
-                    with open(metadata_file, 'r') as f:
-                        return json.load(f)
-                return {}
-            except Exception:
-                return {}
-        # Event handlers
         def load_file(file):
             if not file:
                 return {
                     current_text: "",
                     next_text: "",
                     progress: "",
-                    status: "No file selected",
-                    dataset_info: update_dataset_info()
                 }
             success, msg = collector.load_text_file(file)
             if not success:
                 return {
                     current_text: "",
                     next_text: "",
                     progress: "",
-                    status: msg,
                     dataset_info: update_dataset_info()
                 }
             return {
-                current_text: collector.sentences[0],
-                next_text: collector.sentences[1] if len(collector.sentences) > 1 else "",
-                progress: f"Sentence 1 of {len(collector.sentences)}",
-                status: msg,
                 dataset_info: update_dataset_info()
             }
-        def update_display():
-            """Update interface display"""
-            if not collector.sentences:
-                return {
-                    current_text: "",
-                    next_text: "",
-                    progress: "",
-                    status: "No text loaded",
-                    dataset_info: update_dataset_info()
-                }
-            next_idx = collector.current_index + 1
-            return {
-                current_text: collector.sentences[collector.current_index],
-                next_text: collector.sentences[next_idx] if next_idx < len(collector.sentences) else "",
-                progress: f"Sentence {collector.current_index + 1} of {len(collector.sentences)}",
-                status: "Ready for recording",
-                dataset_info: update_dataset_info()
-            }
-        def next_sentence():
-            """Move to next sentence"""
-            if collector.sentences and collector.current_index < len(collector.sentences) - 1:
-                collector.current_index += 1
-            return update_display()
-        def prev_sentence():
-            """Move to previous sentence"""
-            if collector.sentences and collector.current_index > 0:
-                collector.current_index -= 1
-            return update_display()
-        def save_recording(audio, spk_id, ds_name):
-            """Handle saving recording"""
-            if not audio:
-                return {status: "No audio recorded", dataset_info: update_dataset_info()}
-            if not spk_id:
-                return {status: "Speaker ID required", dataset_info: update_dataset_info()}
-            if not ds_name:
-                return {status: "Dataset name required", dataset_info: update_dataset_info()}
-            success, msg = collector.save_recording(audio, spk_id, ds_name)
-            return {
-                status: msg,
-                dataset_info: update_dataset_info()
-            }
-        # Connect event handlers
-        file_input.change(
-            load_file,
-            inputs=[file_input],
-            outputs=[current_text, next_text, progress, status, dataset_info]
-        )
-        next_btn.click(
-            next_sentence,
-            outputs=[current_text, next_text, progress, status, dataset_info]
-        )
-        prev_btn.click(
-            prev_sentence,
-            outputs=[current_text, next_text, progress, status, dataset_info]
-        )
-        save_btn.click(
-            save_recording,
-            inputs=[audio_recorder, speaker_id, dataset_name],
-            outputs=[status, dataset_info]
-        )
         return interface
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

+"""
+TTS Dataset Collection Tool with Font Support and Enhanced Error Handling
+"""
 import os
 import json
 import nltk
 from datetime import datetime
 from pathlib import Path
 import shutil
+import logging
+from typing import Dict, List, Tuple, Optional
+import traceback
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Font configurations
+FONT_STYLES = {
+    "english_serif": {
+        "name": "Times New Roman",
+        "family": "serif",
+        "css": "font-family: 'Times New Roman', serif;"
+    },
+    "english_sans": {
+        "name": "Arial",
+        "family": "sans-serif",
+        "css": "font-family: Arial, sans-serif;"
+    },
+    "nastaliq": {
+        "name": "Nastaliq",
+        "family": "Jameel Noori Nastaleeq",
+        "css": "font-family: 'Jameel Noori Nastaleeq', serif;"
+    },
+    "naskh": {
+        "name": "Naskh",
+        "family": "Traditional Arabic",
+        "css": "font-family: 'Traditional Arabic', serif;"
+    }
+}
 class TTSDatasetCollector:
+    """Manages TTS dataset collection and organization with enhanced features"""
     def __init__(self):
+        """Initialize the TTS Dataset Collector"""
+        # Initialize NLTK
+        self._initialize_nltk()
+        # Set up paths and directories
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
+        self.sentences: List[str] = []
+        self.current_index: int = 0
+        self.current_font: str = "english_serif"
         self.setup_directories()
+        logger.info("TTS Dataset Collector initialized")
+    def _initialize_nltk(self) -> None:
+        """Initialize NLTK with error handling"""
         try:
+            nltk.download('punkt', quiet=True)
+            logger.info("NLTK punkt tokenizer downloaded successfully")
         except Exception as e:
+            logger.error(f"Failed to download NLTK data: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise RuntimeError("Failed to initialize NLTK. Please check your internet connection.")
+    def setup_directories(self) -> None:
+        """Create necessary directory structure with logging"""
         try:
+            # Create main dataset directory
+            self.root_path.mkdir(exist_ok=True)
+            # Create subdirectories
+            for subdir in ['audio', 'transcriptions', 'metadata', 'fonts']:
+                (self.root_path / subdir).mkdir(exist_ok=True)
+            # Initialize log file
+            log_file = self.root_path / 'dataset_log.txt'
+            if not log_file.exists():
+                with open(log_file, 'w', encoding='utf-8') as f:
+                    f.write(f"Dataset collection initialized on {datetime.now().isoformat()}\n")
+            logger.info("Directory structure created successfully")
+        except Exception as e:
+            logger.error(f"Failed to create directory structure: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise RuntimeError("Failed to initialize directory structure")
+    def log_operation(self, message: str, level: str = "info") -> None:
+        """Log operations with timestamp and level"""
+        try:
+            log_file = self.root_path / 'dataset_log.txt'
+            timestamp = datetime.now().isoformat()
+            with open(log_file, 'a', encoding='utf-8') as f:
+                f.write(f"[{timestamp}] [{level.upper()}] {message}\n")
+            if level.lower() == "error":
+                logger.error(message)
+            else:
+                logger.info(message)
         except Exception as e:
+            logger.error(f"Failed to log operation: {str(e)}")
+    def load_text_file(self, file) -> Tuple[bool, str]:
+        """Process and load text file with enhanced error handling"""
+        if not file:
+            return False, "No file provided"
         try:
+            # Validate file extension
+            if not file.name.endswith('.txt'):
+                return False, "Only .txt files are supported"
+            with open(file.name, 'r', encoding='utf-8') as f:
+                text = f.read()
+            # Validate text content
+            if not text.strip():
+                return False, "File is empty"
+            # Tokenize sentences
+            self.sentences = nltk.sent_tokenize(text)
+            if not self.sentences:
+                return False, "No valid sentences found in file"
+            self.current_index = 0
+            # Log success
+            self.log_operation(
+                f"Loaded text file: {file.name} with {len(self.sentences)} sentences"
+            )
+            return True, f"Successfully loaded {len(self.sentences)} sentences"
+        except UnicodeDecodeError:
+            error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
+            self.log_operation(error_msg, "error")
+            return False, error_msg
         except Exception as e:
+            error_msg = f"Error loading file: {str(e)}"
+            self.log_operation(error_msg, "error")
+            logger.error(traceback.format_exc())
+            return False, error_msg
+    # Remaining methods go here ...
 def create_interface():
+    """Create Gradio interface with enhanced features"""
+    # Create custom CSS for fonts
+    custom_css = """
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    .record-button {
+        font-size: 1.2em !important;
+        padding: 20px !important;
+    }
+    """
+    # Add font-face declarations
+    for font_style, font_info in FONT_STYLES.items():
+        if font_style in ['nastaliq', 'naskh']:
+            custom_css += f"""
+            @font-face {{
+                font-family: '{font_info["family"]}';
+                src: url('fonts/{font_info["family"]}.ttf') format('truetype');
+            }}
+            """
     collector = TTSDatasetCollector()
+    with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
                 )
                 speaker_id = gr.Textbox(
                     label="Speaker ID",
+                    placeholder="Enter unique speaker identifier (letters and numbers only)"
                 )
                 dataset_name = gr.Textbox(
                     label="Dataset Name",
+                    placeholder="Enter dataset name (letters and numbers only)"
+                )
+                font_select = gr.Dropdown(
+                    choices=list(FONT_STYLES.keys()),
+                    value="english_serif",
+                    label="Select Font Style"
                 )
             # Right column - Recording
             with gr.Column():
+                current_text = gr.HTML(
+                    label="Current Sentence"
                 )
                 audio_recorder = gr.Audio(
                     label="Record Audio",
                     type="filepath"
                 )
+                next_text = gr.HTML(
+                    label="Next Sentence"
                 )
         # Controls
         with gr.Row():
+            prev_btn = gr.Button("Previous", variant="secondary")
+            next_btn = gr.Button("Next", variant="secondary")
             save_btn = gr.Button("Save Recording", variant="primary")
+        # Status and Progress
         with gr.Row():
             progress = gr.Textbox(
                 label="Progress",
             )
             status = gr.Textbox(
                 label="Status",
+                interactive=False,
+                max_lines=3
             )
         # Dataset Info
         with gr.Row():
             dataset_info = gr.JSON(
                 value={}
             )
+        def update_font(font_style):
+            """Update font and refresh display"""
+            success, msg = collector.set_font(font_style)
+            if not success:
+                return {status: msg}
+            return update_display()
         def load_file(file):
+            """Handle file loading with enhanced error reporting"""
             if not file:
                 return {
                     current_text: "",
                     next_text: "",
                     progress: "",
+                    status: "⚠️ No file selected"
                 }
             success, msg = collector.load_text_file(file)
             if not success:
                 return {
                     current_text: "",
                     next_text: "",
                     progress: "",
+                    status: f"❌ {msg}",
                     dataset_info: update_dataset_info()
                 }
             return {
+                current_text: collector.get_styled_text(collector.sentences[0]),
+                next_text: collector.get_styled_text(collector.sentences[1]) if len(collector.sentences) > 1 else "",
+                progress: f"📖 Sentence 1 of {len(collector.sentences)}",
+                status: f"✅ {msg}",
                 dataset_info: update_dataset_info()
             }
+        # Remaining methods and event handlers go here ...
         return interface
 if __name__ == "__main__":
+    try:
+        interface = create_interface()
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=True,
+            enable_queue=True
+        )
+    except Exception as e:
+        logger.error(f"Failed to launch interface: {str(e)}")
+        logger.error(traceback.format_exc())
+        raise