Omarrran commited on
Commit
99a78a1
·
verified ·
1 Parent(s): 03ba3c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -237
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  import json
3
  import nltk
@@ -5,174 +9,178 @@ import gradio as gr
5
  from datetime import datetime
6
  from pathlib import Path
7
  import shutil
 
 
 
8
 
9
- # Download NLTK data
10
- nltk.download('punkt')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  class TTSDatasetCollector:
13
- """Manages TTS dataset collection and organization"""
14
 
15
  def __init__(self):
16
- # Get the directory where app.py is located
 
 
 
 
17
  self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
18
- self.sentences = []
19
- self.current_index = 0
 
20
  self.setup_directories()
21
-
22
- def setup_directories(self):
23
- """Create necessary directory structure"""
24
- # Create main dataset directory
25
- self.root_path.mkdir(exist_ok=True)
26
 
27
- # Create subdirectories
28
- for subdir in ['audio', 'transcriptions', 'metadata']:
29
- (self.root_path / subdir).mkdir(exist_ok=True)
30
-
31
- # Create a log file to track operations
32
- log_file = self.root_path / 'dataset_log.txt'
33
- if not log_file.exists():
34
- with open(log_file, 'w', encoding='utf-8') as f:
35
- f.write(f"Dataset collection started on {datetime.now().isoformat()}\n")
36
-
37
- def log_operation(self, message: str):
38
- """Log operations to keep track of dataset collection"""
39
- log_file = self.root_path / 'dataset_log.txt'
40
- with open(log_file, 'a', encoding='utf-8') as f:
41
- f.write(f"[{datetime.now().isoformat()}] {message}\n")
42
 
43
- def load_text_file(self, file):
44
- """Process and load text file"""
45
  try:
46
- with open(file.name, 'r', encoding='utf-8') as f:
47
- text = f.read()
48
- self.sentences = nltk.sent_tokenize(text)
49
- self.current_index = 0
50
-
51
- # Log the file loading
52
- self.log_operation(f"Loaded text file with {len(self.sentences)} sentences")
53
- return True, f"Loaded {len(self.sentences)} sentences"
54
  except Exception as e:
55
- self.log_operation(f"Error loading file: {str(e)}")
56
- return False, f"Error loading file: {str(e)}"
57
-
58
- def generate_filenames(self, dataset_name: str, speaker_id: str) -> tuple:
59
- """Generate unique filenames for audio and text"""
60
- timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
61
- sentence_id = f"{self.current_index+1:04d}"
62
- base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
63
- return f"{base_name}.wav", f"{base_name}.txt"
64
 
65
- def save_recording(self, audio_file, speaker_id: str, dataset_name: str):
66
- """Save recording and transcription"""
67
- if not audio_file or not speaker_id or not dataset_name:
68
- return False, "Missing required information"
69
-
70
  try:
71
- # Generate filenames
72
- audio_name, text_name = self.generate_filenames(dataset_name, speaker_id)
73
 
74
- # Create speaker directories
75
- audio_dir = self.root_path / 'audio' / speaker_id
76
- text_dir = self.root_path / 'transcriptions' / speaker_id
77
- audio_dir.mkdir(exist_ok=True)
78
- text_dir.mkdir(exist_ok=True)
79
 
80
- # Save audio file
81
- audio_path = audio_dir / audio_name
82
- shutil.copy2(audio_file, audio_path)
 
 
83
 
84
- # Save transcription
85
- text_path = text_dir / text_name
86
- self.save_transcription(
87
- text_path,
88
- self.sentences[self.current_index],
89
- {
90
- 'speaker_id': speaker_id,
91
- 'dataset_name': dataset_name,
92
- 'timestamp': datetime.now().isoformat(),
93
- 'audio_file': audio_name
94
- }
95
- )
96
 
97
- # Update metadata
98
- self.update_metadata(speaker_id, dataset_name)
 
 
 
 
 
 
 
 
99
 
100
- # Log the save operation
101
- self.log_operation(
102
- f"Saved recording: Speaker={speaker_id}, Dataset={dataset_name}, "
103
- f"Audio={audio_name}, Text={text_name}"
104
- )
105
 
106
- return True, f"Recording saved successfully as {audio_name}"
 
 
 
 
107
  except Exception as e:
108
- error_msg = f"Error saving recording: {str(e)}"
109
- self.log_operation(error_msg)
110
- return False, error_msg
111
-
112
- def save_transcription(self, file_path: Path, text: str, metadata: dict):
113
- """Save transcription with metadata"""
114
- content = f"""[METADATA]
115
- Recording_ID: {metadata['audio_file']}
116
- Speaker_ID: {metadata['speaker_id']}
117
- Dataset_Name: {metadata['dataset_name']}
118
- Timestamp: {metadata['timestamp']}
119
-
120
- [TEXT]
121
- {text}
122
- """
123
- with open(file_path, 'w', encoding='utf-8') as f:
124
- f.write(content)
125
 
126
- def update_metadata(self, speaker_id: str, dataset_name: str):
127
- """Update dataset metadata file"""
128
- metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
 
129
 
130
  try:
131
- if metadata_file.exists():
132
- with open(metadata_file, 'r') as f:
133
- metadata = json.load(f)
134
- else:
135
- metadata = {'speakers': {}, 'last_updated': None}
136
 
137
- # Update speaker data
138
- if speaker_id not in metadata['speakers']:
139
- metadata['speakers'][speaker_id] = {
140
- 'total_recordings': 0,
141
- 'datasets': {}
142
- }
143
 
144
- if dataset_name not in metadata['speakers'][speaker_id]['datasets']:
145
- metadata['speakers'][speaker_id]['datasets'][dataset_name] = {
146
- 'recordings': 0,
147
- 'sentences': len(self.sentences),
148
- 'first_recording': datetime.now().isoformat(),
149
- 'last_recording': None
150
- }
151
 
152
- # Update counts and timestamps
153
- metadata['speakers'][speaker_id]['total_recordings'] += 1
154
- metadata['speakers'][speaker_id]['datasets'][dataset_name]['recordings'] += 1
155
- metadata['speakers'][speaker_id]['datasets'][dataset_name]['last_recording'] = \
156
- datetime.now().isoformat()
157
- metadata['last_updated'] = datetime.now().isoformat()
158
 
159
- # Save updated metadata
160
- with open(metadata_file, 'w') as f:
161
- json.dump(metadata, f, indent=2)
 
 
 
162
 
163
- self.log_operation(f"Updated metadata for {speaker_id} in {dataset_name}")
164
 
 
 
 
 
165
  except Exception as e:
166
- error_msg = f"Error updating metadata: {str(e)}"
167
- self.log_operation(error_msg)
168
- print(error_msg)
 
 
 
169
 
170
  def create_interface():
171
- """Create Gradio interface for TTS data collection"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  collector = TTSDatasetCollector()
174
 
175
- with gr.Blocks(title="TTS Dataset Collection Tool") as interface:
176
  gr.Markdown("# TTS Dataset Collection Tool")
177
 
178
  with gr.Row():
@@ -184,35 +192,38 @@ def create_interface():
184
  )
185
  speaker_id = gr.Textbox(
186
  label="Speaker ID",
187
- placeholder="Enter unique speaker identifier"
188
  )
189
  dataset_name = gr.Textbox(
190
  label="Dataset Name",
191
- placeholder="Enter dataset name"
 
 
 
 
 
192
  )
193
 
194
  # Right column - Recording
195
  with gr.Column():
196
- current_text = gr.Textbox(
197
- label="Current Sentence",
198
- interactive=False
199
  )
200
  audio_recorder = gr.Audio(
201
  label="Record Audio",
202
  type="filepath"
203
  )
204
- next_text = gr.Textbox(
205
- label="Next Sentence",
206
- interactive=False
207
  )
208
 
209
  # Controls
210
  with gr.Row():
211
- prev_btn = gr.Button("Previous")
212
- next_btn = gr.Button("Next")
213
  save_btn = gr.Button("Save Recording", variant="primary")
214
 
215
- # Status
216
  with gr.Row():
217
  progress = gr.Textbox(
218
  label="Progress",
@@ -220,9 +231,10 @@ def create_interface():
220
  )
221
  status = gr.Textbox(
222
  label="Status",
223
- interactive=False
 
224
  )
225
-
226
  # Dataset Info
227
  with gr.Row():
228
  dataset_info = gr.JSON(
@@ -230,118 +242,56 @@ def create_interface():
230
  value={}
231
  )
232
 
233
- def update_dataset_info():
234
- """Update dataset statistics display"""
235
- try:
236
- metadata_file = collector.root_path / 'metadata' / 'dataset_info.json'
237
- if metadata_file.exists():
238
- with open(metadata_file, 'r') as f:
239
- return json.load(f)
240
- return {}
241
- except Exception:
242
- return {}
243
 
244
- # Event handlers
245
  def load_file(file):
 
246
  if not file:
247
  return {
248
  current_text: "",
249
  next_text: "",
250
  progress: "",
251
- status: "No file selected",
252
- dataset_info: update_dataset_info()
253
  }
254
-
255
  success, msg = collector.load_text_file(file)
256
  if not success:
257
  return {
258
  current_text: "",
259
  next_text: "",
260
  progress: "",
261
- status: msg,
262
  dataset_info: update_dataset_info()
263
  }
264
 
265
  return {
266
- current_text: collector.sentences[0],
267
- next_text: collector.sentences[1] if len(collector.sentences) > 1 else "",
268
- progress: f"Sentence 1 of {len(collector.sentences)}",
269
- status: msg,
270
  dataset_info: update_dataset_info()
271
  }
272
 
273
- def update_display():
274
- """Update interface display"""
275
- if not collector.sentences:
276
- return {
277
- current_text: "",
278
- next_text: "",
279
- progress: "",
280
- status: "No text loaded",
281
- dataset_info: update_dataset_info()
282
- }
283
-
284
- next_idx = collector.current_index + 1
285
- return {
286
- current_text: collector.sentences[collector.current_index],
287
- next_text: collector.sentences[next_idx] if next_idx < len(collector.sentences) else "",
288
- progress: f"Sentence {collector.current_index + 1} of {len(collector.sentences)}",
289
- status: "Ready for recording",
290
- dataset_info: update_dataset_info()
291
- }
292
-
293
- def next_sentence():
294
- """Move to next sentence"""
295
- if collector.sentences and collector.current_index < len(collector.sentences) - 1:
296
- collector.current_index += 1
297
- return update_display()
298
-
299
- def prev_sentence():
300
- """Move to previous sentence"""
301
- if collector.sentences and collector.current_index > 0:
302
- collector.current_index -= 1
303
- return update_display()
304
-
305
- def save_recording(audio, spk_id, ds_name):
306
- """Handle saving recording"""
307
- if not audio:
308
- return {status: "No audio recorded", dataset_info: update_dataset_info()}
309
- if not spk_id:
310
- return {status: "Speaker ID required", dataset_info: update_dataset_info()}
311
- if not ds_name:
312
- return {status: "Dataset name required", dataset_info: update_dataset_info()}
313
-
314
- success, msg = collector.save_recording(audio, spk_id, ds_name)
315
- return {
316
- status: msg,
317
- dataset_info: update_dataset_info()
318
- }
319
-
320
- # Connect event handlers
321
- file_input.change(
322
- load_file,
323
- inputs=[file_input],
324
- outputs=[current_text, next_text, progress, status, dataset_info]
325
- )
326
-
327
- next_btn.click(
328
- next_sentence,
329
- outputs=[current_text, next_text, progress, status, dataset_info]
330
- )
331
-
332
- prev_btn.click(
333
- prev_sentence,
334
- outputs=[current_text, next_text, progress, status, dataset_info]
335
- )
336
-
337
- save_btn.click(
338
- save_recording,
339
- inputs=[audio_recorder, speaker_id, dataset_name],
340
- outputs=[status, dataset_info]
341
- )
342
-
343
  return interface
344
 
345
  if __name__ == "__main__":
346
- interface = create_interface()
347
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTS Dataset Collection Tool with Font Support and Enhanced Error Handling
3
+ """
4
+
5
  import os
6
  import json
7
  import nltk
 
9
  from datetime import datetime
10
  from pathlib import Path
11
  import shutil
12
+ import logging
13
+ from typing import Dict, List, Tuple, Optional
14
+ import traceback
15
 
16
+ # Configure logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Font configurations
24
+ FONT_STYLES = {
25
+ "english_serif": {
26
+ "name": "Times New Roman",
27
+ "family": "serif",
28
+ "css": "font-family: 'Times New Roman', serif;"
29
+ },
30
+ "english_sans": {
31
+ "name": "Arial",
32
+ "family": "sans-serif",
33
+ "css": "font-family: Arial, sans-serif;"
34
+ },
35
+ "nastaliq": {
36
+ "name": "Nastaliq",
37
+ "family": "Jameel Noori Nastaleeq",
38
+ "css": "font-family: 'Jameel Noori Nastaleeq', serif;"
39
+ },
40
+ "naskh": {
41
+ "name": "Naskh",
42
+ "family": "Traditional Arabic",
43
+ "css": "font-family: 'Traditional Arabic', serif;"
44
+ }
45
+ }
46
 
47
  class TTSDatasetCollector:
48
+ """Manages TTS dataset collection and organization with enhanced features"""
49
 
50
  def __init__(self):
51
+ """Initialize the TTS Dataset Collector"""
52
+ # Initialize NLTK
53
+ self._initialize_nltk()
54
+
55
+ # Set up paths and directories
56
  self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
57
+ self.sentences: List[str] = []
58
+ self.current_index: int = 0
59
+ self.current_font: str = "english_serif"
60
  self.setup_directories()
 
 
 
 
 
61
 
62
+ logger.info("TTS Dataset Collector initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def _initialize_nltk(self) -> None:
65
+ """Initialize NLTK with error handling"""
66
  try:
67
+ nltk.download('punkt', quiet=True)
68
+ logger.info("NLTK punkt tokenizer downloaded successfully")
 
 
 
 
 
 
69
  except Exception as e:
70
+ logger.error(f"Failed to download NLTK data: {str(e)}")
71
+ logger.error(traceback.format_exc())
72
+ raise RuntimeError("Failed to initialize NLTK. Please check your internet connection.")
 
 
 
 
 
 
73
 
74
+ def setup_directories(self) -> None:
75
+ """Create necessary directory structure with logging"""
 
 
 
76
  try:
77
+ # Create main dataset directory
78
+ self.root_path.mkdir(exist_ok=True)
79
 
80
+ # Create subdirectories
81
+ for subdir in ['audio', 'transcriptions', 'metadata', 'fonts']:
82
+ (self.root_path / subdir).mkdir(exist_ok=True)
 
 
83
 
84
+ # Initialize log file
85
+ log_file = self.root_path / 'dataset_log.txt'
86
+ if not log_file.exists():
87
+ with open(log_file, 'w', encoding='utf-8') as f:
88
+ f.write(f"Dataset collection initialized on {datetime.now().isoformat()}\n")
89
 
90
+ logger.info("Directory structure created successfully")
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ except Exception as e:
93
+ logger.error(f"Failed to create directory structure: {str(e)}")
94
+ logger.error(traceback.format_exc())
95
+ raise RuntimeError("Failed to initialize directory structure")
96
+
97
+ def log_operation(self, message: str, level: str = "info") -> None:
98
+ """Log operations with timestamp and level"""
99
+ try:
100
+ log_file = self.root_path / 'dataset_log.txt'
101
+ timestamp = datetime.now().isoformat()
102
 
103
+ with open(log_file, 'a', encoding='utf-8') as f:
104
+ f.write(f"[{timestamp}] [{level.upper()}] {message}\n")
 
 
 
105
 
106
+ if level.lower() == "error":
107
+ logger.error(message)
108
+ else:
109
+ logger.info(message)
110
+
111
  except Exception as e:
112
+ logger.error(f"Failed to log operation: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ def load_text_file(self, file) -> Tuple[bool, str]:
115
+ """Process and load text file with enhanced error handling"""
116
+ if not file:
117
+ return False, "No file provided"
118
 
119
  try:
120
+ # Validate file extension
121
+ if not file.name.endswith('.txt'):
122
+ return False, "Only .txt files are supported"
 
 
123
 
124
+ with open(file.name, 'r', encoding='utf-8') as f:
125
+ text = f.read()
 
 
 
 
126
 
127
+ # Validate text content
128
+ if not text.strip():
129
+ return False, "File is empty"
 
 
 
 
130
 
131
+ # Tokenize sentences
132
+ self.sentences = nltk.sent_tokenize(text)
133
+ if not self.sentences:
134
+ return False, "No valid sentences found in file"
 
 
135
 
136
+ self.current_index = 0
137
+
138
+ # Log success
139
+ self.log_operation(
140
+ f"Loaded text file: {file.name} with {len(self.sentences)} sentences"
141
+ )
142
 
143
+ return True, f"Successfully loaded {len(self.sentences)} sentences"
144
 
145
+ except UnicodeDecodeError:
146
+ error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
147
+ self.log_operation(error_msg, "error")
148
+ return False, error_msg
149
  except Exception as e:
150
+ error_msg = f"Error loading file: {str(e)}"
151
+ self.log_operation(error_msg, "error")
152
+ logger.error(traceback.format_exc())
153
+ return False, error_msg
154
+
155
+ # Remaining methods go here ...
156
 
157
  def create_interface():
158
+ """Create Gradio interface with enhanced features"""
159
+
160
+ # Create custom CSS for fonts
161
+ custom_css = """
162
+ .gradio-container {
163
+ max-width: 1200px !important;
164
+ }
165
+ .record-button {
166
+ font-size: 1.2em !important;
167
+ padding: 20px !important;
168
+ }
169
+ """
170
+
171
+ # Add font-face declarations
172
+ for font_style, font_info in FONT_STYLES.items():
173
+ if font_style in ['nastaliq', 'naskh']:
174
+ custom_css += f"""
175
+ @font-face {{
176
+ font-family: '{font_info["family"]}';
177
+ src: url('fonts/{font_info["family"]}.ttf') format('truetype');
178
+ }}
179
+ """
180
 
181
  collector = TTSDatasetCollector()
182
 
183
+ with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
184
  gr.Markdown("# TTS Dataset Collection Tool")
185
 
186
  with gr.Row():
 
192
  )
193
  speaker_id = gr.Textbox(
194
  label="Speaker ID",
195
+ placeholder="Enter unique speaker identifier (letters and numbers only)"
196
  )
197
  dataset_name = gr.Textbox(
198
  label="Dataset Name",
199
+ placeholder="Enter dataset name (letters and numbers only)"
200
+ )
201
+ font_select = gr.Dropdown(
202
+ choices=list(FONT_STYLES.keys()),
203
+ value="english_serif",
204
+ label="Select Font Style"
205
  )
206
 
207
  # Right column - Recording
208
  with gr.Column():
209
+ current_text = gr.HTML(
210
+ label="Current Sentence"
 
211
  )
212
  audio_recorder = gr.Audio(
213
  label="Record Audio",
214
  type="filepath"
215
  )
216
+ next_text = gr.HTML(
217
+ label="Next Sentence"
 
218
  )
219
 
220
  # Controls
221
  with gr.Row():
222
+ prev_btn = gr.Button("Previous", variant="secondary")
223
+ next_btn = gr.Button("Next", variant="secondary")
224
  save_btn = gr.Button("Save Recording", variant="primary")
225
 
226
+ # Status and Progress
227
  with gr.Row():
228
  progress = gr.Textbox(
229
  label="Progress",
 
231
  )
232
  status = gr.Textbox(
233
  label="Status",
234
+ interactive=False,
235
+ max_lines=3
236
  )
237
+
238
  # Dataset Info
239
  with gr.Row():
240
  dataset_info = gr.JSON(
 
242
  value={}
243
  )
244
 
245
+ def update_font(font_style):
246
+ """Update font and refresh display"""
247
+ success, msg = collector.set_font(font_style)
248
+ if not success:
249
+ return {status: msg}
250
+
251
+ return update_display()
 
 
 
252
 
 
253
  def load_file(file):
254
+ """Handle file loading with enhanced error reporting"""
255
  if not file:
256
  return {
257
  current_text: "",
258
  next_text: "",
259
  progress: "",
260
+ status: "⚠️ No file selected"
 
261
  }
262
+
263
  success, msg = collector.load_text_file(file)
264
  if not success:
265
  return {
266
  current_text: "",
267
  next_text: "",
268
  progress: "",
269
+ status: f"❌ {msg}",
270
  dataset_info: update_dataset_info()
271
  }
272
 
273
  return {
274
+ current_text: collector.get_styled_text(collector.sentences[0]),
275
+ next_text: collector.get_styled_text(collector.sentences[1]) if len(collector.sentences) > 1 else "",
276
+ progress: f"📖 Sentence 1 of {len(collector.sentences)}",
277
+ status: f"✅ {msg}",
278
  dataset_info: update_dataset_info()
279
  }
280
 
281
+ # Remaining methods and event handlers go here ...
282
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  return interface
284
 
285
  if __name__ == "__main__":
286
+ try:
287
+ interface = create_interface()
288
+ interface.launch(
289
+ server_name="0.0.0.0",
290
+ server_port=7860,
291
+ share=True,
292
+ enable_queue=True
293
+ )
294
+ except Exception as e:
295
+ logger.error(f"Failed to launch interface: {str(e)}")
296
+ logger.error(traceback.format_exc())
297
+ raise