cristinae ssolito commited on
Commit
8cdbd94
·
verified ·
1 Parent(s): 9905a05

Update whisper_cs.py (#26)

Browse files

- Update whisper_cs.py (a413b59f2b8b32465ca45c89d8058e998c60b2af)


Co-authored-by: Sarah Solito <ssolito@users.noreply.huggingface.co>

Files changed (1) hide show
  1. whisper_cs.py +11 -9
whisper_cs.py CHANGED
@@ -25,9 +25,15 @@ def clean_text(input_text):
25
 
26
 
27
  def split_stereo_channels(audio_path):
 
 
 
 
 
 
 
 
28
 
29
- audio = AudioSegment.from_wav(audio_path)
30
-
31
  channels = audio.split_to_mono()
32
  if len(channels) != 2:
33
  raise ValueError(f"Audio {audio_path} does not have 2 channels.")
@@ -127,10 +133,8 @@ def post_process_transcription(transcription, max_repeats=2):
127
 
128
  return cleaned_transcription
129
 
130
- def post_merge_consecutive_segments(input_file, output_file): #check
131
- with open(input_file, "r") as f:
132
- transcription_text = f.read()
133
 
 
134
  segments = re.split(r'(\[SPEAKER_\d{2}\])', transcription_text)
135
  merged_transcription = ''
136
  current_speaker = None
@@ -153,8 +157,7 @@ def post_merge_consecutive_segments(input_file, output_file): #check
153
  if current_speaker is not None:
154
  merged_transcription += f'[SPEAKER_{current_speaker}] {" ".join(current_segment)}\n'
155
 
156
- with open(output_file, "w") as f:
157
- f.write(merged_transcription.strip())
158
 
159
  def cleanup_temp_files(*file_paths):
160
  for path in file_paths:
@@ -262,8 +265,6 @@ def generate(audio_path, use_v2):
262
  model = load_whisper_model(MODEL_PATH_2)
263
  split_stereo_channels(audio_path)
264
 
265
- audio_id = os.path.splitext(os.path.basename(audio_path))[0]
266
-
267
  left_channel_path = "temp_mono_speaker2.wav"
268
  right_channel_path = "temp_mono_speaker1.wav"
269
 
@@ -309,6 +310,7 @@ def generate(audio_path, use_v2):
309
  clean_output = ""
310
  for line in aligned_text:
311
  clean_output += f"{line}\n"
 
312
  cleanup_temp_files(mono_audio_path,tmp_full_path)
313
 
314
  cleanup_temp_files(
 
25
 
26
 
27
  def split_stereo_channels(audio_path):
28
+ ext = os.path.splitext(audio_path)[1].lower()
29
+
30
+ if ext == ".wav":
31
+ audio = AudioSegment.from_wav(audio_path)
32
+ elif ext == ".mp3":
33
+ audio = AudioSegment.from_file(audio_path, format="mp3")
34
+ else:
35
+ raise ValueError(f"Unsupported file format: {audio_path}")
36
 
 
 
37
  channels = audio.split_to_mono()
38
  if len(channels) != 2:
39
  raise ValueError(f"Audio {audio_path} does not have 2 channels.")
 
133
 
134
  return cleaned_transcription
135
 
 
 
 
136
 
137
+ def post_merge_consecutive_segments_from_text(transcription_text: str) -> str:
138
  segments = re.split(r'(\[SPEAKER_\d{2}\])', transcription_text)
139
  merged_transcription = ''
140
  current_speaker = None
 
157
  if current_speaker is not None:
158
  merged_transcription += f'[SPEAKER_{current_speaker}] {" ".join(current_segment)}\n'
159
 
160
+ return merged_transcription.strip()
 
161
 
162
  def cleanup_temp_files(*file_paths):
163
  for path in file_paths:
 
265
  model = load_whisper_model(MODEL_PATH_2)
266
  split_stereo_channels(audio_path)
267
 
 
 
268
  left_channel_path = "temp_mono_speaker2.wav"
269
  right_channel_path = "temp_mono_speaker1.wav"
270
 
 
310
  clean_output = ""
311
  for line in aligned_text:
312
  clean_output += f"{line}\n"
313
+ clean_output = post_merge_consecutive_segments_from_text(clean_output)
314
  cleanup_temp_files(mono_audio_path,tmp_full_path)
315
 
316
  cleanup_temp_files(