NeoPy commited on
Commit
f474942
·
verified ·
1 Parent(s): cb921f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -420
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import requests
3
  import random
4
  import os
5
- import zipfile
6
  import librosa
7
  import time
8
  from infer_rvc_python import BaseLoader
@@ -12,46 +12,18 @@ import edge_tts
12
  import tempfile
13
  from audio_separator.separator import Separator
14
  import model_handler
15
- import psutil
16
- import cpuinfo
 
17
 
18
- language_dict = tts_order_voice
 
 
19
 
20
- async def text_to_speech_edge(text, language_code):
21
- voice = language_dict[language_code]
22
- communicate = edge_tts.Communicate(text, voice)
23
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
- tmp_path = tmp_file.name
25
-
26
- await communicate.save(tmp_path)
27
-
28
- return tmp_path
29
-
30
- try:
31
- import spaces
32
- spaces_status = True
33
- except ImportError:
34
- spaces_status = False
35
-
36
- separator = Separator()
37
- converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
38
-
39
- global pth_file
40
- global index_file
41
-
42
- pth_file = "model.pth"
43
- index_file = "model.index"
44
-
45
- #CONFIGS
46
  TEMP_DIR = "temp"
47
  MODEL_PREFIX = "model"
48
- PITCH_ALGO_OPT = [
49
- "pm",
50
- "harvest",
51
- "crepe",
52
- "rmvpe",
53
- "rmvpe+",
54
- ]
55
  UVR_5_MODELS = [
56
  {"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
57
  {"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
@@ -63,130 +35,78 @@ UVR_5_MODELS = [
63
  MODELS = [
64
  {"model": "model.pth", "index": "model.index", "model_name": "Test Model"},
65
  ]
 
 
66
 
67
  os.makedirs(TEMP_DIR, exist_ok=True)
68
 
69
- def unzip_file(file):
70
- filename = os.path.basename(file).split(".")[0]
71
- with zipfile.ZipFile(file, 'r') as zip_ref:
72
- zip_ref.extractall(os.path.join(TEMP_DIR, filename))
73
- return True
74
-
75
-
76
- def progress_bar(total, current):
77
- return "[" + "=" * int(current / total * 20) + ">" + " " * (20 - int(current / total * 20)) + "] " + str(int(current / total * 100)) + "%"
78
-
79
- def contains_bad_word(text, bad_words):
80
- text_lower = text.lower()
81
- for word in bad_words:
82
- if word.lower() in text_lower:
83
- return True
84
- return False
85
 
86
- bad_words = ['puttana', 'whore', 'badword3', 'badword4']
 
87
 
88
  class BadWordError(Exception):
89
- def __init__(self, msg):
90
- super().__init__(msg)
91
- self.word = word
92
 
93
- def download_from_url(url, name=None):
94
- if name is None:
95
- raise ValueError("The model name must be provided")
96
- if "/blob/" in url:
97
- url = url.replace("/blob/", "/resolve/")
98
- if "huggingface" not in url:
99
- return ["The URL must be from huggingface", "Failed", "Failed"]
100
- if contains_bad_word(url, bad_words):
101
- return BadWordError("The file url has a bad word.")
102
- if contains_bad_word(name, bad_words):
103
- return BadWordError("The file name has a bad word.")
104
- filename = os.path.join(TEMP_DIR, MODEL_PREFIX + str(random.randint(1, 1000)) + ".zip")
105
- response = requests.get(url)
106
- total = int(response.headers.get('content-length', 0))
107
- if total > 500000000:
108
 
109
- return ["The file is too large. You can only download files up to 500 MB in size.", "Failed", "Failed"]
110
- current = 0
111
- with open(filename, "wb") as f:
112
- for data in response.iter_content(chunk_size=4096):
113
- f.write(data)
114
- current += len(data)
115
- print(progress_bar(total, current), end="\r") #
116
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
-
119
  try:
120
- unzip_file(filename)
 
121
  except Exception as e:
122
- return ["Failed to unzip the file", "Failed", "Failed"]
 
 
123
  unzipped_dir = os.path.join(TEMP_DIR, os.path.basename(filename).split(".")[0])
124
- pth_files = []
125
- index_files = []
126
- for root, dirs, files in os.walk(unzipped_dir):
127
- for file in files:
128
- if file.endswith(".pth"):
129
- pth_files.append(os.path.join(root, file))
130
- elif file.endswith(".index"):
131
- index_files.append(os.path.join(root, file))
132
 
133
- print(pth_files, index_files)
134
- global pth_file
135
- global index_file
136
  pth_file = pth_files[0]
137
  index_file = index_files[0]
138
-
139
- print(pth_file)
140
- print(index_file)
141
-
142
- if name == "":
143
- name = pth_file.split(".")[0]
144
-
145
  MODELS.append({"model": pth_file, "index": index_file, "model_name": name})
146
- return ["Downloaded as " + name, pth_files[0], index_files[0]]
147
-
148
- def inference(audio, model_name):
149
- output_data = inf_handler(audio, model_name)
150
- vocals = output_data[0]
151
- inst = output_data[1]
152
-
153
- return vocals, inst
154
-
155
- if spaces_status:
156
- @spaces.GPU()
157
- def convert_now(audio_files, random_tag, converter):
158
- return converter(
159
- audio_files,
160
- random_tag,
161
- overwrite=False,
162
- parallel_workers=8
163
- )
164
-
165
-
166
- else:
167
- def convert_now(audio_files, random_tag, converter):
168
- return converter(
169
- audio_files,
170
- random_tag,
171
- overwrite=False,
172
- parallel_workers=8
173
- )
174
-
175
- def calculate_remaining_time(epochs, seconds_per_epoch):
176
- total_seconds = epochs * seconds_per_epoch
177
-
178
- hours = total_seconds // 3600
179
- minutes = (total_seconds % 3600) // 60
180
- seconds = total_seconds % 60
181
 
182
- if hours == 0:
183
- return f"{int(minutes)} minutes"
184
- elif hours == 1:
185
- return f"{int(hours)} hour and {int(minutes)} minutes"
186
- else:
187
- return f"{int(hours)} hours and {int(minutes)} minutes"
188
-
189
- def inf_handler(audio, model_name):
190
  model_found = False
191
  for model_info in UVR_5_MODELS:
192
  if model_info["model_name"] == model_name:
@@ -196,60 +116,27 @@ def inf_handler(audio, model_name):
196
  if not model_found:
197
  separator.load_model()
198
  output_files = separator.separate(audio)
199
- vocals = output_files[0]
200
- inst = output_files[1]
201
- return vocals, inst
202
 
203
-
204
- def run(
205
- model,
206
- audio_files,
207
- pitch_alg,
208
- pitch_lvl,
209
- index_inf,
210
- r_m_f,
211
- e_r,
212
- c_b_p,
213
- ):
214
  if not audio_files:
215
- raise ValueError("The audio pls")
216
-
217
  if isinstance(audio_files, str):
218
  audio_files = [audio_files]
219
-
220
- try:
221
- duration_base = librosa.get_duration(filename=audio_files[0])
222
- print("Duration:", duration_base)
223
- except Exception as e:
224
- print(e)
225
-
226
- random_tag = "USER_"+str(random.randint(10000000, 99999999))
227
-
228
  file_m = model
229
- print("File model:", file_m)
230
-
231
- # get from MODELS
232
- for model in MODELS:
233
- if model["model_name"] == file_m:
234
- print(model)
235
- file_m = model["model"]
236
- file_index = model["index"]
237
  break
238
 
239
  if not file_m.endswith(".pth"):
240
- raise ValueError("The model file must be a .pth file")
241
-
242
-
243
- print("ILARIA RVC: mod by NeoDev 💖")
244
- print("Random tag:", random_tag)
245
- print("File model:", file_m)
246
- print("Pitch algorithm:", pitch_alg)
247
- print("Pitch level:", pitch_lvl)
248
- print("File index:", file_index)
249
- print("Index influence:", index_inf)
250
- print("Respiration median filtering:", r_m_f)
251
- print("Envelope ratio:", e_r)
252
-
253
  converter.apply_conf(
254
  tag=random_tag,
255
  file_model=file_m,
@@ -260,249 +147,109 @@ def run(
260
  respiration_median_filtering=r_m_f,
261
  envelope_ratio=e_r,
262
  consonant_breath_protection=c_b_p,
263
- resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
264
  )
265
  time.sleep(0.1)
266
-
267
  result = convert_now(audio_files, random_tag, converter)
268
- print("Result:", result)
269
-
270
  return result[0]
271
 
272
- def upload_model(index_file, pth_file, model_name):
273
- pth_file = pth_file.name
274
- index_file = index_file.name
275
- MODELS.append({"model": pth_file, "index": index_file, "model_name": model_name})
276
- return "Uploaded!"
277
-
278
-
279
 
280
- with gr.Blocks(theme=gr.themes.Base(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  gr.Markdown("# Ilaria RVC 💖")
282
- gr.Markdown("**Help keeping up the GPU donating on [Ko-Fi](https://ko-fi.com/ilariaowo)**")
283
- with gr.Tab("Inference"):
284
-
285
- def update():
286
- print(MODELS)
287
- return gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
288
- with gr.Row(equal_height=True):
289
- models_dropdown = gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
290
- refresh_button = gr.Button("Refresh Models")
291
- refresh_button.click(update, outputs=[models_dropdown])
292
 
293
- sound_gui = gr.Audio(value=None,type="filepath",autoplay=False,visible=True,)
294
-
295
- with gr.Accordion("Ilaria TTS", open=False):
296
- text_tts = gr.Textbox(label="Text", placeholder="Hello!", lines=3, interactive=True,)
297
- dropdown_tts = gr.Dropdown(label="Language and Model",choices=list(language_dict.keys()),interactive=True, value=list(language_dict.keys())[0])
298
-
299
- button_tts = gr.Button("Speak", variant="primary",)
300
- button_tts.click(text_to_speech_edge, inputs=[text_tts, dropdown_tts], outputs=[sound_gui])
301
 
302
- with gr.Accordion("Settings", open=False):
303
- pitch_algo_conf = gr.Radio(choices=[PITCH_ALGO_OPT],value=PITCH_ALGO_OPT[4],label="Pitch algorithm",visible=True,interactive=True) # Dropdown is 🤡
304
- with gr.Row(equal_height=True):
305
- pitch_lvl_conf = gr.Slider(label="Pitch level (lower -> 'male' while higher -> 'female')",minimum=-24,maximum=24,step=1,value=0,visible=True,interactive=True,)
306
- index_inf_conf = gr.Slider(minimum=0,maximum=1,label="Index influence -> How much accent is applied",value=0.75,)
307
- with gr.Row(equal_height=True):
308
- respiration_filter_conf = gr.Slider(minimum=0,maximum=7,label="Respiration median filtering",value=3,step=1,interactive=True,)
309
- envelope_ratio_conf = gr.Slider(minimum=0,maximum=1,label="Envelope ratio",value=0.25,interactive=True,)
310
- consonant_protec_conf = gr.Slider(minimum=0,maximum=0.5,label="Consonant breath protection",value=0.5,interactive=True,)
311
 
312
- with gr.Row(equal_height=True):
313
- button_conf = gr.Button("Convert",variant="primary",)
314
- output_conf = gr.Audio(type="filepath",label="Output",)
315
-
316
- button_conf.click(lambda :None, None, output_conf)
317
- button_conf.click(
318
- run,
319
- inputs=[
320
- models_dropdown,
321
- sound_gui,
322
- pitch_algo_conf,
323
- pitch_lvl_conf,
324
- index_inf_conf,
325
- respiration_filter_conf,
326
- envelope_ratio_conf,
327
- consonant_protec_conf,
328
- ],
329
- outputs=[output_conf],
330
- )
331
 
 
 
 
 
 
 
 
 
 
332
 
333
- with gr.Tab("Model Loader (Download and Upload)"):
334
- with gr.Accordion("Model Downloader", open=False):
335
- gr.Markdown(
336
- "Download the model from the following URL and upload it here. (Huggingface RVC model)"
337
- )
338
- model = gr.Textbox(lines=1, label="Model URL")
339
- name = gr.Textbox(lines=1, label="Model Name", placeholder="Model Name")
340
- download_button = gr.Button("Download Model")
341
- status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
342
- model_pth = gr.Textbox(lines=1, label="Model pth file", placeholder="Waiting....", interactive=False)
343
- index_pth = gr.Textbox(lines=1, label="Index pth file", placeholder="Waiting....", interactive=False)
344
- download_button.click(download_from_url, [model, name], outputs=[status, model_pth, index_pth])
345
- with gr.Accordion("Upload A Model", open=False):
 
 
 
 
346
  index_file_upload = gr.File(label="Index File (.index)")
347
  pth_file_upload = gr.File(label="Model File (.pth)")
348
-
349
- model_name = gr.Textbox(label="Model Name", placeholder="Model Name")
350
- upload_button = gr.Button("Upload Model")
351
- upload_status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
352
-
353
- upload_button.click(upload_model, [index_file_upload, pth_file_upload, model_name], upload_status)
354
-
355
-
356
- with gr.Tab("Vocal Separator (UVR)"):
357
- gr.Markdown("Separate vocals and instruments from an audio file using UVR models. - This is only on CPU due to ZeroGPU being ZeroGPU :(")
358
- uvr5_audio_file = gr.Audio(label="Audio File",type="filepath")
359
-
360
  with gr.Row():
361
- uvr5_model = gr.Dropdown(label="Model", choices=[model["model_name"] for model in UVR_5_MODELS])
362
- uvr5_button = gr.Button("Separate Vocals", variant="primary",)
363
-
364
- uvr5_output_voc = gr.Audio(type="filepath", label="Output 1",)
365
- uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)
366
-
367
- uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_voc, uvr5_output_inst])
368
-
369
- with gr.Tab("Extra"):
370
- with gr.Accordion("Model Information", open=False):
371
- def json_to_markdown_table(json_data):
372
- table = "| Key | Value |\n| --- | --- |\n"
373
- for key, value in json_data.items():
374
- table += f"| {key} | {value} |\n"
375
- return table
376
- def model_info(name):
377
- for model in MODELS:
378
- if model["model_name"] == name:
379
- print(model["model"])
380
- info = model_handler.model_info(model["model"])
381
- info2 = {
382
- "Model Name": model["model_name"],
383
- "Model Config": info['config'],
384
- "Epochs Trained": info['epochs'],
385
- "Sample Rate": info['sr'],
386
- "Pitch Guidance": info['f0'],
387
- "Model Precision": info['size'],
388
- }
389
- return gr.Markdown(json_to_markdown_table(info2))
390
-
391
- return "Model not found"
392
- def update():
393
- print(MODELS)
394
- return gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
395
- with gr.Row():
396
- model_info_dropdown = gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
397
- refresh_button = gr.Button("Refresh Models")
398
- refresh_button.click(update, outputs=[model_info_dropdown])
399
- model_info_button = gr.Button("Get Model Information")
400
- model_info_output = gr.Textbox(value="Waiting...",label="Output", interactive=False)
401
- model_info_button.click(model_info, [model_info_dropdown], [model_info_output])
402
-
403
-
404
-
405
- with gr.Accordion("Training Time Calculator", open=False):
406
- with gr.Column():
407
- epochs_input = gr.Number(label="Number of Epochs")
408
- seconds_input = gr.Number(label="Seconds per Epoch")
409
- calculate_button = gr.Button("Calculate Time Remaining")
410
- remaining_time_output = gr.Textbox(label="Remaining Time", interactive=False)
411
-
412
- calculate_button.click(calculate_remaining_time,inputs=[epochs_input, seconds_input],outputs=[remaining_time_output])
413
-
414
- with gr.Accordion("Model Fusion", open=False):
415
- with gr.Group():
416
- def merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2):
417
- for model in MODELS:
418
- if model["model_name"] == ckpt_a:
419
- ckpt_a = model["model"]
420
- if model["model_name"] == ckpt_b:
421
- ckpt_b = model["model"]
422
-
423
- path = model_handler.merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2)
424
- if path == "Fail to merge the models. The model architectures are not the same.":
425
- return "Fail to merge the models. The model architectures are not the same."
426
- else:
427
- MODELS.append({"model": path, "index": None, "model_name": name_to_save0})
428
- return "Merged, saved as " + name_to_save0
429
-
430
- gr.Markdown(value="Strongly suggested to use only very clean models.")
431
- with gr.Row():
432
- def update():
433
- print(MODELS)
434
- return gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS]), gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
435
- refresh_button_fusion = gr.Button("Refresh Models")
436
- ckpt_a = gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS])
437
- ckpt_b = gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
438
- refresh_button_fusion.click(update, outputs=[ckpt_a, ckpt_b])
439
- alpha_a = gr.Slider(
440
- minimum=0,
441
- maximum=1,
442
- label="Weight of the first model over the second",
443
- value=0.5,
444
- interactive=True,
445
- )
446
- with gr.Group():
447
- with gr.Row():
448
- sr_ = gr.Radio(
449
- label="Sample rate of both models",
450
- choices=["32k","40k", "48k"],
451
- value="32k",
452
- interactive=True,
453
- )
454
- if_f0_ = gr.Radio(
455
- label="Pitch Guidance",
456
- choices=["Yes", "Nah"],
457
- value="Yes",
458
- interactive=True,
459
- )
460
- info__ = gr.Textbox(
461
- label="Add informations to the model",
462
- value="",
463
- max_lines=8,
464
- interactive=True,
465
- visible=False
466
- )
467
- name_to_save0 = gr.Textbox(
468
- label="Final Model name",
469
- value="",
470
- max_lines=1,
471
- interactive=True,
472
- )
473
- version_2 = gr.Radio(
474
- label="Versions of the models",
475
- choices=["v1", "v2"],
476
- value="v2",
477
- interactive=True,
478
- )
479
- with gr.Group():
480
- with gr.Row():
481
- but6 = gr.Button("Fuse the two models", variant="primary")
482
- info4 = gr.Textbox(label="Output", value="", max_lines=8)
483
- but6.click(
484
- merge,
485
- [ckpt_a,ckpt_b,alpha_a,sr_,if_f0_,info__,name_to_save0,version_2,],info4,api_name="ckpt_merge",)
486
-
487
- with gr.Accordion("Model Quantization", open=False):
488
- gr.Markdown("Quantize the model to a lower precision. - soon™ or never™ 😎")
489
-
490
-
491
- with gr.Tab("Credits"):
492
- gr.Markdown(
493
- """
494
- Ilaria RVC made by [Ilaria](https://huggingface.co/TheStinger) suport her on [ko-fi](https://ko-fi.com/ilariaowo)
495
-
496
- The Inference code is made by [r3gm](https://huggingface.co/r3gm) (his module helped form this space 💖)
497
-
498
- made with ❤️ by [mikus](https://github.com/cappuch) - made the ui!
499
-
500
- ## In loving memory of JLabDX 🕊️
501
- """
502
- )
503
- with gr.Tab(("")):
504
- gr.Markdown('''
505
- ![ilaria](https://i.ytimg.com/vi/5PWqt2Wg-us/maxresdefault.jpg)
506
- ''')
507
 
508
- app.queue(api_open=False).launch(show_api=False)
 
2
  import requests
3
  import random
4
  import os
5
+ import zipfile
6
  import librosa
7
  import time
8
  from infer_rvc_python import BaseLoader
 
12
  import tempfile
13
  from audio_separator.separator import Separator
14
  import model_handler
15
+ import logging
16
+ import aiohttp
17
+ import asyncio
18
 
19
+ # Configure logging
20
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
21
+ logger = logging.getLogger(__name__)
22
 
23
+ # Constants
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  TEMP_DIR = "temp"
25
  MODEL_PREFIX = "model"
26
+ PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
 
 
 
 
 
 
27
  UVR_5_MODELS = [
28
  {"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
29
  {"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
 
35
  MODELS = [
36
  {"model": "model.pth", "index": "model.index", "model_name": "Test Model"},
37
  ]
38
+ BAD_WORDS = ['puttana', 'whore', 'badword3', 'badword4']
39
+ MAX_FILE_SIZE = 500_000_000 # 500 MB
40
 
41
  os.makedirs(TEMP_DIR, exist_ok=True)
42
 
43
+ try:
44
+ import spaces
45
+ spaces_status = True
46
+ except ImportError:
47
+ spaces_status = False
48
+ logger.warning("Spaces module not found; running in CPU mode")
 
 
 
 
 
 
 
 
 
 
49
 
50
+ separator = Separator()
51
+ converter = BaseLoader(only_cpu=not spaces_status, hubert_path=None, rmvpe_path=None)
52
 
53
  class BadWordError(Exception):
54
+ pass
 
 
55
 
56
+ async def text_to_speech_edge(text, language_code):
57
+ if not text.strip():
58
+ raise ValueError("Text input cannot be empty")
59
+ voice = tts_order_voice.get(language_code, tts_order_voice[list(tts_order_voice.keys())[0]])
60
+ communicate = edge_tts.Communicate(text, voice)
61
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
62
+ tmp_path = tmp_file.name
63
+ await communicate.save(tmp_path)
64
+ return tmp_path
 
 
 
 
 
 
65
 
66
+ async def download_from_url(url, name, progress=gr.Progress()):
67
+ if not url.startswith("https://huggingface.co"):
68
+ raise ValueError("URL must be from Hugging Face")
69
+ if not name.strip():
70
+ raise ValueError("Model name cannot be empty")
71
+ if any(bad_word in url.lower() or bad_word in name.lower() for bad_word in BAD_WORDS):
72
+ raise BadWordError("Input contains restricted words")
73
+
74
+ filename = os.path.join(TEMP_DIR, f"{MODEL_PREFIX}{random.randint(1, 1000)}.zip")
75
+ async with aiohttp.ClientSession() as session:
76
+ async with session.get(url.replace("/blob/", "/resolve/")) as response:
77
+ if response.status != 200:
78
+ raise ValueError("Failed to download file")
79
+ total = int(response.headers.get('content-length', 0))
80
+ if total > MAX_FILE_SIZE:
81
+ raise ValueError(f"File size exceeds {MAX_FILE_SIZE / 1_000_000} MB limit")
82
+ current = 0
83
+ with open(filename, "wb") as f:
84
+ async for data in response.content.iter_chunked(4096):
85
+ f.write(data)
86
+ current += len(data)
87
+ progress(current / total, desc="Downloading model")
88
 
 
89
  try:
90
+ with zipfile.ZipFile(filename, 'r') as zip_ref:
91
+ zip_ref.extractall(os.path.join(TEMP_DIR, os.path.basename(filename).split(".")[0]))
92
  except Exception as e:
93
+ logger.error(f"Failed to unzip file: {e}")
94
+ raise ValueError("Failed to unzip file")
95
+
96
  unzipped_dir = os.path.join(TEMP_DIR, os.path.basename(filename).split(".")[0])
97
+ pth_files = [os.path.join(root, file) for root, _, files in os.walk(unzipped_dir) for file in files if file.endswith(".pth")]
98
+ index_files = [os.path.join(root, file) for root, _, files in os.walk(unzipped_dir) for file in files if file.endswith(".index")]
99
+
100
+ if not pth_files or not index_files:
101
+ raise ValueError("No .pth or .index files found in the zip")
 
 
 
102
 
 
 
 
103
  pth_file = pth_files[0]
104
  index_file = index_files[0]
105
+ name = name or os.path.basename(pth_file).split(".")[0]
 
 
 
 
 
 
106
  MODELS.append({"model": pth_file, "index": index_file, "model_name": name})
107
+ return [f"Downloaded as {name}", pth_file, index_file]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ def inf_handler(audio, model_name):
 
 
 
 
 
 
 
110
  model_found = False
111
  for model_info in UVR_5_MODELS:
112
  if model_info["model_name"] == model_name:
 
116
  if not model_found:
117
  separator.load_model()
118
  output_files = separator.separate(audio)
119
+ return output_files[0], output_files[1]
 
 
120
 
121
+ def run(model, audio_files, pitch_alg, pitch_lvl, index_inf, r_m_f, e_r, c_b_p):
 
 
 
 
 
 
 
 
 
 
122
  if not audio_files:
123
+ raise ValueError("Please upload an audio file")
 
124
  if isinstance(audio_files, str):
125
  audio_files = [audio_files]
126
+
127
+ random_tag = f"USER_{random.randint(10000000, 99999999)}"
 
 
 
 
 
 
 
128
  file_m = model
129
+ file_index = None
130
+ for m in MODELS:
131
+ if m["model_name"] == file_m:
132
+ file_m = m["model"]
133
+ file_index = m["index"]
 
 
 
134
  break
135
 
136
  if not file_m.endswith(".pth"):
137
+ raise ValueError("Model file must be a .pth file")
138
+
139
+ logger.info(f"Running inference with model: {file_m}, tag: {random_tag}")
 
 
 
 
 
 
 
 
 
 
140
  converter.apply_conf(
141
  tag=random_tag,
142
  file_model=file_m,
 
147
  respiration_median_filtering=r_m_f,
148
  envelope_ratio=e_r,
149
  consonant_breath_protection=c_b_p,
150
+ resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
151
  )
152
  time.sleep(0.1)
 
153
  result = convert_now(audio_files, random_tag, converter)
 
 
154
  return result[0]
155
 
156
+ def convert_now(audio_files, random_tag, converter):
157
+ return converter(
158
+ audio_files,
159
+ random_tag,
160
+ overwrite=False,
161
+ parallel_workers=8
162
+ )
163
 
164
+ def upload_model(index_file, pth_file, model_name):
165
+ if not index_file or not pth_file:
166
+ raise ValueError("Both index and model files are required")
167
+ if not model_name.strip():
168
+ raise ValueError("Model name cannot be empty")
169
+ MODELS.append({"model": pth_file.name, "index": index_file.name, "model_name": model_name})
170
+ return "Model uploaded successfully!"
171
+
172
+ def json_to_markdown_table(json_data):
173
+ table = "| Key | Value |\n| --- | --- |\n"
174
+ for key, value in json_data.items():
175
+ table += f"| {key} | {value} |\n"
176
+ return table
177
+
178
+ def model_info(name):
179
+ for model in MODELS:
180
+ if model["model_name"] == name:
181
+ info = model_handler.model_info(model["model"])
182
+ info2 = {
183
+ "Model Name": model["model_name"],
184
+ "Model Config": info['config'],
185
+ "Epochs Trained": info['epochs'],
186
+ "Sample Rate": info['sr'],
187
+ "Pitch Guidance": info['f0'],
188
+ "Model Precision": info['size'],
189
+ }
190
+ return json_to_markdown_table(info2)
191
+ return "Model not found"
192
+
193
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as app:
194
  gr.Markdown("# Ilaria RVC 💖")
195
+ gr.Markdown("Support the project by donating on [Ko-Fi](https://ko-fi.com/ilariaowo)")
 
 
 
 
 
 
 
 
 
196
 
197
+ with gr.Tab("Inference"):
198
+ with gr.Group():
199
+ models_dropdown = gr.Dropdown(label="Select Model", choices=[m["model_name"] for m in MODELS], value=MODELS[0]["model_name"])
200
+ refresh_button = gr.Button("Refresh Models", variant="secondary")
201
+ refresh_button.click(lambda: gr.Dropdown(choices=[m["model_name"] for m in MODELS]), outputs=models_dropdown)
 
 
 
202
 
203
+ sound_gui = gr.Audio(label="Input Audio", type="filepath")
 
 
 
 
 
 
 
 
204
 
205
+ with gr.Accordion("Text-to-Speech", open=False):
206
+ text_tts = gr.Textbox(label="Text Input", placeholder="Enter text to convert to speech", lines=3)
207
+ dropdown_tts = gr.Dropdown(label="Language and Voice", choices=list(tts_order_voice.keys()), value=list(tts_order_voice.keys())[0])
208
+ button_tts = gr.Button("Generate Speech", variant="primary")
209
+ button_tts.click(text_to_speech_edge, inputs=[text_tts, dropdown_tts], outputs=sound_gui)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ with gr.Accordion("Conversion Settings", open=False):
212
+ pitch_algo_conf = gr.Dropdown(choices=PITCH_ALGO_OPT, value=PITCH_ALGO_OPT[4], label="Pitch Algorithm", info="Select the algorithm for pitch detection")
213
+ with gr.Row():
214
+ pitch_lvl_conf = gr.Slider(label="Pitch Level", minimum=-24, maximum=24, step=1, value=0, info="Adjust pitch: negative for male, positive for female")
215
+ index_inf_conf = gr.Slider(minimum=0, maximum=1, value=0.75, label="Index Influence", info="Controls how much accent is applied")
216
+ with gr.Row():
217
+ respiration_filter_conf = gr.Slider(minimum=0, maximum=7, value=3, step=1, label="Respiration Median Filtering")
218
+ envelope_ratio_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Envelope Ratio")
219
+ consonant_protec_conf = gr.Slider(minimum=0, maximum=0.5, value=0.5, label="Consonant Breath Protection")
220
 
221
+ with gr.Row():
222
+ button_conf = gr.Button("Convert Audio", variant="primary")
223
+ output_conf = gr.Audio(type="filepath", label="Converted Audio")
224
+ button_conf.click(run, inputs=[models_dropdown, sound_gui, pitch_algo_conf, pitch_lvl_conf, index_inf_conf, respiration_filter_conf, envelope_ratio_conf, consonant_protec_conf], outputs=output_conf)
225
+
226
+ with gr.Tab("Model Loader"):
227
+ with gr.Accordion("Download Model", open=False):
228
+ gr.Markdown("Download a model from Hugging Face (RVC model, max 500 MB)")
229
+ model_url = gr.Textbox(label="Hugging Face Model URL", placeholder="https://huggingface.co/username/model")
230
+ model_name = gr.Textbox(label="Model Name", placeholder="Enter a unique model name")
231
+ download_button = gr.Button("Download Model", variant="primary")
232
+ status = gr.Textbox(label="Status", interactive=False)
233
+ model_pth = gr.Textbox(label="Model .pth File", interactive=False)
234
+ index_pth = gr.Textbox(label="Index .index File", interactive=False)
235
+ download_button.click(download_from_url, [model_url, model_name], [status, model_pth, index_pth])
236
+
237
+ with gr.Accordion("Upload Model", open=False):
238
  index_file_upload = gr.File(label="Index File (.index)")
239
  pth_file_upload = gr.File(label="Model File (.pth)")
240
+ model_name_upload = gr.Textbox(label="Model Name", placeholder="Enter a unique model name")
241
+ upload_button = gr.Button("Upload Model", variant="primary")
242
+ upload_status = gr.Textbox(label="Status", interactive=False)
243
+ upload_button.click(upload_model, [index_file_upload, pth_file_upload, model_name_upload], upload_status)
244
+
245
+ with gr.Tab("Vocal Separator"):
246
+ gr.Markdown("Separate vocals and instruments using UVR models (CPU only)")
247
+ uvr5_audio_file = gr.Audio(label="Input Audio", type="filepath")
 
 
 
 
248
  with gr.Row():
249
+ uvr5_model = gr.Dropdown(label="UVR Model", choices=[m["model_name"] for m in UVR_5_MODELS])
250
+ uvr5_button = gr.Button("Separate", variant="primary")
251
+ uvr5_output_voc = gr.Audio(label="Vocals", type="filepath")
252
+ uvr5_output_inst = gr.Audio(label="Instrumental", type="filepath")
253
+ uvr5_button.click(inf_handler, [uvr5_audio_file, uvr5_model], [uvr5_output_voc, uvr5_output_inst])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ app.queue(api_open=False).launch(show_api=False)