Spaces:

JacobLinCool
/

tc5-exp

Running on Zero

App Files Files Community

JacobLinCool commited on Jun 6

Commit

db8b2d5

1 Parent(s): 812b01c

Add offset parameter to TJA writing functions and update inference methods for TC5, TC6, and TC7

Browse files

Files changed (4) hide show

app.py +54 -16
tc5/infer.py +2 -2
tc6/infer.py +2 -2
tc7/infer.py +2 -2

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import torch
 from tc5.config import SAMPLE_RATE, HOP_LENGTH
@@ -30,7 +31,7 @@ tc7.eval()
 synthesizer = Client("ryanlinjui/taiko-music-generator")
-def infer_tc5(audio, nps, bpm):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
@@ -58,7 +59,7 @@ def infer_tc5(audio, nps, bpm):
         output_frame_hop_sec,
     )
     # Generate TJA content
-    tja_content = tc5infer.write_tja(onsets, bpm=bpm, audio=filename)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
@@ -70,7 +71,7 @@ def infer_tc5(audio, nps, bpm):
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
-        param_4=5,
         param_5=5,
         param_6=5,
         param_7=5,
@@ -90,7 +91,7 @@ def infer_tc5(audio, nps, bpm):
     return oni_audio, plot, tja_content
-def infer_tc6(audio, nps, bpm, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
@@ -121,7 +122,7 @@ def infer_tc6(audio, nps, bpm, difficulty, level):
         output_frame_hop_sec,
     )
     # Generate TJA content
-    tja_content = tc6infer.write_tja(onsets, bpm=bpm, audio=filename)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
@@ -133,7 +134,7 @@ def infer_tc6(audio, nps, bpm, difficulty, level):
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
-        param_4=5,
         param_5=5,
         param_6=5,
         param_7=5,
@@ -153,7 +154,7 @@ def infer_tc6(audio, nps, bpm, difficulty, level):
     return oni_audio, plot, tja_content
-def infer_tc7(audio, nps, bpm, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
@@ -184,7 +185,7 @@ def infer_tc7(audio, nps, bpm, difficulty, level):
         output_frame_hop_sec,
     )
     # Generate TJA content
-    tja_content = tc7infer.write_tja(onsets, bpm=bpm, audio=filename)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
@@ -196,7 +197,7 @@ def infer_tc7(audio, nps, bpm, difficulty, level):
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
-        param_4=5,
         param_5=5,
         param_6=5,
         param_7=5,
@@ -216,17 +217,38 @@ def infer_tc7(audio, nps, bpm, difficulty, level):
     return oni_audio, plot, tja_content
-def run_inference(audio, model_choice, nps, bpm, difficulty, level):
     if model_choice == "TC5":
-        return infer_tc5(audio, nps, bpm)
     elif model_choice == "TC6":
-        return infer_tc6(audio, nps, bpm, difficulty, level)
     else:  # TC7
-        return infer_tc7(audio, nps, bpm, difficulty, level)
 with gr.Blocks() as demo:
-    gr.Markdown("# Taiko Conformer 5/7 Demo")
     with gr.Row():
         audio_input = gr.Audio(sources="upload", type="filepath", label="Input Audio")
@@ -253,6 +275,14 @@ with gr.Blocks() as demo:
             step=1,
             label="BPM (Used by TJA Quantization)",
         )
     with gr.Row():
         difficulty = gr.Slider(
@@ -274,10 +304,18 @@ with gr.Blocks() as demo:
             info="Difficulty level from 1 to 10",
         )
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     plot_output = gr.Plot(label="Onset/Energy Plot")
     tja_output = gr.Textbox(label="TJA File Content", show_copy_button=True)
-    run_btn = gr.Button("Run Inference")
     # Update visibility of TC7-specific controls based on model selection
     def update_visibility(model_choice):
@@ -292,7 +330,7 @@ with gr.Blocks() as demo:
     run_btn.click(
         run_inference,
-        inputs=[audio_input, model_choice, nps, bpm, difficulty, level],
         outputs=[audio_output, plot_output, tja_output],
     )

+import spaces
 import gradio as gr
 import torch
 from tc5.config import SAMPLE_RATE, HOP_LENGTH
 synthesizer = Client("ryanlinjui/taiko-music-generator")
+def infer_tc5(audio, nps, bpm, offset):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
         output_frame_hop_sec,
     )
     # Generate TJA content
+    tja_content = tc5infer.write_tja(onsets, bpm=bpm, audio=filename, offset=offset)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
+        param_4=7,
         param_5=5,
         param_6=5,
         param_7=5,
     return oni_audio, plot, tja_content
+def infer_tc6(audio, nps, bpm, offset, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
         output_frame_hop_sec,
     )
     # Generate TJA content
+    tja_content = tc6infer.write_tja(onsets, bpm=bpm, audio=filename, offset=offset)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
+        param_4=7,
         param_5=5,
         param_6=5,
         param_7=5,
     return oni_audio, plot, tja_content
+def infer_tc7(audio, nps, bpm, offset, difficulty, level):
     audio_path = audio
     filename = audio_path.split("/")[-1]
     # Preprocess
         output_frame_hop_sec,
     )
     # Generate TJA content
+    tja_content = tc7infer.write_tja(onsets, bpm=bpm, audio=filename, offset=offset)
     # wrtie TJA content to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".tja") as temp_tja_file:
         param_1=handle_file(audio_path),
         param_2="達人譜面 / Master",
         param_3=16,
+        param_4=7,
         param_5=5,
         param_6=5,
         param_7=5,
     return oni_audio, plot, tja_content
+@spaces.GPU
+def run_inference_gpu(audio, model_choice, nps, bpm, offset, difficulty, level):
     if model_choice == "TC5":
+        return infer_tc5(audio, nps, bpm, offset)
     elif model_choice == "TC6":
+        return infer_tc6(audio, nps, bpm, offset, difficulty, level)
     else:  # TC7
+        return infer_tc7(audio, nps, bpm, offset, difficulty, level)
+def run_inference_cpu(audio, model_choice, nps, bpm, offset, difficulty, level):
+    if model_choice == "TC5":
+        return infer_tc5(audio, nps, bpm, offset)
+    elif model_choice == "TC6":
+        return infer_tc6(audio, nps, bpm, offset, difficulty, level)
+    else:  # TC7
+        return infer_tc7(audio, nps, bpm, offset, difficulty, level)
+def run_inference(with_gpu, audio, model_choice, nps, bpm, offset, difficulty, level):
+    if with_gpu:
+        return run_inference_gpu(
+            audio, model_choice, nps, bpm, offset, difficulty, level
+        )
+    else:
+        return run_inference_cpu(
+            audio, model_choice, nps, bpm, offset, difficulty, level
+        )
 with gr.Blocks() as demo:
+    gr.Markdown("# Taiko Conformer 5/6/7 Demo")
     with gr.Row():
         audio_input = gr.Audio(sources="upload", type="filepath", label="Input Audio")
             step=1,
             label="BPM (Used by TJA Quantization)",
         )
+        offset = gr.Slider(
+            value=0.0,
+            minimum=-5.0,
+            maximum=5.0,
+            step=0.01,
+            label="Offset (in seconds)",
+            info="Adjust the offset for TJA",
+        )
     with gr.Row():
         difficulty = gr.Slider(
             info="Difficulty level from 1 to 10",
         )
+    with gr.Row():
+        with_gpu = gr.Checkbox(
+            value=True,
+            label="Use GPU for Inference",
+            info="Enable this to use GPU for faster inference (if available)",
+        )
+    run_btn = gr.Button("Run Inference", variant="primary")
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     plot_output = gr.Plot(label="Onset/Energy Plot")
     tja_output = gr.Textbox(label="TJA File Content", show_copy_button=True)
     # Update visibility of TC7-specific controls based on model selection
     def update_visibility(model_choice):
     run_btn.click(
         run_inference,
+        inputs=[audio_input, model_choice, nps, bpm, offset, difficulty, level],
         outputs=[audio_output, plot_output, tja_output],
     )

tc5/infer.py CHANGED Viewed

@@ -258,7 +258,7 @@ def plot_results(
         return fig
-def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
@@ -336,7 +336,7 @@ def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     tja_content.append(f"TITLE:{audio} (TC5, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
-    tja_content.append("OFFSET:0")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):

         return fig
+def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav", offset=0):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
     tja_content.append(f"TITLE:{audio} (TC5, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
+    tja_content.append(f"OFFSET:{offset}")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):

tc6/infer.py CHANGED Viewed

@@ -257,7 +257,7 @@ def plot_results(
         return fig
-def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
@@ -334,7 +334,7 @@ def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     tja_content.append(f"TITLE:{audio} (TC6, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
-    tja_content.append("OFFSET:0")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):

         return fig
+def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav", offset=0):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
     tja_content.append(f"TITLE:{audio} (TC6, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
+    tja_content.append(f"OFFSET:{offset}")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):

tc7/infer.py CHANGED Viewed

@@ -257,7 +257,7 @@ def plot_results(
         return fig
-def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
@@ -334,7 +334,7 @@ def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav"):
     tja_content.append(f"TITLE:{audio} (TC7, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
-    tja_content.append("OFFSET:0")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):

         return fig
+def write_tja(onsets, out_path=None, bpm=160, quantize=96, audio="audio.wav", offset=0):
     # TJA types: 0:no note, 1:Don, 2:Ka, 3:BigDon, 4:BigKa, 5:DrumrollStart, 8:DrumrollEnd
     # Model output types: 1:Don, 2:Ka, 5:Drumroll (interpreted as start/single)
     sec_per_beat = 60 / bpm
     tja_content.append(f"TITLE:{audio} (TC7, {time.strftime('%Y-%m-%d %H:%M:%S')})")
     tja_content.append(f"BPM:{bpm}")
     tja_content.append(f"WAVE:{audio}")
+    tja_content.append(f"OFFSET:{offset}")
     tja_content.append("COURSE:Oni\nLEVEL:9\n")
     tja_content.append("#START")
     for i in range(max_measure_idx + 1):