MusicGen

Paused

App Files Files Community

ford442 commited on Feb 19

Commit

ae140bb

verified ·

1 Parent(s): e7e9ea1

Update demos/musicgen_app.py

Browse files

Files changed (1) hide show

demos/musicgen_app.py +73 -3

demos/musicgen_app.py CHANGED Viewed

@@ -332,7 +332,7 @@ def ui_full(launch_kwargs):
             inputs=[text, melody, model, decoder],
             outputs=[output]
         )
-        gr.Markdown(
             """
             ### More details
@@ -358,7 +358,77 @@ def ui_full(launch_kwargs):
             We present 10 model variations:
             1. facebook/musicgen-melody -- a music generation model capable of generating music condition
-                on text and melody inputs. **Note**, you can also use text only.
             2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
             3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
-            4

             inputs=[text, melody, model, decoder],
             outputs=[output]
         )
+gr.Markdown(
             """
             ### More details
             We present 10 model variations:
             1. facebook/musicgen-melody -- a music generation model capable of generating music condition
+                on text and melody inputs.  **Note**, you can also use text only.
             2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
             3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
+            4. facebook/musicgen-large -- a 3.3B transformer decoder conditioned on text only.
+            5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on text and melody.
+            6. facebook/musicgen-stereo-small -- a 300M transformer decoder conditioned on text only, fine tuned for stereo output.
+            7. facebook/musicgen-stereo-medium -- a 1.5B transformer decoder conditioned on text only, fine tuned for stereo output.
+            8. facebook/musicgen-stereo-melody -- a 1.5B transformer decoder conditioned on text and melody, fine tuned for stereo output.
+            9. facebook/musicgen-stereo-large -- a 3.3B transformer decoder conditioned on text only, fine tuned for stereo output.
+           10. facebook/musicgen-stereo-melody-large -- a 3.3B transformer decoder conditioned on text and melody, fine tuned for stereo output.
+            We also present two way of decoding the audio tokens:
+            1. Use the default GAN based compression model.  It can suffer from artifacts especially
+                for crashes, snares etc.
+            2. Use [MultiBand Diffusion](https://arxiv.org/abs/2308.02560).  Should improve the audio quality,
+                at an extra computational cost.  When this is selected, we provide both the GAN based decoded
+                audio, and the one obtained with MBD.
+            See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft/blob/main/docs/MUSICGEN.md)
+            for more details.
+            """
+        )
+        interface.queue().launch(**launch_kwargs)
+# --- Main Entry Point ---
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--listen',
+        type=str,
+        default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
+        help='IP to listen on for connections to Gradio',
+    )
+    parser.add_argument(
+        '--username', type=str, default='', help='Username for authentication'
+    )
+    parser.add_argument(
+        '--password', type=str, default='', help='Password for authentication'
+    )
+    parser.add_argument(
+        '--server_port',
+        type=int,
+        default=0,
+        help='Port to run the server listener on',
+    )
+    parser.add_argument(
+        '--inbrowser', action='store_true', help='Open in browser'
+    )
+    parser.add_argument(
+        '--share', action='store_true', help='Share the gradio UI'
+    )
+    args = parser.parse_args()
+    launch_kwargs = {}
+    launch_kwargs['server_name'] = args.listen
+    if args.username and args.password:
+        launch_kwargs['auth'] = (args.username, args.password)
+    if args.server_port:
+        launch_kwargs['server_port'] = args.server_port
+    if args.inbrowser:
+        launch_kwargs['inbrowser'] = args.inbrowser
+    if args.share:
+        launch_kwargs['share'] = args.share
+    logging.basicConfig(level=logging.INFO, stream=sys.stderr)
+    # Added predictor shutdown
+    try:
+        ui_full(launch_kwargs)
+    finally:
+        if _predictor is not None:
+            _predictor.shutdown()