ford442 commited on
Commit
ae140bb
·
verified ·
1 Parent(s): e7e9ea1

Update demos/musicgen_app.py

Browse files
Files changed (1) hide show
  1. demos/musicgen_app.py +73 -3
demos/musicgen_app.py CHANGED
@@ -332,7 +332,7 @@ def ui_full(launch_kwargs):
332
  inputs=[text, melody, model, decoder],
333
  outputs=[output]
334
  )
335
- gr.Markdown(
336
  """
337
  ### More details
338
 
@@ -358,7 +358,77 @@ def ui_full(launch_kwargs):
358
 
359
  We present 10 model variations:
360
  1. facebook/musicgen-melody -- a music generation model capable of generating music condition
361
- on text and melody inputs. **Note**, you can also use text only.
362
  2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
363
  3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
364
- 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  inputs=[text, melody, model, decoder],
333
  outputs=[output]
334
  )
335
+ gr.Markdown(
336
  """
337
  ### More details
338
 
 
358
 
359
  We present 10 model variations:
360
  1. facebook/musicgen-melody -- a music generation model capable of generating music condition
361
+ on text and melody inputs. **Note**, you can also use text only.
362
  2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
363
  3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
364
+ 4. facebook/musicgen-large -- a 3.3B transformer decoder conditioned on text only.
365
+ 5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on text and melody.
366
+ 6. facebook/musicgen-stereo-small -- a 300M transformer decoder conditioned on text only, fine tuned for stereo output.
367
+ 7. facebook/musicgen-stereo-medium -- a 1.5B transformer decoder conditioned on text only, fine tuned for stereo output.
368
+ 8. facebook/musicgen-stereo-melody -- a 1.5B transformer decoder conditioned on text and melody, fine tuned for stereo output.
369
+ 9. facebook/musicgen-stereo-large -- a 3.3B transformer decoder conditioned on text only, fine tuned for stereo output.
370
+ 10. facebook/musicgen-stereo-melody-large -- a 3.3B transformer decoder conditioned on text and melody, fine tuned for stereo output.
371
+
372
+ We also present two way of decoding the audio tokens:
373
+ 1. Use the default GAN based compression model. It can suffer from artifacts especially
374
+ for crashes, snares etc.
375
+ 2. Use [MultiBand Diffusion](https://arxiv.org/abs/2308.02560). Should improve the audio quality,
376
+ at an extra computational cost. When this is selected, we provide both the GAN based decoded
377
+ audio, and the one obtained with MBD.
378
+
379
+ See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft/blob/main/docs/MUSICGEN.md)
380
+ for more details.
381
+ """
382
+ )
383
+
384
+ interface.queue().launch(**launch_kwargs)
385
+
386
+ # --- Main Entry Point ---
387
+ if __name__ == '__main__':
388
+ parser = argparse.ArgumentParser()
389
+ parser.add_argument(
390
+ '--listen',
391
+ type=str,
392
+ default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
393
+ help='IP to listen on for connections to Gradio',
394
+ )
395
+ parser.add_argument(
396
+ '--username', type=str, default='', help='Username for authentication'
397
+ )
398
+ parser.add_argument(
399
+ '--password', type=str, default='', help='Password for authentication'
400
+ )
401
+ parser.add_argument(
402
+ '--server_port',
403
+ type=int,
404
+ default=0,
405
+ help='Port to run the server listener on',
406
+ )
407
+ parser.add_argument(
408
+ '--inbrowser', action='store_true', help='Open in browser'
409
+ )
410
+ parser.add_argument(
411
+ '--share', action='store_true', help='Share the gradio UI'
412
+ )
413
+
414
+ args = parser.parse_args()
415
+
416
+ launch_kwargs = {}
417
+ launch_kwargs['server_name'] = args.listen
418
+
419
+ if args.username and args.password:
420
+ launch_kwargs['auth'] = (args.username, args.password)
421
+ if args.server_port:
422
+ launch_kwargs['server_port'] = args.server_port
423
+ if args.inbrowser:
424
+ launch_kwargs['inbrowser'] = args.inbrowser
425
+ if args.share:
426
+ launch_kwargs['share'] = args.share
427
+
428
+ logging.basicConfig(level=logging.INFO, stream=sys.stderr)
429
+ # Added predictor shutdown
430
+ try:
431
+ ui_full(launch_kwargs)
432
+ finally:
433
+ if _predictor is not None:
434
+ _predictor.shutdown()