from diffusers import AudioLDM2Pipeline from IPython.display import Audio import torch model = "cvssp/audioldm2" pipe = AudioLDM2Pipeline.from_pretrained(model, torch_dtype = torch.float16,).to("cuda") prompt = "A cheerful ukulele strumming in a beachside jam." generator = torch.Generator('cuda').manual_seed(0) audio = pipe(prompt,audio_length_in_s = 10.24, generator = generator).audios[0] Audio(audio, rate = 16000)