|
Connect to `wss://…/ws/jam` and send a **JSON control stream**. In `rt` mode the server emits ~2 s WAV chunks (or binary frames) continuously. |
|
|
|
### Start (client → server) |
|
```jsonc |
|
{ |
|
"type": "start", |
|
"mode": "rt", |
|
"binary_audio": false, // true → raw WAV bytes + separate chunk_meta |
|
"params": { |
|
"styles": "heavy metal", // or "jazz, hiphop" |
|
"style_weights": "1.0,1.0", // optional, auto‑normalized |
|
"temperature": 1.1, |
|
"topk": 40, |
|
"guidance_weight": 1.1, |
|
"pace": "realtime", // "realtime" | "asap" (default) |
|
"max_decode_frames": 50 // 50≈2.0s; try 36–45 on smaller GPUs |
|
} |
|
} |
|
``` |
|
|
|
### Server events (server → client) |
|
- `{"type":"started","mode":"rt"}` – handshake |
|
- `{"type":"chunk","audio_base64":"…","metadata":{…}}` – base64 WAV |
|
- `metadata.sample_rate` *(int)* – usually 48000 |
|
- `metadata.chunk_frames` *(int)* – e.g., 50 |
|
- `metadata.chunk_seconds` *(float)* – frames / 25.0 |
|
- `metadata.crossfade_seconds` *(float)* – typically 0.04 |
|
- `{"type":"chunk_meta","metadata":{…}}` – sent **after** a binary frame when `binary_audio=true` |
|
- `{"type":"status",…}`, `{"type":"error",…}`, `{"type":"stopped"}` |
|
|
|
### Update (client → server) |
|
```jsonc |
|
{ |
|
"type": "update", |
|
"styles": "jazz, hiphop", |
|
"style_weights": "1.0,0.8", |
|
"temperature": 1.2, |
|
"topk": 64, |
|
"guidance_weight": 1.0, |
|
"pace": "realtime", // optional live flip |
|
"max_decode_frames": 40 // optional; <= 50 |
|
} |
|
``` |
|
|
|
### Stop / ping |
|
```json |
|
{"type":"stop"} |
|
{"type":"ping"} |
|
``` |
|
|
|
### Browser quick‑start (schedules seamlessly with 25–40 ms crossfade) |
|
```html |
|
<script> |
|
const XFADE = 0.025; // 25 ms |
|
let ctx, gain, ws, nextTime = 0; |
|
async function start(){ |
|
ctx = new (window.AudioContext||window.webkitAudioContext)(); |
|
gain = ctx.createGain(); gain.connect(ctx.destination); |
|
ws = new WebSocket("wss://YOUR_SPACE/ws/jam"); |
|
ws.onopen = ()=> ws.send(JSON.stringify({ |
|
type:"start", mode:"rt", binary_audio:false, |
|
params:{ styles:"warmup", temperature:1.1, topk:40, guidance_weight:1.1, pace:"realtime" } |
|
})); |
|
ws.onmessage = async ev => { |
|
const msg = JSON.parse(ev.data); |
|
if (msg.type === "chunk" && msg.audio_base64){ |
|
const bin = atob(msg.audio_base64); const buf = new Uint8Array(bin.length); |
|
for (let i=0;i<bin.length;i++) buf[i] = bin.charCodeAt(i); |
|
const ab = buf.buffer; const audio = await ctx.decodeAudioData(ab); |
|
const src = ctx.createBufferSource(); const g = ctx.createGain(); |
|
src.buffer = audio; src.connect(g); g.connect(gain); |
|
if (nextTime < ctx.currentTime + 0.05) nextTime = ctx.currentTime + 0.12; |
|
const startAt = nextTime, dur = audio.duration; |
|
nextTime = startAt + Math.max(0, dur - XFADE); |
|
g.gain.setValueAtTime(0, startAt); |
|
g.gain.linearRampToValueAtTime(1, startAt + XFADE); |
|
g.gain.setValueAtTime(1, startAt + Math.max(0, dur - XFADE)); |
|
g.gain.linearRampToValueAtTime(0, startAt + dur); |
|
src.start(startAt); |
|
} |
|
}; |
|
} |
|
</script> |
|
``` |
|
|
|
### Python client (async) |
|
```python |
|
import asyncio, json, websockets, base64, soundfile as sf, io |
|
async def run(url): |
|
async with websockets.connect(url) as ws: |
|
await ws.send(json.dumps({"type":"start","mode":"rt","binary_audio":False, |
|
"params": {"styles":"warmup","temperature":1.1,"topk":40,"guidance_weight":1.1,"pace":"realtime"}})) |
|
while True: |
|
msg = json.loads(await ws.recv()) |
|
if msg.get("type") == "chunk": |
|
wav = base64.b64decode(msg["audio_base64"]) # bytes of a WAV |
|
x, sr = sf.read(io.BytesIO(wav), dtype="float32") |
|
print("chunk", x.shape, sr) |
|
elif msg.get("type") in ("stopped","error"): break |
|
asyncio.run(run("wss://YOUR_SPACE/ws/jam")) |
|
``` |