|
|
|
import threading, time, base64, io, uuid |
|
from dataclasses import dataclass, field |
|
import numpy as np |
|
import soundfile as sf |
|
from magenta_rt import audio as au |
|
from threading import RLock |
|
from utils import ( |
|
match_loudness_to_reference, stitch_generated, hard_trim_seconds, |
|
apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail, |
|
resample_and_snap, wav_bytes_base64 |
|
) |
|
|
|
@dataclass |
|
class JamParams: |
|
bpm: float |
|
beats_per_bar: int |
|
bars_per_chunk: int |
|
target_sr: int |
|
loudness_mode: str = "auto" |
|
headroom_db: float = 1.0 |
|
style_vec: np.ndarray | None = None |
|
ref_loop: any = None |
|
combined_loop: any = None |
|
guidance_weight: float = 1.1 |
|
temperature: float = 1.1 |
|
topk: int = 40 |
|
|
|
@dataclass |
|
class JamChunk: |
|
index: int |
|
audio_base64: str |
|
metadata: dict |
|
|
|
class JamWorker(threading.Thread): |
|
def __init__(self, mrt, params: JamParams): |
|
super().__init__(daemon=True) |
|
self.mrt = mrt |
|
self.params = params |
|
self.state = mrt.init_state() |
|
|
|
|
|
self._lock = threading.Lock() |
|
self._original_context_tokens = None |
|
|
|
if params.combined_loop is not None: |
|
self._setup_context_from_combined_loop() |
|
|
|
self.idx = 0 |
|
self.outbox: list[JamChunk] = [] |
|
self._stop_event = threading.Event() |
|
|
|
self._stream = None |
|
self._next_emit_start = 0 |
|
|
|
|
|
self._last_delivered_index = 0 |
|
self._max_buffer_ahead = 5 |
|
|
|
|
|
self.last_chunk_started_at = None |
|
self.last_chunk_completed_at = None |
|
|
|
self._pending_reseed = None |
|
self._needs_bar_realign = False |
|
self._reseed_ref_loop = None |
|
|
|
|
|
def _setup_context_from_combined_loop(self): |
|
"""Set up MRT context tokens from the combined loop audio""" |
|
try: |
|
from utils import make_bar_aligned_context, take_bar_aligned_tail |
|
|
|
codec_fps = float(self.mrt.codec.frame_rate) |
|
ctx_seconds = float(self.mrt.config.context_length_frames) / codec_fps |
|
|
|
loop_for_context = take_bar_aligned_tail( |
|
self.params.combined_loop, |
|
self.params.bpm, |
|
self.params.beats_per_bar, |
|
ctx_seconds |
|
) |
|
|
|
tokens_full = self.mrt.codec.encode(loop_for_context).astype(np.int32) |
|
tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth] |
|
|
|
context_tokens = make_bar_aligned_context( |
|
tokens, |
|
bpm=self.params.bpm, |
|
fps=float(self.mrt.codec.frame_rate), |
|
ctx_frames=self.mrt.config.context_length_frames, |
|
beats_per_bar=self.params.beats_per_bar |
|
) |
|
|
|
|
|
self.state.context_tokens = context_tokens |
|
print(f"✅ JamWorker: Set up fresh context from combined loop") |
|
|
|
|
|
|
|
with self._lock: |
|
if not hasattr(self, "_original_context_tokens") or self._original_context_tokens is None: |
|
self._original_context_tokens = np.copy(context_tokens) |
|
|
|
except Exception as e: |
|
print(f"❌ Failed to setup context from combined loop: {e}") |
|
|
|
def stop(self): |
|
self._stop_event.set() |
|
|
|
def update_knobs(self, *, guidance_weight=None, temperature=None, topk=None): |
|
with self._lock: |
|
if guidance_weight is not None: self.params.guidance_weight = float(guidance_weight) |
|
if temperature is not None: self.params.temperature = float(temperature) |
|
if topk is not None: self.params.topk = int(topk) |
|
|
|
def get_next_chunk(self) -> JamChunk | None: |
|
"""Get the next sequential chunk (blocks/waits if not ready)""" |
|
target_index = self._last_delivered_index + 1 |
|
|
|
|
|
max_wait = 30.0 |
|
start_time = time.time() |
|
|
|
while time.time() - start_time < max_wait and not self._stop_event.is_set(): |
|
with self._lock: |
|
|
|
for chunk in self.outbox: |
|
if chunk.index == target_index: |
|
self._last_delivered_index = target_index |
|
print(f"📦 Delivered chunk {target_index}") |
|
return chunk |
|
|
|
|
|
time.sleep(0.1) |
|
|
|
|
|
return None |
|
|
|
def mark_chunk_consumed(self, chunk_index: int): |
|
"""Mark a chunk as consumed by the frontend""" |
|
with self._lock: |
|
self._last_delivered_index = max(self._last_delivered_index, chunk_index) |
|
print(f"✅ Chunk {chunk_index} consumed") |
|
|
|
def _should_generate_next_chunk(self) -> bool: |
|
"""Check if we should generate the next chunk (don't get too far ahead)""" |
|
with self._lock: |
|
|
|
if self.idx > self._last_delivered_index + self._max_buffer_ahead: |
|
return False |
|
return True |
|
|
|
def _seconds_per_bar(self) -> float: |
|
return self.params.beats_per_bar * (60.0 / self.params.bpm) |
|
|
|
def _snap_and_encode(self, y, seconds, target_sr, bars): |
|
cur_sr = int(self.mrt.sample_rate) |
|
x = y.samples if y.samples.ndim == 2 else y.samples[:, None] |
|
x = resample_and_snap(x, cur_sr=cur_sr, target_sr=target_sr, seconds=seconds) |
|
b64, total_samples, channels = wav_bytes_base64(x, target_sr) |
|
meta = { |
|
"bpm": int(round(self.params.bpm)), |
|
"bars": int(bars), |
|
"beats_per_bar": int(self.params.beats_per_bar), |
|
"sample_rate": int(target_sr), |
|
"channels": channels, |
|
"total_samples": total_samples, |
|
"seconds_per_bar": self._seconds_per_bar(), |
|
"loop_duration_seconds": bars * self._seconds_per_bar(), |
|
"guidance_weight": self.params.guidance_weight, |
|
"temperature": self.params.temperature, |
|
"topk": self.params.topk, |
|
} |
|
return b64, meta |
|
|
|
def _append_model_chunk_to_stream(self, wav): |
|
"""Incrementally append a model chunk with equal-power crossfade.""" |
|
xfade_s = float(self.mrt.config.crossfade_length) |
|
sr = int(self.mrt.sample_rate) |
|
xfade_n = int(round(xfade_s * sr)) |
|
|
|
s = wav.samples if wav.samples.ndim == 2 else wav.samples[:, None] |
|
|
|
if getattr(self, "_stream", None) is None: |
|
|
|
if s.shape[0] > xfade_n: |
|
self._stream = s[xfade_n:].astype(np.float32, copy=True) |
|
else: |
|
self._stream = np.zeros((0, s.shape[1]), dtype=np.float32) |
|
self._next_emit_start = 0 |
|
return |
|
|
|
|
|
if s.shape[0] <= xfade_n or self._stream.shape[0] < xfade_n: |
|
|
|
self._stream = np.concatenate([self._stream, s], axis=0) |
|
return |
|
|
|
tail = self._stream[-xfade_n:] |
|
head = s[:xfade_n] |
|
|
|
|
|
t = np.linspace(0, np.pi/2, xfade_n, endpoint=False, dtype=np.float32)[:, None] |
|
eq_in, eq_out = np.sin(t), np.cos(t) |
|
mixed = tail * eq_out + head * eq_in |
|
|
|
self._stream = np.concatenate([self._stream[:-xfade_n], mixed, s[xfade_n:]], axis=0) |
|
|
|
def reseed_from_waveform(self, wav): |
|
|
|
new_state = self.mrt.init_state() |
|
|
|
|
|
codec_fps = float(self.mrt.codec.frame_rate) |
|
ctx_seconds = float(self.mrt.config.context_length_frames) / codec_fps |
|
from utils import take_bar_aligned_tail, make_bar_aligned_context |
|
|
|
tail = take_bar_aligned_tail(wav, self.params.bpm, self.params.beats_per_bar, ctx_seconds) |
|
tokens_full = self.mrt.codec.encode(tail).astype(np.int32) |
|
tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth] |
|
context_tokens = make_bar_aligned_context(tokens, |
|
bpm=self.params.bpm, fps=float(self.mrt.codec.frame_rate), |
|
ctx_frames=self.mrt.config.context_length_frames, |
|
beats_per_bar=self.params.beats_per_bar |
|
) |
|
new_state.context_tokens = context_tokens |
|
self.state = new_state |
|
self._prepare_stream_for_reseed_handoff() |
|
|
|
def _frames_per_bar(self) -> int: |
|
|
|
fps = float(self.mrt.codec.frame_rate) |
|
sec_per_bar = (60.0 / float(self.params.bpm)) * float(self.params.beats_per_bar) |
|
return int(round(fps * sec_per_bar)) |
|
|
|
def _ctx_frames(self) -> int: |
|
|
|
return int(self.mrt.config.context_length_frames) |
|
|
|
def _make_recent_tokens_from_wave(self, wav) -> np.ndarray: |
|
""" |
|
Encode waveform and produce a BAR-ALIGNED context token window. |
|
""" |
|
tokens_full = self.mrt.codec.encode(wav).astype(np.int32) |
|
tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth] |
|
|
|
from utils import make_bar_aligned_context |
|
ctx = make_bar_aligned_context( |
|
tokens, |
|
bpm=self.params.bpm, |
|
fps=float(self.mrt.codec.frame_rate), |
|
ctx_frames=self.mrt.config.context_length_frames, |
|
beats_per_bar=self.params.beats_per_bar |
|
) |
|
return ctx |
|
|
|
def _bar_aligned_tail(self, tokens: np.ndarray, bars: float) -> np.ndarray: |
|
""" |
|
Take a tail slice that is an integer number of codec frames corresponding to `bars`. |
|
We round to nearest frame to stay phase-consistent with codec grid. |
|
""" |
|
frames_per_bar = self._frames_per_bar() |
|
want = max(frames_per_bar * int(round(bars)), 0) |
|
if want == 0: |
|
return tokens[:0] |
|
if tokens.shape[0] <= want: |
|
return tokens |
|
return tokens[-want:] |
|
|
|
def _splice_context(self, original_tokens: np.ndarray, recent_tokens: np.ndarray, |
|
anchor_bars: float) -> np.ndarray: |
|
import math |
|
ctx_frames = self._ctx_frames() |
|
depth = original_tokens.shape[1] |
|
frames_per_bar = self._frames_per_bar() |
|
|
|
|
|
anchor = self._bar_aligned_tail(original_tokens, math.floor(anchor_bars)) |
|
|
|
|
|
a = anchor.shape[0] |
|
remain = max(ctx_frames - a, 0) |
|
|
|
recent = recent_tokens[:0] |
|
used_recent = 0 |
|
if remain > 0: |
|
bars_fit = remain // frames_per_bar |
|
if bars_fit >= 1: |
|
want_recent_frames = int(bars_fit * frames_per_bar) |
|
used_recent = min(want_recent_frames, recent_tokens.shape[0]) |
|
recent = recent_tokens[-used_recent:] if used_recent > 0 else recent_tokens[:0] |
|
else: |
|
used_recent = min(remain, recent_tokens.shape[0]) |
|
recent = recent_tokens[-used_recent:] if used_recent > 0 else recent_tokens[:0] |
|
|
|
|
|
if anchor.size or recent.size: |
|
out = np.concatenate([anchor, recent], axis=0) |
|
else: |
|
|
|
out = recent_tokens[-ctx_frames:] |
|
|
|
|
|
if out.shape[0] > ctx_frames: |
|
out = out[-ctx_frames:] |
|
|
|
|
|
if frames_per_bar > 0: |
|
max_bar_aligned = (out.shape[0] // frames_per_bar) * frames_per_bar |
|
else: |
|
max_bar_aligned = out.shape[0] |
|
if max_bar_aligned > 0 and out.shape[0] != max_bar_aligned: |
|
out = out[-max_bar_aligned:] |
|
|
|
|
|
deficit = ctx_frames - out.shape[0] |
|
if deficit > 0: |
|
left_parts = [] |
|
|
|
|
|
if used_recent < recent_tokens.shape[0]: |
|
take = min(deficit, recent_tokens.shape[0] - used_recent) |
|
if used_recent > 0: |
|
left_parts.append(recent_tokens[-(used_recent + take) : -used_recent]) |
|
else: |
|
left_parts.append(recent_tokens[-take:]) |
|
|
|
|
|
if sum(p.shape[0] for p in left_parts) < deficit and anchor.shape[0] > 0: |
|
need = deficit - sum(p.shape[0] for p in left_parts) |
|
a_len = anchor.shape[0] |
|
avail = max(original_tokens.shape[0] - a_len, 0) |
|
take2 = min(need, avail) |
|
if take2 > 0: |
|
left_parts.append(original_tokens[-(a_len + take2) : -a_len]) |
|
|
|
|
|
have = sum(p.shape[0] for p in left_parts) |
|
if have < deficit: |
|
base = out if out.shape[0] > 0 else (recent_tokens if recent_tokens.shape[0] > 0 else original_tokens) |
|
reps = int(np.ceil((deficit - have) / max(1, base.shape[0]))) |
|
left_parts.append(np.tile(base, (reps, 1))[: (deficit - have)]) |
|
|
|
left = np.concatenate(left_parts, axis=0) |
|
out = np.concatenate([left[-deficit:], out], axis=0) |
|
|
|
|
|
if out.shape[0] > ctx_frames: |
|
out = out[-ctx_frames:] |
|
elif out.shape[0] < ctx_frames: |
|
reps = int(np.ceil(ctx_frames / max(1, out.shape[0]))) |
|
out = np.tile(out, (reps, 1))[-ctx_frames:] |
|
|
|
|
|
if out.shape[1] != depth: |
|
out = out[:, :depth] |
|
return out |
|
|
|
|
|
def _realign_emit_pointer_to_bar(self, sr_model: int): |
|
"""Advance _next_emit_start to the next bar boundary in model-sample space.""" |
|
bar_samps = int(round(self._seconds_per_bar() * sr_model)) |
|
if bar_samps <= 0: |
|
return |
|
phase = self._next_emit_start % bar_samps |
|
if phase != 0: |
|
self._next_emit_start += (bar_samps - phase) |
|
|
|
def _prepare_stream_for_reseed_handoff(self): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self._stream = None |
|
|
|
self._next_emit_start = 0 |
|
self._needs_bar_realign = True |
|
|
|
def reseed_splice(self, recent_wav, anchor_bars: float): |
|
""" |
|
Token-splice reseed queued for the next bar boundary between chunks. |
|
""" |
|
with self._lock: |
|
if not hasattr(self, "_original_context_tokens") or self._original_context_tokens is None: |
|
self._original_context_tokens = np.copy(self.state.context_tokens) |
|
|
|
recent_tokens = self._make_recent_tokens_from_wave(recent_wav) |
|
new_ctx = self._splice_context(self._original_context_tokens, recent_tokens, anchor_bars) |
|
|
|
|
|
self._pending_reseed = {"ctx": new_ctx, "ref": recent_wav} |
|
|
|
|
|
new_state = self.mrt.init_state() |
|
new_state.context_tokens = new_ctx |
|
self.state = new_state |
|
|
|
self._prepare_stream_for_reseed_handoff() |
|
|
|
|
|
self._pending_drop_intro_bars = getattr(self, "_pending_drop_intro_bars", 0) + 1 |
|
|
|
def run(self): |
|
""" |
|
Main worker loop: |
|
• Generate continuous audio at model/native SR (sr_in). |
|
• Maintain input-domain emit pointer for groove realign. |
|
• Maintain an OUTPUT-domain streaming resampler (sr_out = 44100 by default). |
|
• Emit EXACTLY bars_per_chunk at sr_out using a fractional phase accumulator. |
|
• No per-chunk resampling; resampler carries state across chunks => seamless. |
|
""" |
|
import numpy as np |
|
import time |
|
from math import floor, ceil |
|
from utils import wav_bytes_base64, match_loudness_to_reference, apply_micro_fades |
|
|
|
|
|
spb = self._seconds_per_bar() |
|
chunk_secs = float(self.params.bars_per_chunk) * float(spb) |
|
|
|
|
|
sr_in = int(self.mrt.sample_rate) |
|
sr_out = int(getattr(self.params, "target_sr", 44100) or 44100) |
|
self.params.target_sr = sr_out |
|
|
|
|
|
xfade_seconds = float(self.mrt.config.crossfade_length) |
|
|
|
|
|
chunk_step_in_f = chunk_secs * sr_in |
|
self._emit_phase = float(getattr(self, "_emit_phase", 0.0)) |
|
|
|
|
|
chunk_step_out_f = chunk_secs * sr_out |
|
self._emit_phase_out = float(getattr(self, "_emit_phase_out", 0.0)) |
|
self._next_emit_start_out = int(getattr(self, "_next_emit_start_out", 0)) |
|
|
|
|
|
self._resampler = None |
|
self._stream_out = np.zeros((0, int(self.params.channels or 2)), dtype=np.float32) |
|
if sr_out != sr_in: |
|
|
|
from utils import StreamingResampler |
|
ch = int(self.params.channels or 2) |
|
self._resampler = StreamingResampler(in_sr=sr_in, out_sr=sr_out, channels=ch, quality="VHQ") |
|
|
|
|
|
|
|
|
|
self._stream = getattr(self, "_stream", None) |
|
self._next_emit_start = int(getattr(self, "_next_emit_start", 0)) |
|
self._needs_bar_realign = bool(getattr(self, "_needs_bar_realign", True)) |
|
|
|
|
|
input_consumed = int(getattr(self, "_input_consumed", 0)) |
|
|
|
|
|
self.idx = int(getattr(self, "idx", 0)) |
|
self._last_delivered_index = int(getattr(self, "_last_delivered_index", 0)) |
|
self.outbox = getattr(self, "outbox", []) |
|
|
|
print("🚀 JamWorker started (bar-aligned streaming, stateful resampler)…") |
|
|
|
|
|
def _need_input(first_chunk_extra: bool = False) -> int: |
|
""" |
|
How many INPUT-domain samples we still need in self._stream to be comfortable |
|
before emitting the next slice. Mirrors your fractional step math without |
|
mutating _emit_phase here. |
|
""" |
|
total = 0 if self._stream is None else self._stream.shape[0] |
|
start = int(getattr(self, "_next_emit_start", 0)) |
|
have = max(0, total - start) |
|
|
|
|
|
step_int = int(floor(chunk_step_in_f + float(getattr(self, "_emit_phase", 0.0)))) |
|
|
|
want = step_int |
|
if first_chunk_extra: |
|
|
|
want += int(ceil(2.0 * spb * sr_in)) |
|
|
|
return max(0, want - have) |
|
|
|
def _feed_resampler_as_needed(): |
|
""" |
|
Ensure OUTPUT buffer (_stream_out) has resampled audio for any new INPUT |
|
samples appended to self._stream since we last consumed it. |
|
""" |
|
nonlocal input_consumed, sr_in, sr_out |
|
total_in = 0 if self._stream is None else self._stream.shape[0] |
|
if total_in <= input_consumed: |
|
return |
|
|
|
|
|
new_in = self._stream[input_consumed:total_in] |
|
if new_in.size == 0: |
|
return |
|
|
|
if self._resampler is not None: |
|
y_out = self._resampler.process(new_in, final=False) |
|
else: |
|
|
|
y_out = new_in |
|
|
|
if y_out.size: |
|
self._stream_out = y_out if self._stream_out.size == 0 else np.vstack([self._stream_out, y_out]) |
|
|
|
input_consumed = total_in |
|
|
|
def _output_have(): |
|
"""How many OUTPUT-domain samples are available to emit from current pointer.""" |
|
total_out = 0 if self._stream_out is None else self._stream_out.shape[0] |
|
return max(0, total_out - self._next_emit_start_out) |
|
|
|
def _compute_step_in() -> int: |
|
"""Integer input-domain step for internal pointer (non-mutating).""" |
|
return int(floor(chunk_step_in_f + float(getattr(self, "_emit_phase", 0.0)))) |
|
|
|
def _compute_step_out() -> int: |
|
"""Integer output-domain step for emission (non-mutating).""" |
|
return int(floor(chunk_step_out_f + float(getattr(self, "_emit_phase_out", 0.0)))) |
|
|
|
def _advance_input_pointer(): |
|
"""Advance input emit pointer by the integer step and carry fractional phase.""" |
|
step_total = chunk_step_in_f + self._emit_phase |
|
step_int = int(floor(step_total)) |
|
self._emit_phase = float(step_total - step_int) |
|
self._next_emit_start += step_int |
|
|
|
def _advance_output_pointer(): |
|
"""Advance output emit pointer by the integer step and carry fractional phase.""" |
|
step_total = chunk_step_out_f + self._emit_phase_out |
|
step_int = int(floor(step_total)) |
|
self._emit_phase_out = float(step_total - step_int) |
|
self._next_emit_start_out += step_int |
|
|
|
def _trim_buffers_if_needed(): |
|
""" |
|
Keep memory bounded by dropping already-emitted OUTPUT and corresponding INPUT, |
|
while keeping indices consistent. |
|
""" |
|
|
|
if self._next_emit_start_out > 3 * int(chunk_step_out_f or sr_out): |
|
cut = int(self._next_emit_start_out) |
|
self._stream_out = self._stream_out[cut:] |
|
self._next_emit_start_out -= cut |
|
|
|
|
|
|
|
head_can_drop = min(input_consumed, self._next_emit_start) |
|
if head_can_drop > sr_in * 8: |
|
drop = head_can_drop - int(sr_in * 4) |
|
if drop > 0: |
|
self._stream = self._stream[drop:] |
|
self._next_emit_start -= drop |
|
input_consumed -= drop |
|
|
|
|
|
while not self._stop_event.is_set(): |
|
|
|
if not self._should_generate_next_chunk(): |
|
time.sleep(0.25) |
|
continue |
|
|
|
|
|
need_in = _need_input(first_chunk_extra=(self.idx == 0)) |
|
while need_in > 0 and not self._stop_event.is_set(): |
|
|
|
with self._lock: |
|
style_vec = self.params.style_vec |
|
self.mrt.guidance_weight = float(self.params.guidance_weight) |
|
self.mrt.temperature = float(self.params.temperature) |
|
self.mrt.topk = int(self.params.topk) |
|
|
|
wav, self.state = self.mrt.generate_chunk(state=self.state, style=style_vec) |
|
self._append_model_chunk_to_stream(wav) |
|
|
|
|
|
_feed_resampler_as_needed() |
|
|
|
need_in = _need_input(first_chunk_extra=(self.idx == 0)) |
|
|
|
if self._stop_event.is_set(): |
|
break |
|
|
|
|
|
if self._needs_bar_realign: |
|
self._realign_emit_pointer_to_bar(sr_in) |
|
self._emit_phase = 0.0 |
|
|
|
|
|
input_consumed = max(input_consumed, self._next_emit_start) |
|
self._needs_bar_realign = False |
|
|
|
|
|
_feed_resampler_as_needed() |
|
|
|
|
|
step_out_int = _compute_step_out() |
|
while _output_have() < step_out_int and not self._stop_event.is_set(): |
|
|
|
_feed_resampler_as_needed() |
|
if _output_have() < step_out_int: |
|
|
|
with self._lock: |
|
style_vec = self.params.style_vec |
|
self.mrt.guidance_weight = float(self.params.guidance_weight) |
|
self.mrt.temperature = float(self.params.temperature) |
|
self.mrt.topk = int(self.params.topk) |
|
wav, self.state = self.mrt.generate_chunk(state=self.state, style=style_vec) |
|
self._append_model_chunk_to_stream(wav) |
|
_feed_resampler_as_needed() |
|
|
|
if self._stop_event.is_set(): |
|
break |
|
|
|
|
|
start_out = int(self._next_emit_start_out) |
|
end_out = start_out + int(step_out_int) |
|
|
|
total_out = 0 if self._stream_out is None else self._stream_out.shape[0] |
|
if end_out > total_out: |
|
|
|
time.sleep(0.01) |
|
continue |
|
|
|
y_send = self._stream_out[start_out:end_out] |
|
|
|
if self.idx == 0 and getattr(self.params, "ref_loop", None) is not None: |
|
|
|
y_send, _ = match_loudness_to_reference( |
|
self.params.ref_loop, y_send, |
|
method=getattr(self.params, "loudness_mode", "integrated"), |
|
headroom_db=getattr(self.params, "headroom_db", 1.0), |
|
) |
|
else: |
|
|
|
|
|
pass |
|
|
|
|
|
b64, total_samples, channels = wav_bytes_base64(y_send, sr_out) |
|
meta = { |
|
"bpm": float(self.params.bpm), |
|
"bars": int(self.params.bars_per_chunk), |
|
"seconds": float(chunk_secs), |
|
"sample_rate": int(sr_out), |
|
"samples": int(total_samples), |
|
"channels": int(channels), |
|
"xfade_seconds": float(xfade_seconds), |
|
} |
|
|
|
|
|
with self._lock: |
|
self.idx += 1 |
|
self.outbox.append(JamChunk(index=self.idx, audio_base64=b64, metadata=meta)) |
|
|
|
if len(self.outbox) > 10: |
|
cutoff = self._last_delivered_index - 5 |
|
self.outbox = [ch for ch in self.outbox if ch.index > cutoff] |
|
|
|
|
|
if getattr(self, "_pending_reseed", None) is not None: |
|
pkg = self._pending_reseed |
|
self._pending_reseed = None |
|
|
|
|
|
new_state = self.mrt.init_state() |
|
new_state.context_tokens = pkg["ctx"] |
|
self.state = new_state |
|
|
|
|
|
self._stream = None |
|
self._next_emit_start = 0 |
|
self._reseed_ref_loop = pkg.get("ref") or self.params.combined_loop |
|
self._needs_bar_realign = True |
|
|
|
|
|
self._stream_out = np.zeros((0, int(self.params.channels or 2)), dtype=np.float32) |
|
self._next_emit_start_out = 0 |
|
self._emit_phase_out = 0.0 |
|
input_consumed = 0 |
|
if self._resampler is not None: |
|
|
|
from utils import StreamingResampler |
|
ch = int(self.params.channels or 2) |
|
self._resampler = StreamingResampler(in_sr=sr_in, out_sr=sr_out, channels=ch, quality="VHQ") |
|
|
|
print("🔁 Reseed installed at bar boundary; will realign before next slice") |
|
|
|
|
|
_advance_input_pointer() |
|
_advance_output_pointer() |
|
|
|
|
|
_trim_buffers_if_needed() |
|
|
|
print(f"✅ Completed chunk {self.idx}") |
|
|
|
|
|
if self._resampler is not None: |
|
tail = self._resampler.flush() |
|
if tail.size: |
|
self._stream_out = tail if self._stream_out.size == 0 else np.vstack([self._stream_out, tail]) |
|
|
|
print("🛑 JamWorker stopped") |
|
|