Commit
Β·
1a7ea3c
1
Parent(s):
d1afbc8
transparency update
Browse files
app.py
CHANGED
@@ -17,6 +17,206 @@ from utils import (
|
|
17 |
from jam_worker import JamWorker, JamParams, JamChunk
|
18 |
import uuid, threading
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
jam_registry: dict[str, JamWorker] = {}
|
21 |
jam_lock = threading.Lock()
|
22 |
|
@@ -433,4 +633,31 @@ def jam_status(session_id: str):
|
|
433 |
|
434 |
@app.get("/health")
|
435 |
def health():
|
436 |
-
return {"ok": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
from jam_worker import JamWorker, JamParams, JamChunk
|
18 |
import uuid, threading
|
19 |
|
20 |
+
import gradio as gr
|
21 |
+
|
22 |
+
def create_documentation_interface():
|
23 |
+
"""Create a Gradio interface for documentation and transparency"""
|
24 |
+
|
25 |
+
with gr.Blocks(title="MagentaRT Research API", theme=gr.themes.Soft()) as interface:
|
26 |
+
|
27 |
+
gr.Markdown("""
|
28 |
+
# π΅ MagentaRT Live Music Generation Research API
|
29 |
+
|
30 |
+
**Research-only implementation for iOS app development**
|
31 |
+
|
32 |
+
This API uses Google's [MagentaRT](https://github.com/magenta/magenta-realtime) to generate
|
33 |
+
continuous music based on input audio loops for experimental iOS app development.
|
34 |
+
""")
|
35 |
+
|
36 |
+
with gr.Tabs():
|
37 |
+
with gr.Tab("π About This Research"):
|
38 |
+
gr.Markdown("""
|
39 |
+
## What This API Does
|
40 |
+
|
41 |
+
We're exploring AI-assisted loop-based music creation for mobile apps. Websockets are notoriously annoying in ios-swift apps, so I tried to come up with an http version tailored to the loop based nature of an existing swift app. This API provides:
|
42 |
+
|
43 |
+
### πΉ Single Generation (`/generate`)
|
44 |
+
- Upload audio loop + BPM + style parameters
|
45 |
+
- Returns 4-8 bars of AI-generated continuation
|
46 |
+
- **Performance**: 4 bars in ~9s, 8 bars in ~16s (L40S GPU)
|
47 |
+
|
48 |
+
### π Continuous Jamming (`/jam/*`)
|
49 |
+
- `/jam/start` - Begin continuous generation session
|
50 |
+
- `/jam/next` - Get next bar-aligned chunk
|
51 |
+
- `/jam/stop` - End session
|
52 |
+
- **Performance**: Real-time 8-bar chunks after warmup
|
53 |
+
|
54 |
+
## Technical Specs
|
55 |
+
- **Model**: MagentaRT (800M parameter transformer)
|
56 |
+
- **Quality**: 48kHz stereo output
|
57 |
+
- **Context**: 10-second audio analysis window
|
58 |
+
- **Styles**: Text descriptions (e.g., "acid house, techno")
|
59 |
+
|
60 |
+
## Research Goals
|
61 |
+
- Seamless AI music generation for loop-based composition
|
62 |
+
- Real-time parameter adjustment during generation
|
63 |
+
- Mobile-optimized music creation workflows
|
64 |
+
""")
|
65 |
+
|
66 |
+
with gr.Tab("π§ API Documentation"):
|
67 |
+
gr.Markdown("""
|
68 |
+
## Single Generation Example
|
69 |
+
```bash
|
70 |
+
curl -X POST "/generate" \\
|
71 |
+
-F "loop_audio=@drum_loop.wav" \\
|
72 |
+
-F "bpm=120" \\
|
73 |
+
-F "bars=8" \\
|
74 |
+
-F "styles=acid house,techno" \\
|
75 |
+
-F "guidance_weight=5.0" \\
|
76 |
+
-F "temperature=1.1"
|
77 |
+
```
|
78 |
+
|
79 |
+
## Continuous Jamming Example
|
80 |
+
```bash
|
81 |
+
# 1. Start session
|
82 |
+
SESSION=$(curl -X POST "/jam/start" \\
|
83 |
+
-F "loop_audio=@loop.wav" \\
|
84 |
+
-F "bpm=120" \\
|
85 |
+
-F "bars_per_chunk=8" | jq -r .session_id)
|
86 |
+
|
87 |
+
# 2. Get chunks in real-time
|
88 |
+
curl "/jam/next?session_id=$SESSION"
|
89 |
+
|
90 |
+
# 3. Stop when done
|
91 |
+
curl -X POST "/jam/stop" \\
|
92 |
+
-H "Content-Type: application/json" \\
|
93 |
+
-d "{\\"session_id\\": \\"$SESSION\\"}"
|
94 |
+
```
|
95 |
+
|
96 |
+
## Key Parameters
|
97 |
+
- **bpm**: 60-200 (beats per minute)
|
98 |
+
- **bars**: 1-16 (bars to generate)
|
99 |
+
- **styles**: Text descriptions, comma-separated
|
100 |
+
- **guidance_weight**: 0.1-10.0 (style adherence)
|
101 |
+
- **temperature**: 0.1-2.0 (randomness)
|
102 |
+
- **intro_bars_to_drop**: Skip N bars from start
|
103 |
+
|
104 |
+
## Response Format
|
105 |
+
```json
|
106 |
+
{
|
107 |
+
"audio_base64": "...",
|
108 |
+
"metadata": {
|
109 |
+
"bpm": 120,
|
110 |
+
"bars": 8,
|
111 |
+
"sample_rate": 48000,
|
112 |
+
"loop_duration_seconds": 16.0
|
113 |
+
}
|
114 |
+
}
|
115 |
+
```
|
116 |
+
""")
|
117 |
+
|
118 |
+
with gr.Tab("π± iOS App Integration"):
|
119 |
+
gr.Markdown("""
|
120 |
+
## How Our iOS App Uses This API
|
121 |
+
|
122 |
+
### User Flow
|
123 |
+
1. **Record/Import**: User provides drum or instrument loop
|
124 |
+
2. **Parameter Setup**: Set BPM, style, generation settings
|
125 |
+
3. **Continuous Generation**: App calls `/jam/start`
|
126 |
+
4. **Real-time Playback**: App fetches chunks via `/jam/next`
|
127 |
+
5. **Seamless Mixing**: Generated audio mixed into live stream
|
128 |
+
|
129 |
+
### Technical Implementation
|
130 |
+
- **Audio Format**: 48kHz WAV for consistency
|
131 |
+
- **Chunk Size**: 8 bars (~16 seconds at 120 BPM)
|
132 |
+
- **Buffer Management**: 3-5 chunks ahead for smooth playback
|
133 |
+
- **Style Updates**: Real-time parameter adjustment via `/jam/update`
|
134 |
+
|
135 |
+
### Networking Considerations
|
136 |
+
- **Latency**: ~2-3 seconds per chunk after warmup
|
137 |
+
- **Bandwidth**: ~500KB per 8-bar chunk (compressed)
|
138 |
+
- **Reliability**: Automatic retry with exponential backoff
|
139 |
+
- **Caching**: Local buffer for offline resilience
|
140 |
+
""")
|
141 |
+
|
142 |
+
with gr.Tab("βοΈ Licensing & Legal"):
|
143 |
+
gr.Markdown("""
|
144 |
+
## MagentaRT Licensing
|
145 |
+
|
146 |
+
This project uses Google's MagentaRT model under:
|
147 |
+
- **Source Code**: Apache License 2.0
|
148 |
+
- **Model Weights**: Creative Commons Attribution 4.0 International
|
149 |
+
- **Usage Terms**: [See MagentaRT repository](https://github.com/magenta/magenta-realtime)
|
150 |
+
|
151 |
+
### Key Requirements
|
152 |
+
- β
**Attribution**: Credit MagentaRT in derivative works
|
153 |
+
- β
**Responsible Use**: Don't infringe copyrights
|
154 |
+
- β
**No Warranties**: Use at your own risk
|
155 |
+
- β
**Patent License**: Explicit patent grants included
|
156 |
+
|
157 |
+
## Our Implementation
|
158 |
+
- **Purpose**: Research and development only
|
159 |
+
- **Non-Commercial**: Experimental iOS app development
|
160 |
+
- **Open Source**: Will release implementation under Apache 2.0
|
161 |
+
- **Attribution**: Proper credit to Google Research team
|
162 |
+
|
163 |
+
### Required Attribution
|
164 |
+
```
|
165 |
+
Generated using MagentaRT
|
166 |
+
Copyright 2024 Google LLC
|
167 |
+
Licensed under Apache 2.0 and CC-BY 4.0
|
168 |
+
Implementation for research purposes
|
169 |
+
```
|
170 |
+
""")
|
171 |
+
|
172 |
+
with gr.Tab("π Performance & Limits"):
|
173 |
+
gr.Markdown("""
|
174 |
+
## Current Performance (L40S 48GB)
|
175 |
+
|
176 |
+
### β‘ Single Generation
|
177 |
+
- **4 bars @ 100 BPM**: ~9 seconds
|
178 |
+
- **8 bars @ 100 BPM**: ~16 seconds
|
179 |
+
- **Memory usage**: ~40GB VRAM during generation
|
180 |
+
|
181 |
+
### π Continuous Jamming
|
182 |
+
- **Warmup**: ~10-15 seconds first chunk
|
183 |
+
- **8-bar chunks @ 120 BPM**: Real-time delivery
|
184 |
+
- **Buffer ahead**: 3-5 chunks for smooth playback
|
185 |
+
|
186 |
+
## Known Limitations
|
187 |
+
|
188 |
+
### π΅ Model Limitations (MagentaRT)
|
189 |
+
- **Context**: 10-second maximum memory
|
190 |
+
- **Training**: Primarily Western instrumental music
|
191 |
+
- **Vocals**: Non-lexical only, no lyric conditioning
|
192 |
+
- **Structure**: No long-form song arrangement
|
193 |
+
- **Inside Swift**: After a few turns of continuous chunks, the swift app works best if you restart the jam from the combined audio again. In this way you might end up with a real jam.
|
194 |
+
|
195 |
+
### π₯οΈ Infrastructure Limitations
|
196 |
+
- **Concurrency**: Single user jam sessions only
|
197 |
+
- **GPU Memory**: 40GB+ VRAM required for stable operation
|
198 |
+
- **Latency**: 2+ second minimum for style changes
|
199 |
+
- **Uptime**: Research setup, no SLA guarantees
|
200 |
+
|
201 |
+
## Resource Requirements
|
202 |
+
- **Minimum**: 24GB VRAM (basic operation, won't operate realtime enough for new chunks coming in)
|
203 |
+
- **Recommended**: 48GB VRAM (stable performance)
|
204 |
+
- **CPU**: 8+ cores
|
205 |
+
- **System RAM**: 32GB+
|
206 |
+
- **Storage**: 50GB+ for model weights
|
207 |
+
""")
|
208 |
+
|
209 |
+
gr.Markdown("""
|
210 |
+
---
|
211 |
+
|
212 |
+
**π¬ Research Project** | **π± iOS Development** | **π΅ Powered by MagentaRT**
|
213 |
+
|
214 |
+
This API is part of ongoing research into AI-assisted music creation for mobile devices.
|
215 |
+
For technical details, see the API documentation tabs above.
|
216 |
+
""")
|
217 |
+
|
218 |
+
return interface
|
219 |
+
|
220 |
jam_registry: dict[str, JamWorker] = {}
|
221 |
jam_lock = threading.Lock()
|
222 |
|
|
|
633 |
|
634 |
@app.get("/health")
|
635 |
def health():
|
636 |
+
return {"ok": True}
|
637 |
+
|
638 |
+
@app.get("/", response_class=Response)
|
639 |
+
def read_root():
|
640 |
+
"""Root endpoint that explains what this API does"""
|
641 |
+
html_content = """
|
642 |
+
<!DOCTYPE html>
|
643 |
+
<html>
|
644 |
+
<head><title>MagentaRT Research API</title></head>
|
645 |
+
<body style="font-family: Arial; max-width: 800px; margin: 50px auto; padding: 20px;">
|
646 |
+
<h1>π΅ MagentaRT Research API</h1>
|
647 |
+
<p><strong>Purpose:</strong> AI music generation for iOS app research using Google's MagentaRT</p>
|
648 |
+
<h2>Available Endpoints:</h2>
|
649 |
+
<ul>
|
650 |
+
<li><code>POST /generate</code> - Generate 4-8 bars of music</li>
|
651 |
+
<li><code>POST /jam/start</code> - Start continuous jamming</li>
|
652 |
+
<li><code>GET /jam/next</code> - Get next chunk</li>
|
653 |
+
<li><code>GET /jam/consume</code> - confirm a chunk as consumed</li>
|
654 |
+
<li><code>POST /jam/stop</code> - End session</li>
|
655 |
+
<li><code>GET /docs</code> - API documentation</li>
|
656 |
+
</ul>
|
657 |
+
<p><strong>Research Only:</strong> Experimental implementation for iOS app development.</p>
|
658 |
+
<p><strong>Licensing:</strong> Uses MagentaRT (Apache 2.0 + CC-BY 4.0). Users responsible for outputs.</p>
|
659 |
+
<p>Visit <a href="/docs">/docs</a> for detailed API documentation.</p>
|
660 |
+
</body>
|
661 |
+
</html>
|
662 |
+
"""
|
663 |
+
return Response(content=html_content, media_type="text/html")
|