thecollabagepatch commited on
Commit
1a7ea3c
Β·
1 Parent(s): d1afbc8

transparency update

Browse files
Files changed (1) hide show
  1. app.py +228 -1
app.py CHANGED
@@ -17,6 +17,206 @@ from utils import (
17
  from jam_worker import JamWorker, JamParams, JamChunk
18
  import uuid, threading
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  jam_registry: dict[str, JamWorker] = {}
21
  jam_lock = threading.Lock()
22
 
@@ -433,4 +633,31 @@ def jam_status(session_id: str):
433
 
434
  @app.get("/health")
435
  def health():
436
- return {"ok": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from jam_worker import JamWorker, JamParams, JamChunk
18
  import uuid, threading
19
 
20
+ import gradio as gr
21
+
22
+ def create_documentation_interface():
23
+ """Create a Gradio interface for documentation and transparency"""
24
+
25
+ with gr.Blocks(title="MagentaRT Research API", theme=gr.themes.Soft()) as interface:
26
+
27
+ gr.Markdown("""
28
+ # 🎡 MagentaRT Live Music Generation Research API
29
+
30
+ **Research-only implementation for iOS app development**
31
+
32
+ This API uses Google's [MagentaRT](https://github.com/magenta/magenta-realtime) to generate
33
+ continuous music based on input audio loops for experimental iOS app development.
34
+ """)
35
+
36
+ with gr.Tabs():
37
+ with gr.Tab("πŸ“– About This Research"):
38
+ gr.Markdown("""
39
+ ## What This API Does
40
+
41
+ We're exploring AI-assisted loop-based music creation for mobile apps. Websockets are notoriously annoying in ios-swift apps, so I tried to come up with an http version tailored to the loop based nature of an existing swift app. This API provides:
42
+
43
+ ### 🎹 Single Generation (`/generate`)
44
+ - Upload audio loop + BPM + style parameters
45
+ - Returns 4-8 bars of AI-generated continuation
46
+ - **Performance**: 4 bars in ~9s, 8 bars in ~16s (L40S GPU)
47
+
48
+ ### πŸ”„ Continuous Jamming (`/jam/*`)
49
+ - `/jam/start` - Begin continuous generation session
50
+ - `/jam/next` - Get next bar-aligned chunk
51
+ - `/jam/stop` - End session
52
+ - **Performance**: Real-time 8-bar chunks after warmup
53
+
54
+ ## Technical Specs
55
+ - **Model**: MagentaRT (800M parameter transformer)
56
+ - **Quality**: 48kHz stereo output
57
+ - **Context**: 10-second audio analysis window
58
+ - **Styles**: Text descriptions (e.g., "acid house, techno")
59
+
60
+ ## Research Goals
61
+ - Seamless AI music generation for loop-based composition
62
+ - Real-time parameter adjustment during generation
63
+ - Mobile-optimized music creation workflows
64
+ """)
65
+
66
+ with gr.Tab("πŸ”§ API Documentation"):
67
+ gr.Markdown("""
68
+ ## Single Generation Example
69
+ ```bash
70
+ curl -X POST "/generate" \\
71
+ -F "loop_audio=@drum_loop.wav" \\
72
+ -F "bpm=120" \\
73
+ -F "bars=8" \\
74
+ -F "styles=acid house,techno" \\
75
+ -F "guidance_weight=5.0" \\
76
+ -F "temperature=1.1"
77
+ ```
78
+
79
+ ## Continuous Jamming Example
80
+ ```bash
81
+ # 1. Start session
82
+ SESSION=$(curl -X POST "/jam/start" \\
83
+ -F "loop_audio=@loop.wav" \\
84
+ -F "bpm=120" \\
85
+ -F "bars_per_chunk=8" | jq -r .session_id)
86
+
87
+ # 2. Get chunks in real-time
88
+ curl "/jam/next?session_id=$SESSION"
89
+
90
+ # 3. Stop when done
91
+ curl -X POST "/jam/stop" \\
92
+ -H "Content-Type: application/json" \\
93
+ -d "{\\"session_id\\": \\"$SESSION\\"}"
94
+ ```
95
+
96
+ ## Key Parameters
97
+ - **bpm**: 60-200 (beats per minute)
98
+ - **bars**: 1-16 (bars to generate)
99
+ - **styles**: Text descriptions, comma-separated
100
+ - **guidance_weight**: 0.1-10.0 (style adherence)
101
+ - **temperature**: 0.1-2.0 (randomness)
102
+ - **intro_bars_to_drop**: Skip N bars from start
103
+
104
+ ## Response Format
105
+ ```json
106
+ {
107
+ "audio_base64": "...",
108
+ "metadata": {
109
+ "bpm": 120,
110
+ "bars": 8,
111
+ "sample_rate": 48000,
112
+ "loop_duration_seconds": 16.0
113
+ }
114
+ }
115
+ ```
116
+ """)
117
+
118
+ with gr.Tab("πŸ“± iOS App Integration"):
119
+ gr.Markdown("""
120
+ ## How Our iOS App Uses This API
121
+
122
+ ### User Flow
123
+ 1. **Record/Import**: User provides drum or instrument loop
124
+ 2. **Parameter Setup**: Set BPM, style, generation settings
125
+ 3. **Continuous Generation**: App calls `/jam/start`
126
+ 4. **Real-time Playback**: App fetches chunks via `/jam/next`
127
+ 5. **Seamless Mixing**: Generated audio mixed into live stream
128
+
129
+ ### Technical Implementation
130
+ - **Audio Format**: 48kHz WAV for consistency
131
+ - **Chunk Size**: 8 bars (~16 seconds at 120 BPM)
132
+ - **Buffer Management**: 3-5 chunks ahead for smooth playback
133
+ - **Style Updates**: Real-time parameter adjustment via `/jam/update`
134
+
135
+ ### Networking Considerations
136
+ - **Latency**: ~2-3 seconds per chunk after warmup
137
+ - **Bandwidth**: ~500KB per 8-bar chunk (compressed)
138
+ - **Reliability**: Automatic retry with exponential backoff
139
+ - **Caching**: Local buffer for offline resilience
140
+ """)
141
+
142
+ with gr.Tab("βš–οΈ Licensing & Legal"):
143
+ gr.Markdown("""
144
+ ## MagentaRT Licensing
145
+
146
+ This project uses Google's MagentaRT model under:
147
+ - **Source Code**: Apache License 2.0
148
+ - **Model Weights**: Creative Commons Attribution 4.0 International
149
+ - **Usage Terms**: [See MagentaRT repository](https://github.com/magenta/magenta-realtime)
150
+
151
+ ### Key Requirements
152
+ - βœ… **Attribution**: Credit MagentaRT in derivative works
153
+ - βœ… **Responsible Use**: Don't infringe copyrights
154
+ - βœ… **No Warranties**: Use at your own risk
155
+ - βœ… **Patent License**: Explicit patent grants included
156
+
157
+ ## Our Implementation
158
+ - **Purpose**: Research and development only
159
+ - **Non-Commercial**: Experimental iOS app development
160
+ - **Open Source**: Will release implementation under Apache 2.0
161
+ - **Attribution**: Proper credit to Google Research team
162
+
163
+ ### Required Attribution
164
+ ```
165
+ Generated using MagentaRT
166
+ Copyright 2024 Google LLC
167
+ Licensed under Apache 2.0 and CC-BY 4.0
168
+ Implementation for research purposes
169
+ ```
170
+ """)
171
+
172
+ with gr.Tab("πŸ“Š Performance & Limits"):
173
+ gr.Markdown("""
174
+ ## Current Performance (L40S 48GB)
175
+
176
+ ### ⚑ Single Generation
177
+ - **4 bars @ 100 BPM**: ~9 seconds
178
+ - **8 bars @ 100 BPM**: ~16 seconds
179
+ - **Memory usage**: ~40GB VRAM during generation
180
+
181
+ ### πŸ”„ Continuous Jamming
182
+ - **Warmup**: ~10-15 seconds first chunk
183
+ - **8-bar chunks @ 120 BPM**: Real-time delivery
184
+ - **Buffer ahead**: 3-5 chunks for smooth playback
185
+
186
+ ## Known Limitations
187
+
188
+ ### 🎡 Model Limitations (MagentaRT)
189
+ - **Context**: 10-second maximum memory
190
+ - **Training**: Primarily Western instrumental music
191
+ - **Vocals**: Non-lexical only, no lyric conditioning
192
+ - **Structure**: No long-form song arrangement
193
+ - **Inside Swift**: After a few turns of continuous chunks, the swift app works best if you restart the jam from the combined audio again. In this way you might end up with a real jam.
194
+
195
+ ### πŸ–₯️ Infrastructure Limitations
196
+ - **Concurrency**: Single user jam sessions only
197
+ - **GPU Memory**: 40GB+ VRAM required for stable operation
198
+ - **Latency**: 2+ second minimum for style changes
199
+ - **Uptime**: Research setup, no SLA guarantees
200
+
201
+ ## Resource Requirements
202
+ - **Minimum**: 24GB VRAM (basic operation, won't operate realtime enough for new chunks coming in)
203
+ - **Recommended**: 48GB VRAM (stable performance)
204
+ - **CPU**: 8+ cores
205
+ - **System RAM**: 32GB+
206
+ - **Storage**: 50GB+ for model weights
207
+ """)
208
+
209
+ gr.Markdown("""
210
+ ---
211
+
212
+ **πŸ”¬ Research Project** | **πŸ“± iOS Development** | **🎡 Powered by MagentaRT**
213
+
214
+ This API is part of ongoing research into AI-assisted music creation for mobile devices.
215
+ For technical details, see the API documentation tabs above.
216
+ """)
217
+
218
+ return interface
219
+
220
  jam_registry: dict[str, JamWorker] = {}
221
  jam_lock = threading.Lock()
222
 
 
633
 
634
  @app.get("/health")
635
  def health():
636
+ return {"ok": True}
637
+
638
+ @app.get("/", response_class=Response)
639
+ def read_root():
640
+ """Root endpoint that explains what this API does"""
641
+ html_content = """
642
+ <!DOCTYPE html>
643
+ <html>
644
+ <head><title>MagentaRT Research API</title></head>
645
+ <body style="font-family: Arial; max-width: 800px; margin: 50px auto; padding: 20px;">
646
+ <h1>🎡 MagentaRT Research API</h1>
647
+ <p><strong>Purpose:</strong> AI music generation for iOS app research using Google's MagentaRT</p>
648
+ <h2>Available Endpoints:</h2>
649
+ <ul>
650
+ <li><code>POST /generate</code> - Generate 4-8 bars of music</li>
651
+ <li><code>POST /jam/start</code> - Start continuous jamming</li>
652
+ <li><code>GET /jam/next</code> - Get next chunk</li>
653
+ <li><code>GET /jam/consume</code> - confirm a chunk as consumed</li>
654
+ <li><code>POST /jam/stop</code> - End session</li>
655
+ <li><code>GET /docs</code> - API documentation</li>
656
+ </ul>
657
+ <p><strong>Research Only:</strong> Experimental implementation for iOS app development.</p>
658
+ <p><strong>Licensing:</strong> Uses MagentaRT (Apache 2.0 + CC-BY 4.0). Users responsible for outputs.</p>
659
+ <p>Visit <a href="/docs">/docs</a> for detailed API documentation.</p>
660
+ </body>
661
+ </html>
662
+ """
663
+ return Response(content=html_content, media_type="text/html")