speaker-anonymization-v2

Running

App Files Files Community

jempf commited on Apr 29

Commit

fd30b9c

1 Parent(s): cdeb51f

updated the UI

Browse files

Files changed (1) hide show

app.py +48 -30

app.py CHANGED Viewed

@@ -89,45 +89,63 @@ class VPInterface:
 model = VPInterface()
 article = """
-This demo allows you to anonymize your input speech by defining the single models for ASR, anonymization and TTS. If
-you want to know more about each model, please read the paper linked above. Every time you click the *submit* button,
-you should receive a new voice.
-Note that for *pool* anonymization in this demo, we are using a different scaling approach (
-sklearn.preprocessing.StandardScaler instead of sklearn.preprocessing.MinMaxScaler) because we are processing only
-one sample at a time and would otherwise always end up with the same voice.
-This demo is still work in progress, so please be lenient with possible low quality and errors. Also, be aware that
-this Huggingface space runs on CPU which makes the demo quite slow.
-For more information about this system, visit our Github page: [https://github.com/DigitalPhonetics/speaker-anonymization](https://github.com/DigitalPhonetics/speaker-anonymization)
 """
 description = """
-## Test demo corresponding to the models in our paper [Speaker Anonymization with Phonetic Intermediate Representations](https://arxiv.org/abs/2207.04834)
 """
 css = """
-.gr-button-primary {background-color: green !important, border-color: green}
 """
-iface = gr.Interface(fn=model.read,
-                     inputs=[gr.inputs.Audio(source='microphone', type='numpy', label='Say a sentence in English.'),
-                             gr.inputs.Dropdown(['phones', 'STT', 'TTS'], type='value', default='phones',
-                                                label='ASR model'),
-                             gr.inputs.Dropdown(['pool', 'random', 'pool raw'], type='value', default='pool',
-                                                label='Anonymization'),
-                             gr.inputs.Dropdown(['Libri100', 'Libri100 + finetuned', 'Libri600',
-                                                 'Libri600 + finetuned'], type='value', default='Libri100',
-                                                label='TTS model')
-                             ],
-                     outputs=gr.outputs.Audio(type='numpy', label=None),
-                     layout='vertical',
-                     title='IMS Speaker Anonymization',
-                     description=description,
-                     theme='default',
-                     allow_flagging='never',
-                     article=article,
-                     allow_screenshot=False)
-iface.launch(enable_queue=True)

 model = VPInterface()
+# ── UI copy & look-and-feel for “anamedi Ghost” ────────────────────────────────
 article = """
+**anamedi Ghost** lets you **anonymize _and_ pseudonymize** short speech samples in a single click.
+Under the hood the demo chains three micro-models:
+* **ASR engine** → turns speech into text
+* **Voice-privacy module** → scrambles the speaker embedding (pool/random/pool-raw)
+* **TTS synthesizer** → renders the same words with a surrogate voice
+Every time you hit **Submit** you’ll get a new, privacy-safe version of your input.
+> **Heads-up**
+> • This Space runs on CPU, so inference can take ~20 s.
+> • The “pool” setting uses an ad-hoc scaler (`StandardScaler`) because we process just one sample at a time; in production Ghost uses a batch-optimised `MinMaxScaler`.
+> • Quality is still work-in-progress—please report glitches!
+🔗 Source & docs: <https://github.com/anamedi/ghost>
 """
 description = """
+## anamedi Ghost – Voice Privacy Demo (v0.1)
+Try it out: record a short English sentence, pick your privacy mode, then listen to the anonymized result.
 """
 css = """
+.gr-button-primary {
+    background-color: #00b7c2 !important;  /* anamedi teal */
+    border-color: #00b7c2 !important;
+}
 """
+iface = gr.Interface(
+    fn=model.read,
+    inputs=[
+        gr.inputs.Audio(source="microphone", type="numpy",
+                        label="🎙️ Speak a sentence (English)"),
+        gr.inputs.Dropdown(
+            ["phones", "STT", "TTS"],
+            type="value", default="phones",
+            label="ASR engine"),
+        gr.inputs.Dropdown(
+            ["pool", "random", "pool raw"],
+            type="value", default="pool",
+            label="Privacy mode"),
+        gr.inputs.Dropdown(
+            ["Libri100", "Libri100 + finetuned", "Libri600",
+             "Libri600 + finetuned"],
+            type="value", default="Libri100",
+            label="TTS voice")
+    ],
+    outputs=gr.outputs.Audio(type="numpy", label="🔊 Anonymized audio"),
+    layout="vertical",
+    title="anamedi Ghost – Voice Privacy Demo",
+    description=description,
+    theme="default",
+    allow_flagging="never",
+    article=article,
+    allow_screenshot=False
+)
+iface.launch(enable_queue=True)