jempf commited on
Commit
fd30b9c
Β·
1 Parent(s): cdeb51f

updated the UI

Browse files
Files changed (1) hide show
  1. app.py +48 -30
app.py CHANGED
@@ -89,45 +89,63 @@ class VPInterface:
89
 
90
  model = VPInterface()
91
 
 
92
  article = """
93
- This demo allows you to anonymize your input speech by defining the single models for ASR, anonymization and TTS. If
94
- you want to know more about each model, please read the paper linked above. Every time you click the *submit* button,
95
- you should receive a new voice.
96
 
97
- Note that for *pool* anonymization in this demo, we are using a different scaling approach (
98
- sklearn.preprocessing.StandardScaler instead of sklearn.preprocessing.MinMaxScaler) because we are processing only
99
- one sample at a time and would otherwise always end up with the same voice.
100
 
101
- This demo is still work in progress, so please be lenient with possible low quality and errors. Also, be aware that
102
- this Huggingface space runs on CPU which makes the demo quite slow.
103
 
104
- For more information about this system, visit our Github page: [https://github.com/DigitalPhonetics/speaker-anonymization](https://github.com/DigitalPhonetics/speaker-anonymization)
 
 
 
 
 
105
  """
106
 
107
  description = """
108
- ## Test demo corresponding to the models in our paper [Speaker Anonymization with Phonetic Intermediate Representations](https://arxiv.org/abs/2207.04834)
 
109
  """
110
 
111
  css = """
112
- .gr-button-primary {background-color: green !important, border-color: green}
 
 
 
113
  """
114
 
115
- iface = gr.Interface(fn=model.read,
116
- inputs=[gr.inputs.Audio(source='microphone', type='numpy', label='Say a sentence in English.'),
117
- gr.inputs.Dropdown(['phones', 'STT', 'TTS'], type='value', default='phones',
118
- label='ASR model'),
119
- gr.inputs.Dropdown(['pool', 'random', 'pool raw'], type='value', default='pool',
120
- label='Anonymization'),
121
- gr.inputs.Dropdown(['Libri100', 'Libri100 + finetuned', 'Libri600',
122
- 'Libri600 + finetuned'], type='value', default='Libri100',
123
- label='TTS model')
124
- ],
125
- outputs=gr.outputs.Audio(type='numpy', label=None),
126
- layout='vertical',
127
- title='IMS Speaker Anonymization',
128
- description=description,
129
- theme='default',
130
- allow_flagging='never',
131
- article=article,
132
- allow_screenshot=False)
133
- iface.launch(enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  model = VPInterface()
91
 
92
+ # ── UI copy & look-and-feel for β€œanamedi Ghost” ────────────────────────────────
93
  article = """
94
+ **anamedi Ghost** lets you **anonymize _and_ pseudonymize** short speech samples in a single click.
95
+ Under the hood the demo chains three micro-models:
 
96
 
97
+ * **ASR engine** β†’ turns speech into text
98
+ * **Voice-privacy module** β†’ scrambles the speaker embedding (pool/random/pool-raw)
99
+ * **TTS synthesizer** β†’ renders the same words with a surrogate voice
100
 
101
+ Every time you hit **Submit** you’ll get a new, privacy-safe version of your input.
 
102
 
103
+ > **Heads-up**
104
+ > β€’ This Space runs on CPU, so inference can take ~20 s.
105
+ > β€’ The β€œpool” setting uses an ad-hoc scaler (`StandardScaler`) because we process just one sample at a time; in production Ghost uses a batch-optimised `MinMaxScaler`.
106
+ > β€’ Quality is still work-in-progressβ€”please report glitches!
107
+
108
+ πŸ”— Source & docs: <https://github.com/anamedi/ghost>
109
  """
110
 
111
  description = """
112
+ ## anamedi Ghost – Voice Privacy Demo (v0.1)
113
+ Try it out: record a short English sentence, pick your privacy mode, then listen to the anonymized result.
114
  """
115
 
116
  css = """
117
+ .gr-button-primary {
118
+ background-color: #00b7c2 !important; /* anamedi teal */
119
+ border-color: #00b7c2 !important;
120
+ }
121
  """
122
 
123
+ iface = gr.Interface(
124
+ fn=model.read,
125
+ inputs=[
126
+ gr.inputs.Audio(source="microphone", type="numpy",
127
+ label="πŸŽ™οΈ Speak a sentence (English)"),
128
+ gr.inputs.Dropdown(
129
+ ["phones", "STT", "TTS"],
130
+ type="value", default="phones",
131
+ label="ASR engine"),
132
+ gr.inputs.Dropdown(
133
+ ["pool", "random", "pool raw"],
134
+ type="value", default="pool",
135
+ label="Privacy mode"),
136
+ gr.inputs.Dropdown(
137
+ ["Libri100", "Libri100 + finetuned", "Libri600",
138
+ "Libri600 + finetuned"],
139
+ type="value", default="Libri100",
140
+ label="TTS voice")
141
+ ],
142
+ outputs=gr.outputs.Audio(type="numpy", label="πŸ”Š Anonymized audio"),
143
+ layout="vertical",
144
+ title="anamedi Ghost – Voice Privacy Demo",
145
+ description=description,
146
+ theme="default",
147
+ allow_flagging="never",
148
+ article=article,
149
+ allow_screenshot=False
150
+ )
151
+ iface.launch(enable_queue=True)