Spaces:
Sleeping
Sleeping
speed improvements and documentation
Browse files
InferenceInterfaces/ControllableInterface.py
CHANGED
@@ -72,7 +72,7 @@ class ControllableInterface:
|
|
72 |
energy=energy,
|
73 |
durations=durations)
|
74 |
wavs.append(wav)
|
75 |
-
wav = wavs
|
76 |
else:
|
77 |
self.model.set_utterance_embedding(reference_audio)
|
78 |
|
|
|
72 |
energy=energy,
|
73 |
durations=durations)
|
74 |
wavs.append(wav)
|
75 |
+
wav = sum(wavs)/len(wavs)
|
76 |
else:
|
77 |
self.model.set_utterance_embedding(reference_audio)
|
78 |
|
Modules/GeneralLayers/Conformer.py
CHANGED
@@ -136,7 +136,7 @@ class Conformer(torch.nn.Module):
|
|
136 |
projected_lang_embs = self.language_embedding_projection(lang_embs).unsqueeze(-1).transpose(1, 2)
|
137 |
projected_lang_embs = self.language_emb_norm(projected_lang_embs)
|
138 |
proj_lang_embs_s.append(projected_lang_embs)
|
139 |
-
xs = xs + proj_lang_embs_s
|
140 |
|
141 |
xs = self.pos_enc(xs)
|
142 |
|
|
|
136 |
projected_lang_embs = self.language_embedding_projection(lang_embs).unsqueeze(-1).transpose(1, 2)
|
137 |
projected_lang_embs = self.language_emb_norm(projected_lang_embs)
|
138 |
proj_lang_embs_s.append(projected_lang_embs)
|
139 |
+
xs = xs + (sum(proj_lang_embs_s)/len(proj_lang_embs_s)) # offset phoneme representation by language specific offset
|
140 |
|
141 |
xs = self.pos_enc(xs)
|
142 |
|