Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b8368df
1
Parent(s):
a692a02
update
Browse files- cosyvoice/cli/cosyvoice.py +2 -16
- cosyvoice/cli/frontend.py +12 -2
cosyvoice/cli/cosyvoice.py
CHANGED
@@ -63,13 +63,7 @@ class CosyVoice:
|
|
63 |
|
64 |
@spaces.GPU
|
65 |
def reload_frontend(self):
|
66 |
-
self.frontend
|
67 |
-
self.configs['feat_extractor'],
|
68 |
-
'{}/campplus.onnx'.format(self.model_dir),
|
69 |
-
'{}/speech_tokenizer_v1.onnx'.format(self.model_dir),
|
70 |
-
'{}/spk2info.pt'.format(self.model_dir),
|
71 |
-
self.instruct,
|
72 |
-
self.configs['allowed_special'])
|
73 |
|
74 |
@spaces.GPU
|
75 |
def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0):
|
@@ -198,12 +192,4 @@ class CosyVoice2(CosyVoice):
|
|
198 |
self.model.load_trt('{}/flow.decoder.estimator.fp16.l20.plan'.format(model_dir))
|
199 |
del configs
|
200 |
|
201 |
-
|
202 |
-
def reload_frontend(self):
|
203 |
-
self.frontend = CosyVoiceFrontEnd(self.configs['get_tokenizer'],
|
204 |
-
self.configs['feat_extractor'],
|
205 |
-
'{}/campplus.onnx'.format(self.model_dir),
|
206 |
-
'{}/speech_tokenizer_v2.onnx'.format(self.model_dir),
|
207 |
-
'{}/spk2info.pt'.format(self.model_dir),
|
208 |
-
self.instruct,
|
209 |
-
self.configs['allowed_special'])
|
|
|
63 |
|
64 |
@spaces.GPU
|
65 |
def reload_frontend(self):
|
66 |
+
self.frontend.reload_onnx()
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
@spaces.GPU
|
69 |
def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0):
|
|
|
192 |
self.model.load_trt('{}/flow.decoder.estimator.fp16.l20.plan'.format(model_dir))
|
193 |
del configs
|
194 |
|
195 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cosyvoice/cli/frontend.py
CHANGED
@@ -51,11 +51,16 @@ class CosyVoiceFrontEnd:
|
|
51 |
option = onnxruntime.SessionOptions()
|
52 |
option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
53 |
option.intra_op_num_threads = 1
|
|
|
|
|
|
|
54 |
self.campplus_session = onnxruntime.InferenceSession(campplus_model, sess_options=option, providers=["CPUExecutionProvider"])
|
55 |
print("load campplus model from {}".format(campplus_model))
|
|
|
|
|
|
|
56 |
self.speech_tokenizer_session = onnxruntime.InferenceSession(speech_tokenizer_model, sess_options=option,
|
57 |
-
providers=["
|
58 |
-
"CPUExecutionProvider"])
|
59 |
print("load speech-tokenizer model from {}".format(speech_tokenizer_model))
|
60 |
if os.path.exists(spk2info):
|
61 |
self.spk2info = torch.load(spk2info, map_location=self.device)
|
@@ -75,6 +80,11 @@ class CosyVoiceFrontEnd:
|
|
75 |
self.zh_tn_model = ZhNormalizer(remove_erhua=False, full_to_half=False)
|
76 |
self.en_tn_model = EnNormalizer()
|
77 |
|
|
|
|
|
|
|
|
|
|
|
78 |
def _extract_text_token(self, text):
|
79 |
text_token = self.tokenizer.encode(text, allowed_special=self.allowed_special)
|
80 |
text_token = torch.tensor([text_token], dtype=torch.int32).to(self.device)
|
|
|
51 |
option = onnxruntime.SessionOptions()
|
52 |
option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
53 |
option.intra_op_num_threads = 1
|
54 |
+
self.campplus_model = campplus_model
|
55 |
+
self.option = option
|
56 |
+
self.speech_tokenizer_model = speech_tokenizer_model
|
57 |
self.campplus_session = onnxruntime.InferenceSession(campplus_model, sess_options=option, providers=["CPUExecutionProvider"])
|
58 |
print("load campplus model from {}".format(campplus_model))
|
59 |
+
# self.speech_tokenizer_session = onnxruntime.InferenceSession(speech_tokenizer_model, sess_options=option,
|
60 |
+
# providers=["CUDAExecutionProvider" if torch.cuda.is_available() else
|
61 |
+
# "CPUExecutionProvider"])
|
62 |
self.speech_tokenizer_session = onnxruntime.InferenceSession(speech_tokenizer_model, sess_options=option,
|
63 |
+
providers=["CPUExecutionProvider"])
|
|
|
64 |
print("load speech-tokenizer model from {}".format(speech_tokenizer_model))
|
65 |
if os.path.exists(spk2info):
|
66 |
self.spk2info = torch.load(spk2info, map_location=self.device)
|
|
|
80 |
self.zh_tn_model = ZhNormalizer(remove_erhua=False, full_to_half=False)
|
81 |
self.en_tn_model = EnNormalizer()
|
82 |
|
83 |
+
def reload_onnx(self):
|
84 |
+
self.campplus_session = onnxruntime.InferenceSession(self.campplus_model, sess_options=self.option, providers=["CPUExecutionProvider"])
|
85 |
+
self.speech_tokenizer_session = onnxruntime.InferenceSession(self.speech_tokenizer_model, sess_options=self.option,
|
86 |
+
providers=["CPUExecutionProvider"])
|
87 |
+
|
88 |
def _extract_text_token(self, text):
|
89 |
text_token = self.tokenizer.encode(text, allowed_special=self.allowed_special)
|
90 |
text_token = torch.tensor([text_token], dtype=torch.int32).to(self.device)
|