kemuriririn commited on
Commit
ba791a8
·
1 Parent(s): 9eda15b
Files changed (1) hide show
  1. cosyvoice/cli/cosyvoice.py +7 -0
cosyvoice/cli/cosyvoice.py CHANGED
@@ -55,10 +55,12 @@ class CosyVoice:
55
  self.model.load_onnx('{}/flow.decoder.estimator.fp32.onnx'.format(model_dir))
56
  del configs
57
 
 
58
  def list_avaliable_spks(self):
59
  spks = list(self.frontend.spk2info.keys())
60
  return spks
61
 
 
62
  def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0):
63
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
64
  model_input = self.frontend.frontend_sft(i, spk_id)
@@ -70,6 +72,7 @@ class CosyVoice:
70
  yield model_output
71
  start_time = time.time()
72
 
 
73
  def inference_zero_shot(self, tts_text, prompt_text, prompt_speech_16k, stream=False, speed=1.0):
74
  prompt_text = self.frontend.text_normalize(prompt_text, split=False)
75
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
@@ -84,6 +87,7 @@ class CosyVoice:
84
  yield model_output
85
  start_time = time.time()
86
 
 
87
  def inference_cross_lingual(self, tts_text, prompt_speech_16k, stream=False, speed=1.0):
88
  if self.frontend.instruct is True:
89
  raise ValueError('{} do not support cross_lingual inference'.format(self.model_dir))
@@ -97,6 +101,7 @@ class CosyVoice:
97
  yield model_output
98
  start_time = time.time()
99
 
 
100
  def inference_instruct(self, tts_text, spk_id, instruct_text, stream=False, speed=1.0):
101
  if self.frontend.instruct is False:
102
  raise ValueError('{} do not support instruct inference'.format(self.model_dir))
@@ -111,6 +116,7 @@ class CosyVoice:
111
  yield model_output
112
  start_time = time.time()
113
 
 
114
  def inference_instruct2(self, tts_text, instruct_text, prompt_speech_16k, stream=False, speed=1.0):
115
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
116
  model_input = self.frontend.frontend_instruct2(i, instruct_text, prompt_speech_16k, self.sample_rate)
@@ -122,6 +128,7 @@ class CosyVoice:
122
  yield model_output
123
  start_time = time.time()
124
 
 
125
  def inference_vc(self, source_speech_16k, prompt_speech_16k, stream=False, speed=1.0):
126
  model_input = self.frontend.frontend_vc(source_speech_16k, prompt_speech_16k, self.sample_rate)
127
  start_time = time.time()
 
55
  self.model.load_onnx('{}/flow.decoder.estimator.fp32.onnx'.format(model_dir))
56
  del configs
57
 
58
+ @spaces.GPU
59
  def list_avaliable_spks(self):
60
  spks = list(self.frontend.spk2info.keys())
61
  return spks
62
 
63
+ @spaces.GPU
64
  def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0):
65
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
66
  model_input = self.frontend.frontend_sft(i, spk_id)
 
72
  yield model_output
73
  start_time = time.time()
74
 
75
+ @spaces.GPU
76
  def inference_zero_shot(self, tts_text, prompt_text, prompt_speech_16k, stream=False, speed=1.0):
77
  prompt_text = self.frontend.text_normalize(prompt_text, split=False)
78
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
 
87
  yield model_output
88
  start_time = time.time()
89
 
90
+ @spaces.GPU
91
  def inference_cross_lingual(self, tts_text, prompt_speech_16k, stream=False, speed=1.0):
92
  if self.frontend.instruct is True:
93
  raise ValueError('{} do not support cross_lingual inference'.format(self.model_dir))
 
101
  yield model_output
102
  start_time = time.time()
103
 
104
+ @spaces.GPU
105
  def inference_instruct(self, tts_text, spk_id, instruct_text, stream=False, speed=1.0):
106
  if self.frontend.instruct is False:
107
  raise ValueError('{} do not support instruct inference'.format(self.model_dir))
 
116
  yield model_output
117
  start_time = time.time()
118
 
119
+ @spaces.GPU
120
  def inference_instruct2(self, tts_text, instruct_text, prompt_speech_16k, stream=False, speed=1.0):
121
  for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
122
  model_input = self.frontend.frontend_instruct2(i, instruct_text, prompt_speech_16k, self.sample_rate)
 
128
  yield model_output
129
  start_time = time.time()
130
 
131
+ @spaces.GPU
132
  def inference_vc(self, source_speech_16k, prompt_speech_16k, stream=False, speed=1.0):
133
  model_input = self.frontend.frontend_vc(source_speech_16k, prompt_speech_16k, self.sample_rate)
134
  start_time = time.time()