Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import os | |
| import sys | |
| import requests | |
| import json | |
| from huggingface_hub import HfApi | |
| # start xVASynth service (no HTTP) | |
| import resources.app.no_server as xvaserver | |
| from gr_client import BlocksDemo | |
| # model | |
| hf_model_name = "Pendrokar/xvapitch_nvidia" | |
| model_repo = HfApi() | |
| commits = model_repo.list_repo_commits(repo_id=hf_model_name) | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/' | |
| print(hf_cache_models_path) | |
| commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_lojban') | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_lojban_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_lojban/snapshots/{latest_commit_sha}/' | |
| print(hf_cache_lojban_models_path) | |
| models_path = hf_cache_models_path | |
| current_voice_model = None | |
| base_speaker_emb = '' | |
| def load_model(voice_model_name): | |
| if voice_model_name == 'x_selpahi': | |
| # Lojban | |
| model_path = hf_cache_lojban_models_path + voice_model_name | |
| model_type = 'FastPitch1.1' | |
| else: | |
| model_path = models_path + voice_model_name | |
| model_type = 'xVAPitch' | |
| language = 'en' # seems to have no effect if generated text is from a different language | |
| data = { | |
| 'outputs': None, | |
| 'version': '3.0', | |
| 'model': model_path, | |
| 'modelType': model_type, | |
| 'base_lang': language, | |
| 'pluginsContext': '{}', | |
| } | |
| embs = base_speaker_emb | |
| print('Loading voice model...') | |
| try: | |
| json_data = xvaserver.loadModel(data) | |
| current_voice_model = voice_model_name | |
| with open(model_path + '.json', 'r', encoding='utf-8') as f: | |
| voice_model_json = json.load(f) | |
| embs = voice_model_json['games'][0]['base_speaker_emb'] | |
| except requests.exceptions.RequestException as err: | |
| print(f'FAILED to load voice model: {err}') | |
| return embs | |
| class LocalBlocksDemo(BlocksDemo): | |
| def predict( | |
| self, | |
| input_text, | |
| voice, | |
| lang, | |
| pacing, | |
| pitch, | |
| energy, | |
| anger, | |
| happy, | |
| sad, | |
| surprise, | |
| use_deepmoji | |
| ): | |
| # grab only the first 1000 characters | |
| input_text = input_text[:1000] | |
| # load voice model if not the current model | |
| if (current_voice_model != voice): | |
| base_speaker_emb = load_model(voice) | |
| model_type = 'xVAPitch' | |
| pace = pacing if pacing else 1.0 | |
| save_path = '/tmp/xvapitch_audio_sample.wav' | |
| language = lang | |
| use_sr = 0 | |
| use_cleanup = 0 | |
| pluginsContext = {} | |
| pluginsContext["mantella_settings"] = { | |
| "emAngry": (anger if anger > 0 else 0), | |
| "emHappy": (happy if happy > 0 else 0), | |
| "emSad": (sad if sad > 0 else 0), | |
| "emSurprise": (surprise if surprise > 0 else 0), | |
| "run_model": use_deepmoji | |
| } | |
| data = { | |
| 'pluginsContext': json.dumps(pluginsContext), | |
| 'modelType': model_type, | |
| # pad with whitespaces as a workaround to avoid cutoffs | |
| 'sequence': input_text.center(len(input_text) + 2, ' '), | |
| 'pace': pace, | |
| 'outfile': save_path, | |
| 'vocoder': 'n/a', | |
| 'base_lang': language, | |
| 'base_emb': base_speaker_emb, | |
| 'useSR': use_sr, | |
| 'useCleanup': use_cleanup, | |
| } | |
| print('Synthesizing...') | |
| try: | |
| json_data = xvaserver.synthesize(data) | |
| # response = requests.post('http://0.0.0.0:8008/synthesize', json=data, timeout=60) | |
| # response.raise_for_status() # If the response contains an HTTP error status code, raise an exception | |
| # json_data = json.loads(response.text) | |
| except requests.exceptions.RequestException as err: | |
| print('FAILED to synthesize: {err}') | |
| save_path = '' | |
| response = {'text': '{"message": "Failed"}'} | |
| json_data = { | |
| 'arpabet': ['Failed'], | |
| 'durations': [0], | |
| 'em_anger': anger, | |
| 'em_happy': happy, | |
| 'em_sad': sad, | |
| 'em_surprise': surprise, | |
| } | |
| # print('server.log contents:') | |
| # with open('resources/app/server.log', 'r') as f: | |
| # print(f.read()) | |
| arpabet_html = '<h6>ARPAbet & Phoneme lengths</h6>' | |
| arpabet_symbols = json_data['arpabet'].split('|') | |
| utter_time = 0 | |
| for symb_i in range(len(json_data['durations'])): | |
| # skip PAD symbol | |
| if (arpabet_symbols[symb_i] == '<PAD>'): | |
| continue | |
| length = float(json_data['durations'][symb_i]) | |
| arpa_length = str(round(length/2, 1)) | |
| arpabet_html += '<strong\ | |
| class="arpabet"\ | |
| style="padding: 0 '\ | |
| + str(arpa_length)\ | |
| +'em"'\ | |
| +f" title=\"{utter_time} + {length}\""\ | |
| +'>'\ | |
| + arpabet_symbols[symb_i]\ | |
| + '</strong> ' | |
| utter_time += round(length, 1) | |
| return [ | |
| save_path, | |
| arpabet_html, | |
| round(json_data['em_angry'][0], 2), | |
| round(json_data['em_happy'][0], 2), | |
| round(json_data['em_sad'][0], 2), | |
| round(json_data['em_surprise'][0], 2), | |
| json_data | |
| ] | |
| if __name__ == "__main__": | |
| print('running custom Gradio interface') | |
| demo = LocalBlocksDemo(models_path, lojban_models_path) | |
| demo.block.launch() | |