Spaces:
Sleeping
Sleeping
File size: 2,693 Bytes
cfc4ac5 308f38a cfc4ac5 308f38a 67ca6ac cfc4ac5 5ed3033 cfc4ac5 67ca6ac 308f38a 67ca6ac cfc4ac5 1cb05db cfc4ac5 6dff63d cfc4ac5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from elevenlabs import set_api_key
from elevenlabs import clone, generate, play
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from pydub import AudioSegment
from huggingface_hub.hf_api import HfFolder
HfFolder.save_token('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')
access_token = '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'
import streamlit as st
#language translation function
def translate(text):
tokenizer = AutoTokenizer.from_pretrained(".",use_auth_token=True)
#model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M",use_auth_token=True)
model = AutoModelForSeq2SeqLM.from_pretrained(".",use_auth_token=True)
inputs = tokenizer(text, return_tensors="pt")
translated_tokens = model.generate(**inputs,
forced_bos_token_id=tokenizer.lang_code_to_id["hin_Deva"],
max_length=100)
result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
return result
def save_uploaded_file_as_mp3(uploaded_file, output_file_path):
audio = AudioSegment.from_file(uploaded_file)
audio.export(output_file_path, format="mp3")
def ui():
st.title('Multi Lingual Voice Cloner')
st.markdown("Made by vageesh")
#audio input box
audio_file = st.file_uploader("Upload an audio file that needs to be cloned", type=[ "wav,Mp4","Mp3"])
if audio_file is not None:
output_file_path = "./output_audio.mp3"
save_uploaded_file_as_mp3(audio_file, output_file_path)
st.success(f"Audio file saved as {output_file_path}")
user_api_key = st.sidebar.text_input(
label="#### Your Eleven Labs API key here 👇",
placeholder="Paste your Eleven Labs API key API key, sk-",
type="password")
if user_api_key is not None and user_api_key.strip() != "":
set_api_key(user_api_key)
#making an voice
voice = clone(
name="Alex",
description="An middle aged American male voice with a slight hoarseness in his throat. Perfect for npodcast", # Optional
files="./output_audio.mp3",
model='eleven_multilingual_v1',
language='Hindi'
)
in_text=st.text_input("Paste the text you want to hear from english to hindi")
if in_text is not None and in_text.strip() != "":
hin_text=translate(in_text)
audio = generate(text=hin_text, voice=voice)
st.audio(audio)
if __name__=="__main__":
ui()
|