Spaces:

Krishna086
/

Multi-language-translation-with-MarianMT

Running

App Files Files Community

Multi-language-translation-with-MarianMT / app.py

Krishna086

Update app.py

b5c7107 verified 2 months ago

raw

history blame

9.63 kB

	import streamlit as st
	import importlib
	from io import BytesIO
	import docx
	from PyPDF2 import PdfReader
	import time

	# Configure Streamlit page settings
	st.set_page_config(page_title="Multilingual Translator", page_icon="🌐", layout="wide")

	# Import language definitions from translation module
	try:
	from translation import LANGUAGES
	except ImportError as e:
	st.error(f"Failed to import translation module: {e}")
	st.stop()

	# Extract text from uploaded files (PDF, DOCX, TXT)
	def extract_text_from_file(uploaded_file):
	try:
	if uploaded_file.type == "application/pdf":
	pdf_reader = PdfReader(uploaded_file)
	text = "".join(page.extract_text() or "" for page in pdf_reader.pages)
	return text.encode().decode("utf-8", errors="ignore").strip()
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = docx.Document(uploaded_file)
	text = "\n".join(para.text for para in doc.paragraphs)
	return text.encode().decode("utf-8", errors="ignore").strip()
	elif uploaded_file.type == "text/plain":
	return uploaded_file.read().decode("utf-8", errors="ignore").strip()
	return ""
	except Exception as e:
	st.error(f"Error extracting text from file: {e}")
	return ""

	# Update input text when a file is uploaded
	def on_file_upload():
	try:
	uploaded_file = st.session_state.get("file_input")
	if uploaded_file and uploaded_file.size < 1024 * 1024:
	st.session_state.user_input_text = extract_text_from_file(uploaded_file)
	st.success(f"File '{uploaded_file.name}' uploaded successfully!")
	elif uploaded_file and uploaded_file.size >= 1024 * 1024:
	st.error("File size must be less than 1 MB")
	except Exception as e:
	st.error(f"Error processing file upload: {e}")

	# Main application function
	def main():
	try:
	# Import required modules dynamically
	translation_module = importlib.import_module("translation")
	language_detector = importlib.import_module("lang_detect")
	audio_processor_module = importlib.import_module("audio_processor")

	# Display application header
	st.markdown("<h1 style='text-align: center; color: #4285F4;'>Multilingual Translator</h1>", unsafe_allow_html=True)
	st.markdown("<p style='text-align: center; color: #666;'>Effortless Multilingual Translation</p>", unsafe_allow_html=True)

	# Apply custom CSS for UI enhancements
	st.markdown(
	"""
	<style>
	.stFileUploader > div > div > div[role="button"] { display: none !important; }
	.stFileUploader label { display: none !important; }
	.stFileUploader [data-testid="stFileUploaderDropzone"] {
	border: 2px dashed #ccc !important; padding: 10px !important;
	text-align: center !important; font-size: 0 !important;
	}
	.stFileUploader [data-testid="stFileUploaderDropzone"]::after {
	content: 'Drag and drop TXT, DOCX, or PDF here or ' !important;
	font-size: 1em !important; color: #666 !important;
	}
	.stFileUploader [data-testid="stFileUploaderButton"] {
	font-size: 1em !important; color: #1E90FF !important;
	background: none !important; border: none !important; padding: 0 !important;
	}
	.stFileUploader [data-testid="stFileUploader"] { background: none !important; }
	.stRadio > div { margin-bottom: 0 !important; }
	.stTextArea { margin-top: 0 !important; }
	</style>
	""",
	unsafe_allow_html=True
	)

	# Create symmetric layout with two columns
	left_col, right_col = st.columns([1, 1])
	with left_col:
	# Detect language of input text if available
	detected_options = language_detector.detect_language(st.session_state.get("user_input_text", "").strip()) if st.session_state.get("user_input_text", "").strip() else [("Auto-detect", 1.0, "Auto-detect")]
	source_language = detected_options[0][2] if detected_options[0][0] != "Auto-detect" else "Auto-detect"
	source_lang_code = next((k for k, v in LANGUAGES.items() if v[1] == source_language), "en") if source_language != "Auto-detect" else "auto"
	source_options = ["Auto-detect"] + [f"{v[0]} ({v[1]})" for v in LANGUAGES.values()]
	st.selectbox("Source Language", options=source_options, index=0 if source_language == "Auto-detect" else source_options.index(f"{LANGUAGES[source_lang_code][0]} ({source_language})"), key="source_lang")
	user_input_text = st.text_area("Input Text", height=200, key="user_input_text", placeholder="Enter text here", label_visibility="hidden")
	input_type = st.radio("Input Type", ["Text", "File"], horizontal=True, label_visibility="hidden", key="input_type")
	if input_type == "File":
	st.file_uploader("Upload File", type=["txt", "docx", "pdf"], key="file_input", on_change=on_file_upload, label_visibility="hidden")
	st.button("Translate", key="translate_btn", on_click=trigger_translation, args=(translation_module, language_detector, audio_processor_module))
	with right_col:
	# Set target language options based on source language
	source_lang_display = st.session_state.source_lang.split(" (")[0] if " (" in st.session_state.source_lang else st.session_state.source_lang
	target_options = [f"{v[0]} ({v[1]})" for v in LANGUAGES.values() if v[0] != source_lang_display and v[1] != source_lang_display]
	st.selectbox("Target Language", options=target_options, index=target_options.index(f"{LANGUAGES['en'][0]} ({LANGUAGES['en'][1]})") if "English" not in source_lang_display else 0, key="target_lang")
	if "translated_text" in st.session_state:
	st.text_area("Output Text", value=st.session_state.translated_text, height=200, key="output_text", disabled=True, label_visibility="hidden")
	# Trigger audio playback on button click
	if st.button("🔊", key="audio_btn", on_click=play_audio, args=(audio_processor_module,), help="Play audio", use_container_width=False):
	pass
	# Display application footer
	st.markdown("""
	<p style="font-size: small; color: grey; text-align: center;">
	Developed By: Krishna Prakash
	<a href="https://www.linkedin.com/in/krishnaprakash-profile/" target="_blank">
	<img src="https://img.icons8.com/ios-filled/30/0077b5/linkedin.png" alt="LinkedIn" style="vertical-align: middle; margin: 0 5px;"/>
	</a>
	</p>
	""", unsafe_allow_html=True)
	except Exception as e:
	st.error(f"Application error occurred: {e}")

	# Trigger translation process with timeout handling
	def trigger_translation(translation_module, language_detector, audio_processor_module):
	try:
	user_input_text = st.session_state.get("user_input_text", "").strip()
	if user_input_text:
	with st.spinner("Translating..."):
	start_time = time.time()
	source_lang = st.session_state.source_lang.split(" (")[0] if " (" in st.session_state.source_lang else st.session_state.source_lang
	target_lang = st.session_state.target_lang.split(" (")[0] if " (" in st.session_state.target_lang else st.session_state.target_lang
	if source_lang == "Auto-detect":
	detected_options = language_detector.detect_language(user_input_text)
	source_lang_code = next((k for k, v in LANGUAGES.items() if v[1] == detected_options[0][0]), "en")
	else:
	source_lang_code = next((k for k, v in LANGUAGES.items() if v[0] == source_lang), "en")
	target_lang_code = next((k for k, v in LANGUAGES.items() if v[0] == target_lang), "en")
	translated_text = translation_module.translate(user_input_text, source_lang_code, target_lang_code)
	if time.time() - start_time > 20: # Check if translation exceeds 20-second timeout
	st.error("Translation took too long, reverting to input.")
	translated_text = user_input_text
	if translated_text and len(translated_text.split()) > 2: # Validate translation result
	st.session_state.translated_text = translated_text
	else:
	st.session_state.translated_text = user_input_text
	except Exception as e:
	st.error(f"Translation process failed: {e}")
	st.session_state.translated_text = user_input_text

	# Handle audio playback with error checking
	def play_audio(audio_processor_module):
	try:
	if "translated_text" in st.session_state and st.session_state.translated_text:
	target_lang = next((k for k, v in LANGUAGES.items() if v[0] == st.session_state.target_lang.split(" (")[0]), "en")
	audio_data = audio_processor_module.text_to_speech(st.session_state.translated_text, target_lang)
	if audio_data and audio_data.getbuffer().nbytes > 0:
	st.audio(audio_data, format="audio/mp3")
	else:
	st.error("Failed to generate audio. Please try again.")
	except Exception as e:
	st.error(f"Audio playback failed: {e}")

	if __name__ == "__main__":
	main()