from langdetect import detect_langs def detect_language(text): """Detect the language of input text with confidence scores and robust native mapping.""" try: if len(text) < 10: # Minimum length for reliable detection return [("English", 1.0, "English")] # Get list of detected languages with confidence scores lang_detections = detect_langs(text) # Mapping of detected codes to native language names and supported languages native_lang_map = { "en": ("English", "English"), "fr": ("Français", "French"), "es": ("Español", "Spanish"), "de": ("Deutsch", "German"), "hi": ("हिन्दी", "Hindi"), "zh": ("中文", "Chinese"), "ar": ("العربية", "Arabic"), "ru": ("Русский", "Russian"), "ja": ("日本語", "Japanese"), } detected_options = [] for lang in lang_detections: lang_code = lang.lang confidence = lang.prob if confidence >= 0.7: # Confidence threshold native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English")) detected_options.append((mapped_lang, confidence, native_name)) # Return top options or default to English if none meet threshold if not detected_options: detected_options = [("English", 0.5, "English")] # Low confidence fallback return [(lang, conf, native) for lang, conf, native in detected_options[:3]] # Return top 3 options except Exception as e: st.error(f"Language detection failed: {str(e)}") return [("English", 0.5, "English")] # Fallback with low confidence