File size: 1,768 Bytes
b937c3e
0329cdf
 
b937c3e
0329cdf
b937c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42904fe
b937c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from langdetect import detect_langs

def detect_language(text):
    """Detect the language of input text with confidence scores and robust native mapping."""
    try:
        if len(text) < 10:  # Minimum length for reliable detection
            return [("English", 1.0, "English")]

        # Get list of detected languages with confidence scores
        lang_detections = detect_langs(text)
        
        # Mapping of detected codes to native language names and supported languages
        native_lang_map = {
            "en": ("English", "English"),
            "fr": ("Français", "French"),
            "es": ("Español", "Spanish"),
            "de": ("Deutsch", "German"),
            "hi": ("हिन्दी", "Hindi"),
            "zh": ("中文", "Chinese"),
            "ar": ("العربية", "Arabic"),
            "ru": ("Русский", "Russian"),
            "ja": ("日本語", "Japanese"),
        }

        detected_options = []
        for lang in lang_detections:
            lang_code = lang.lang
            confidence = lang.prob
            if confidence >= 0.7:  # Confidence threshold
                native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English"))
                detected_options.append((mapped_lang, confidence, native_name))

        # Return top options or default to English if none meet threshold
        if not detected_options:
            detected_options = [("English", 0.5, "English")]  # Low confidence fallback
        return [(lang, conf, native) for lang, conf, native in detected_options[:3]]  # Return top 3 options

    except Exception as e:
        st.error(f"Language detection failed: {str(e)}")
        return [("English", 0.5, "English")]  # Fallback with low confidence