File size: 1,623 Bytes
3011301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import logging

logger = logging.getLogger(__name__)

def get_stopwords(use_lite: bool = False) -> set:
    """
    Returns a set of Tibetan stopwords by importing them from the respective .py files.

    Args:
        use_lite (bool): If True, returns a smaller, less aggressive list of stopwords
                         from stopwords_lite_bo.py.
                         Otherwise, returns the full list from stopwords_bo.py.

    Returns:
        set: A set of stopword strings. Returns an empty set on failure.
    """
    stopwords_set = set()
    try:
        if use_lite:
            from .stopwords_lite_bo import STOPWORDS
            stopwords_set = STOPWORDS
        else:
            from .stopwords_bo import STOPWORDS
            stopwords_set = STOPWORDS
        
        logger.info(f"Successfully loaded {len(stopwords_set)} stopwords from {source_module_name.lstrip('.')}.py")
    except ImportError:
        logger.error(
            f"Failed to import STOPWORDS from {source_module_name.lstrip('.')}.py. "
            f"Ensure the file exists in the 'pipeline' directory, is a Python module (ends in .py), "
            f"and is importable (e.g., no syntax errors)."
        )
    except AttributeError:
        logger.error(
            f"Variable 'STOPWORDS' (all caps) not found in {source_module_name.lstrip('.')}.py. "
            f"Please ensure the stopword set is defined with this name within the module."
        )
    except Exception as e:
        logger.error(f"An unexpected error occurred while loading stopwords from {source_module_name.lstrip('.')}.py: {e}")

    return stopwords_set