Spaces:
Running
Running
import logging | |
logger = logging.getLogger(__name__) | |
def get_stopwords(use_lite: bool = False) -> set: | |
""" | |
Returns a set of Tibetan stopwords by importing them from the respective .py files. | |
Args: | |
use_lite (bool): If True, returns a smaller, less aggressive list of stopwords | |
from stopwords_lite_bo.py. | |
Otherwise, returns the full list from stopwords_bo.py. | |
Returns: | |
set: A set of stopword strings. Returns an empty set on failure. | |
""" | |
stopwords_set = set() | |
try: | |
if use_lite: | |
from .stopwords_lite_bo import STOPWORDS | |
stopwords_set = STOPWORDS | |
else: | |
from .stopwords_bo import STOPWORDS | |
stopwords_set = STOPWORDS | |
logger.info(f"Successfully loaded {len(stopwords_set)} stopwords from {source_module_name.lstrip('.')}.py") | |
except ImportError: | |
logger.error( | |
f"Failed to import STOPWORDS from {source_module_name.lstrip('.')}.py. " | |
f"Ensure the file exists in the 'pipeline' directory, is a Python module (ends in .py), " | |
f"and is importable (e.g., no syntax errors)." | |
) | |
except AttributeError: | |
logger.error( | |
f"Variable 'STOPWORDS' (all caps) not found in {source_module_name.lstrip('.')}.py. " | |
f"Please ensure the stopword set is defined with this name within the module." | |
) | |
except Exception as e: | |
logger.error(f"An unexpected error occurred while loading stopwords from {source_module_name.lstrip('.')}.py: {e}") | |
return stopwords_set |