Spaces:
Running
Running
import os | |
import re | |
from confusables import is_confusable, confusable_regex | |
SCAM_BRANDS_FILE = os.path.join(os.path.dirname(__file__), 'scam_brands.txt') | |
def load_scam_brands(): | |
"""Load the list of scam brands from the text file.""" | |
with open(SCAM_BRANDS_FILE, encoding='utf-8') as f: | |
return [line.strip().strip('"') for line in f if line.strip()] | |
SCAM_BRANDS = load_scam_brands() | |
def find_confusable_brand(message): | |
""" | |
Check if the message contains a confusable/homoglyph variant of any scam brand. | |
Returns the matched brand if found, otherwise None. | |
""" | |
for brand in SCAM_BRANDS: | |
# Build a regex that matches the brand or any confusable variant | |
regex_string = confusable_regex(brand, include_character_padding=True) | |
regex = re.compile(regex_string) | |
if regex.search(message): | |
return brand | |
return None | |
def test_find_confusable_brand(): | |
""" | |
Test the find_confusable_brand function with example messages. | |
""" | |
test_cases = [ | |
"This is a message from Amazοn support.", # Greek omicron instead of o | |
"Your Apple account has been locked.", | |
"Contact S0ciété Générale for more info.", # Zero instead of O | |
"Welcome to Netflix!", | |
"This is a message from a random sender.", | |
"Bonjour, c'est le livreur votre colis ne rentrait pas dans la boite aux lettres merci de choisir un point relais sur : https://mondiaIrelais-expedition.com" | |
] | |
for msg in test_cases: | |
result = find_confusable_brand(msg) | |
if result: | |
print(f"[ALERT] Message: '{msg}' => Confusable brand detected: {result}") | |
else: | |
print(f"[OK] Message: '{msg}' => No confusable brand detected.") | |
if __name__ == "__main__": | |
test_find_confusable_brand() |