Spaces:
Running
Running
File size: 2,366 Bytes
05026d9 504cee7 05026d9 504cee7 05026d9 504cee7 05026d9 504cee7 05026d9 504cee7 05026d9 504cee7 05026d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
import re
from confusables import is_confusable, confusable_regex
SCAM_BRANDS_FILE = os.path.join(os.path.dirname(__file__), 'scam_brands.txt')
def load_scam_brands():
"""Load the list of scam brands from the text file."""
with open(SCAM_BRANDS_FILE, encoding='utf-8') as f:
return [line.strip().strip('"') for line in f if line.strip()]
SCAM_BRANDS = load_scam_brands()
def find_confusable_brand(message):
"""
Check if the message contains a confusable/homoglyph variant of any scam brand.
Returns the matched brand if found, otherwise None.
Does not return the brand if the match is an exact (byte-for-byte, case-sensitive) match.
"""
for brand in SCAM_BRANDS:
# Build a regex that matches the brand or any confusable variant
regex_string = confusable_regex(brand, include_character_padding=False)
regex = re.compile(regex_string)
for match in regex.finditer(message):
matched_text = match.group(0)
# Skip if the matched text is exactly the same as the brand (case-sensitive)
if matched_text.strip().lower() == brand.lower().strip():
continue
else:
print(f"matched_text: {matched_text.lower().strip()} brand: {brand.lower().strip()}")
return brand
return None
def test_find_confusable_brand():
"""
Test the find_confusable_brand function with example messages.
"""
test_cases = [
"This is a message from Amazοn support.", # Greek omicron instead of o
"Your Αpple account has been locked.", # Greek capital alpha instead of a
"Contact S0ciété Générale for more info.", # Zero instead of O
"Welcome to Netflix!",
"This is a message from a random sender.",
"Bonjour, c'est le livreur votre colis ne rentrait pas dans la boite aux lettres merci de choisir un point relais sur : https://mondiaIrelais-expedition.com",
"599915 est votre code de vérification Leboncoin."
]
for msg in test_cases:
result = find_confusable_brand(msg)
if result:
print(f"[ALERT] Message: '{msg}' => Confusable brand detected: {result}")
else:
print(f"[OK] Message: '{msg}' => No confusable brand detected.")
if __name__ == "__main__":
test_find_confusable_brand() |