Spaces:

kokluch
/

phishing-detector-api

Sleeping

App Files Files Community

phishing-detector-api / app.py

kokluch

Add mnemonic detection

05026d9 2 months ago

raw

history blame

6.78 kB

	from time import sleep
	import logging
	import sys
	import re

	import httpx
	from fastapi import FastAPI
	from fastapi.responses import JSONResponse, FileResponse
	from pydantic import BaseModel
	from enum import Enum
	from transformers import pipeline
	from phishing_datasets import submit_entry
	from url_tools import extract_urls, resolve_short_url, extract_domain_from_url
	from urlscan_client import UrlscanClient
	import requests
	from mnemonic_attack import find_confusable_brand



	app = FastAPI()
	urlscan = UrlscanClient()

	# Remove all handlers associated with the root logger object
	for handler in logging.root.handlers[:]:
	logging.root.removeHandler(handler)

	logging.basicConfig(
	level=logging.DEBUG,
	format='%(asctime)s [%(levelname)s] %(message)s',
	handlers=[logging.StreamHandler(sys.stdout)]
	)

	class MessageModel(BaseModel):
	text: str

	class QueryModel(BaseModel):
	sender: str
	message: MessageModel

	class AppModel(BaseModel):
	version: str

	class InputModel(BaseModel):
	_version: int
	query: QueryModel
	app: AppModel

	class ActionModel(Enum):
	# Insufficient information to determine an action to take. In a query response, has the effect of allowing the message to be shown normally.
	NONE = 0
	# Allow the message to be shown normally.
	ALLOW = 1
	# Prevent the message from being shown normally, filtered as Junk message.
	JUNK = 2
	# Prevent the message from being shown normally, filtered as Promotional message.
	PROMOTION = 3
	# Prevent the message from being shown normally, filtered as Transactional message.
	TRANSACTION = 4

	class SubActionModel(Enum):
	NONE = 0

	class OutputModel(BaseModel):
	action: ActionModel
	sub_action: SubActionModel

	pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")

	@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
	def get_well_known_aasa():
	return JSONResponse(
	content={
	"messagefilter": {
	"apps": [
	"X9NN3FSS3T.com.lela.Serenity.SerenityMessageFilterExtension",
	"X9NN3FSS3T.com.lela.Serenity"
	]
	}
	},
	media_type="application/json"
	)

	@app.get("/robots.txt", include_in_schema=False)
	def get_robots_txt():
	return FileResponse("robots.txt")

	@app.post("/predict")
	def predict(model: InputModel) -> OutputModel:
	sender = model.query.sender
	text = model.query.message.text

	logging.info(f"[{sender}] {text}")

	# Brand usurpation detection using confusables
	confusable_brand = find_confusable_brand(text)
	if confusable_brand:
	logging.warning(f"[BRAND USURPATION] Confusable/homoglyph variant of brand '{confusable_brand}' detected in message. Classified as JUNK.")
	return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)

	# Debug sleep
	pattern = r"^Sent from your Twilio trial account - sleep (\d+)$"
	match = re.search(pattern, text)

	if match:
	number_str = match.group(1)
	sleep_duration = int(number_str)
	logging.debug(f"[DEBUG SLEEP] Sleeping for {sleep_duration} seconds for sender {sender}")
	sleep(sleep_duration)
	return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)

	# Debug category
	pattern = r"^Sent from your Twilio trial account - (junk\|transaction\|promotion)$"
	match = re.search(pattern, text)

	if match:
	category_str = match.group(1)
	logging.info(f"[DEBUG CATEGORY] Forced category: {category_str} for sender {sender}")
	match category_str:
	case 'junk':
	return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
	case 'transaction':
	return OutputModel(action=ActionModel.TRANSACTION, sub_action=SubActionModel.NONE)
	case 'promotion':
	return OutputModel(action=ActionModel.PROMOTION, sub_action=SubActionModel.NONE)

	result = pipe(text)

	label = result[0]['label']
	score = result[0]['score']

	logging.info(f"[CLASSIFICATION] label={label} score={score}")

	if label == 'LABEL_0':
	score = 1 - score

	# Pattern for detecting an alphanumeric SenderID
	alphanumeric_sender_pattern = r'^[A-Za-z][A-Za-z0-9\-\.]{2,14}$'
	# Pattern for detecting a short code
	shorten_sender_pattern = r'^(?:3\d{4}\|[4-8]\d{4})$'

	commercial_stop = False



	# Detection of commercial senders (short code or alphanumeric)
	if re.search(shorten_sender_pattern, sender):
	logging.info("[COMMERCIAL] Commercial sender detected (short code)")
	score = score * 0.7
	elif re.match(alphanumeric_sender_pattern, sender):
	logging.info("[COMMERCIAL] Alphanumeric SenderID detected")
	score = score * 0.7

	urls = extract_urls(text)

	if urls:
	logging.info(f"[URL] URLs found: {urls}")
	logging.info("[URL] Searching for previous scans")
	search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]

	scan_results = []
	for search_result in search_results:
	results = search_result.get('results', [])
	for result in results:
	result_uuid = result.get('_id', str)
	scan_result = urlscan.get_result(result_uuid)
	scan_results.append(scan_result)

	if not scan_results:
	logging.info("[URL] No previous scan found, launching a new scan...")
	scan_results = [urlscan.scan(url) for url in urls]

	for result in scan_results:
	overall = result.get('verdicts', {}).get('overall', {})
	logging.info(f"[URLSCAN] Overall verdict: {overall}")
	if overall.get('hasVerdicts'):
	score = overall.get('score')
	logging.info(f"[URLSCAN] Verdict score: {score}")

	if 0 < overall.get('score'):
	score = 1.0
	break
	elif overall.get('score') < 0:
	score = score * 0.9
	else:
	logging.info(f"[URL] No URL found")
	score = score * 0.9

	logging.info(f"[FINAL SCORE] {score}")
	action = ActionModel.NONE
	if score > 0.7:
	action=ActionModel.JUNK
	elif score > 0.5:
	if commercial_stop:
	action=ActionModel.PROMOTION
	else:
	action=ActionModel.JUNK

	logging.info(f"[FINAL ACTION] {action}")
	return OutputModel(action=action, sub_action=SubActionModel.NONE)

	class ReportModel(BaseModel):
	sender: str
	message: str

	@app.post("/report")
	def report(model: ReportModel):
	submit_entry(model.sender, model.message)