Spaces:

kokluch
/

phishing-detector-api

Running

App Files Files Community

kokluch commited on Jul 1

Commit

664d4ee

1 Parent(s): 26278b4

Add logging

Browse files

Files changed (1) hide show

app.py +24 -13

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from time import sleep
 import httpx
 from fastapi import FastAPI
@@ -12,9 +13,17 @@ from urlscan_client import UrlscanClient
 import requests
 import re
 app = FastAPI()
 urlscan = UrlscanClient()
 class MessageModel(BaseModel):
     text: str
@@ -74,7 +83,7 @@ def predict(model: InputModel) -> OutputModel:
     sender = model.query.sender
     text = model.query.message.text
-    print(f"[{sender}] {text}")
     # Debug sleep
     pattern = r"^Sent from your Twilio trial account - sleep (\d+)$"
@@ -83,6 +92,7 @@ def predict(model: InputModel) -> OutputModel:
     if match:
         number_str = match.group(1)
         sleep_duration = int(number_str)
         sleep(sleep_duration)
         return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
@@ -92,6 +102,7 @@ def predict(model: InputModel) -> OutputModel:
     if match:
         category_str = match.group(1)
         match category_str:
             case 'junk':
                 return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
@@ -104,7 +115,7 @@ def predict(model: InputModel) -> OutputModel:
     label = result[0]['label']
     score = result[0]['score']
-    print(f"classification {label} score {score}")
     if label == 'LABEL_0':
         score = 1 - score
@@ -114,20 +125,20 @@ def predict(model: InputModel) -> OutputModel:
     commercial_stop = False
     if re.search(commercial_sender_pattern, sender):
-        print("commercial sender")
         score = score * 0.9
         if re.search(commercial_stop_pattern, text):
-            print("STOP founded")
             score = score * 0.9
             commercial_stop = True
         else:
-            print("STOP missing")
     urls = extract_urls(text)
     if urls:
-        print(f"found URLs: {urls}")
-        print("searching for past scans")
         search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]
         scan_results = []
@@ -139,15 +150,15 @@ def predict(model: InputModel) -> OutputModel:
                 scan_results.append(scan_result)
         if not scan_results:
-            print("scanning...")
             scan_results = [urlscan.scan(url) for url in urls]
         for result in scan_results:
             overall = result.get('verdicts', {}).get('overall', {})
-            print(f"overall verdict: {overall}")
             if overall.get('hasVerdicts'):
                 score = overall.get('score')
-                print(f"verdict score {score}")
                 if 0 < overall.get('score'):
                     score = 1.0
@@ -155,10 +166,10 @@ def predict(model: InputModel) -> OutputModel:
                 elif overall.get('score') < 0:
                     score = score * 0.9
     else:
-        print(f"no URL found")
         score = score * 0.9
-    print(f"final score {score}")
     action = ActionModel.NONE
     if score > 0.7:
         action=ActionModel.JUNK
@@ -168,7 +179,7 @@ def predict(model: InputModel) -> OutputModel:
         else:
             action=ActionModel.JUNK
-    print(f"final action {action}")
     return OutputModel(action=action, sub_action=SubActionModel.NONE)
 class ReportModel(BaseModel):

 from time import sleep
+import logging
 import httpx
 from fastapi import FastAPI
 import requests
 import re
 app = FastAPI()
 urlscan = UrlscanClient()
+# Configuration de base du logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s [%(levelname)s] %(message)s'
+)
 class MessageModel(BaseModel):
     text: str
     sender = model.query.sender
     text = model.query.message.text
+    logging.info(f"[{sender}] {text}")
     # Debug sleep
     pattern = r"^Sent from your Twilio trial account - sleep (\d+)$"
     if match:
         number_str = match.group(1)
         sleep_duration = int(number_str)
+        logging.debug(f"[DEBUG SLEEP] Sleeping for {sleep_duration} seconds for sender {sender}")
         sleep(sleep_duration)
         return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
     if match:
         category_str = match.group(1)
+        logging.info(f"[DEBUG CATEGORY] Forced category: {category_str} for sender {sender}")
         match category_str:
             case 'junk':
                 return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
     label = result[0]['label']
     score = result[0]['score']
+    logging.info(f"[CLASSIFICATION] label={label} score={score}")
     if label == 'LABEL_0':
         score = 1 - score
     commercial_stop = False
     if re.search(commercial_sender_pattern, sender):
+        logging.info("[COMMERCIAL] Commercial sender detected")
         score = score * 0.9
         if re.search(commercial_stop_pattern, text):
+            logging.info("[COMMERCIAL] STOP keyword detected")
             score = score * 0.9
             commercial_stop = True
         else:
+            logging.info("[COMMERCIAL] STOP keyword missing")
     urls = extract_urls(text)
     if urls:
+        logging.info(f"[URL] URLs found: {urls}")
+        logging.info("[URL] Searching for previous scans")
         search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]
         scan_results = []
                 scan_results.append(scan_result)
         if not scan_results:
+            logging.info("[URL] No previous scan found, launching a new scan...")
             scan_results = [urlscan.scan(url) for url in urls]
         for result in scan_results:
             overall = result.get('verdicts', {}).get('overall', {})
+            logging.info(f"[URLSCAN] Overall verdict: {overall}")
             if overall.get('hasVerdicts'):
                 score = overall.get('score')
+                logging.info(f"[URLSCAN] Verdict score: {score}")
                 if 0 < overall.get('score'):
                     score = 1.0
                 elif overall.get('score') < 0:
                     score = score * 0.9
     else:
+        logging.info(f"[URL] No URL found")
         score = score * 0.9
+    logging.info(f"[FINAL SCORE] {score}")
     action = ActionModel.NONE
     if score > 0.7:
         action=ActionModel.JUNK
         else:
             action=ActionModel.JUNK
+    logging.info(f"[FINAL ACTION] {action}")
     return OutputModel(action=action, sub_action=SubActionModel.NONE)
 class ReportModel(BaseModel):