kokluch commited on
Commit
000642c
·
1 Parent(s): 2fb3bf7

Add reported phishing to dataset

Browse files
Files changed (3) hide show
  1. app.py +15 -3
  2. phishing_datasets.py +28 -0
  3. requirements.txt +3 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import JSONResponse
3
  from pydantic import BaseModel
4
  from enum import Enum
5
  from transformers import pipeline
 
6
 
7
  app = FastAPI()
8
 
@@ -42,6 +43,7 @@ class OutputModel(BaseModel):
42
 
43
  pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
44
 
 
45
  @app.get("/.well-known/apple-app-site-association", include_in_schema=False)
46
  def get_well_known_aasa():
47
  return JSONResponse(
@@ -57,9 +59,19 @@ def get_well_known_aasa():
57
  )
58
 
59
  @app.post("/predict")
60
- def predict(input: InputModel) -> OutputModel:
61
- label = pipe(input.query.message.text)
 
62
  if label[0]['label'] == 'LABEL_1':
 
63
  return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
64
  else:
65
- return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
 
 
 
 
 
 
 
 
 
3
  from pydantic import BaseModel
4
  from enum import Enum
5
  from transformers import pipeline
6
+ from phishing_datasets import submit_entry
7
 
8
  app = FastAPI()
9
 
 
43
 
44
  pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
45
 
46
+
47
  @app.get("/.well-known/apple-app-site-association", include_in_schema=False)
48
  def get_well_known_aasa():
49
  return JSONResponse(
 
59
  )
60
 
61
  @app.post("/predict")
62
+ def predict(model: InputModel) -> OutputModel:
63
+ text = model.query.message.text
64
+ label = pipe(text)
65
  if label[0]['label'] == 'LABEL_1':
66
+ submit_entry(model.query.sender, model.query.message.text)
67
  return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
68
  else:
69
+ return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
70
+
71
+ class ReportModel(BaseModel):
72
+ sender: str
73
+ message: str
74
+
75
+ @app.post("/report")
76
+ def report(model: ReportModel):
77
+ submit_entry(model.sender, model.message)
phishing_datasets.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datasets import load_dataset, Dataset
3
+ import os
4
+
5
+ DATASET_NAME = os.getenv("DATASET_NAME")
6
+
7
+ dataset = load_dataset(DATASET_NAME, split="train")
8
+ df = pd.DataFrame(dataset)
9
+
10
+ def submit_entry(sender, message):
11
+ """Adds a new SMS phishing report if it's not already in the dataset."""
12
+ global df
13
+
14
+ sender = sender.strip().replace(" ", "") # Remove all spaces inside sender
15
+ message = message.strip()
16
+
17
+ # Check for duplicates
18
+ if ((df["sender"] == sender) & (df["message"] == message)).any():
19
+ "⚠️ This entry already exists in the dataset!"
20
+
21
+ # Append new entry
22
+ new_entry = pd.DataFrame([[sender, message]], columns=["sender", "message"])
23
+ df = pd.concat([df, new_entry], ignore_index=True)
24
+
25
+ new_dataset = Dataset.from_pandas(df)
26
+ new_dataset.push_to_hub(DATASET_NAME)
27
+
28
+ "✅ Submission saved successfully!"
requirements.txt CHANGED
@@ -2,4 +2,6 @@ fastapi
2
  uvicorn[standard]
3
  pydantic
4
  transformers
5
- torch
 
 
 
2
  uvicorn[standard]
3
  pydantic
4
  transformers
5
+ torch
6
+ datasets
7
+ pandas