Spaces:
Running
Running
Add reported phishing to dataset
Browse files- app.py +15 -3
- phishing_datasets.py +28 -0
- requirements.txt +3 -1
app.py
CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import JSONResponse
|
|
3 |
from pydantic import BaseModel
|
4 |
from enum import Enum
|
5 |
from transformers import pipeline
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
@@ -42,6 +43,7 @@ class OutputModel(BaseModel):
|
|
42 |
|
43 |
pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
|
44 |
|
|
|
45 |
@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
|
46 |
def get_well_known_aasa():
|
47 |
return JSONResponse(
|
@@ -57,9 +59,19 @@ def get_well_known_aasa():
|
|
57 |
)
|
58 |
|
59 |
@app.post("/predict")
|
60 |
-
def predict(
|
61 |
-
|
|
|
62 |
if label[0]['label'] == 'LABEL_1':
|
|
|
63 |
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
|
64 |
else:
|
65 |
-
return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from pydantic import BaseModel
|
4 |
from enum import Enum
|
5 |
from transformers import pipeline
|
6 |
+
from phishing_datasets import submit_entry
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
|
|
43 |
|
44 |
pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
|
45 |
|
46 |
+
|
47 |
@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
|
48 |
def get_well_known_aasa():
|
49 |
return JSONResponse(
|
|
|
59 |
)
|
60 |
|
61 |
@app.post("/predict")
|
62 |
+
def predict(model: InputModel) -> OutputModel:
|
63 |
+
text = model.query.message.text
|
64 |
+
label = pipe(text)
|
65 |
if label[0]['label'] == 'LABEL_1':
|
66 |
+
submit_entry(model.query.sender, model.query.message.text)
|
67 |
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
|
68 |
else:
|
69 |
+
return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
|
70 |
+
|
71 |
+
class ReportModel(BaseModel):
|
72 |
+
sender: str
|
73 |
+
message: str
|
74 |
+
|
75 |
+
@app.post("/report")
|
76 |
+
def report(model: ReportModel):
|
77 |
+
submit_entry(model.sender, model.message)
|
phishing_datasets.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from datasets import load_dataset, Dataset
|
3 |
+
import os
|
4 |
+
|
5 |
+
DATASET_NAME = os.getenv("DATASET_NAME")
|
6 |
+
|
7 |
+
dataset = load_dataset(DATASET_NAME, split="train")
|
8 |
+
df = pd.DataFrame(dataset)
|
9 |
+
|
10 |
+
def submit_entry(sender, message):
|
11 |
+
"""Adds a new SMS phishing report if it's not already in the dataset."""
|
12 |
+
global df
|
13 |
+
|
14 |
+
sender = sender.strip().replace(" ", "") # Remove all spaces inside sender
|
15 |
+
message = message.strip()
|
16 |
+
|
17 |
+
# Check for duplicates
|
18 |
+
if ((df["sender"] == sender) & (df["message"] == message)).any():
|
19 |
+
"⚠️ This entry already exists in the dataset!"
|
20 |
+
|
21 |
+
# Append new entry
|
22 |
+
new_entry = pd.DataFrame([[sender, message]], columns=["sender", "message"])
|
23 |
+
df = pd.concat([df, new_entry], ignore_index=True)
|
24 |
+
|
25 |
+
new_dataset = Dataset.from_pandas(df)
|
26 |
+
new_dataset.push_to_hub(DATASET_NAME)
|
27 |
+
|
28 |
+
"✅ Submission saved successfully!"
|
requirements.txt
CHANGED
@@ -2,4 +2,6 @@ fastapi
|
|
2 |
uvicorn[standard]
|
3 |
pydantic
|
4 |
transformers
|
5 |
-
torch
|
|
|
|
|
|
2 |
uvicorn[standard]
|
3 |
pydantic
|
4 |
transformers
|
5 |
+
torch
|
6 |
+
datasets
|
7 |
+
pandas
|