import pandas as pd | |
from transformers import pipeline | |
import logging | |
logger = logging.getLogger(__name__) | |
def detect_anomalies(df): | |
"""Detect anomalies in log data using a Hugging Face model.""" | |
logger.info("Detecting anomalies...") | |
try: | |
detector = pipeline( | |
"text-classification", | |
model="prajjwal1/bert-tiny", | |
tokenizer="prajjwal1/bert-tiny", | |
clean_up_tokenization_spaces=True | |
) | |
df["text"] = df["status"] + " Usage:" + df["usage_count"].astype(str) | |
results = detector(df["text"].tolist()) | |
df["anomaly"] = [r["label"] for r in results] | |
anomalies = df[df["anomaly"] == "POSITIVE"] | |
logger.info(f"Detected {len(anomalies)} anomalies.") | |
return anomalies | |
except Exception as e: | |
logger.error(f"Failed to detect anomalies: {e}") | |
raise |