File size: 1,251 Bytes
cfdd494
1f542f4
cfdd494
 
 
 
 
1f542f4
cfdd494
 
1f542f4
 
 
 
 
cfdd494
 
 
1f542f4
cfdd494
1f542f4
 
 
 
 
 
 
 
 
 
 
cfdd494
 
 
4e9d1bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from transformers import pipeline
import pandas as pd
import logging

logger = logging.getLogger(__name__)

def detect_anomalies(df):
    """Detect anomalies in device logs using BERT-based text classification."""
    logger.info("Detecting anomalies...")
    try:
        # Prepare text for anomaly detection
        df['text'] = df.apply(lambda x: f"{x['status']} Usage:{x['usage_count']}", axis=1)
        
        # Load BERT model for classification with explicit tokenizer parameter
        classifier = pipeline(
            "text-classification",
            model="prajjwal1/bert-tiny",
            tokenizer="prajjwal1/bert-tiny",
            clean_up_tokenization_spaces=False  # Suppress the warning and avoid the error
        )
        
        # Detect anomalies
        results = classifier(df['text'].tolist())
        
        # Add anomaly labels to dataframe
        df['anomaly'] = [result['label'] for result in results]
        
        # Filter for anomalies labeled as "POSITIVE"
        anomalies = df[df['anomaly'] == "POSITIVE"]
        
        logger.info(f"Detected {len(anomalies)} anomalies...")
        return anomalies
    except Exception as e:
        logger.error(f"Failed to detect anomalies: {e}")
        raise