LPX commited on
Commit
58f23d5
·
1 Parent(s): 8f7f87a

feat: add logging

Browse files
Files changed (2) hide show
  1. requirements.txt +4 -2
  2. utils/hf_logger.py +87 -0
requirements.txt CHANGED
@@ -5,11 +5,13 @@ huggingface_hub[xet]
5
  torchvision
6
  torch
7
  spaces
8
- pillow
9
  opencv-python
10
  modelscope_studio
11
  pydantic==2.10.6
12
  tf-keras
13
  PyWavelets
14
  pyexiftool
15
- psutil
 
 
 
5
  torchvision
6
  torch
7
  spaces
8
+ # pillow
9
  opencv-python
10
  modelscope_studio
11
  pydantic==2.10.6
12
  tf-keras
13
  PyWavelets
14
  pyexiftool
15
+ psutil
16
+ datasetsdatasets
17
+ PillowPillow
utils/hf_logger.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import io
5
+ import datetime
6
+ from PIL import Image
7
+ import logging
8
+ from datasets import Dataset, load_dataset
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
13
+
14
+ def _pil_to_base64(image: Image.Image) -> str:
15
+ """Converts a PIL Image to a base64 string."""
16
+ buffered = io.BytesIO()
17
+ # Ensure image is in RGB mode before saving as JPEG
18
+ if image.mode != 'RGB':
19
+ image = image.convert('RGB')
20
+ image.save(buffered, format="JPEG", quality=85)
21
+ return base64.b64encode(buffered.getvalue()).decode('utf-8')
22
+
23
+ def initialize_dataset():
24
+ """Initializes or loads the Hugging Face dataset."""
25
+ try:
26
+ # Try to load existing dataset
27
+ dataset = load_dataset(HF_DATASET_NAME, split="train")
28
+ logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
29
+ except Exception:
30
+ # If dataset does not exist, create a new one with an empty structure
31
+ logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
32
+ dataset = Dataset.from_dict({
33
+ "timestamp": [],
34
+ "image": [], # Storing base64 string for simplicity, or path/bytes if preferred
35
+ "inference_request": [],
36
+ "model_predictions": [],
37
+ "ensemble_output": [],
38
+ "forensic_outputs": [], # List of base64 image strings
39
+ "agent_monitoring_data": [],
40
+ "human_feedback": []
41
+ })
42
+ return dataset
43
+
44
+ def log_inference_data(
45
+ original_image: Image.Image,
46
+ inference_params: dict,
47
+ model_predictions: list[dict],
48
+ ensemble_output: dict,
49
+ forensic_images: list[Image.Image],
50
+ agent_monitoring_data: dict,
51
+ human_feedback: dict = None
52
+ ):
53
+ """Logs a single inference event to the Hugging Face dataset."""
54
+ try:
55
+ dataset = initialize_dataset()
56
+
57
+ # Convert PIL Images to base64 strings for storage
58
+ original_image_b64 = _pil_to_base64(original_image)
59
+ forensic_images_b64 = [_pil_to_base64(img) for img in forensic_images if img is not None]
60
+
61
+ new_entry = {
62
+ "timestamp": datetime.datetime.now().isoformat(),
63
+ "image": original_image_b64,
64
+ "inference_request": inference_params,
65
+ "model_predictions": model_predictions,
66
+ "ensemble_output": ensemble_output,
67
+ "forensic_outputs": forensic_images_b64,
68
+ "agent_monitoring_data": agent_monitoring_data,
69
+ "human_feedback": human_feedback if human_feedback is not None else {}
70
+ }
71
+
72
+ # Append the new entry
73
+ # Note: Directly appending might not be efficient for large datasets or frequent logging
74
+ # For a production system, consider batched writes or more robust data pipelines.
75
+ updated_dataset = dataset.add_item(new_entry)
76
+
77
+ # This will push to the Hugging Face Hub if you are logged in and dataset is configured
78
+ # Or save locally if not.
79
+ updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now
80
+ logger.info("Inference data logged successfully to local cache.")
81
+
82
+ # To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in
83
+ # updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
84
+ # logger.info("Inference data pushed to Hugging Face Hub.")
85
+
86
+ except Exception as e:
87
+ logger.error(f"Failed to log inference data to Hugging Face dataset: {e}")