LPX
commited on
Commit
·
58f23d5
1
Parent(s):
8f7f87a
feat: add logging
Browse files- requirements.txt +4 -2
- utils/hf_logger.py +87 -0
requirements.txt
CHANGED
@@ -5,11 +5,13 @@ huggingface_hub[xet]
|
|
5 |
torchvision
|
6 |
torch
|
7 |
spaces
|
8 |
-
pillow
|
9 |
opencv-python
|
10 |
modelscope_studio
|
11 |
pydantic==2.10.6
|
12 |
tf-keras
|
13 |
PyWavelets
|
14 |
pyexiftool
|
15 |
-
psutil
|
|
|
|
|
|
5 |
torchvision
|
6 |
torch
|
7 |
spaces
|
8 |
+
# pillow
|
9 |
opencv-python
|
10 |
modelscope_studio
|
11 |
pydantic==2.10.6
|
12 |
tf-keras
|
13 |
PyWavelets
|
14 |
pyexiftool
|
15 |
+
psutil
|
16 |
+
datasetsdatasets
|
17 |
+
PillowPillow
|
utils/hf_logger.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import json
|
4 |
+
import io
|
5 |
+
import datetime
|
6 |
+
from PIL import Image
|
7 |
+
import logging
|
8 |
+
from datasets import Dataset, load_dataset
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
|
13 |
+
|
14 |
+
def _pil_to_base64(image: Image.Image) -> str:
|
15 |
+
"""Converts a PIL Image to a base64 string."""
|
16 |
+
buffered = io.BytesIO()
|
17 |
+
# Ensure image is in RGB mode before saving as JPEG
|
18 |
+
if image.mode != 'RGB':
|
19 |
+
image = image.convert('RGB')
|
20 |
+
image.save(buffered, format="JPEG", quality=85)
|
21 |
+
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
22 |
+
|
23 |
+
def initialize_dataset():
|
24 |
+
"""Initializes or loads the Hugging Face dataset."""
|
25 |
+
try:
|
26 |
+
# Try to load existing dataset
|
27 |
+
dataset = load_dataset(HF_DATASET_NAME, split="train")
|
28 |
+
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
|
29 |
+
except Exception:
|
30 |
+
# If dataset does not exist, create a new one with an empty structure
|
31 |
+
logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
|
32 |
+
dataset = Dataset.from_dict({
|
33 |
+
"timestamp": [],
|
34 |
+
"image": [], # Storing base64 string for simplicity, or path/bytes if preferred
|
35 |
+
"inference_request": [],
|
36 |
+
"model_predictions": [],
|
37 |
+
"ensemble_output": [],
|
38 |
+
"forensic_outputs": [], # List of base64 image strings
|
39 |
+
"agent_monitoring_data": [],
|
40 |
+
"human_feedback": []
|
41 |
+
})
|
42 |
+
return dataset
|
43 |
+
|
44 |
+
def log_inference_data(
|
45 |
+
original_image: Image.Image,
|
46 |
+
inference_params: dict,
|
47 |
+
model_predictions: list[dict],
|
48 |
+
ensemble_output: dict,
|
49 |
+
forensic_images: list[Image.Image],
|
50 |
+
agent_monitoring_data: dict,
|
51 |
+
human_feedback: dict = None
|
52 |
+
):
|
53 |
+
"""Logs a single inference event to the Hugging Face dataset."""
|
54 |
+
try:
|
55 |
+
dataset = initialize_dataset()
|
56 |
+
|
57 |
+
# Convert PIL Images to base64 strings for storage
|
58 |
+
original_image_b64 = _pil_to_base64(original_image)
|
59 |
+
forensic_images_b64 = [_pil_to_base64(img) for img in forensic_images if img is not None]
|
60 |
+
|
61 |
+
new_entry = {
|
62 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
63 |
+
"image": original_image_b64,
|
64 |
+
"inference_request": inference_params,
|
65 |
+
"model_predictions": model_predictions,
|
66 |
+
"ensemble_output": ensemble_output,
|
67 |
+
"forensic_outputs": forensic_images_b64,
|
68 |
+
"agent_monitoring_data": agent_monitoring_data,
|
69 |
+
"human_feedback": human_feedback if human_feedback is not None else {}
|
70 |
+
}
|
71 |
+
|
72 |
+
# Append the new entry
|
73 |
+
# Note: Directly appending might not be efficient for large datasets or frequent logging
|
74 |
+
# For a production system, consider batched writes or more robust data pipelines.
|
75 |
+
updated_dataset = dataset.add_item(new_entry)
|
76 |
+
|
77 |
+
# This will push to the Hugging Face Hub if you are logged in and dataset is configured
|
78 |
+
# Or save locally if not.
|
79 |
+
updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now
|
80 |
+
logger.info("Inference data logged successfully to local cache.")
|
81 |
+
|
82 |
+
# To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in
|
83 |
+
# updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
|
84 |
+
# logger.info("Inference data pushed to Hugging Face Hub.")
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
logger.error(f"Failed to log inference data to Hugging Face dataset: {e}")
|