Spaces:

matsant01
/

dit-editor-user-study

Paused

App Files Files Community

dit-editor-user-study / app.py

matsant01

Minor changes

56c5ad3 5 months ago

raw

history blame

16.2 kB

	import gradio as gr
	import os
	import random
	import csv
	from pathlib import Path
	from datetime import datetime, timedelta
	import tempfile
	from huggingface_hub import HfApi, hf_hub_download, login
	from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
	from apscheduler.schedulers.background import BackgroundScheduler
	import atexit
	import threading
	import time
	import shutil

	# --- Configuration ---
	DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "matsant01/user-study-collected-preferences")
	HF_TOKEN = os.getenv("HF_TOKEN")
	RESULTS_FILENAME_IN_REPO = "preferences.csv"
	TEMP_DIR = tempfile.mkdtemp()
	LOCAL_RESULTS_FILE = Path(TEMP_DIR) / RESULTS_FILENAME_IN_REPO
	UPLOAD_INTERVAL_HOURS = 0.1

	DATA_DIR = Path("data")
	IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", ".webp"]

	# --- Global State for Upload Logic ---
	hf_api = None
	scheduler = BackgroundScheduler(daemon=True)
	upload_lock = threading.Lock()
	new_preferences_recorded_since_last_upload = threading.Event()

	# --- Hugging Face Hub Login & Initialization ---
	def initialize_hub_and_results():
	global hf_api
	if HF_TOKEN:
	print("Logging into Hugging Face Hub...")
	try:
	login(token=HF_TOKEN)
	hf_api = HfApi()
	print(f"Attempting initial download of {RESULTS_FILENAME_IN_REPO} from {DATASET_REPO_ID}")
	hf_hub_download(
	repo_id=DATASET_REPO_ID,
	filename=RESULTS_FILENAME_IN_REPO,
	repo_type="dataset",
	token=HF_TOKEN,
	local_dir=TEMP_DIR,
	local_dir_use_symlinks=False
	)
	print(f"Successfully downloaded existing {RESULTS_FILENAME_IN_REPO} to {LOCAL_RESULTS_FILE}")
	except EntryNotFoundError:
	print(f"{RESULTS_FILENAME_IN_REPO} not found in repo. Will create locally.")
	except RepositoryNotFoundError:
	print(f"Error: Dataset repository {DATASET_REPO_ID} not found or token lacks permissions.")
	print("Results saving will be disabled.")
	hf_api = None
	except Exception as e:
	print(f"Error during initial download/login: {e}")
	print("Proceeding without initial download. File will be created locally.")
	else:
	print("Warning: HF_TOKEN secret not found. Results will not be saved to the Hub.")
	hf_api = None

	# --- Data Loading ---

	def find_image(folder_path: Path, base_name: str) -> Path \| None:
	for ext in IMAGE_EXTENSIONS:
	file_path = folder_path / f"{base_name}{ext}"
	if file_path.exists():
	return file_path
	return None

	def get_sample_ids() -> list[str]:
	sample_ids = []
	if DATA_DIR.is_dir():
	for item in DATA_DIR.iterdir():
	if item.is_dir():
	prompt_file = item / "prompt.txt"
	input_bg = find_image(item, "input_bg")
	input_fg = find_image(item, "input_fg")
	output_baseline = find_image(item, "baseline")
	output_tficon = find_image(item, "tf-icon")
	if prompt_file.exists() and input_bg and input_fg and output_baseline and output_tficon:
	sample_ids.append(item.name)
	return sample_ids

	def load_sample_data(sample_id: str) -> dict \| None:
	sample_path = DATA_DIR / sample_id
	if not sample_path.is_dir():
	return None

	prompt_file = sample_path / "prompt.txt"
	input_bg_path = find_image(sample_path, "input_bg")
	input_fg_path = find_image(sample_path, "input_fg")
	output_baseline_path = find_image(sample_path, "baseline")
	output_tficon_path = find_image(sample_path, "tf-icon")

	if not all([prompt_file.exists(), input_bg_path, input_fg_path, output_baseline_path, output_tficon_path]):
	print(f"Warning: Missing files in sample {sample_id}")
	return None

	try:
	prompt = prompt_file.read_text().strip()
	except Exception as e:
	print(f"Error reading prompt for {sample_id}: {e}")
	return None

	return {
	"id": sample_id,
	"prompt": prompt,
	"input_bg": str(input_bg_path),
	"input_fg": str(input_fg_path),
	"output_baseline": str(output_baseline_path),
	"output_tficon": str(output_tficon_path),
	}

	# --- State and UI Logic ---

	INITIAL_SAMPLE_IDS = get_sample_ids()

	def get_next_sample(available_ids: list[str]) -> tuple[dict \| None, list[str]]:
	if not available_ids:
	return None, []
	chosen_id = random.choice(available_ids)
	remaining_ids = [id for id in available_ids if id != chosen_id]
	sample_data = load_sample_data(chosen_id)
	return sample_data, remaining_ids

	def display_new_sample(state: dict, available_ids: list[str]):
	sample_data, remaining_ids = get_next_sample(available_ids)

	if not sample_data:
	return {
	prompt_display: gr.update(value="Prompt: No more samples available. Thank you!"),
	input_bg_display: gr.update(value=None, visible=False),
	input_fg_display: gr.update(value=None, visible=False),
	output_a_display: gr.update(value=None, visible=False),
	output_b_display: gr.update(value=None, visible=False),
	choice_button_a: gr.update(visible=False),
	choice_button_b: gr.update(visible=False),
	next_button: gr.update(visible=False),
	status_display: gr.update(value="Status: Completed!"),
	app_state: state,
	available_samples_state: remaining_ids
	}

	outputs = [
	{"model_name": "baseline", "path": sample_data["output_baseline"]},
	{"model_name": "tf-icon", "path": sample_data["output_tficon"]},
	]
	random.shuffle(outputs)
	output_a = outputs[0]
	output_b = outputs[1]

	state = {
	"current_sample_id": sample_data["id"],
	"output_a_model_name": output_a["model_name"],
	"output_b_model_name": output_b["model_name"],
	}

	return {
	prompt_display: gr.update(value=f"Prompt: {sample_data['prompt']}"),
	input_bg_display: gr.update(value=sample_data["input_bg"], visible=True),
	input_fg_display: gr.update(value=sample_data["input_fg"], visible=True),
	output_a_display: gr.update(value=output_a["path"], visible=True),
	output_b_display: gr.update(value=output_b["path"], visible=True),
	choice_button_a: gr.update(visible=True, interactive=True),
	choice_button_b: gr.update(visible=True, interactive=True),
	next_button: gr.update(visible=False),
	status_display: gr.update(value="Status: Please choose the image you prefer."),
	app_state: state,
	available_samples_state: remaining_ids
	}

	def record_preference(choice: str, state: dict, request: gr.Request):
	if not request:
	print("Error: Request object is None. Cannot get session ID.")
	session_id = "unknown_session"
	else:
	try:
	session_id = request.client.host
	except AttributeError:
	print("Error: request.client is None or has no 'host' attribute.")
	session_id = "unknown_client"

	if not state or "current_sample_id" not in state:
	print("Warning: State missing, cannot record preference.")
	return {
	choice_button_a: gr.update(interactive=False),
	choice_button_b: gr.update(interactive=False),
	next_button: gr.update(visible=True, interactive=True),
	status_display: gr.update(value="Status: Error: Session state lost. Click Next Sample."),
	app_state: state
	}

	chosen_model_name = state["output_a_model_name"] if choice == "A" else state["output_b_model_name"]
	baseline_display = "A" if state["output_a_model_name"] == "baseline" else "B"
	tficon_display = "B" if state["output_a_model_name"] == "baseline" else "A"

	new_row = {
	"timestamp": datetime.now().isoformat(),
	"session_id": session_id,
	"sample_id": state["current_sample_id"],
	"baseline_displayed_as": baseline_display,
	"tficon_displayed_as": tficon_display,
	"chosen_display": choice,
	"chosen_model_name": chosen_model_name
	}
	header = list(new_row.keys())

	try:
	with upload_lock:
	file_exists = LOCAL_RESULTS_FILE.exists()
	mode = 'a' if file_exists else 'w'
	with open(LOCAL_RESULTS_FILE, mode, newline='', encoding='utf-8') as f:
	writer = csv.DictWriter(f, fieldnames=header)
	if not file_exists or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
	writer.writeheader()
	print(f"Created or wrote header to {LOCAL_RESULTS_FILE}")
	writer.writerow(new_row)
	print(f"Appended preference for {state['current_sample_id']} to local file.")
	new_preferences_recorded_since_last_upload.set()

	except Exception as e:
	print(f"Error writing local results file {LOCAL_RESULTS_FILE}: {e}")
	return {
	choice_button_a: gr.update(interactive=False),
	choice_button_b: gr.update(interactive=False),
	next_button: gr.update(visible=True, interactive=True),
	status_display: gr.update(value=f"Status: Error saving preference locally: {e}. Click Next."),
	app_state: state
	}

	return {
	choice_button_a: gr.update(interactive=False),
	choice_button_b: gr.update(interactive=False),
	next_button: gr.update(visible=True, interactive=True),
	status_display: gr.update(value=f"Status: Preference recorded (Chose {choice}). Click Next Sample."),
	app_state: state
	}

	def upload_preferences_to_hub():
	print("Periodic upload check triggered.")
	if not hf_api:
	print("Upload check skipped: Hugging Face API not available.")
	return

	if not new_preferences_recorded_since_last_upload.is_set():
	print("Upload check skipped: No new preferences recorded since last upload.")
	return

	with upload_lock:
	if not new_preferences_recorded_since_last_upload.is_set():
	print("Upload check skipped (race condition avoided): No new preferences.")
	return

	if not LOCAL_RESULTS_FILE.exists() or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
	print("Upload check skipped: Local results file is missing or empty.")
	new_preferences_recorded_since_last_upload.clear()
	return

	try:
	print(f"Attempting to upload {LOCAL_RESULTS_FILE} to {DATASET_REPO_ID}/{RESULTS_FILENAME_IN_REPO}")
	start_time = time.time()
	hf_api.upload_file(
	path_or_fileobj=str(LOCAL_RESULTS_FILE),
	path_in_repo=RESULTS_FILENAME_IN_REPO,
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	commit_message=f"Periodic upload of preferences - {datetime.now().isoformat()}"
	)
	end_time = time.time()
	print(f"Successfully uploaded preferences. Took {end_time - start_time:.2f} seconds.")
	new_preferences_recorded_since_last_upload.clear()
	except Exception as e:
	print(f"Error uploading results file: {e}")

	def handle_choice_a(state: dict, request: gr.Request):
	return record_preference("A", state, request)

	def handle_choice_b(state: dict, request: gr.Request):
	return record_preference("B", state, request)

	with gr.Blocks(title="Image Composition User Study") as demo:
	gr.Markdown("# Image Composition User Study")
	gr.Markdown(
	"> Please look at the input images and the prompt below. "
	"Then, compare the two output images (Output A and Output B) and click the button below the one you prefer."
	)

	app_state = gr.State({})
	available_samples_state = gr.State(INITIAL_SAMPLE_IDS)

	status_display = gr.Markdown("Status: Loading first sample...")

	gr.Markdown("## Inputs")
	with gr.Row():
	prompt_display = gr.Markdown("Prompt: Loading...")
	with gr.Row():
	with gr.Column():
	gr.Markdown("<div style='text-align: center;'>Input Background</div>")
	input_bg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
	with gr.Column():
	gr.Markdown("<div style='text-align: center;'>Input Foreground</div>")
	input_fg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)

	gr.Markdown("---")
	gr.Markdown("## Choose your preferred output")

	with gr.Row():
	with gr.Column():
	output_a_display = gr.Image(label="Output A", type="filepath", height=400, width=400, interactive=False)
	choice_button_a = gr.Button("Choose Output A", variant="primary")
	with gr.Column():
	output_b_display = gr.Image(label="Output B", type="filepath", height=400, width=400, interactive=False)
	choice_button_b = gr.Button("Choose Output B", variant="primary")

	next_button = gr.Button("🔁 Next Sample 🔁", visible=False)

	demo.load(
	fn=display_new_sample,
	inputs=[app_state, available_samples_state],
	outputs=[
	prompt_display, input_bg_display, input_fg_display,
	output_a_display, output_b_display,
	choice_button_a, choice_button_b, next_button, status_display,
	app_state, available_samples_state
	]
	)

	choice_button_a.click(
	fn=handle_choice_a,
	inputs=[app_state],
	outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
	api_name=False,
	)

	choice_button_b.click(
	fn=handle_choice_b,
	inputs=[app_state],
	outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
	api_name=False,
	)

	next_button.click(
	fn=display_new_sample,
	inputs=[app_state, available_samples_state],
	outputs=[
	prompt_display, input_bg_display, input_fg_display,
	output_a_display, output_b_display,
	choice_button_a, choice_button_b, next_button, status_display,
	app_state, available_samples_state
	],
	api_name=False,
	)

	def cleanup_temp_dir():
	if Path(TEMP_DIR).exists():
	print(f"Cleaning up temporary directory: {TEMP_DIR}")
	shutil.rmtree(TEMP_DIR, ignore_errors=True)

	def shutdown_hook():
	print("Application shutting down. Performing final upload check...")
	upload_preferences_to_hub()
	if scheduler.running:
	print("Shutting down scheduler...")
	scheduler.shutdown(wait=False)
	cleanup_temp_dir()
	print("Shutdown complete.")

	atexit.register(shutdown_hook)

	if __name__ == "__main__":
	initialize_hub_and_results()

	if not INITIAL_SAMPLE_IDS:
	print("Error: No valid samples found in the 'data' directory.")
	print("Please ensure the 'data' directory exists and contains subdirectories")
	print("named like 'sample_id', each with 'prompt.txt', 'input_bg.*',")
	print("'input_fg.', 'baseline.', and 'tf-icon.*' files.")
	elif not DATASET_REPO_ID:
	print("Error: DATASET_REPO_ID environment variable is not set or is set to the default placeholder.")
	print("Please set the DATASET_REPO_ID environment variable or update the script.")
	elif hf_api:
	print(f"Starting periodic upload scheduler (every {UPLOAD_INTERVAL_HOURS} hours)...")
	scheduler.add_job(upload_preferences_to_hub, 'interval', hours=UPLOAD_INTERVAL_HOURS)
	scheduler.start()
	print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
	print(f"Configured to save results periodically to Hugging Face Dataset: {DATASET_REPO_ID}")
	print("Starting Gradio app...")
	demo.launch(server_name="0.0.0.0")
	else:
	print("Warning: Running without Hugging Face Hub integration (HF_TOKEN or DATASET_REPO_ID missing/invalid).")
	print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
	print("Starting Gradio app...")
	demo.launch(server_name="0.0.0.0")