Spaces:

zsyJosh
/

stark

Sleeping

App Files Files Community

Shiyu Zhao commited on Oct 22, 2024

Commit

d9a1db1

1 Parent(s): 2c8dbc2

Update space

Browse files

Files changed (1) hide show

app.py +186 -1

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import gradio as gr
 import pandas as pd
-import numpy as np
 # Sample data based on your table (you'll need to update this with the full dataset)
 data_synthesized_full = {
@@ -55,6 +59,187 @@ df_synthesized_full = pd.DataFrame(data_synthesized_full)
 df_synthesized_10 = pd.DataFrame(data_synthesized_10)
 df_human_generated = pd.DataFrame(data_human_generated)
 def format_dataframe(df, dataset):
     # Filter the dataframe for the selected dataset
     columns = ['Method'] + [col for col in df.columns if dataset in col]

 import gradio as gr
 import pandas as pd
+import os
+import re
+from datetime import datetime
+import json
 # Sample data based on your table (you'll need to update this with the full dataset)
 data_synthesized_full = {
 df_synthesized_10 = pd.DataFrame(data_synthesized_10)
 df_human_generated = pd.DataFrame(data_human_generated)
+def validate_email(email_str):
+    """Validate email format(s)"""
+    emails = [e.strip() for e in email_str.split(';')]
+    email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
+    return all(email_pattern.match(email) for email in emails)
+def validate_github_url(url):
+    """Validate GitHub URL format"""
+    github_pattern = re.compile(
+        r'^https?:\/\/(?:www\.)?github\.com\/[\w-]+\/[\w.-]+\/?$'
+    )
+    return bool(github_pattern.match(url))
+def validate_csv(file_obj):
+    """Validate CSV file format and content"""
+    try:
+        df = pd.read_csv(file_obj.name)
+        required_cols = ['query_id', 'pred_rank']
+        # Check columns
+        if not all(col in df.columns for col in required_cols):
+            return False, "CSV must contain 'query_id' and 'pred_rank' columns"
+        # Check pred_rank format and length
+        try:
+            first_rank = eval(df['pred_rank'].iloc[0]) if isinstance(df['pred_rank'].iloc[0], str) else df['pred_rank'].iloc[0]
+            if not isinstance(first_rank, list) or len(first_rank) < 20:
+                return False, "pred_rank must be a list with at least 20 candidates"
+        except:
+            return False, "Invalid pred_rank format"
+        return True, "Valid CSV file"
+    except Exception as e:
+        return False, f"Error processing CSV: {str(e)}"
+def save_submission(submission_data):
+    """Save submission data to a JSON file"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    submission_id = f"{submission_data['team_name']}_{timestamp}"
+    # Create submissions directory if it doesn't exist
+    os.makedirs("submissions", exist_ok=True)
+    # Save submission data
+    submission_path = f"submissions/{submission_id}.json"
+    with open(submission_path, 'w') as f:
+        json.dump(submission_data, f, indent=4)
+    return submission_id
+def process_submission(
+    method_name, team_name, dataset, split, contact_email,
+    code_repo, csv_file, model_description, hardware, paper_link
+):
+    """Process and validate submission"""
+    # Validation checks
+    if len(method_name) > 25:
+        return "Error: Method name must be 25 characters or less"
+    if len(team_name) > 25:
+        return "Error: Team name must be 25 characters or less"
+    if not validate_email(contact_email):
+        return "Error: Invalid email format"
+    if not validate_github_url(code_repo):
+        return "Error: Invalid GitHub repository URL"
+    # Validate CSV file
+    csv_valid, csv_message = validate_csv(csv_file)
+    if not csv_valid:
+        return f"Error with CSV file: {csv_message}"
+    # Process CSV file through evaluation pipeline
+    try:
+        results = compute_metrics(
+            csv_file.name,
+            dataset=dataset.lower(),
+            split=split,
+            num_workers=4
+        )
+        if isinstance(results, str) and results.startswith("Error"):
+            return f"Evaluation error: {results}"
+        # Prepare submission data
+        submission_data = {
+            "method_name": method_name,
+            "team_name": team_name,
+            "dataset": dataset,
+            "split": split,
+            "contact_email": contact_email,
+            "code_repo": code_repo,
+            "model_description": model_description,
+            "hardware": hardware,
+            "paper_link": paper_link,
+            "results": results,
+            "status": "pending_review",
+            "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+        # Save submission
+        submission_id = save_submission(submission_data)
+        return f"""
+        Submission successful! Your submission ID is: {submission_id}
+        Evaluation Results:
+        Hit@1: {results['hit@1']:.2f}
+        Hit@5: {results['hit@5']:.2f}
+        Recall@20: {results['recall@20']:.2f}
+        MRR: {results['mrr']:.2f}
+        Your submission is pending review. You will receive an email notification once the review is complete.
+        """
+    except Exception as e:
+        return f"Error processing submission: {str(e)}"
+# Add this to your existing Gradio interface
+def add_submission_form(demo):
+    with demo:
+        gr.Markdown("## Submit Your Results")
+        gr.Markdown("""
+        Submit your results to be included in the leaderboard. Please ensure your submission meets all requirements.
+        For questions, contact stark-qa@cs.stanford.edu
+        """)
+        with gr.Form(elem_id="submission_form"):
+            method_name = gr.Textbox(
+                label="Method Name (max 25 chars)",
+                placeholder="e.g., MyRetrievalModel-v1"
+            )
+            team_name = gr.Textbox(
+                label="Team Name (max 25 chars)",
+                placeholder="e.g., Stanford NLP"
+            )
+            dataset = gr.Dropdown(
+                choices=["amazon", "mag", "prime"],
+                label="Dataset"
+            )
+            split = gr.Dropdown(
+                choices=["test", "test-0.1", "human_generated_eval"],
+                label="Split",
+                value="test"
+            )
+            contact_email = gr.Textbox(
+                label="Contact Email(s)",
+                placeholder="email@example.com; another@example.com"
+            )
+            code_repo = gr.Textbox(
+                label="Code Repository",
+                placeholder="https://github.com/username/repository"
+            )
+            csv_file = gr.File(
+                label="Prediction CSV",
+                file_types=[".csv"]
+            )
+            model_description = gr.Textbox(
+                label="Model Description",
+                lines=3,
+                placeholder="Briefly describe how your retriever model works..."
+            )
+            hardware = gr.Textbox(
+                label="Hardware Specifications",
+                placeholder="e.g., 4x NVIDIA A100 80GB"
+            )
+            paper_link = gr.Textbox(
+                label="Paper Link (Optional)",
+                placeholder="https://arxiv.org/abs/..."
+            )
+            submit_btn = gr.Button("Submit", variant="primary")
+            result = gr.Textbox(label="Submission Status", interactive=False)
+            submit_btn.click(
+                process_submission,
+                inputs=[
+                    method_name, team_name, dataset, split, contact_email,
+                    code_repo, csv_file, model_description, hardware, paper_link
+                ],
+                outputs=result
+            )
 def format_dataframe(df, dataset):
     # Filter the dataframe for the selected dataset
     columns = ['Method'] + [col for col in df.columns if dataset in col]