Spaces:

FelixPhilip
/

DeepFundingOracle

Sleeping

File size: 6,690 Bytes

import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
import io
from PIL import Image
import logging

# Import the functions from deepfundingoracle
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv, \
    normalize_and_clip_weights

# Configure logging
logging.basicConfig(level=logging.INFO)


def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
    """
    Analyzes the uploaded file and generates results.
    """
    start_time = time.time()

    try:
        # Step 1: Prepare dataset
        progress(0, desc="Preparing dataset...")
        df = prepare_dataset(file.name)

        # Step 2: Train model and predict weights
        progress(0.3, desc="Training model and predicting weights...")
        df = train_predict_weight(df)

        # Step 3: Normalize weights
        progress(0.5, desc="Normalizing weights...")
        df = normalize_and_clip_weights(df)

        # Step 4: Save results
        progress(0.6, desc="Saving results to CSV...")
        output_filename = "submission.csv"
        create_submission_csv(df, output_filename)

        # Step 5: Generate visualizations
        progress(0.8, desc="Generating graphs...")

        # Feature distribution plot
        dist_fig = plt.figure(figsize=(15, 10))
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        plot_cols = [col for col in numeric_cols if
                     col in ['stars', 'forks', 'watchers', 'contributors', 'pulls', 'final_weight']]

        if plot_cols:
            df[plot_cols].hist(bins=20, figsize=(15, 10), color="skyblue", edgecolor="black")
            plt.suptitle("Feature Distributions", fontsize=16)
            plt.tight_layout()

        dist_buf = io.BytesIO()
        plt.savefig(dist_buf, format='png', dpi=100, bbox_inches='tight')
        dist_buf.seek(0)
        plt.close(dist_fig)
        dist_img = Image.open(dist_buf)

        # Correlation matrix plot
        corr_fig = plt.figure(figsize=(12, 8))
        if len(plot_cols) > 1:
            correlation_matrix = df[plot_cols].corr()
            sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
            plt.title("Feature Correlation Matrix", fontsize=16)

        corr_buf = io.BytesIO()
        plt.savefig(corr_buf, format='png', dpi=100, bbox_inches='tight')
        corr_buf.seek(0)
        plt.close(corr_fig)
        corr_img = Image.open(corr_buf)

        # Prepare preview
        progress(1, desc="Done!")
        elapsed = time.time() - start_time

        # Create a summary preview
        summary_df = df[['repo', 'parent', 'final_weight']].head(10)
        preview = f"Top 10 Results:\n{summary_df.to_string(index=False)}\n\nTotal repositories analyzed: {len(df)}"

        # Return the path to the generated file for automatic download
        return (
            preview,
            output_filename,  # This will trigger automatic download
            dist_img,
            corr_img,
            f"✅ Analysis completed successfully in {elapsed:.2f} seconds.\n📥 Results file ready for download!"
        )

    except Exception as e:
        logging.error(f"Error during analysis: {str(e)}")
        elapsed = time.time() - start_time
        error_msg = f"❌ Error: {str(e)}\nTime elapsed: {elapsed:.2f} seconds"

        # Return empty images and error message
        empty_img = Image.new('RGB', (800, 600), color='white')
        return error_msg, None, empty_img, empty_img, error_msg


# Custom CSS for better styling
custom_css = """
    .download-button {
        background-color: #4CAF50 !important;
        color: white !important;
        font-weight: bold !important;
    }
    .status-box {
        font-family: monospace;
        padding: 10px;
        border-radius: 5px;
    }
"""

# Create Gradio interface with automatic download
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
    gr.Markdown("""
    # 🚀 DeepFunding Oracle

    Upload a CSV file containing repository dependencies with 'repo' and 'parent' columns.
    The system will:
    1. **Fetch** GitHub metrics for each repository
    2. **Generate** importance weights using AI
    3. **Train** a model to predict final contribution weights
    4. **Normalize** weights so they sum to 1 per parent

    ⚠️ **Note**: Set `GITHUB_API_TOKEN` environment variable for better API rate limits.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(
                label="Upload CSV File",
                file_types=[".csv"],
                elem_id="file-upload"
            )
            analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")

        with gr.Column(scale=2):
            status_output = gr.Textbox(
                label="Status",
                lines=3,
                elem_classes="status-box"
            )

    with gr.Row():
        preview_output = gr.Textbox(
            label="Preview of Results",
            lines=15,
            show_copy_button=True
        )

    with gr.Row():
        download_output = gr.File(
            label="📥 Download Results CSV",
            visible=True,
            elem_classes="download-button"
        )

    with gr.Row():
        with gr.Column():
            dist_plot = gr.Image(label="Feature Distributions")
        with gr.Column():
            corr_plot = gr.Image(label="Feature Correlation Matrix")

    # JavaScript for automatic download
    download_js = """
    () => {
        setTimeout(() => {
            const downloadButton = document.querySelector('.download-button a');
            if (downloadButton) {
                downloadButton.click();
            }
        }, 500);
    }
    """

    # Set up the event handler
    analyze_btn.click(
        fn=analyze_file,
        inputs=[file_input],
        outputs=[preview_output, download_output, dist_plot, corr_plot, status_output]
    ).then(
        fn=None,
        inputs=None,
        outputs=None,
        _js=download_js  # This triggers automatic download
    )

    # Add example usage
    gr.Examples(
        examples=[["example_dependencies.csv"]],  # Add your example file here if you have one
        inputs=file_input,
        outputs=[preview_output, download_output, dist_plot, corr_plot, status_output],
        fn=analyze_file,
        cache_examples=False,
    )

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    iface.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        show_error=True
    )