File size: 5,803 Bytes
513a1f2
3410966
274fa3d
 
3388ab8
 
4cdc4e2
 
a1599a6
bf9b592
 
 
99de051
bf9b592
 
 
 
4cdc4e2
bf9b592
 
 
4cdc4e2
bf9b592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99de051
bf9b592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99de051
bf9b592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99de051
bf9b592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99de051
bf9b592
 
 
 
 
3410966
 
513a1f2
bf9b592
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
import io
from PIL import Image
import logging

# Import the functions from deepfundingoracle
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv, normalize_and_clip_weights

# Configure logging
logging.basicConfig(level=logging.INFO)

def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
    """
    Analyzes the uploaded file and generates results.
    """
    start_time = time.time()

    try:
        # Step 1: Prepare dataset
        progress(0, desc="Preparing dataset...")
        df = prepare_dataset(file.name)

        # Step 2: Train model and predict weights
        progress(0.3, desc="Training model and predicting weights...")
        df = train_predict_weight(df)

        # Step 3: Normalize weights
        progress(0.5, desc="Normalizing weights...")
        df = normalize_and_clip_weights(df)

        # Step 4: Save results
        progress(0.6, desc="Saving results to CSV...")
        output_filename = "submission.csv"
        create_submission_csv(df, output_filename)

        # Step 5: Generate visualizations
        progress(0.8, desc="Generating graphs...")

        # Feature distribution plot
        dist_fig = plt.figure(figsize=(15, 10))
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        plot_cols = [col for col in numeric_cols if col in ['stars', 'forks', 'watchers', 'contributors', 'pulls', 'final_weight']]

        if plot_cols:
            df[plot_cols].hist(bins=20, figsize=(15, 10), color="skyblue", edgecolor="black")
            plt.suptitle("Feature Distributions", fontsize=16)
            plt.tight_layout()

        dist_buf = io.BytesIO()
        plt.savefig(dist_buf, format='png', dpi=100, bbox_inches='tight')
        dist_buf.seek(0)
        plt.close(dist_fig)
        dist_img = Image.open(dist_buf)

        # Correlation matrix plot
        corr_fig = plt.figure(figsize=(12, 8))
        if len(plot_cols) > 1:
            correlation_matrix = df[plot_cols].corr()
            sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
            plt.title("Feature Correlation Matrix", fontsize=16)

        corr_buf = io.BytesIO()
        plt.savefig(corr_buf, format='png', dpi=100, bbox_inches='tight')
        corr_buf.seek(0)
        plt.close(corr_fig)
        corr_img = Image.open(corr_buf)

        # Prepare preview
        progress(1, desc="Done!")
        elapsed = time.time() - start_time

        # Create a summary preview
        summary_df = df[['repo', 'parent', 'final_weight']].head(10)
        preview = f"Top 10 Results:\n{summary_df.to_string(index=False)}\n\nTotal repositories analyzed: {len(df)}"

        return (
            preview,
            output_filename,
            dist_img,
            corr_img,
            f"βœ… Analysis completed successfully in {elapsed:.2f} seconds.\nπŸ“₯ Results file ready for download!"
        )

    except Exception as e:
        logging.error(f"Error during analysis: {str(e)}")
        elapsed = time.time() - start_time
        error_msg = f"❌ Error: {str(e)}\nTime elapsed: {elapsed:.2f} seconds"

        # Return empty images and error message
        empty_img = Image.new('RGB', (800, 600), color='white')
        return error_msg, None, empty_img, empty_img, error_msg

# Custom CSS for better styling
custom_css = """
    .download-button {
        background-color: #4CAF50 !important;
        color: white !important;
        font-weight: bold !important;
    }
    .status-box {
        font-family: monospace;
        padding: 10px;
        border-radius: 5px;
    }
"""

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
    gr.Markdown("""
    # πŸš€ DeepFunding Oracle

    Upload a CSV file containing repository dependencies with 'repo' and 'parent' columns.
    The system will:
    1. **Fetch** GitHub metrics for each repository
    2. **Generate** importance weights using AI
    3. **Train** a model to predict final contribution weights
    4. **Normalize** weights so they sum to 1 per parent

    ⚠️ **Note**: Set `GITHUB_API_TOKEN` environment variable for better API rate limits.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(
                label="Upload CSV File",
                file_types=[".csv"],
                elem_id="file-upload"
            )
            analyze_btn = gr.Button("πŸ” Analyze", variant="primary", size="lg")

        with gr.Column(scale=2):
            status_output = gr.Textbox(
                label="Status",
                lines=3,
                elem_classes="status-box"
            )

    with gr.Row():
        preview_output = gr.Textbox(
            label="Preview of Results",
            lines=15,
            show_copy_button=True
        )

    with gr.Row():
        download_output = gr.File(
            label="πŸ“₯ Download Results CSV",
            visible=True,
            elem_classes="download-button"
        )

    with gr.Row():
        with gr.Column():
            dist_plot = gr.Image(label="Feature Distributions")
        with gr.Column():
            corr_plot = gr.Image(label="Feature Correlation Matrix")

    # Set up the event handler (without _js parameter)
    analyze_btn.click(
        fn=analyze_file,
        inputs=[file_input],
        outputs=[preview_output, download_output, dist_plot, corr_plot, status_output]
    )

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    iface.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        show_error=True
    )