import os import gradio as gr import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np import time import io from PIL import Image import logging # Import the functions from deepfundingoracle from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv, \ normalize_and_clip_weights # Configure logging logging.basicConfig(level=logging.INFO) def analyze_file(file, progress=gr.Progress(track_tqdm=True)): """ Analyzes the uploaded file and generates results. """ start_time = time.time() try: # Step 1: Prepare dataset progress(0, desc="Preparing dataset...") df = prepare_dataset(file.name) # Step 2: Train model and predict weights progress(0.3, desc="Training model and predicting weights...") df = train_predict_weight(df) # Step 3: Normalize weights progress(0.5, desc="Normalizing weights...") df = normalize_and_clip_weights(df) # Step 4: Save results progress(0.6, desc="Saving results to CSV...") output_filename = "submission.csv" create_submission_csv(df, output_filename) # Step 5: Generate visualizations progress(0.8, desc="Generating graphs...") # Feature distribution plot dist_fig = plt.figure(figsize=(15, 10)) numeric_cols = df.select_dtypes(include=[np.number]).columns plot_cols = [col for col in numeric_cols if col in ['stars', 'forks', 'watchers', 'contributors', 'pulls', 'final_weight']] if plot_cols: df[plot_cols].hist(bins=20, figsize=(15, 10), color="skyblue", edgecolor="black") plt.suptitle("Feature Distributions", fontsize=16) plt.tight_layout() dist_buf = io.BytesIO() plt.savefig(dist_buf, format='png', dpi=100, bbox_inches='tight') dist_buf.seek(0) plt.close(dist_fig) dist_img = Image.open(dist_buf) # Correlation matrix plot corr_fig = plt.figure(figsize=(12, 8)) if len(plot_cols) > 1: correlation_matrix = df[plot_cols].corr() sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5) plt.title("Feature Correlation Matrix", fontsize=16) corr_buf = io.BytesIO() plt.savefig(corr_buf, format='png', dpi=100, bbox_inches='tight') corr_buf.seek(0) plt.close(corr_fig) corr_img = Image.open(corr_buf) # Prepare preview progress(1, desc="Done!") elapsed = time.time() - start_time # Create a summary preview summary_df = df[['repo', 'parent', 'final_weight']].head(10) preview = f"Top 10 Results:\n{summary_df.to_string(index=False)}\n\nTotal repositories analyzed: {len(df)}" # Return the path to the generated file for automatic download return ( preview, output_filename, # This will trigger automatic download dist_img, corr_img, f"āœ… Analysis completed successfully in {elapsed:.2f} seconds.\nšŸ“„ Results file ready for download!" ) except Exception as e: logging.error(f"Error during analysis: {str(e)}") elapsed = time.time() - start_time error_msg = f"āŒ Error: {str(e)}\nTime elapsed: {elapsed:.2f} seconds" # Return empty images and error message empty_img = Image.new('RGB', (800, 600), color='white') return error_msg, None, empty_img, empty_img, error_msg # Custom CSS for better styling custom_css = """ .download-button { background-color: #4CAF50 !important; color: white !important; font-weight: bold !important; } .status-box { font-family: monospace; padding: 10px; border-radius: 5px; } """ # Create Gradio interface with automatic download with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface: gr.Markdown(""" # šŸš€ DeepFunding Oracle Upload a CSV file containing repository dependencies with 'repo' and 'parent' columns. The system will: 1. **Fetch** GitHub metrics for each repository 2. **Generate** importance weights using AI 3. **Train** a model to predict final contribution weights 4. **Normalize** weights so they sum to 1 per parent āš ļø **Note**: Set `GITHUB_API_TOKEN` environment variable for better API rate limits. """) with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload CSV File", file_types=[".csv"], elem_id="file-upload" ) analyze_btn = gr.Button("šŸ” Analyze", variant="primary", size="lg") with gr.Column(scale=2): status_output = gr.Textbox( label="Status", lines=3, elem_classes="status-box" ) with gr.Row(): preview_output = gr.Textbox( label="Preview of Results", lines=15, show_copy_button=True ) with gr.Row(): download_output = gr.File( label="šŸ“„ Download Results CSV", visible=True, elem_classes="download-button" ) with gr.Row(): with gr.Column(): dist_plot = gr.Image(label="Feature Distributions") with gr.Column(): corr_plot = gr.Image(label="Feature Correlation Matrix") # JavaScript for automatic download download_js = """ () => { setTimeout(() => { const downloadButton = document.querySelector('.download-button a'); if (downloadButton) { downloadButton.click(); } }, 500); } """ # Set up the event handler analyze_btn.click( fn=analyze_file, inputs=[file_input], outputs=[preview_output, download_output, dist_plot, corr_plot, status_output] ).then( fn=None, inputs=None, outputs=None, _js=download_js # This triggers automatic download ) # Add example usage gr.Examples( examples=[["example_dependencies.csv"]], # Add your example file here if you have one inputs=file_input, outputs=[preview_output, download_output, dist_plot, corr_plot, status_output], fn=analyze_file, cache_examples=False, ) if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) iface.launch( server_name="0.0.0.0", server_port=port, share=False, show_error=True )