Spaces:
Running
Running
import os | |
import gradio as gr | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
import time | |
import io | |
from PIL import Image | |
import logging | |
# Import the functions from deepfundingoracle | |
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv, \ | |
normalize_and_clip_weights | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
def analyze_file(file, progress=gr.Progress(track_tqdm=True)): | |
""" | |
Analyzes the uploaded file and generates results. | |
""" | |
start_time = time.time() | |
try: | |
# Step 1: Prepare dataset | |
progress(0, desc="Preparing dataset...") | |
df = prepare_dataset(file.name) | |
# Step 2: Train model and predict weights | |
progress(0.3, desc="Training model and predicting weights...") | |
df = train_predict_weight(df) | |
# Step 3: Normalize weights | |
progress(0.5, desc="Normalizing weights...") | |
df = normalize_and_clip_weights(df) | |
# Step 4: Save results | |
progress(0.6, desc="Saving results to CSV...") | |
output_filename = "submission.csv" | |
create_submission_csv(df, output_filename) | |
# Step 5: Generate visualizations | |
progress(0.8, desc="Generating graphs...") | |
# Feature distribution plot | |
dist_fig = plt.figure(figsize=(15, 10)) | |
numeric_cols = df.select_dtypes(include=[np.number]).columns | |
plot_cols = [col for col in numeric_cols if | |
col in ['stars', 'forks', 'watchers', 'contributors', 'pulls', 'final_weight']] | |
if plot_cols: | |
df[plot_cols].hist(bins=20, figsize=(15, 10), color="skyblue", edgecolor="black") | |
plt.suptitle("Feature Distributions", fontsize=16) | |
plt.tight_layout() | |
dist_buf = io.BytesIO() | |
plt.savefig(dist_buf, format='png', dpi=100, bbox_inches='tight') | |
dist_buf.seek(0) | |
plt.close(dist_fig) | |
dist_img = Image.open(dist_buf) | |
# Correlation matrix plot | |
corr_fig = plt.figure(figsize=(12, 8)) | |
if len(plot_cols) > 1: | |
correlation_matrix = df[plot_cols].corr() | |
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5) | |
plt.title("Feature Correlation Matrix", fontsize=16) | |
corr_buf = io.BytesIO() | |
plt.savefig(corr_buf, format='png', dpi=100, bbox_inches='tight') | |
corr_buf.seek(0) | |
plt.close(corr_fig) | |
corr_img = Image.open(corr_buf) | |
# Prepare preview | |
progress(1, desc="Done!") | |
elapsed = time.time() - start_time | |
# Create a summary preview | |
summary_df = df[['repo', 'parent', 'final_weight']].head(10) | |
preview = f"Top 10 Results:\n{summary_df.to_string(index=False)}\n\nTotal repositories analyzed: {len(df)}" | |
# Return the path to the generated file for automatic download | |
return ( | |
preview, | |
output_filename, # This will trigger automatic download | |
dist_img, | |
corr_img, | |
f"β Analysis completed successfully in {elapsed:.2f} seconds.\nπ₯ Results file ready for download!" | |
) | |
except Exception as e: | |
logging.error(f"Error during analysis: {str(e)}") | |
elapsed = time.time() - start_time | |
error_msg = f"β Error: {str(e)}\nTime elapsed: {elapsed:.2f} seconds" | |
# Return empty images and error message | |
empty_img = Image.new('RGB', (800, 600), color='white') | |
return error_msg, None, empty_img, empty_img, error_msg | |
# Custom CSS for better styling | |
custom_css = """ | |
.download-button { | |
background-color: #4CAF50 !important; | |
color: white !important; | |
font-weight: bold !important; | |
} | |
.status-box { | |
font-family: monospace; | |
padding: 10px; | |
border-radius: 5px; | |
} | |
""" | |
# Create Gradio interface with automatic download | |
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface: | |
gr.Markdown(""" | |
# π DeepFunding Oracle | |
Upload a CSV file containing repository dependencies with 'repo' and 'parent' columns. | |
The system will: | |
1. **Fetch** GitHub metrics for each repository | |
2. **Generate** importance weights using AI | |
3. **Train** a model to predict final contribution weights | |
4. **Normalize** weights so they sum to 1 per parent | |
β οΈ **Note**: Set `GITHUB_API_TOKEN` environment variable for better API rate limits. | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File( | |
label="Upload CSV File", | |
file_types=[".csv"], | |
elem_id="file-upload" | |
) | |
analyze_btn = gr.Button("π Analyze", variant="primary", size="lg") | |
with gr.Column(scale=2): | |
status_output = gr.Textbox( | |
label="Status", | |
lines=3, | |
elem_classes="status-box" | |
) | |
with gr.Row(): | |
preview_output = gr.Textbox( | |
label="Preview of Results", | |
lines=15, | |
show_copy_button=True | |
) | |
with gr.Row(): | |
download_output = gr.File( | |
label="π₯ Download Results CSV", | |
visible=True, | |
elem_classes="download-button" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
dist_plot = gr.Image(label="Feature Distributions") | |
with gr.Column(): | |
corr_plot = gr.Image(label="Feature Correlation Matrix") | |
# JavaScript for automatic download | |
download_js = """ | |
() => { | |
setTimeout(() => { | |
const downloadButton = document.querySelector('.download-button a'); | |
if (downloadButton) { | |
downloadButton.click(); | |
} | |
}, 500); | |
} | |
""" | |
# Set up the event handler | |
analyze_btn.click( | |
fn=analyze_file, | |
inputs=[file_input], | |
outputs=[preview_output, download_output, dist_plot, corr_plot, status_output] | |
).then( | |
fn=None, | |
inputs=None, | |
outputs=None, | |
_js=download_js # This triggers automatic download | |
) | |
# Add example usage | |
gr.Examples( | |
examples=[["example_dependencies.csv"]], # Add your example file here if you have one | |
inputs=file_input, | |
outputs=[preview_output, download_output, dist_plot, corr_plot, status_output], | |
fn=analyze_file, | |
cache_examples=False, | |
) | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
iface.launch( | |
server_name="0.0.0.0", | |
server_port=port, | |
share=False, | |
show_error=True | |
) |