FelixPhilip's picture
Oracle
bf9b592
raw
history blame
6.69 kB
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
import io
from PIL import Image
import logging
# Import the functions from deepfundingoracle
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv, \
normalize_and_clip_weights
# Configure logging
logging.basicConfig(level=logging.INFO)
def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
"""
Analyzes the uploaded file and generates results.
"""
start_time = time.time()
try:
# Step 1: Prepare dataset
progress(0, desc="Preparing dataset...")
df = prepare_dataset(file.name)
# Step 2: Train model and predict weights
progress(0.3, desc="Training model and predicting weights...")
df = train_predict_weight(df)
# Step 3: Normalize weights
progress(0.5, desc="Normalizing weights...")
df = normalize_and_clip_weights(df)
# Step 4: Save results
progress(0.6, desc="Saving results to CSV...")
output_filename = "submission.csv"
create_submission_csv(df, output_filename)
# Step 5: Generate visualizations
progress(0.8, desc="Generating graphs...")
# Feature distribution plot
dist_fig = plt.figure(figsize=(15, 10))
numeric_cols = df.select_dtypes(include=[np.number]).columns
plot_cols = [col for col in numeric_cols if
col in ['stars', 'forks', 'watchers', 'contributors', 'pulls', 'final_weight']]
if plot_cols:
df[plot_cols].hist(bins=20, figsize=(15, 10), color="skyblue", edgecolor="black")
plt.suptitle("Feature Distributions", fontsize=16)
plt.tight_layout()
dist_buf = io.BytesIO()
plt.savefig(dist_buf, format='png', dpi=100, bbox_inches='tight')
dist_buf.seek(0)
plt.close(dist_fig)
dist_img = Image.open(dist_buf)
# Correlation matrix plot
corr_fig = plt.figure(figsize=(12, 8))
if len(plot_cols) > 1:
correlation_matrix = df[plot_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Feature Correlation Matrix", fontsize=16)
corr_buf = io.BytesIO()
plt.savefig(corr_buf, format='png', dpi=100, bbox_inches='tight')
corr_buf.seek(0)
plt.close(corr_fig)
corr_img = Image.open(corr_buf)
# Prepare preview
progress(1, desc="Done!")
elapsed = time.time() - start_time
# Create a summary preview
summary_df = df[['repo', 'parent', 'final_weight']].head(10)
preview = f"Top 10 Results:\n{summary_df.to_string(index=False)}\n\nTotal repositories analyzed: {len(df)}"
# Return the path to the generated file for automatic download
return (
preview,
output_filename, # This will trigger automatic download
dist_img,
corr_img,
f"βœ… Analysis completed successfully in {elapsed:.2f} seconds.\nπŸ“₯ Results file ready for download!"
)
except Exception as e:
logging.error(f"Error during analysis: {str(e)}")
elapsed = time.time() - start_time
error_msg = f"❌ Error: {str(e)}\nTime elapsed: {elapsed:.2f} seconds"
# Return empty images and error message
empty_img = Image.new('RGB', (800, 600), color='white')
return error_msg, None, empty_img, empty_img, error_msg
# Custom CSS for better styling
custom_css = """
.download-button {
background-color: #4CAF50 !important;
color: white !important;
font-weight: bold !important;
}
.status-box {
font-family: monospace;
padding: 10px;
border-radius: 5px;
}
"""
# Create Gradio interface with automatic download
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
gr.Markdown("""
# πŸš€ DeepFunding Oracle
Upload a CSV file containing repository dependencies with 'repo' and 'parent' columns.
The system will:
1. **Fetch** GitHub metrics for each repository
2. **Generate** importance weights using AI
3. **Train** a model to predict final contribution weights
4. **Normalize** weights so they sum to 1 per parent
⚠️ **Note**: Set `GITHUB_API_TOKEN` environment variable for better API rate limits.
""")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload CSV File",
file_types=[".csv"],
elem_id="file-upload"
)
analyze_btn = gr.Button("πŸ” Analyze", variant="primary", size="lg")
with gr.Column(scale=2):
status_output = gr.Textbox(
label="Status",
lines=3,
elem_classes="status-box"
)
with gr.Row():
preview_output = gr.Textbox(
label="Preview of Results",
lines=15,
show_copy_button=True
)
with gr.Row():
download_output = gr.File(
label="πŸ“₯ Download Results CSV",
visible=True,
elem_classes="download-button"
)
with gr.Row():
with gr.Column():
dist_plot = gr.Image(label="Feature Distributions")
with gr.Column():
corr_plot = gr.Image(label="Feature Correlation Matrix")
# JavaScript for automatic download
download_js = """
() => {
setTimeout(() => {
const downloadButton = document.querySelector('.download-button a');
if (downloadButton) {
downloadButton.click();
}
}, 500);
}
"""
# Set up the event handler
analyze_btn.click(
fn=analyze_file,
inputs=[file_input],
outputs=[preview_output, download_output, dist_plot, corr_plot, status_output]
).then(
fn=None,
inputs=None,
outputs=None,
_js=download_js # This triggers automatic download
)
# Add example usage
gr.Examples(
examples=[["example_dependencies.csv"]], # Add your example file here if you have one
inputs=file_input,
outputs=[preview_output, download_output, dist_plot, corr_plot, status_output],
fn=analyze_file,
cache_examples=False,
)
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
iface.launch(
server_name="0.0.0.0",
server_port=port,
share=False,
show_error=True
)