Spaces:

pors
/

Which-image-models-are-best

Running

App Files Files Community

pors commited on 23 days ago

Commit

10f9258

1 Parent(s): 21891ff

set default benchmark file

Browse files

Files changed (1) hide show

app.py +109 -102

app.py CHANGED Viewed

@@ -6,191 +6,201 @@ import re
 import os
 import glob
 # Download the main results file
 def download_main_results():
     url = "https://github.com/huggingface/pytorch-image-models/raw/main/results/results-imagenet.csv"
-    if not os.path.exists('results-imagenet.csv'):
         response = requests.get(url)
-        with open('results-imagenet.csv', 'wb') as f:
             f.write(response.content)
 def download_github_csvs_api(
     repo="huggingface/pytorch-image-models",
     folder="results",
     filename_pattern=r"benchmark-.*\.csv",
-    output_dir="benchmarks"
 ):
     """Download benchmark CSV files from GitHub API."""
     api_url = f"https://api.github.com/repos/{repo}/contents/{folder}"
     r = requests.get(api_url)
     if r.status_code != 200:
         return []
     files = r.json()
-    matched_files = [f['name'] for f in files if re.match(filename_pattern, f['name'])]
     if not matched_files:
         return []
     raw_base = f"https://raw.githubusercontent.com/{repo}/main/{folder}/"
     os.makedirs(output_dir, exist_ok=True)
     for fname in matched_files:
         raw_url = raw_base + fname
         out_path = os.path.join(output_dir, fname)
         if not os.path.exists(out_path):  # Only download if not exists
             resp = requests.get(raw_url)
             if resp.ok:
                 with open(out_path, "wb") as f:
                     f.write(resp.content)
     return matched_files
 def load_main_data():
     """Load the main ImageNet results."""
     download_main_results()
-    df_results = pd.read_csv('results-imagenet.csv')
-    df_results['model_org'] = df_results['model']
-    df_results['model'] = df_results['model'].str.split('.').str[0]
     return df_results
 def get_data(benchmark_file, df_results):
     """Process benchmark data and merge with main results."""
     pattern = (
-        r'^(?:'
-        r'eva|'
-        r'maxx?vit(?:v2)?|'
-        r'coatnet|coatnext|'
-        r'convnext(?:v2)?|'
-        r'beit(?:v2)?|'
-        r'efficient(?:net(?:v2)?|former(?:v2)?|vit)|'
-        r'regnet[xyvz]?|'
-        r'levit|'
-        r'vitd?|'
-        r'swin(?:v2)?'
-        r')$'
     )
     if not os.path.exists(benchmark_file):
         return pd.DataFrame()
-    df = pd.read_csv(benchmark_file).merge(df_results, on='model')
-    df['secs'] = 1. / df['infer_samples_per_sec']
-    df['family'] = df.model.str.extract('^([a-z]+?(?:v2)?)(?:\d|_|$)')
-    df = df[~df.model.str.endswith('gn')]
-    df.loc[df.model.str.contains('resnet.*d'),'family'] = df.loc[df.model.str.contains('resnet.*d'),'family'] + 'd'
     return df[df.family.str.contains(pattern)]
 def create_plot(benchmark_file, x_axis, y_axis, selected_families, log_x, log_y):
     """Create the scatter plot based on user selections."""
     df_results = load_main_data()
     df = get_data(benchmark_file, df_results)
     if df.empty:
         return None
     # Filter by selected families
     if selected_families:
-        df = df[df['family'].isin(selected_families)]
     if df.empty:
         return None
     # Create the plot
     fig = px.scatter(
-        df,
-        width=1000,
         height=800,
-        x=x_axis,
-        y=y_axis,
-        log_x=log_x,
         log_y=log_y,
-        color='family',
-        hover_name='model_org',
-        hover_data=['infer_samples_per_sec', 'infer_img_size'],
-        title=f'Model Performance: {y_axis} vs {x_axis}'
     )
     return fig
 def setup_interface():
     """Set up the Gradio interface."""
     # Download benchmark files
     downloaded_files = download_github_csvs_api()
     # Get available benchmark files
     benchmark_files = glob.glob("benchmarks/benchmark-*.csv")
     if not benchmark_files:
         benchmark_files = ["No benchmark files found"]
     # Load sample data to get families and columns
     df_results = load_main_data()
     # Relevant columns for plotting
     plot_columns = [
-        'top1', 'top5', 'infer_samples_per_sec',
-        'secs', 'param_count_x', 'infer_img_size'
     ]
     # Get families from a sample file (if available)
     families = []
     if benchmark_files and benchmark_files[0] != "No benchmark files found":
         sample_df = get_data(benchmark_files[0], df_results)
         if not sample_df.empty:
-            families = sorted(sample_df['family'].unique().tolist())
     return benchmark_files, plot_columns, families
 # Initialize the interface
 benchmark_files, plot_columns, families = setup_interface()
 # Create the Gradio interface
 with gr.Blocks(title="Image Model Performance Analysis") as demo:
     gr.Markdown("# Image Model Performance Analysis")
-    gr.Markdown("Analyze and visualize performance metrics of different image models based on benchmark data.")
     with gr.Row():
         with gr.Column(scale=1):
-            benchmark_dropdown = gr.Dropdown(
-                choices=benchmark_files,
-                value=benchmark_files[0] if benchmark_files else None,
-                label="Select Benchmark File"
             )
-            x_axis_radio = gr.Radio(
-                choices=plot_columns,
-                value="secs",
-                label="X-axis"
             )
-            y_axis_radio = gr.Radio(
-                choices=plot_columns,
-                value="top1",
-                label="Y-axis"
             )
             family_checkboxes = gr.CheckboxGroup(
-                choices=families,
-                value=families,
-                label="Select Model Families"
-            )
-            log_x_checkbox = gr.Checkbox(
-                value=True,
-                label="Log scale X-axis"
             )
-            log_y_checkbox = gr.Checkbox(
-                value=False,
-                label="Log scale Y-axis"
-            )
             update_button = gr.Button("Update Plot", variant="primary")
         with gr.Column(scale=2):
             plot_output = gr.Plot()
     # Update plot when button is clicked
     update_button.click(
         fn=create_plot,
@@ -200,30 +210,28 @@ with gr.Blocks(title="Image Model Performance Analysis") as demo:
             y_axis_radio,
             family_checkboxes,
             log_x_checkbox,
-            log_y_checkbox
         ],
-        outputs=plot_output
     )
     # Auto-update when benchmark file changes
     def update_families(benchmark_file):
         if not benchmark_file or benchmark_file == "No benchmark files found":
             return gr.CheckboxGroup(choices=[], value=[])
         df_results = load_main_data()
         df = get_data(benchmark_file, df_results)
         if df.empty:
             return gr.CheckboxGroup(choices=[], value=[])
-        new_families = sorted(df['family'].unique().tolist())
         return gr.CheckboxGroup(choices=new_families, value=new_families)
     benchmark_dropdown.change(
-        fn=update_families,
-        inputs=benchmark_dropdown,
-        outputs=family_checkboxes
     )
     # Load initial plot
     demo.load(
         fn=create_plot,
@@ -233,11 +241,10 @@ with gr.Blocks(title="Image Model Performance Analysis") as demo:
             y_axis_radio,
             family_checkboxes,
             log_x_checkbox,
-            log_y_checkbox
         ],
-        outputs=plot_output
     )
 if __name__ == "__main__":
     demo.launch()

 import os
 import glob
 # Download the main results file
 def download_main_results():
     url = "https://github.com/huggingface/pytorch-image-models/raw/main/results/results-imagenet.csv"
+    if not os.path.exists("results-imagenet.csv"):
         response = requests.get(url)
+        with open("results-imagenet.csv", "wb") as f:
             f.write(response.content)
 def download_github_csvs_api(
     repo="huggingface/pytorch-image-models",
     folder="results",
     filename_pattern=r"benchmark-.*\.csv",
+    output_dir="benchmarks",
 ):
     """Download benchmark CSV files from GitHub API."""
     api_url = f"https://api.github.com/repos/{repo}/contents/{folder}"
     r = requests.get(api_url)
     if r.status_code != 200:
         return []
     files = r.json()
+    matched_files = [f["name"] for f in files if re.match(filename_pattern, f["name"])]
     if not matched_files:
         return []
     raw_base = f"https://raw.githubusercontent.com/{repo}/main/{folder}/"
     os.makedirs(output_dir, exist_ok=True)
     for fname in matched_files:
         raw_url = raw_base + fname
         out_path = os.path.join(output_dir, fname)
         if not os.path.exists(out_path):  # Only download if not exists
             resp = requests.get(raw_url)
             if resp.ok:
                 with open(out_path, "wb") as f:
                     f.write(resp.content)
     return matched_files
 def load_main_data():
     """Load the main ImageNet results."""
     download_main_results()
+    df_results = pd.read_csv("results-imagenet.csv")
+    df_results["model_org"] = df_results["model"]
+    df_results["model"] = df_results["model"].str.split(".").str[0]
     return df_results
 def get_data(benchmark_file, df_results):
     """Process benchmark data and merge with main results."""
     pattern = (
+        r"^(?:"
+        r"eva|"
+        r"maxx?vit(?:v2)?|"
+        r"coatnet|coatnext|"
+        r"convnext(?:v2)?|"
+        r"beit(?:v2)?|"
+        r"efficient(?:net(?:v2)?|former(?:v2)?|vit)|"
+        r"regnet[xyvz]?|"
+        r"levit|"
+        r"vitd?|"
+        r"swin(?:v2)?"
+        r")$"
     )
     if not os.path.exists(benchmark_file):
         return pd.DataFrame()
+    df = pd.read_csv(benchmark_file).merge(df_results, on="model")
+    df["secs"] = 1.0 / df["infer_samples_per_sec"]
+    df["family"] = df.model.str.extract("^([a-z]+?(?:v2)?)(?:\d|_|$)")
+    df = df[~df.model.str.endswith("gn")]
+    df.loc[df.model.str.contains("resnet.*d"), "family"] = (
+        df.loc[df.model.str.contains("resnet.*d"), "family"] + "d"
+    )
     return df[df.family.str.contains(pattern)]
 def create_plot(benchmark_file, x_axis, y_axis, selected_families, log_x, log_y):
     """Create the scatter plot based on user selections."""
     df_results = load_main_data()
     df = get_data(benchmark_file, df_results)
     if df.empty:
         return None
     # Filter by selected families
     if selected_families:
+        df = df[df["family"].isin(selected_families)]
     if df.empty:
         return None
     # Create the plot
     fig = px.scatter(
+        df,
+        width=1000,
         height=800,
+        x=x_axis,
+        y=y_axis,
+        log_x=log_x,
         log_y=log_y,
+        color="family",
+        hover_name="model_org",
+        hover_data=["infer_samples_per_sec", "infer_img_size"],
+        title=f"Model Performance: {y_axis} vs {x_axis}",
     )
     return fig
 def setup_interface():
     """Set up the Gradio interface."""
     # Download benchmark files
     downloaded_files = download_github_csvs_api()
     # Get available benchmark files
     benchmark_files = glob.glob("benchmarks/benchmark-*.csv")
     if not benchmark_files:
         benchmark_files = ["No benchmark files found"]
     # Load sample data to get families and columns
     df_results = load_main_data()
     # Relevant columns for plotting
     plot_columns = [
+        "top1",
+        "top5",
+        "infer_samples_per_sec",
+        "secs",
+        "param_count_x",
+        "infer_img_size",
     ]
     # Get families from a sample file (if available)
     families = []
     if benchmark_files and benchmark_files[0] != "No benchmark files found":
         sample_df = get_data(benchmark_files[0], df_results)
         if not sample_df.empty:
+            families = sorted(sample_df["family"].unique().tolist())
     return benchmark_files, plot_columns, families
 # Initialize the interface
 benchmark_files, plot_columns, families = setup_interface()
 # Create the Gradio interface
 with gr.Blocks(title="Image Model Performance Analysis") as demo:
     gr.Markdown("# Image Model Performance Analysis")
+    gr.Markdown(
+        "Analyze and visualize performance metrics of different image models based on benchmark data."
+    )
     with gr.Row():
         with gr.Column(scale=1):
+            # Set preferred default file
+            preferred_file = (
+                "benchmarks/benchmark-infer-amp-nhwc-pt240-cu124-rtx3090.csv"
             )
+            default_file = (
+                preferred_file
+                if preferred_file in benchmark_files
+                else (benchmark_files[0] if benchmark_files else None)
             )
+            benchmark_dropdown = gr.Dropdown(
+                choices=benchmark_files,
+                value=default_file,
+                label="Select Benchmark File",
             )
+            x_axis_radio = gr.Radio(choices=plot_columns, value="secs", label="X-axis")
+            y_axis_radio = gr.Radio(choices=plot_columns, value="top1", label="Y-axis")
             family_checkboxes = gr.CheckboxGroup(
+                choices=families, value=families, label="Select Model Families"
             )
+            log_x_checkbox = gr.Checkbox(value=True, label="Log scale X-axis")
+            log_y_checkbox = gr.Checkbox(value=False, label="Log scale Y-axis")
             update_button = gr.Button("Update Plot", variant="primary")
         with gr.Column(scale=2):
             plot_output = gr.Plot()
     # Update plot when button is clicked
     update_button.click(
         fn=create_plot,
             y_axis_radio,
             family_checkboxes,
             log_x_checkbox,
+            log_y_checkbox,
         ],
+        outputs=plot_output,
     )
     # Auto-update when benchmark file changes
     def update_families(benchmark_file):
         if not benchmark_file or benchmark_file == "No benchmark files found":
             return gr.CheckboxGroup(choices=[], value=[])
         df_results = load_main_data()
         df = get_data(benchmark_file, df_results)
         if df.empty:
             return gr.CheckboxGroup(choices=[], value=[])
+        new_families = sorted(df["family"].unique().tolist())
         return gr.CheckboxGroup(choices=new_families, value=new_families)
     benchmark_dropdown.change(
+        fn=update_families, inputs=benchmark_dropdown, outputs=family_checkboxes
     )
     # Load initial plot
     demo.load(
         fn=create_plot,
             y_axis_radio,
             family_checkboxes,
             log_x_checkbox,
+            log_y_checkbox,
         ],
+        outputs=plot_output,
     )
 if __name__ == "__main__":
     demo.launch()