Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Sleeping

App Files Files Community

naman1102 commited on Jun 8

Commit

ae53812

1 Parent(s): 953203e

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -10

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 from typing import List, Dict, Tuple, Any
 import logging
 import os
 # Import core logic from other modules, as in app_old.py
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
@@ -32,9 +33,11 @@ def write_repos_to_csv(repo_ids: List[str]) -> None:
     try:
         with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
             writer = csv.writer(csvfile)
-            writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
             for repo_id in repo_ids:
-                writer.writerow([repo_id, "", "", "", ""])
         logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
     except Exception as e:
         logger.error(f"Error writing to CSV: {e}")
@@ -63,15 +66,16 @@ def read_csv_to_dataframe() -> pd.DataFrame:
         # Format text columns for better display
         if not df.empty:
             df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
-            df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
             # Keep relevance rating as is since it should be short
         return df
     except FileNotFoundError:
-        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
     except Exception as e:
         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
@@ -117,6 +121,9 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
                     df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
                     df.at[idx, "speciality"] = llm_json.get("speciality", "")
                     df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
                 repo_found_in_df = True
                 break
@@ -277,13 +284,15 @@ def create_ui() -> gr.Blocks:
     .gr-dataframe th:nth-child(1),
     .gr-dataframe td:nth-child(1) { width: 15%; }
     .gr-dataframe th:nth-child(2),
-    .gr-dataframe td:nth-child(2) { width: 25%; }
     .gr-dataframe th:nth-child(3),
-    .gr-dataframe td:nth-child(3) { width: 25%; }
     .gr-dataframe th:nth-child(4),
     .gr-dataframe td:nth-child(4) { width: 20%; }
     .gr-dataframe th:nth-child(5),
     .gr-dataframe td:nth-child(5) { width: 15%; }
     /* Make repository names clickable */
     .gr-dataframe td:nth-child(1) {
@@ -299,6 +308,20 @@ def create_ui() -> gr.Blocks:
         transform: scale(1.02);
     }
     .gr-dataframe tbody tr:hover {
         background-color: rgba(102, 126, 234, 0.05);
     }
@@ -377,10 +400,22 @@ def create_ui() -> gr.Blocks:
                     )
                 with gr.Row():
-                    analyze_next_btn = gr.Button("⚡ Analyze Next Repository", variant="primary", size="lg", scale=2)
-                    with gr.Column(scale=3):
                         status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2)
                 with gr.Row(equal_height=True):
                     # with gr.Column():
                     #     content_output = gr.Textbox(
@@ -400,7 +435,7 @@ def create_ui() -> gr.Blocks:
                 gr.Markdown("### 📊 Results Dashboard")
                 gr.Markdown("💡 **Tip:** Click on any repository name to explore it in detail!")
                 df_output = gr.Dataframe(
-                    headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
                     wrap=True,
                     interactive=False  # Prevent editing but allow selection
                 )
@@ -603,7 +638,17 @@ def create_ui() -> gr.Blocks:
                 # Handle pandas DataFrame
                 if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
-                    # Get the repository ID from the first column (repo id)
                     repo_id = df_data.iloc[row_idx, 0]  # First column contains repo id
                     print(f"DEBUG: Extracted repo_id = '{repo_id}'")
@@ -621,6 +666,65 @@ def create_ui() -> gr.Blocks:
             return "", gr.update()
         # --- Component Event Wiring ---
         # Initialize chatbot with welcome message on app load
@@ -647,6 +751,17 @@ def create_ui() -> gr.Blocks:
             inputs=[repo_ids_state, current_repo_idx_state, user_requirements_state],
             outputs=[summary_output, df_output, current_repo_idx_state, status_box_analysis]
         )
         # Chatbot Tab
         msg_input.submit(

 from typing import List, Dict, Tuple, Any
 import logging
 import os
+import time
 # Import core logic from other modules, as in app_old.py
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
     try:
         with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
             writer = csv.writer(csvfile)
+            writer.writerow(["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
             for repo_id in repo_ids:
+                # Create Hugging Face Spaces link
+                hf_link = f"https://huggingface.co/spaces/{repo_id}"
+                writer.writerow([repo_id, hf_link, "", "", "", ""])
         logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
     except Exception as e:
         logger.error(f"Error writing to CSV: {e}")
         # Format text columns for better display
         if not df.empty:
+            df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
+            # Keep link as is since it's a URL
             df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
             # Keep relevance rating as is since it should be short
         return df
     except FileNotFoundError:
+        return pd.DataFrame(columns=["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
     except Exception as e:
         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
                     df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
                     df.at[idx, "speciality"] = llm_json.get("speciality", "")
                     df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
+                # Ensure link is present (in case it was added later)
+                if "link" in df.columns and (pd.isna(df.at[idx, "link"]) or df.at[idx, "link"] == ""):
+                    df.at[idx, "link"] = f"https://huggingface.co/spaces/{repo_id}"
                 repo_found_in_df = True
                 break
     .gr-dataframe th:nth-child(1),
     .gr-dataframe td:nth-child(1) { width: 15%; }
     .gr-dataframe th:nth-child(2),
+    .gr-dataframe td:nth-child(2) { width: 15%; }
     .gr-dataframe th:nth-child(3),
+    .gr-dataframe td:nth-child(3) { width: 20%; }
     .gr-dataframe th:nth-child(4),
     .gr-dataframe td:nth-child(4) { width: 20%; }
     .gr-dataframe th:nth-child(5),
     .gr-dataframe td:nth-child(5) { width: 15%; }
+    .gr-dataframe th:nth-child(6),
+    .gr-dataframe td:nth-child(6) { width: 15%; }
     /* Make repository names clickable */
     .gr-dataframe td:nth-child(1) {
         transform: scale(1.02);
     }
+    /* Make links clickable and styled */
+    .gr-dataframe td:nth-child(2) {
+        cursor: pointer;
+        color: #667eea;
+        text-decoration: underline;
+        font-size: 0.9rem;
+        transition: all 0.3s ease;
+    }
+    .gr-dataframe td:nth-child(2):hover {
+        background-color: rgba(102, 126, 234, 0.1);
+        color: #764ba2;
+    }
     .gr-dataframe tbody tr:hover {
         background-color: rgba(102, 126, 234, 0.05);
     }
                     )
                 with gr.Row():
+                    analyze_next_btn = gr.Button("⚡ Analyze Next Repository", variant="primary", size="lg", scale=1)
+                    analyze_all_btn = gr.Button("🚀 Analyze All Repositories", variant="secondary", size="lg", scale=1)
+                    with gr.Column(scale=2):
                         status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2)
+                # Progress bar for batch analysis
+                with gr.Row():
+                    analysis_progress = gr.Progress()
+                    progress_display = gr.Textbox(
+                        label="📊 Batch Analysis Progress",
+                        interactive=False,
+                        lines=2,
+                        visible=False,
+                        info="Shows progress when analyzing all repositories"
+                    )
                 with gr.Row(equal_height=True):
                     # with gr.Column():
                     #     content_output = gr.Textbox(
                 gr.Markdown("### 📊 Results Dashboard")
                 gr.Markdown("💡 **Tip:** Click on any repository name to explore it in detail!")
                 df_output = gr.Dataframe(
+                    headers=["Repository", "Link", "Strengths", "Weaknesses", "Speciality", "Relevance"],
                     wrap=True,
                     interactive=False  # Prevent editing but allow selection
                 )
                 # Handle pandas DataFrame
                 if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
+                    # If link column (column 1) is clicked, open the URL
+                    if col_idx == 1 and "link" in df_data.columns:
+                        link_url = df_data.iloc[row_idx, 1]  # Second column contains link
+                        print(f"DEBUG: Link clicked: {link_url}")
+                        if link_url and str(link_url).strip() and str(link_url).startswith('http'):
+                            # Return JavaScript to open link in new tab
+                            js_code = f"window.open('{link_url}', '_blank');"
+                            return "", gr.update()
+                    # For other columns, get the repository ID from the first column (repo id)
                     repo_id = df_data.iloc[row_idx, 0]  # First column contains repo id
                     print(f"DEBUG: Extracted repo_id = '{repo_id}'")
             return "", gr.update()
+        def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, str]:
+            """Analyzes all repositories in the CSV file with progress tracking."""
+            if not repo_ids:
+                return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first.", ""
+            total_repos = len(repo_ids)
+            progress_text = f"Starting batch analysis of {total_repos} repositories..."
+            try:
+                # Start the progress tracking
+                progress(0, desc="Initializing batch analysis...")
+                all_summaries = []
+                successful_analyses = 0
+                failed_analyses = 0
+                for i, repo_id in enumerate(repo_ids):
+                    # Update progress
+                    progress_percent = (i / total_repos)
+                    progress(progress_percent, desc=f"Analyzing {repo_id} ({i+1}/{total_repos})")
+                    try:
+                        logger.info(f"Batch analysis: Processing {repo_id} ({i+1}/{total_repos})")
+                        # Analyze the repository
+                        content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements)
+                        all_summaries.append(f"✅ {repo_id}: Analysis completed")
+                        successful_analyses += 1
+                        # Small delay to show progress (optional)
+                        time.sleep(0.1)
+                    except Exception as e:
+                        logger.error(f"Error analyzing {repo_id}: {e}")
+                        all_summaries.append(f"❌ {repo_id}: Error - {str(e)[:100]}...")
+                        failed_analyses += 1
+                # Complete the progress
+                progress(1.0, desc="Batch analysis completed!")
+                # Final status
+                final_status = f"🎉 Batch Analysis Complete!\n✅ Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
+                # Create progress summary
+                progress_summary = "\n".join(all_summaries[-10:])  # Show last 10 entries
+                if len(all_summaries) > 10:
+                    progress_summary = f"... (showing last 10 of {len(all_summaries)} repositories)\n" + progress_summary
+                # Get updated dataframe
+                updated_df = read_csv_to_dataframe()
+                logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed")
+                return updated_df, final_status, progress_summary
+            except Exception as e:
+                logger.error(f"Error in batch analysis: {e}")
+                error_status = f"❌ Batch analysis failed: {e}"
+                return read_csv_to_dataframe(), error_status, ""
         # --- Component Event Wiring ---
         # Initialize chatbot with welcome message on app load
             inputs=[repo_ids_state, current_repo_idx_state, user_requirements_state],
             outputs=[summary_output, df_output, current_repo_idx_state, status_box_analysis]
         )
+        analyze_all_btn.click(
+            fn=lambda: gr.update(visible=True),  # Show progress display
+            outputs=[progress_display]
+        ).then(
+            fn=handle_analyze_all_repos,
+            inputs=[repo_ids_state, user_requirements_state],
+            outputs=[df_output, status_box_analysis, progress_display]
+        ).then(
+            fn=lambda: gr.update(visible=True),  # Keep progress display visible with results
+            outputs=[progress_display]
+        )
         # Chatbot Tab
         msg_input.submit(