naman1102 commited on
Commit
ae53812
Β·
1 Parent(s): 953203e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -10
app.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  from typing import List, Dict, Tuple, Any
6
  import logging
7
  import os
 
8
 
9
  # Import core logic from other modules, as in app_old.py
10
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
@@ -32,9 +33,11 @@ def write_repos_to_csv(repo_ids: List[str]) -> None:
32
  try:
33
  with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
34
  writer = csv.writer(csvfile)
35
- writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
36
  for repo_id in repo_ids:
37
- writer.writerow([repo_id, "", "", "", ""])
 
 
38
  logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
39
  except Exception as e:
40
  logger.error(f"Error writing to CSV: {e}")
@@ -63,15 +66,16 @@ def read_csv_to_dataframe() -> pd.DataFrame:
63
 
64
  # Format text columns for better display
65
  if not df.empty:
 
 
66
  df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
67
  df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
68
  df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
69
- df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
70
  # Keep relevance rating as is since it should be short
71
 
72
  return df
73
  except FileNotFoundError:
74
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
75
  except Exception as e:
76
  logger.error(f"Error reading CSV: {e}")
77
  return pd.DataFrame()
@@ -117,6 +121,9 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
117
  df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
118
  df.at[idx, "speciality"] = llm_json.get("speciality", "")
119
  df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
 
 
 
120
  repo_found_in_df = True
121
  break
122
 
@@ -277,13 +284,15 @@ def create_ui() -> gr.Blocks:
277
  .gr-dataframe th:nth-child(1),
278
  .gr-dataframe td:nth-child(1) { width: 15%; }
279
  .gr-dataframe th:nth-child(2),
280
- .gr-dataframe td:nth-child(2) { width: 25%; }
281
  .gr-dataframe th:nth-child(3),
282
- .gr-dataframe td:nth-child(3) { width: 25%; }
283
  .gr-dataframe th:nth-child(4),
284
  .gr-dataframe td:nth-child(4) { width: 20%; }
285
  .gr-dataframe th:nth-child(5),
286
  .gr-dataframe td:nth-child(5) { width: 15%; }
 
 
287
 
288
  /* Make repository names clickable */
289
  .gr-dataframe td:nth-child(1) {
@@ -299,6 +308,20 @@ def create_ui() -> gr.Blocks:
299
  transform: scale(1.02);
300
  }
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  .gr-dataframe tbody tr:hover {
303
  background-color: rgba(102, 126, 234, 0.05);
304
  }
@@ -377,10 +400,22 @@ def create_ui() -> gr.Blocks:
377
  )
378
 
379
  with gr.Row():
380
- analyze_next_btn = gr.Button("⚑ Analyze Next Repository", variant="primary", size="lg", scale=2)
381
- with gr.Column(scale=3):
 
382
  status_box_analysis = gr.Textbox(label="πŸ“ˆ Analysis Status", interactive=False, lines=2)
383
 
 
 
 
 
 
 
 
 
 
 
 
384
  with gr.Row(equal_height=True):
385
  # with gr.Column():
386
  # content_output = gr.Textbox(
@@ -400,7 +435,7 @@ def create_ui() -> gr.Blocks:
400
  gr.Markdown("### πŸ“Š Results Dashboard")
401
  gr.Markdown("πŸ’‘ **Tip:** Click on any repository name to explore it in detail!")
402
  df_output = gr.Dataframe(
403
- headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
404
  wrap=True,
405
  interactive=False # Prevent editing but allow selection
406
  )
@@ -603,7 +638,17 @@ def create_ui() -> gr.Blocks:
603
 
604
  # Handle pandas DataFrame
605
  if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
606
- # Get the repository ID from the first column (repo id)
 
 
 
 
 
 
 
 
 
 
607
  repo_id = df_data.iloc[row_idx, 0] # First column contains repo id
608
  print(f"DEBUG: Extracted repo_id = '{repo_id}'")
609
 
@@ -621,6 +666,65 @@ def create_ui() -> gr.Blocks:
621
 
622
  return "", gr.update()
623
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
  # --- Component Event Wiring ---
625
 
626
  # Initialize chatbot with welcome message on app load
@@ -647,6 +751,17 @@ def create_ui() -> gr.Blocks:
647
  inputs=[repo_ids_state, current_repo_idx_state, user_requirements_state],
648
  outputs=[summary_output, df_output, current_repo_idx_state, status_box_analysis]
649
  )
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  # Chatbot Tab
652
  msg_input.submit(
 
5
  from typing import List, Dict, Tuple, Any
6
  import logging
7
  import os
8
+ import time
9
 
10
  # Import core logic from other modules, as in app_old.py
11
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
 
33
  try:
34
  with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
35
  writer = csv.writer(csvfile)
36
+ writer.writerow(["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
37
  for repo_id in repo_ids:
38
+ # Create Hugging Face Spaces link
39
+ hf_link = f"https://huggingface.co/spaces/{repo_id}"
40
+ writer.writerow([repo_id, hf_link, "", "", "", ""])
41
  logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
42
  except Exception as e:
43
  logger.error(f"Error writing to CSV: {e}")
 
66
 
67
  # Format text columns for better display
68
  if not df.empty:
69
+ df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
70
+ # Keep link as is since it's a URL
71
  df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
72
  df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
73
  df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
 
74
  # Keep relevance rating as is since it should be short
75
 
76
  return df
77
  except FileNotFoundError:
78
+ return pd.DataFrame(columns=["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
79
  except Exception as e:
80
  logger.error(f"Error reading CSV: {e}")
81
  return pd.DataFrame()
 
121
  df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
122
  df.at[idx, "speciality"] = llm_json.get("speciality", "")
123
  df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
124
+ # Ensure link is present (in case it was added later)
125
+ if "link" in df.columns and (pd.isna(df.at[idx, "link"]) or df.at[idx, "link"] == ""):
126
+ df.at[idx, "link"] = f"https://huggingface.co/spaces/{repo_id}"
127
  repo_found_in_df = True
128
  break
129
 
 
284
  .gr-dataframe th:nth-child(1),
285
  .gr-dataframe td:nth-child(1) { width: 15%; }
286
  .gr-dataframe th:nth-child(2),
287
+ .gr-dataframe td:nth-child(2) { width: 15%; }
288
  .gr-dataframe th:nth-child(3),
289
+ .gr-dataframe td:nth-child(3) { width: 20%; }
290
  .gr-dataframe th:nth-child(4),
291
  .gr-dataframe td:nth-child(4) { width: 20%; }
292
  .gr-dataframe th:nth-child(5),
293
  .gr-dataframe td:nth-child(5) { width: 15%; }
294
+ .gr-dataframe th:nth-child(6),
295
+ .gr-dataframe td:nth-child(6) { width: 15%; }
296
 
297
  /* Make repository names clickable */
298
  .gr-dataframe td:nth-child(1) {
 
308
  transform: scale(1.02);
309
  }
310
 
311
+ /* Make links clickable and styled */
312
+ .gr-dataframe td:nth-child(2) {
313
+ cursor: pointer;
314
+ color: #667eea;
315
+ text-decoration: underline;
316
+ font-size: 0.9rem;
317
+ transition: all 0.3s ease;
318
+ }
319
+
320
+ .gr-dataframe td:nth-child(2):hover {
321
+ background-color: rgba(102, 126, 234, 0.1);
322
+ color: #764ba2;
323
+ }
324
+
325
  .gr-dataframe tbody tr:hover {
326
  background-color: rgba(102, 126, 234, 0.05);
327
  }
 
400
  )
401
 
402
  with gr.Row():
403
+ analyze_next_btn = gr.Button("⚑ Analyze Next Repository", variant="primary", size="lg", scale=1)
404
+ analyze_all_btn = gr.Button("πŸš€ Analyze All Repositories", variant="secondary", size="lg", scale=1)
405
+ with gr.Column(scale=2):
406
  status_box_analysis = gr.Textbox(label="πŸ“ˆ Analysis Status", interactive=False, lines=2)
407
 
408
+ # Progress bar for batch analysis
409
+ with gr.Row():
410
+ analysis_progress = gr.Progress()
411
+ progress_display = gr.Textbox(
412
+ label="πŸ“Š Batch Analysis Progress",
413
+ interactive=False,
414
+ lines=2,
415
+ visible=False,
416
+ info="Shows progress when analyzing all repositories"
417
+ )
418
+
419
  with gr.Row(equal_height=True):
420
  # with gr.Column():
421
  # content_output = gr.Textbox(
 
435
  gr.Markdown("### πŸ“Š Results Dashboard")
436
  gr.Markdown("πŸ’‘ **Tip:** Click on any repository name to explore it in detail!")
437
  df_output = gr.Dataframe(
438
+ headers=["Repository", "Link", "Strengths", "Weaknesses", "Speciality", "Relevance"],
439
  wrap=True,
440
  interactive=False # Prevent editing but allow selection
441
  )
 
638
 
639
  # Handle pandas DataFrame
640
  if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
641
+
642
+ # If link column (column 1) is clicked, open the URL
643
+ if col_idx == 1 and "link" in df_data.columns:
644
+ link_url = df_data.iloc[row_idx, 1] # Second column contains link
645
+ print(f"DEBUG: Link clicked: {link_url}")
646
+ if link_url and str(link_url).strip() and str(link_url).startswith('http'):
647
+ # Return JavaScript to open link in new tab
648
+ js_code = f"window.open('{link_url}', '_blank');"
649
+ return "", gr.update()
650
+
651
+ # For other columns, get the repository ID from the first column (repo id)
652
  repo_id = df_data.iloc[row_idx, 0] # First column contains repo id
653
  print(f"DEBUG: Extracted repo_id = '{repo_id}'")
654
 
 
666
 
667
  return "", gr.update()
668
 
669
+ def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, str]:
670
+ """Analyzes all repositories in the CSV file with progress tracking."""
671
+ if not repo_ids:
672
+ return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first.", ""
673
+
674
+ total_repos = len(repo_ids)
675
+ progress_text = f"Starting batch analysis of {total_repos} repositories..."
676
+
677
+ try:
678
+ # Start the progress tracking
679
+ progress(0, desc="Initializing batch analysis...")
680
+
681
+ all_summaries = []
682
+ successful_analyses = 0
683
+ failed_analyses = 0
684
+
685
+ for i, repo_id in enumerate(repo_ids):
686
+ # Update progress
687
+ progress_percent = (i / total_repos)
688
+ progress(progress_percent, desc=f"Analyzing {repo_id} ({i+1}/{total_repos})")
689
+
690
+ try:
691
+ logger.info(f"Batch analysis: Processing {repo_id} ({i+1}/{total_repos})")
692
+
693
+ # Analyze the repository
694
+ content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements)
695
+ all_summaries.append(f"βœ… {repo_id}: Analysis completed")
696
+ successful_analyses += 1
697
+
698
+ # Small delay to show progress (optional)
699
+ time.sleep(0.1)
700
+
701
+ except Exception as e:
702
+ logger.error(f"Error analyzing {repo_id}: {e}")
703
+ all_summaries.append(f"❌ {repo_id}: Error - {str(e)[:100]}...")
704
+ failed_analyses += 1
705
+
706
+ # Complete the progress
707
+ progress(1.0, desc="Batch analysis completed!")
708
+
709
+ # Final status
710
+ final_status = f"πŸŽ‰ Batch Analysis Complete!\nβœ… Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
711
+
712
+ # Create progress summary
713
+ progress_summary = "\n".join(all_summaries[-10:]) # Show last 10 entries
714
+ if len(all_summaries) > 10:
715
+ progress_summary = f"... (showing last 10 of {len(all_summaries)} repositories)\n" + progress_summary
716
+
717
+ # Get updated dataframe
718
+ updated_df = read_csv_to_dataframe()
719
+
720
+ logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed")
721
+ return updated_df, final_status, progress_summary
722
+
723
+ except Exception as e:
724
+ logger.error(f"Error in batch analysis: {e}")
725
+ error_status = f"❌ Batch analysis failed: {e}"
726
+ return read_csv_to_dataframe(), error_status, ""
727
+
728
  # --- Component Event Wiring ---
729
 
730
  # Initialize chatbot with welcome message on app load
 
751
  inputs=[repo_ids_state, current_repo_idx_state, user_requirements_state],
752
  outputs=[summary_output, df_output, current_repo_idx_state, status_box_analysis]
753
  )
754
+ analyze_all_btn.click(
755
+ fn=lambda: gr.update(visible=True), # Show progress display
756
+ outputs=[progress_display]
757
+ ).then(
758
+ fn=handle_analyze_all_repos,
759
+ inputs=[repo_ids_state, user_requirements_state],
760
+ outputs=[df_output, status_box_analysis, progress_display]
761
+ ).then(
762
+ fn=lambda: gr.update(visible=True), # Keep progress display visible with results
763
+ outputs=[progress_display]
764
+ )
765
 
766
  # Chatbot Tab
767
  msg_input.submit(