Spaces:
Sleeping
Sleeping
import gradio as gr | |
import google.generativeai as genai | |
import os | |
from dotenv import load_dotenv | |
from github import Github | |
import json | |
from pathlib import Path | |
from datetime import datetime | |
from collections import defaultdict | |
import base64 | |
from typing import Dict, List, Any, Optional, Tuple | |
import tempfile | |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type | |
import time | |
import os | |
# Load environment variables | |
load_dotenv() | |
# Configure API keys | |
GITHUB_TOKEN = os.getenv("github_api") | |
GEMINI_API_KEY = os.getenv("gemini_api") | |
if not GITHUB_TOKEN or not GEMINI_API_KEY: | |
raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment") | |
# Initialize APIs | |
gh = Github(GITHUB_TOKEN) | |
genai.configure(api_key=GEMINI_API_KEY) | |
model = genai.GenerativeModel( | |
model_name="gemini-1.5-pro-latest", | |
generation_config = { | |
"temperature": 1, | |
"top_p": 0.95, | |
"top_k": 40, | |
"max_output_tokens": 8192, | |
"response_mime_type": "text/plain", | |
}, | |
safety_settings=[ | |
{ | |
"category": "HARM_CATEGORY_HARASSMENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_HATE_SPEECH", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
] | |
) | |
RELEVANT_EXTENSIONS = { | |
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", | |
".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt" | |
} | |
class RepositoryAnalyzer: | |
"""Handles GitHub repository analysis""" | |
def __init__(self, repo_url: str): | |
# Extract owner and repo name from URL | |
parts = repo_url.rstrip('/').split('/') | |
if len(parts) < 2: | |
raise ValueError("Invalid repository URL format") | |
self.repo_name = parts[-1] | |
self.owner = parts[-2] | |
self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}") | |
self.analysis_data: Dict[str, Any] = {} | |
def analyze(self) -> Dict[str, Any]: | |
"""Perform complete repository analysis""" | |
try: | |
# Basic repository information | |
self.analysis_data["basic_info"] = { | |
"name": self.repo.name, | |
"owner": self.repo.owner.login, | |
"description": self.repo.description or "No description available", | |
"stars": self.repo.stargazers_count, | |
"forks": self.repo.forks_count, | |
"created_at": self.repo.created_at.isoformat(), | |
"last_updated": self.repo.updated_at.isoformat(), | |
"primary_language": self.repo.language or "Not specified", | |
} | |
# Analyze repository structure | |
self.analysis_data["structure"] = self._analyze_structure() | |
# Analyze code patterns | |
self.analysis_data["code_patterns"] = self._analyze_code_patterns() | |
# Analyze commit history | |
self.analysis_data["commit_history"] = self._analyze_commits() | |
# Get contributor statistics | |
self.analysis_data["contributors"] = self._analyze_contributors() | |
return self.analysis_data | |
except Exception as e: | |
raise Exception(f"Error analyzing repository: {str(e)}") | |
def _analyze_structure(self) -> Dict[str, Any]: | |
"""Analyze repository structure and organization""" | |
structure = { | |
"files": defaultdict(int), | |
"directories": set(), | |
"total_size": 0, | |
} | |
try: | |
contents = self.repo.get_contents("") | |
while contents: | |
content = contents.pop(0) | |
if content.type == "dir": | |
structure["directories"].add(content.path) | |
contents.extend(self.repo.get_contents(content.path)) | |
else: | |
ext = Path(content.path).suffix.lower() | |
if ext in RELEVANT_EXTENSIONS: | |
structure["files"][ext] += 1 | |
structure["total_size"] += content.size | |
except Exception as e: | |
print(f"Error analyzing structure: {str(e)}") | |
return { | |
"file_types": dict(structure["files"]), | |
"directory_count": len(structure["directories"]), | |
"total_size": structure["total_size"], | |
"file_count": sum(structure["files"].values()) | |
} | |
def _analyze_code_patterns(self) -> Dict[str, Any]: | |
"""Analyze code patterns and style""" | |
patterns = { | |
"samples": [], | |
"languages": defaultdict(int), | |
"complexity_metrics": defaultdict(list) | |
} | |
try: | |
files = self.repo.get_contents("") | |
analyzed = 0 | |
while files and analyzed < 5: | |
file = files.pop(0) | |
if file.type == "dir": | |
files.extend(self.repo.get_contents(file.path)) | |
elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS: | |
try: | |
content = base64.b64decode(file.content).decode('utf-8') | |
lines = content.splitlines() | |
if not lines: | |
continue | |
loc = len([line for line in lines if line.strip()]) | |
avg_line_length = sum(len(line) for line in lines) / len(lines) | |
patterns["samples"].append({ | |
"path": file.path, | |
"language": Path(file.path).suffix[1:], | |
"loc": loc, | |
"avg_line_length": round(avg_line_length, 2) | |
}) | |
patterns["languages"][Path(file.path).suffix[1:]] += loc | |
patterns["complexity_metrics"]["loc"].append(loc) | |
patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length) | |
analyzed += 1 | |
except Exception as e: | |
print(f"Error analyzing file {file.path}: {str(e)}") | |
continue | |
except Exception as e: | |
print(f"Error in code pattern analysis: {str(e)}") | |
return patterns | |
def _analyze_commits(self) -> Dict[str, Any]: | |
"""Analyze commit history and patterns""" | |
commit_data = [] | |
commit_times = [] | |
try: | |
commits = list(self.repo.get_commits()[:100]) # Get last 100 commits | |
for commit in commits: | |
try: | |
commit_info = { | |
"sha": commit.sha, | |
"author": commit.author.login if commit.author else "Unknown", | |
"date": commit.commit.author.date.isoformat(), | |
"message": commit.commit.message, | |
"changes": { | |
"additions": commit.stats.additions, | |
"deletions": commit.stats.deletions, | |
} | |
} | |
commit_data.append(commit_info) | |
commit_times.append(commit.commit.author.date.hour) | |
except Exception as e: | |
print(f"Error processing commit {commit.sha}: {str(e)}") | |
continue | |
# Analyze commit patterns | |
commit_hours = defaultdict(int) | |
for hour in commit_times: | |
commit_hours[hour] += 1 | |
total_commits = len(commit_data) | |
return { | |
"commits": commit_data, | |
"total_commits": total_commits, | |
"commit_hours": dict(commit_hours), | |
"avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0, | |
"avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0, | |
} | |
except Exception as e: | |
print(f"Error in commit analysis: {str(e)}") | |
return { | |
"commits": [], | |
"total_commits": 0, | |
"commit_hours": {}, | |
"avg_additions": 0, | |
"avg_deletions": 0 | |
} | |
def _analyze_contributors(self) -> Dict[str, Any]: | |
"""Analyze contributor statistics""" | |
contributor_data = [] | |
try: | |
contributors = list(self.repo.get_contributors()) | |
for contributor in contributors: | |
contributor_data.append({ | |
"login": contributor.login, | |
"contributions": contributor.contributions, | |
"type": contributor.type, | |
}) | |
except Exception as e: | |
print(f"Error analyzing contributors: {str(e)}") | |
return { | |
"total_contributors": len(contributor_data), | |
"contributors": contributor_data | |
} | |
def analyze_repository(repo_url: str, progress=gr.Progress()) -> Tuple[str, str, str]: | |
"""Analyze repository and generate LLM summary with rate limit handling""" | |
try: | |
# Initialize analyzer | |
progress(0, desc="Initializing repository analysis...") | |
analyzer = RepositoryAnalyzer(repo_url) | |
# Perform analysis | |
progress(0.3, desc="Analyzing repository structure and patterns...") | |
analysis_data = analyzer.analyze() | |
# Generate LLM summary | |
progress(0.7, desc="Generating analysis summary...") | |
system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template: | |
# Repository Analysis | |
## π Project Overview | |
[Provide a comprehensive overview including: | |
- Project purpose and scope | |
- Age and maturity of the project | |
- Current activity level and maintenance status | |
- Key metrics (stars, forks, etc.) | |
- Primary technologies and languages used] | |
## ποΈ Architecture and Code Organization | |
[Analyze in detail: | |
- Repository structure and organization | |
- Code distribution across different technologies | |
- File and directory organization patterns | |
- Project size and complexity metrics | |
- Code modularity and component structure | |
- Presence of key architectural patterns] | |
## π» Development Practices & Code Quality | |
[Evaluate: | |
- Coding standards and consistency | |
- Code complexity and maintainability metrics | |
- Documentation practices | |
- Testing approach and coverage (if visible) | |
- Error handling and logging practices | |
- Use of design patterns and best practices] | |
## π Development Workflow & History | |
[Analyze: | |
- Commit patterns and frequency | |
- Release cycles and versioning | |
- Branch management strategy | |
- Code review practices | |
- Continuous integration/deployment indicators | |
- Peak development periods and cycles] | |
## π₯ Team Dynamics & Collaboration | |
[Examine: | |
- Team size and composition | |
- Contribution patterns | |
- Core maintainer identification | |
- Community engagement level | |
- Communication patterns | |
- Collaboration efficiency] | |
## π§ Technical Depth & Innovation | |
[Assess: | |
- Technical sophistication level | |
- Innovative approaches or solutions | |
- Complex problem-solving examples | |
- Performance optimization efforts | |
- Security considerations | |
- Scalability approach] | |
## π Project Health & Sustainability | |
[Evaluate: | |
- Project momentum and growth trends | |
- Maintenance patterns | |
- Community health indicators | |
- Documentation completeness | |
- Onboarding friendliness | |
- Long-term viability indicators] | |
## π‘ Key Insights & Recommendations | |
[Provide: | |
- 3-5 key strengths identified | |
- 3-5 potential improvement areas | |
- Notable patterns or practices | |
- Unique characteristics | |
- Strategic recommendations] | |
Please provide detailed analysis for each section while maintaining the formatting and emojis. Support insights with specific metrics and examples from the repository data where possible.""" | |
chat = model.start_chat(history=[]) | |
response = chat.send_message(f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}") | |
# Save analysis data | |
progress(0.9, desc="Saving analysis results...") | |
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: | |
json.dump(analysis_data, f, indent=2) | |
analysis_file = f.name | |
progress(1.0, desc="Analysis complete!") | |
return response.text, analysis_file, "β Analysis completed successfully!" | |
except Exception as e: | |
error_message = f"β Error analyzing repository: {str(e)}" | |
return "", "", error_message | |
def create_chat_session() -> Any: | |
"""Create a new chat session for follow-up questions""" | |
return genai.GenerativeModel( | |
model_name="gemini-pro", | |
generation_config={ | |
'temperature': 0.7, | |
'top_p': 0.8, | |
'top_k': 40, | |
'max_output_tokens': 2048, | |
} | |
) | |
def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]: | |
"""Process a follow-up question about the analysis""" | |
if not analysis_file: | |
return chat_history + [(question, "Please analyze a repository first before asking questions.")] | |
try: | |
# Load analysis data | |
with open(analysis_file, 'r') as f: | |
analysis_data = json.load(f) | |
# Initialize chat model | |
model = create_chat_session() | |
# Build context from chat history and current question | |
context = "You are an expert code analyst helping users understand repository analysis results.\n\n" | |
context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n" | |
# Add chat history context | |
if chat_history: | |
context += "Previous conversation:\n" | |
for user_msg, assistant_msg in chat_history: | |
context += f"User: {user_msg}\nAssistant: {assistant_msg}\n" | |
# Add current question | |
prompt = context + f"\nUser: {question}\nPlease provide your analysis:" | |
# Get response | |
response = model.generate_content(prompt) | |
# Return in the correct tuple format for Gradio chatbot | |
return chat_history + [(question, response.text)] | |
except Exception as e: | |
error_message = f"Error processing question: {str(e)}" | |
return chat_history + [(question, error_message)] | |
# Create Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as app: | |
gr.Markdown(""" | |
# π GitHub Repository Analyzer | |
Analyze any public GitHub repository using AI. The tool will: | |
1. π Analyze repository structure and patterns | |
2. π‘ Generate insights about development practices | |
3. π Allow you to ask follow-up questions about the analysis | |
Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`) | |
""") | |
with gr.Row(): | |
repo_url = gr.Textbox( | |
label="GitHub Repository URL", | |
placeholder="https://github.com/owner/repo", | |
scale=4 | |
) | |
analyze_btn = gr.Button("π Analyze", variant="primary", scale=1) | |
# Add status message | |
status_msg = gr.Markdown("", elem_id="status_message") | |
with gr.Row(): | |
# Use Markdown instead of Textbox for better formatting | |
summary = gr.Markdown( | |
label="Analysis Summary", | |
) | |
with gr.Row(): | |
chatbot = gr.Chatbot( | |
label="Ask Questions", | |
height=400, | |
show_label=True | |
) | |
with gr.Row(): | |
question = gr.Textbox( | |
label="Your Question", | |
placeholder="Ask about the analysis...", | |
scale=4 | |
) | |
ask_btn = gr.Button("π Ask", variant="primary", scale=1) | |
clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary", scale=1) | |
# Hidden state for analysis file | |
analysis_file = gr.State("") | |
def clear_outputs(): | |
return "", [], "", "" | |
# Set up event handlers | |
analyze_btn.click( | |
fn=lambda: "β³ Analysis in progress...", | |
inputs=None, | |
outputs=status_msg, | |
queue=False | |
).then( | |
analyze_repository, | |
inputs=[repo_url], | |
outputs=[summary, analysis_file, status_msg], | |
) | |
ask_btn.click( | |
ask_question, | |
inputs=[question, analysis_file, chatbot], | |
outputs=[chatbot], | |
).then( | |
lambda: "", # Clear the question input | |
None, | |
question, | |
queue=False | |
) | |
clear_btn.click( | |
clear_outputs, | |
inputs=None, | |
outputs=[summary, chatbot, question, status_msg], | |
queue=False | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
app.launch( | |
share=True, | |
debug=True | |
) | |
# import gradio as gr | |
# import google.generativeai as genai | |
# import os | |
# from dotenv import load_dotenv | |
# from github import Github | |
# import json | |
# from pathlib import Path | |
# from datetime import datetime | |
# from collections import defaultdict | |
# import base64 | |
# from typing import Dict, List, Any, Optional, Tuple | |
# import tempfile | |
# from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type | |
# import time | |
# import os | |
# import re | |
# from fpdf import FPDF | |
# import requests | |
# # Load environment variables | |
# load_dotenv() | |
# # Configure API keys | |
# GITHUB_TOKEN = os.getenv("github_api") | |
# GEMINI_API_KEY = os.getenv("gemini_api") | |
# if not GITHUB_TOKEN or not GEMINI_API_KEY: | |
# raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment") | |
# # Initialize APIs | |
# gh = Github(GITHUB_TOKEN) | |
# genai.configure(api_key=GEMINI_API_KEY) | |
# model = genai.GenerativeModel( | |
# model_name="gemini-1.5-pro-latest", | |
# generation_config = { | |
# "temperature": 1, | |
# "top_p": 0.95, | |
# "top_k": 40, | |
# "max_output_tokens": 8192, | |
# "response_mime_type": "text/plain", | |
# }, | |
# safety_settings=[ | |
# { | |
# "category": "HARM_CATEGORY_HARASSMENT", | |
# "threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
# }, | |
# { | |
# "category": "HARM_CATEGORY_HATE_SPEECH", | |
# "threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
# }, | |
# { | |
# "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
# "threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
# }, | |
# { | |
# "category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
# "threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
# }, | |
# ] | |
# ) | |
# RELEVANT_EXTENSIONS = { | |
# ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", | |
# ".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt" | |
# } | |
# class RepositoryAnalyzer: | |
# """Handles GitHub repository analysis""" | |
# def __init__(self, repo_url: str): | |
# # Extract owner and repo name from URL | |
# parts = repo_url.rstrip('/').split('/') | |
# if len(parts) < 2: | |
# raise ValueError("Invalid repository URL format") | |
# self.repo_name = parts[-1] | |
# self.owner = parts[-2] | |
# self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}") | |
# self.analysis_data: Dict[str, Any] = {} | |
# def analyze(self) -> Dict[str, Any]: | |
# """Perform complete repository analysis""" | |
# try: | |
# # Basic repository information | |
# self.analysis_data["basic_info"] = { | |
# "name": self.repo.name, | |
# "owner": self.repo.owner.login, | |
# "description": self.repo.description or "No description available", | |
# "stars": self.repo.stargazers_count, | |
# "forks": self.repo.forks_count, | |
# "created_at": self.repo.created_at.isoformat(), | |
# "last_updated": self.repo.updated_at.isoformat(), | |
# "primary_language": self.repo.language or "Not specified", | |
# } | |
# # Analyze repository structure | |
# self.analysis_data["structure"] = self._analyze_structure() | |
# # Analyze code patterns | |
# self.analysis_data["code_patterns"] = self._analyze_code_patterns() | |
# # Analyze commit history | |
# self.analysis_data["commit_history"] = self._analyze_commits() | |
# # Get contributor statistics | |
# self.analysis_data["contributors"] = self._analyze_contributors() | |
# return self.analysis_data | |
# except Exception as e: | |
# raise Exception(f"Error analyzing repository: {str(e)}") | |
# def _analyze_structure(self) -> Dict[str, Any]: | |
# """Analyze repository structure and organization""" | |
# structure = { | |
# "files": defaultdict(int), | |
# "directories": set(), | |
# "total_size": 0, | |
# } | |
# try: | |
# contents = self.repo.get_contents("") | |
# while contents: | |
# content = contents.pop(0) | |
# if content.type == "dir": | |
# structure["directories"].add(content.path) | |
# contents.extend(self.repo.get_contents(content.path)) | |
# else: | |
# ext = Path(content.path).suffix.lower() | |
# if ext in RELEVANT_EXTENSIONS: | |
# structure["files"][ext] += 1 | |
# structure["total_size"] += content.size | |
# except Exception as e: | |
# print(f"Error analyzing structure: {str(e)}") | |
# return { | |
# "file_types": dict(structure["files"]), | |
# "directory_count": len(structure["directories"]), | |
# "total_size": structure["total_size"], | |
# "file_count": sum(structure["files"].values()) | |
# } | |
# def _analyze_code_patterns(self) -> Dict[str, Any]: | |
# """Analyze code patterns and style""" | |
# patterns = { | |
# "samples": [], | |
# "languages": defaultdict(int), | |
# "complexity_metrics": defaultdict(list) | |
# } | |
# try: | |
# files = self.repo.get_contents("") | |
# analyzed = 0 | |
# while files and analyzed < 5: | |
# file = files.pop(0) | |
# if file.type == "dir": | |
# files.extend(self.repo.get_contents(file.path)) | |
# elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS: | |
# try: | |
# content = base64.b64decode(file.content).decode('utf-8') | |
# lines = content.splitlines() | |
# if not lines: | |
# continue | |
# loc = len([line for line in lines if line.strip()]) | |
# avg_line_length = sum(len(line) for line in lines) / len(lines) | |
# patterns["samples"].append({ | |
# "path": file.path, | |
# "language": Path(file.path).suffix[1:], | |
# "loc": loc, | |
# "avg_line_length": round(avg_line_length, 2) | |
# }) | |
# patterns["languages"][Path(file.path).suffix[1:]] += loc | |
# patterns["complexity_metrics"]["loc"].append(loc) | |
# patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length) | |
# analyzed += 1 | |
# except Exception as e: | |
# print(f"Error analyzing file {file.path}: {str(e)}") | |
# continue | |
# except Exception as e: | |
# print(f"Error in code pattern analysis: {str(e)}") | |
# return patterns | |
# def _analyze_commits(self) -> Dict[str, Any]: | |
# """Analyze commit history and patterns""" | |
# commit_data = [] | |
# commit_times = [] | |
# try: | |
# commits = list(self.repo.get_commits()[:100]) # Get last 100 commits | |
# for commit in commits: | |
# try: | |
# commit_info = { | |
# "sha": commit.sha, | |
# "author": commit.author.login if commit.author else "Unknown", | |
# "date": commit.commit.author.date.isoformat(), | |
# "message": commit.commit.message, | |
# "changes": { | |
# "additions": commit.stats.additions, | |
# "deletions": commit.stats.deletions, | |
# } | |
# } | |
# commit_data.append(commit_info) | |
# commit_times.append(commit.commit.author.date.hour) | |
# except Exception as e: | |
# print(f"Error processing commit {commit.sha}: {str(e)}") | |
# continue | |
# # Analyze commit patterns | |
# commit_hours = defaultdict(int) | |
# for hour in commit_times: | |
# commit_hours[hour] += 1 | |
# total_commits = len(commit_data) | |
# return { | |
# "commits": commit_data, | |
# "total_commits": total_commits, | |
# "commit_hours": dict(commit_hours), | |
# "avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0, | |
# "avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0, | |
# } | |
# except Exception as e: | |
# print(f"Error in commit analysis: {str(e)}") | |
# return { | |
# "commits": [], | |
# "total_commits": 0, | |
# "commit_hours": {}, | |
# "avg_additions": 0, | |
# "avg_deletions": 0 | |
# } | |
# def _analyze_contributors(self) -> Dict[str, Any]: | |
# """Analyze contributor statistics""" | |
# contributor_data = [] | |
# try: | |
# contributors = list(self.repo.get_contributors()) | |
# for contributor in contributors: | |
# contributor_data.append({ | |
# "login": contributor.login, | |
# "contributions": contributor.contributions, | |
# "type": contributor.type, | |
# }) | |
# except Exception as e: | |
# print(f"Error analyzing contributors: {str(e)}") | |
# return { | |
# "total_contributors": len(contributor_data), | |
# "contributors": contributor_data | |
# } | |
# def create_pdf_from_markdown(markdown_text: str, filename: str) -> str: | |
# """Convert markdown text to PDF""" | |
# class PDF(FPDF): | |
# def header(self): | |
# self.set_font('Arial', 'B', 12) | |
# self.cell(0, 10, 'Repository Analysis Report', 0, 1, 'C') | |
# self.ln(10) | |
# def footer(self): | |
# self.set_y(-15) | |
# self.set_font('Arial', 'I', 8) | |
# self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C') | |
# pdf = PDF() | |
# pdf.add_page() | |
# pdf.set_auto_page_break(auto=True, margin=15) | |
# # Process markdown sections | |
# sections = markdown_text.split('\n## ') | |
# # Handle main title | |
# if sections[0].startswith('# '): | |
# title = sections[0].split('\n')[0].replace('# ', '') | |
# pdf.set_font('Arial', 'B', 16) | |
# pdf.cell(0, 10, title, 0, 1, 'C') | |
# pdf.ln(5) | |
# content = '\n'.join(sections[0].split('\n')[1:]) | |
# sections[0] = content | |
# for section in sections: | |
# if section: | |
# # Extract section title and content | |
# lines = section.split('\n') | |
# if section == sections[0]: # First section (after main title) | |
# section_title = '' | |
# content = lines | |
# else: | |
# section_title = lines[0] | |
# content = lines[1:] | |
# # Add section title | |
# if section_title: | |
# pdf.set_font('Arial', 'B', 14) | |
# # Remove emojis from section titles | |
# clean_title = re.sub(r'[^\x00-\x7F]+', '', section_title) | |
# pdf.cell(0, 10, clean_title.strip(), 0, 1, 'L') | |
# pdf.ln(5) | |
# # Add content | |
# pdf.set_font('Arial', '', 11) | |
# for line in content: | |
# if line.strip(): | |
# # Remove markdown formatting and emojis | |
# clean_line = re.sub(r'[\*\[\]]', '', line) | |
# clean_line = re.sub(r'[^\x00-\x7F]+', '', clean_line) | |
# if line.startswith('- '): | |
# pdf.cell(10, 5, '', 0, 0) | |
# pdf.multi_cell(0, 5, clean_line[2:]) | |
# else: | |
# pdf.multi_cell(0, 5, clean_line) | |
# pdf.ln(5) | |
# # Save PDF | |
# pdf_path = f"{filename}.pdf" | |
# pdf.output(pdf_path) | |
# return pdf_path | |
# def download_noto_font(): | |
# """Download Google's Noto Color Emoji font if not already present""" | |
# font_path = "NotoColorEmoji.ttf" | |
# if not os.path.exists(font_path): | |
# url = "https://github.com/googlefonts/noto-emoji/raw/main/fonts/NotoColorEmoji.ttf" | |
# response = requests.get(url) | |
# with open(font_path, "wb") as f: | |
# f.write(response.content) | |
# return font_path | |
# class PDFWithEmoji(FPDF): | |
# def __init__(self): | |
# super().__init__() | |
# self.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True) | |
# self.add_font('Noto', '', 'NotoColorEmoji.ttf', uni=True) | |
# def header(self): | |
# self.set_font('DejaVu', '', 12) | |
# self.cell(0, 10, 'π Repository Analysis Report', 0, 1, 'C') | |
# self.ln(10) | |
# def footer(self): | |
# self.set_y(-15) | |
# self.set_font('DejaVu', '', 8) | |
# self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C') | |
# def write_with_emoji(self, text): | |
# """Write text with proper emoji support""" | |
# self.set_font('DejaVu', '', 11) | |
# self.multi_cell(0, 5, text) | |
# def create_pdf_report(markdown_text: str) -> str: | |
# """Create a PDF report with full emoji and Unicode support""" | |
# # Ensure required fonts are available | |
# if not os.path.exists("DejaVuSansCondensed.ttf"): | |
# url = "https://github.com/dejavu-fonts/dejavu-fonts/raw/master/ttf/DejaVuSansCondensed.ttf" | |
# response = requests.get(url) | |
# with open("DejaVuSansCondensed.ttf", "wb") as f: | |
# f.write(response.content) | |
# download_noto_font() | |
# # Create PDF | |
# pdf = PDFWithEmoji() | |
# pdf.add_page() | |
# pdf.set_auto_page_break(auto=True, margin=15) | |
# # Process markdown text | |
# sections = markdown_text.split('\n## ') | |
# # Handle main title | |
# if sections[0].startswith('# '): | |
# title = sections[0].split('\n')[0].replace('# ', '') | |
# pdf.set_font('DejaVu', '', 16) | |
# pdf.cell(0, 10, title, 0, 1, 'C') | |
# pdf.ln(5) | |
# content = '\n'.join(sections[0].split('\n')[1:]) | |
# sections[0] = content | |
# # Process each section | |
# for section in sections: | |
# if section: | |
# lines = section.split('\n') | |
# if section == sections[0]: # First section | |
# section_title = '' | |
# content = lines | |
# else: | |
# section_title = lines[0] | |
# content = lines[1:] | |
# # Add section title | |
# if section_title: | |
# pdf.set_font('DejaVu', '', 14) | |
# pdf.cell(0, 10, section_title, 0, 1, 'L') | |
# pdf.ln(5) | |
# # Add content | |
# pdf.set_font('DejaVu', '', 11) | |
# for line in content: | |
# if line.strip(): | |
# if line.strip().startswith('- '): | |
# pdf.cell(10, 5, 'β’', 0, 0) | |
# pdf.write_with_emoji(line.strip()[2:]) | |
# pdf.ln() | |
# else: | |
# pdf.write_with_emoji(line.strip()) | |
# pdf.ln() | |
# # Save PDF | |
# timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
# pdf_path = f"repo_analysis_{timestamp}.pdf" | |
# pdf.output(pdf_path) | |
# return pdf_path | |
# @retry( | |
# retry=retry_if_exception_type(Exception), | |
# stop=stop_after_attempt(3), | |
# wait=wait_exponential(multiplier=1, min=4, max=10) | |
# ) | |
# def analyze_repository(repo_url: str, progress=gr.Progress()) -> Tuple[str, str, str]: | |
# """Analyze repository and generate LLM summary with rate limit handling""" | |
# try: | |
# # Initialize analyzer | |
# progress(0, desc="Initializing repository analysis...") | |
# analyzer = RepositoryAnalyzer(repo_url) | |
# # Perform analysis | |
# progress(0.3, desc="Analyzing repository structure and patterns...") | |
# analysis_data = analyzer.analyze() | |
# # Generate LLM summary | |
# progress(0.7, desc="Generating analysis summary...") | |
# system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template: | |
# # Repository Analysis | |
# ## π Project Overview | |
# [Provide a comprehensive overview including: | |
# - Project purpose and scope | |
# - Age and maturity of the project | |
# - Current activity level and maintenance status | |
# - Key metrics (stars, forks, etc.) | |
# - Primary technologies and languages used] | |
# ## ποΈ Architecture and Code Organization | |
# [Analyze in detail: | |
# - Repository structure and organization | |
# - Code distribution across different technologies | |
# - File and directory organization patterns | |
# - Project size and complexity metrics | |
# - Code modularity and component structure | |
# - Presence of key architectural patterns] | |
# ## π» Development Practices & Code Quality | |
# [Evaluate: | |
# - Coding standards and consistency | |
# - Code complexity and maintainability metrics | |
# - Documentation practices | |
# - Testing approach and coverage (if visible) | |
# - Error handling and logging practices | |
# - Use of design patterns and best practices] | |
# ## π Development Workflow & History | |
# [Analyze: | |
# - Commit patterns and frequency | |
# - Release cycles and versioning | |
# - Branch management strategy | |
# - Code review practices | |
# - Continuous integration/deployment indicators | |
# - Peak development periods and cycles] | |
# ## π₯ Team Dynamics & Collaboration | |
# [Examine: | |
# - Team size and composition | |
# - Contribution patterns | |
# - Core maintainer identification | |
# - Community engagement level | |
# - Communication patterns | |
# - Collaboration efficiency] | |
# ## π§ Technical Depth & Innovation | |
# [Assess: | |
# - Technical sophistication level | |
# - Innovative approaches or solutions | |
# - Complex problem-solving examples | |
# - Performance optimization efforts | |
# - Security considerations | |
# - Scalability approach] | |
# ## π Project Health & Sustainability | |
# [Evaluate: | |
# - Project momentum and growth trends | |
# - Maintenance patterns | |
# - Community health indicators | |
# - Documentation completeness | |
# - Onboarding friendliness | |
# - Long-term viability indicators] | |
# ## π‘ Key Insights & Recommendations | |
# [Provide: | |
# - 3-5 key strengths identified | |
# - 3-5 potential improvement areas | |
# - Notable patterns or practices | |
# - Unique characteristics | |
# - Strategic recommendations] | |
# Please provide detailed analysis for each section while maintaining the formatting and emojis. Support insights with specific metrics and examples from the repository data where possible.""" | |
# chat = model.start_chat(history=[]) | |
# response = chat.send_message(f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}") | |
# # Save analysis data | |
# progress(0.9, desc="Saving analysis results...") | |
# with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: | |
# json.dump(analysis_data, f, indent=2) | |
# analysis_file = f.name | |
# progress(0.9, desc="Generating PDF report...") | |
# try: | |
# pdf_path = create_pdf_report(response.text) | |
# except Exception as pdf_error: | |
# print(f"PDF generation error: {str(pdf_error)}") | |
# pdf_path = "" | |
# # Generate PDF | |
# pdf_path = create_pdf_from_markdown(response.text, f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}") | |
# with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: | |
# json.dump(analysis_data, f, indent=2) | |
# analysis_file = f.name | |
# progress(1.0, desc="β¨ Analysis complete!") | |
# return response.text, analysis_file, pdf_path, "β Analysis completed successfully!" | |
# except Exception as e: | |
# error_message = f"β Error analyzing repository: {str(e)}" | |
# return "", "", "", error_message # Return 4 empty values when there's an error | |
# def create_chat_session() -> Any: | |
# """Create a new chat session for follow-up questions""" | |
# return genai.GenerativeModel( | |
# model_name="gemini-pro", | |
# generation_config={ | |
# 'temperature': 0.7, | |
# 'top_p': 0.8, | |
# 'top_k': 40, | |
# 'max_output_tokens': 2048, | |
# } | |
# ) | |
# @retry( | |
# retry=retry_if_exception_type(Exception), | |
# stop=stop_after_attempt(3), | |
# wait=wait_exponential(multiplier=1, min=4, max=10) | |
# ) | |
# def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]: | |
# """Process a follow-up question about the analysis""" | |
# if not analysis_file: | |
# return chat_history + [(question, "Please analyze a repository first before asking questions.")] | |
# try: | |
# # Load analysis data | |
# with open(analysis_file, 'r') as f: | |
# analysis_data = json.load(f) | |
# # Initialize chat model | |
# model = create_chat_session() | |
# # Build context from chat history and current question | |
# context = "You are an expert code analyst helping users understand repository analysis results.\n\n" | |
# context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n" | |
# # Add chat history context | |
# if chat_history: | |
# context += "Previous conversation:\n" | |
# for user_msg, assistant_msg in chat_history: | |
# context += f"User: {user_msg}\nAssistant: {assistant_msg}\n" | |
# # Add current question | |
# prompt = context + f"\nUser: {question}\nPlease provide your analysis:" | |
# # Get response | |
# response = model.generate_content(prompt) | |
# # Return in the correct tuple format for Gradio chatbot | |
# return chat_history + [(question, response.text)] | |
# except Exception as e: | |
# error_message = f"Error processing question: {str(e)}" | |
# return chat_history + [(question, error_message)] | |
# # Create Gradio interface | |
# # Create Gradio interface | |
# with gr.Blocks(theme=gr.themes.Soft()) as app: | |
# gr.Markdown(""" | |
# # π GitHub Repository Analyzer | |
# Analyze any public GitHub repository using AI. The tool will: | |
# 1. π Analyze repository structure and patterns | |
# 2. π‘ Generate insights about development practices | |
# 3. π Allow you to ask follow-up questions about the analysis | |
# 4. π Generate a downloadable PDF report | |
# """) | |
# with gr.Row(): | |
# repo_url = gr.Textbox( | |
# label="GitHub Repository URL", | |
# placeholder="https://github.com/owner/repo", | |
# scale=4 | |
# ) | |
# with gr.Row(): | |
# with gr.Column(scale=1): | |
# analyze_btn = gr.Button("π Analyze", variant="primary") | |
# download_pdf_btn = gr.Button("π Download PDF", variant="secondary") | |
# # Status message | |
# status_msg = gr.Markdown("", elem_id="status_message") | |
# # File output for PDF | |
# pdf_output = gr.File( | |
# label="Analysis Report", | |
# visible=False, | |
# interactive=True | |
# ) | |
# with gr.Row(): | |
# summary = gr.Markdown(label="Analysis Summary") | |
# with gr.Row(): | |
# chatbot = gr.Chatbot( | |
# label="Ask Questions", | |
# height=400, | |
# show_label=True | |
# ) | |
# with gr.Row(): | |
# question = gr.Textbox( | |
# label="Your Question", | |
# placeholder="Ask about the analysis...", | |
# scale=4 | |
# ) | |
# ask_btn = gr.Button("π Ask", variant="primary", scale=1) | |
# clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary", scale=1) | |
# # Hidden states | |
# analysis_file = gr.State("") | |
# current_pdf_path = gr.State("") | |
# def handle_pdf_download(pdf_path): | |
# """Handle PDF download when button is clicked""" | |
# if pdf_path and os.path.exists(pdf_path): | |
# return { | |
# pdf_output: pdf_path | |
# } | |
# return { | |
# pdf_output: None | |
# } | |
# def clear_outputs(): | |
# """Clear all outputs""" | |
# return { | |
# summary: "", | |
# chatbot: [], | |
# question: "", | |
# status_msg: "", | |
# pdf_output: None, | |
# current_pdf_path: "" | |
# } | |
# # Event handlers | |
# analyze_btn.click( | |
# fn=lambda: "β³ Analysis in progress...", | |
# inputs=None, | |
# outputs=status_msg, | |
# queue=False | |
# ).then( | |
# analyze_repository, | |
# inputs=[repo_url], | |
# outputs=[summary, analysis_file, current_pdf_path, status_msg] | |
# ).then( | |
# lambda: gr.update(visible=True), | |
# None, | |
# download_pdf_btn | |
# ) | |
# # PDF download handler | |
# download_pdf_btn.click( | |
# handle_pdf_download, | |
# inputs=[current_pdf_path], | |
# outputs=pdf_output | |
# ).then( | |
# lambda: gr.update(visible=True), | |
# None, | |
# pdf_output | |
# ) | |
# ask_btn.click( | |
# ask_question, | |
# inputs=[question, analysis_file, chatbot], | |
# outputs=[chatbot] | |
# ).then( | |
# lambda: "", | |
# None, | |
# question, | |
# queue=False | |
# ) | |
# clear_btn.click( | |
# clear_outputs, | |
# inputs=None, | |
# outputs=[summary, chatbot, question, status_msg, pdf_output, current_pdf_path] | |
# ) | |
# # Launch the app | |
# if __name__ == "__main__": | |
# app.launch( | |
# share=True, | |
# debug=True | |
# ) |