Spaces:
Sleeping
Sleeping
import gradio as gr | |
import google.generativeai as genai | |
import os | |
from dotenv import load_dotenv | |
from github import Github | |
import json | |
from pathlib import Path | |
from datetime import datetime | |
from collections import defaultdict | |
import base64 | |
from typing import Dict, List, Any, Optional, Tuple | |
import tempfile | |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type | |
import time | |
# Load environment variables | |
load_dotenv() | |
# Configure API keys | |
GITHUB_TOKEN = "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" | |
GEMINI_API_KEY = "AIzaSyDPkNd-wD80KCpaeDPIEvuksfW-an6Ys5Q" | |
if not GITHUB_TOKEN or not GEMINI_API_KEY: | |
raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment") | |
# Initialize APIs | |
gh = Github(GITHUB_TOKEN) | |
genai.configure(api_key=GEMINI_API_KEY) | |
model = genai.GenerativeModel( | |
model_name="gemini-1.5-pro-latest", | |
generation_config = { | |
"temperature": 1, | |
"top_p": 0.95, | |
"top_k": 40, | |
"max_output_tokens": 8192, | |
"response_mime_type": "text/plain", | |
}, | |
safety_settings=[ | |
{ | |
"category": "HARM_CATEGORY_HARASSMENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_HATE_SPEECH", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
] | |
) | |
RELEVANT_EXTENSIONS = { | |
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", | |
".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt" | |
} | |
class RepositoryAnalyzer: | |
"""Handles GitHub repository analysis""" | |
def __init__(self, repo_url: str): | |
# Extract owner and repo name from URL | |
parts = repo_url.rstrip('/').split('/') | |
if len(parts) < 2: | |
raise ValueError("Invalid repository URL format") | |
self.repo_name = parts[-1] | |
self.owner = parts[-2] | |
self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}") | |
self.analysis_data: Dict[str, Any] = {} | |
def analyze(self) -> Dict[str, Any]: | |
"""Perform complete repository analysis""" | |
try: | |
# Basic repository information | |
self.analysis_data["basic_info"] = { | |
"name": self.repo.name, | |
"owner": self.repo.owner.login, | |
"description": self.repo.description or "No description available", | |
"stars": self.repo.stargazers_count, | |
"forks": self.repo.forks_count, | |
"created_at": self.repo.created_at.isoformat(), | |
"last_updated": self.repo.updated_at.isoformat(), | |
"primary_language": self.repo.language or "Not specified", | |
} | |
# Analyze repository structure | |
self.analysis_data["structure"] = self._analyze_structure() | |
# Analyze code patterns | |
self.analysis_data["code_patterns"] = self._analyze_code_patterns() | |
# Analyze commit history | |
self.analysis_data["commit_history"] = self._analyze_commits() | |
# Get contributor statistics | |
self.analysis_data["contributors"] = self._analyze_contributors() | |
return self.analysis_data | |
except Exception as e: | |
raise Exception(f"Error analyzing repository: {str(e)}") | |
def _analyze_structure(self) -> Dict[str, Any]: | |
"""Analyze repository structure and organization""" | |
structure = { | |
"files": defaultdict(int), | |
"directories": set(), | |
"total_size": 0, | |
} | |
try: | |
contents = self.repo.get_contents("") | |
while contents: | |
content = contents.pop(0) | |
if content.type == "dir": | |
structure["directories"].add(content.path) | |
contents.extend(self.repo.get_contents(content.path)) | |
else: | |
ext = Path(content.path).suffix.lower() | |
if ext in RELEVANT_EXTENSIONS: | |
structure["files"][ext] += 1 | |
structure["total_size"] += content.size | |
except Exception as e: | |
print(f"Error analyzing structure: {str(e)}") | |
return { | |
"file_types": dict(structure["files"]), | |
"directory_count": len(structure["directories"]), | |
"total_size": structure["total_size"], | |
"file_count": sum(structure["files"].values()) | |
} | |
def _analyze_code_patterns(self) -> Dict[str, Any]: | |
"""Analyze code patterns and style""" | |
patterns = { | |
"samples": [], | |
"languages": defaultdict(int), | |
"complexity_metrics": defaultdict(list) | |
} | |
try: | |
files = self.repo.get_contents("") | |
analyzed = 0 | |
while files and analyzed < 5: | |
file = files.pop(0) | |
if file.type == "dir": | |
files.extend(self.repo.get_contents(file.path)) | |
elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS: | |
try: | |
content = base64.b64decode(file.content).decode('utf-8') | |
lines = content.splitlines() | |
if not lines: | |
continue | |
loc = len([line for line in lines if line.strip()]) | |
avg_line_length = sum(len(line) for line in lines) / len(lines) | |
patterns["samples"].append({ | |
"path": file.path, | |
"language": Path(file.path).suffix[1:], | |
"loc": loc, | |
"avg_line_length": round(avg_line_length, 2) | |
}) | |
patterns["languages"][Path(file.path).suffix[1:]] += loc | |
patterns["complexity_metrics"]["loc"].append(loc) | |
patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length) | |
analyzed += 1 | |
except Exception as e: | |
print(f"Error analyzing file {file.path}: {str(e)}") | |
continue | |
except Exception as e: | |
print(f"Error in code pattern analysis: {str(e)}") | |
return patterns | |
def _analyze_commits(self) -> Dict[str, Any]: | |
"""Analyze commit history and patterns""" | |
commit_data = [] | |
commit_times = [] | |
try: | |
commits = list(self.repo.get_commits()[:100]) # Get last 100 commits | |
for commit in commits: | |
try: | |
commit_info = { | |
"sha": commit.sha, | |
"author": commit.author.login if commit.author else "Unknown", | |
"date": commit.commit.author.date.isoformat(), | |
"message": commit.commit.message, | |
"changes": { | |
"additions": commit.stats.additions, | |
"deletions": commit.stats.deletions, | |
} | |
} | |
commit_data.append(commit_info) | |
commit_times.append(commit.commit.author.date.hour) | |
except Exception as e: | |
print(f"Error processing commit {commit.sha}: {str(e)}") | |
continue | |
# Analyze commit patterns | |
commit_hours = defaultdict(int) | |
for hour in commit_times: | |
commit_hours[hour] += 1 | |
total_commits = len(commit_data) | |
return { | |
"commits": commit_data, | |
"total_commits": total_commits, | |
"commit_hours": dict(commit_hours), | |
"avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0, | |
"avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0, | |
} | |
except Exception as e: | |
print(f"Error in commit analysis: {str(e)}") | |
return { | |
"commits": [], | |
"total_commits": 0, | |
"commit_hours": {}, | |
"avg_additions": 0, | |
"avg_deletions": 0 | |
} | |
def _analyze_contributors(self) -> Dict[str, Any]: | |
"""Analyze contributor statistics""" | |
contributor_data = [] | |
try: | |
contributors = list(self.repo.get_contributors()) | |
for contributor in contributors: | |
contributor_data.append({ | |
"login": contributor.login, | |
"contributions": contributor.contributions, | |
"type": contributor.type, | |
}) | |
except Exception as e: | |
print(f"Error analyzing contributors: {str(e)}") | |
return { | |
"total_contributors": len(contributor_data), | |
"contributors": contributor_data | |
} | |
def analyze_repository(repo_url: str, progress=gr.Progress()) -> Tuple[str, str, str]: | |
"""Analyze repository and generate LLM summary with rate limit handling""" | |
try: | |
# Initialize analyzer | |
progress(0, desc="Initializing repository analysis...") | |
analyzer = RepositoryAnalyzer(repo_url) | |
# Perform analysis | |
progress(0.3, desc="Analyzing repository structure and patterns...") | |
analysis_data = analyzer.analyze() | |
# Generate LLM summary | |
progress(0.7, desc="Generating analysis summary...") | |
system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template: | |
# Repository Analysis | |
## π Project Overview | |
[Provide a comprehensive overview including: | |
- Project purpose and scope | |
- Age and maturity of the project | |
- Current activity level and maintenance status | |
- Key metrics (stars, forks, etc.) | |
- Primary technologies and languages used] | |
## ποΈ Architecture and Code Organization | |
[Analyze in detail: | |
- Repository structure and organization | |
- Code distribution across different technologies | |
- File and directory organization patterns | |
- Project size and complexity metrics | |
- Code modularity and component structure | |
- Presence of key architectural patterns] | |
## π» Development Practices & Code Quality | |
[Evaluate: | |
- Coding standards and consistency | |
- Code complexity and maintainability metrics | |
- Documentation practices | |
- Testing approach and coverage (if visible) | |
- Error handling and logging practices | |
- Use of design patterns and best practices] | |
## π Development Workflow & History | |
[Analyze: | |
- Commit patterns and frequency | |
- Release cycles and versioning | |
- Branch management strategy | |
- Code review practices | |
- Continuous integration/deployment indicators | |
- Peak development periods and cycles] | |
## π₯ Team Dynamics & Collaboration | |
[Examine: | |
- Team size and composition | |
- Contribution patterns | |
- Core maintainer identification | |
- Community engagement level | |
- Communication patterns | |
- Collaboration efficiency] | |
## π§ Technical Depth & Innovation | |
[Assess: | |
- Technical sophistication level | |
- Innovative approaches or solutions | |
- Complex problem-solving examples | |
- Performance optimization efforts | |
- Security considerations | |
- Scalability approach] | |
## π Project Health & Sustainability | |
[Evaluate: | |
- Project momentum and growth trends | |
- Maintenance patterns | |
- Community health indicators | |
- Documentation completeness | |
- Onboarding friendliness | |
- Long-term viability indicators] | |
## π‘ Key Insights & Recommendations | |
[Provide: | |
- 3-5 key strengths identified | |
- 3-5 potential improvement areas | |
- Notable patterns or practices | |
- Unique characteristics | |
- Strategic recommendations] | |
Please provide detailed analysis for each section while maintaining the formatting and emojis. Support insights with specific metrics and examples from the repository data where possible.""" | |
chat = model.start_chat(history=[]) | |
response = chat.send_message(f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}") | |
# Save analysis data | |
progress(0.9, desc="Saving analysis results...") | |
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: | |
json.dump(analysis_data, f, indent=2) | |
analysis_file = f.name | |
progress(1.0, desc="Analysis complete!") | |
return response.text, analysis_file, "β Analysis completed successfully!" | |
except Exception as e: | |
error_message = f"β Error analyzing repository: {str(e)}" | |
return "", "", error_message | |
def create_chat_session() -> Any: | |
"""Create a new chat session for follow-up questions""" | |
return genai.GenerativeModel( | |
model_name="gemini-pro", | |
generation_config={ | |
'temperature': 0.7, | |
'top_p': 0.8, | |
'top_k': 40, | |
'max_output_tokens': 2048, | |
} | |
) | |
def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]: | |
"""Process a follow-up question about the analysis""" | |
if not analysis_file: | |
return chat_history + [(question, "Please analyze a repository first before asking questions.")] | |
try: | |
# Load analysis data | |
with open(analysis_file, 'r') as f: | |
analysis_data = json.load(f) | |
# Initialize chat model | |
model = create_chat_session() | |
# Build context from chat history and current question | |
context = "You are an expert code analyst helping users understand repository analysis results.\n\n" | |
context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n" | |
# Add chat history context | |
if chat_history: | |
context += "Previous conversation:\n" | |
for user_msg, assistant_msg in chat_history: | |
context += f"User: {user_msg}\nAssistant: {assistant_msg}\n" | |
# Add current question | |
prompt = context + f"\nUser: {question}\nPlease provide your analysis:" | |
# Get response | |
response = model.generate_content(prompt) | |
# Return in the correct tuple format for Gradio chatbot | |
return chat_history + [(question, response.text)] | |
except Exception as e: | |
error_message = f"Error processing question: {str(e)}" | |
return chat_history + [(question, error_message)] | |
# Create Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as app: | |
gr.Markdown(""" | |
# π GitHub Repository Analyzer | |
Analyze any public GitHub repository using AI. The tool will: | |
1. π Analyze repository structure and patterns | |
2. π‘ Generate insights about development practices | |
3. π Allow you to ask follow-up questions about the analysis | |
Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`) | |
""") | |
with gr.Row(): | |
repo_url = gr.Textbox( | |
label="GitHub Repository URL", | |
placeholder="https://github.com/owner/repo", | |
scale=4 | |
) | |
analyze_btn = gr.Button("π Analyze", variant="primary", scale=1) | |
# Add status message | |
status_msg = gr.Markdown("", elem_id="status_message") | |
with gr.Row(): | |
# Use Markdown instead of Textbox for better formatting | |
summary = gr.Markdown( | |
label="Analysis Summary", | |
) | |
with gr.Row(): | |
chatbot = gr.Chatbot( | |
label="Ask Questions", | |
height=400, | |
show_label=True | |
) | |
with gr.Row(): | |
question = gr.Textbox( | |
label="Your Question", | |
placeholder="Ask about the analysis...", | |
scale=4 | |
) | |
ask_btn = gr.Button("π Ask", variant="primary", scale=1) | |
clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary", scale=1) | |
# Hidden state for analysis file | |
analysis_file = gr.State("") | |
def clear_outputs(): | |
return "", [], "", "" | |
# Set up event handlers | |
analyze_btn.click( | |
fn=lambda: "β³ Analysis in progress...", | |
inputs=None, | |
outputs=status_msg, | |
queue=False | |
).then( | |
analyze_repository, | |
inputs=[repo_url], | |
outputs=[summary, analysis_file, status_msg], | |
) | |
ask_btn.click( | |
ask_question, | |
inputs=[question, analysis_file, chatbot], | |
outputs=[chatbot], | |
).then( | |
lambda: "", # Clear the question input | |
None, | |
question, | |
queue=False | |
) | |
clear_btn.click( | |
clear_outputs, | |
inputs=None, | |
outputs=[summary, chatbot, question, status_msg], | |
queue=False | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
app.launch( | |
share=True, | |
debug=True | |
) |