Spaces:

nihalaninihal
/

GithubAnalyzerr

Sleeping

App Files Files Community

nihalaninihal commited on Jan 29

Commit

689257f

verified ·

1 Parent(s): 40df0c6

Update app.py

Browse files

Files changed (1) hide show

app.py +510 -59

app.py CHANGED Viewed

@@ -1,64 +1,515 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+from github import Github
+import json
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+import base64
+from typing import Dict, List, Any, Optional, Tuple
+import tempfile
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import time
+# Load environment variables
+load_dotenv()
+# Configure API keys
+GITHUB_TOKEN = "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"
+GEMINI_API_KEY = "AIzaSyBsCg4bzMgKqn-tuahOZEN9rBCUugotO9Q"
+if not GITHUB_TOKEN or not GEMINI_API_KEY:
+    raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment")
+# Initialize APIs
+gh = Github(GITHUB_TOKEN)
+genai.configure(api_key=GEMINI_API_KEY)
+model = genai.GenerativeModel(
+    model_name="gemini-1.5-pro",
+    generation_config = {
+  "temperature": 1,
+  "top_p": 0.95,
+  "top_k": 40,
+  "max_output_tokens": 8192,
+  "response_mime_type": "text/plain",
+},
+    safety_settings=[
+        {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+    ]
+)
+RELEVANT_EXTENSIONS = {
+    ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
+    ".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt"
+}
+class RepositoryAnalyzer:
+    """Handles GitHub repository analysis"""
+    def __init__(self, repo_url: str):
+        # Extract owner and repo name from URL
+        parts = repo_url.rstrip('/').split('/')
+        if len(parts) < 2:
+            raise ValueError("Invalid repository URL format")
+        self.repo_name = parts[-1]
+        self.owner = parts[-2]
+        self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}")
+        self.analysis_data: Dict[str, Any] = {}
+    def analyze(self) -> Dict[str, Any]:
+        """Perform complete repository analysis"""
+        try:
+            # Basic repository information
+            self.analysis_data["basic_info"] = {
+                "name": self.repo.name,
+                "owner": self.repo.owner.login,
+                "description": self.repo.description or "No description available",
+                "stars": self.repo.stargazers_count,
+                "forks": self.repo.forks_count,
+                "created_at": self.repo.created_at.isoformat(),
+                "last_updated": self.repo.updated_at.isoformat(),
+                "primary_language": self.repo.language or "Not specified",
+            }
+            # Analyze repository structure
+            self.analysis_data["structure"] = self._analyze_structure()
+            # Analyze code patterns
+            self.analysis_data["code_patterns"] = self._analyze_code_patterns()
+            # Analyze commit history
+            self.analysis_data["commit_history"] = self._analyze_commits()
+            # Get contributor statistics
+            self.analysis_data["contributors"] = self._analyze_contributors()
+            return self.analysis_data
+        except Exception as e:
+            raise Exception(f"Error analyzing repository: {str(e)}")
+    def _analyze_structure(self) -> Dict[str, Any]:
+        """Analyze repository structure and organization"""
+        structure = {
+            "files": defaultdict(int),
+            "directories": set(),
+            "total_size": 0,
+        }
+        try:
+            contents = self.repo.get_contents("")
+            while contents:
+                content = contents.pop(0)
+                if content.type == "dir":
+                    structure["directories"].add(content.path)
+                    contents.extend(self.repo.get_contents(content.path))
+                else:
+                    ext = Path(content.path).suffix.lower()
+                    if ext in RELEVANT_EXTENSIONS:
+                        structure["files"][ext] += 1
+                        structure["total_size"] += content.size
+        except Exception as e:
+            print(f"Error analyzing structure: {str(e)}")
+        return {
+            "file_types": dict(structure["files"]),
+            "directory_count": len(structure["directories"]),
+            "total_size": structure["total_size"],
+            "file_count": sum(structure["files"].values())
+        }
+    def _analyze_code_patterns(self) -> Dict[str, Any]:
+        """Analyze code patterns and style"""
+        patterns = {
+            "samples": [],
+            "languages": defaultdict(int),
+            "complexity_metrics": defaultdict(list)
+        }
+        try:
+            files = self.repo.get_contents("")
+            analyzed = 0
+            while files and analyzed < 5:
+                file = files.pop(0)
+                if file.type == "dir":
+                    files.extend(self.repo.get_contents(file.path))
+                elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS:
+                    try:
+                        content = base64.b64decode(file.content).decode('utf-8')
+                        lines = content.splitlines()
+                        if not lines:
+                            continue
+                        loc = len([line for line in lines if line.strip()])
+                        avg_line_length = sum(len(line) for line in lines) / len(lines)
+                        patterns["samples"].append({
+                            "path": file.path,
+                            "language": Path(file.path).suffix[1:],
+                            "loc": loc,
+                            "avg_line_length": round(avg_line_length, 2)
+                        })
+                        patterns["languages"][Path(file.path).suffix[1:]] += loc
+                        patterns["complexity_metrics"]["loc"].append(loc)
+                        patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length)
+                        analyzed += 1
+                    except Exception as e:
+                        print(f"Error analyzing file {file.path}: {str(e)}")
+                        continue
+        except Exception as e:
+            print(f"Error in code pattern analysis: {str(e)}")
+        return patterns
+    def _analyze_commits(self) -> Dict[str, Any]:
+        """Analyze commit history and patterns"""
+        commit_data = []
+        commit_times = []
+        try:
+            commits = list(self.repo.get_commits()[:100])  # Get last 100 commits
+            for commit in commits:
+                try:
+                    commit_info = {
+                        "sha": commit.sha,
+                        "author": commit.author.login if commit.author else "Unknown",
+                        "date": commit.commit.author.date.isoformat(),
+                        "message": commit.commit.message,
+                        "changes": {
+                            "additions": commit.stats.additions,
+                            "deletions": commit.stats.deletions,
+                        }
+                    }
+                    commit_data.append(commit_info)
+                    commit_times.append(commit.commit.author.date.hour)
+                except Exception as e:
+                    print(f"Error processing commit {commit.sha}: {str(e)}")
+                    continue
+            # Analyze commit patterns
+            commit_hours = defaultdict(int)
+            for hour in commit_times:
+                commit_hours[hour] += 1
+            total_commits = len(commit_data)
+            return {
+                "commits": commit_data,
+                "total_commits": total_commits,
+                "commit_hours": dict(commit_hours),
+                "avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0,
+                "avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0,
+            }
+        except Exception as e:
+            print(f"Error in commit analysis: {str(e)}")
+            return {
+                "commits": [],
+                "total_commits": 0,
+                "commit_hours": {},
+                "avg_additions": 0,
+                "avg_deletions": 0
+            }
+    def _analyze_contributors(self) -> Dict[str, Any]:
+        """Analyze contributor statistics"""
+        contributor_data = []
+        try:
+            contributors = list(self.repo.get_contributors())
+            for contributor in contributors:
+                contributor_data.append({
+                    "login": contributor.login,
+                    "contributions": contributor.contributions,
+                    "type": contributor.type,
+                })
+        except Exception as e:
+            print(f"Error analyzing contributors: {str(e)}")
+        return {
+            "total_contributors": len(contributor_data),
+            "contributors": contributor_data
+        }
+@retry(
+    retry=retry_if_exception_type(Exception),
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10)
 )
+def analyze_repository(repo_url: str, progress=gr.Progress()) -> Tuple[str, str, str]:
+    """Analyze repository and generate LLM summary with rate limit handling"""
+    try:
+        # Initialize analyzer
+        progress(0, desc="Initializing repository analysis...")
+        analyzer = RepositoryAnalyzer(repo_url)
+        # Perform analysis
+        progress(0.3, desc="Analyzing repository structure and patterns...")
+        analysis_data = analyzer.analyze()
+        # Generate LLM summary
+        progress(0.7, desc="Generating analysis summary...")
+        system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template:
+# Repository Analysis
+## 📊 Project Overview
+[Provide a comprehensive overview including:
+- Project purpose and scope
+- Age and maturity of the project
+- Current activity level and maintenance status
+- Key metrics (stars, forks, etc.)
+- Primary technologies and languages used]
+## 🏗️ Architecture and Code Organization
+[Analyze in detail:
+- Repository structure and organization
+- Code distribution across different technologies
+- File and directory organization patterns
+- Project size and complexity metrics
+- Code modularity and component structure
+- Presence of key architectural patterns]
+## 💻 Development Practices & Code Quality
+[Evaluate:
+- Coding standards and consistency
+- Code complexity and maintainability metrics
+- Documentation practices
+- Testing approach and coverage (if visible)
+- Error handling and logging practices
+- Use of design patterns and best practices]
+## 📈 Development Workflow & History
+[Analyze:
+- Commit patterns and frequency
+- Release cycles and versioning
+- Branch management strategy
+- Code review practices
+- Continuous integration/deployment indicators
+- Peak development periods and cycles]
+## 👥 Team Dynamics & Collaboration
+[Examine:
+- Team size and composition
+- Contribution patterns
+- Core maintainer identification
+- Community engagement level
+- Communication patterns
+- Collaboration efficiency]
+## 🔧 Technical Depth & Innovation
+[Assess:
+- Technical sophistication level
+- Innovative approaches or solutions
+- Complex problem-solving examples
+- Performance optimization efforts
+- Security considerations
+- Scalability approach]
+## 🚀 Project Health & Sustainability
+[Evaluate:
+- Project momentum and growth trends
+- Maintenance patterns
+- Community health indicators
+- Documentation completeness
+- Onboarding friendliness
+- Long-term viability indicators]
+## 💡 Key Insights & Recommendations
+[Provide:
+- 3-5 key strengths identified
+- 3-5 potential improvement areas
+- Notable patterns or practices
+- Unique characteristics
+- Strategic recommendations]
+Please provide detailed analysis for each section while maintaining the formatting and emojis. Support insights with specific metrics and examples from the repository data where possible."""
+        chat = model.start_chat(history=[])
+        response = chat.send_message(f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}")
+        # Save analysis data
+        progress(0.9, desc="Saving analysis results...")
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
+            json.dump(analysis_data, f, indent=2)
+            analysis_file = f.name
+        progress(1.0, desc="Analysis complete!")
+        return response.text, analysis_file, "✅ Analysis completed successfully!"
+    except Exception as e:
+        error_message = f"❌ Error analyzing repository: {str(e)}"
+        return "", "", error_message
+def create_chat_session() -> Any:
+    """Create a new chat session for follow-up questions"""
+    return genai.GenerativeModel(
+        model_name="gemini-pro",
+        generation_config={
+            'temperature': 0.7,
+            'top_p': 0.8,
+            'top_k': 40,
+            'max_output_tokens': 2048,
+        }
+    )
+@retry(
+    retry=retry_if_exception_type(Exception),
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10)
+)
+def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+    """Process a follow-up question about the analysis"""
+    if not analysis_file:
+        return chat_history + [(question, "Please analyze a repository first before asking questions.")]
+    try:
+        # Load analysis data
+        with open(analysis_file, 'r') as f:
+            analysis_data = json.load(f)
+        # Initialize chat model
+        model = create_chat_session()
+        # Build context from chat history and current question
+        context = "You are an expert code analyst helping users understand repository analysis results.\n\n"
+        context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n"
+        # Add chat history context
+        if chat_history:
+            context += "Previous conversation:\n"
+            for user_msg, assistant_msg in chat_history:
+                context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
+        # Add current question
+        prompt = context + f"\nUser: {question}\nPlease provide your analysis:"
+        # Get response
+        response = model.generate_content(prompt)
+        # Return in the correct tuple format for Gradio chatbot
+        return chat_history + [(question, response.text)]
+    except Exception as e:
+        error_message = f"Error processing question: {str(e)}"
+        return chat_history + [(question, error_message)]
+# Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("""
+    # 🔍 GitHub Repository Analyzer
+    Analyze any public GitHub repository using AI. The tool will:
+    1. 📊 Analyze repository structure and patterns
+    2. 💡 Generate insights about development practices
+    3. 💭 Allow you to ask follow-up questions about the analysis
+    Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`)
+    """)
+    with gr.Row():
+        repo_url = gr.Textbox(
+            label="GitHub Repository URL",
+            placeholder="https://github.com/owner/repo",
+            scale=4
+        )
+        analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)
+    # Add status message
+    status_msg = gr.Markdown("", elem_id="status_message")
+    with gr.Row():
+        # Use Markdown instead of Textbox for better formatting
+        summary = gr.Markdown(
+            label="Analysis Summary",
+        )
+    with gr.Row():
+        chatbot = gr.Chatbot(
+            label="Ask Questions",
+            height=400,
+            show_label=True
+        )
+    with gr.Row():
+        question = gr.Textbox(
+            label="Your Question",
+            placeholder="Ask about the analysis...",
+            scale=4
+        )
+        ask_btn = gr.Button("💭 Ask", variant="primary", scale=1)
+        clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)
+    # Hidden state for analysis file
+    analysis_file = gr.State("")
+    def clear_outputs():
+        return "", [], "", ""
+    # Set up event handlers
+    analyze_btn.click(
+        fn=lambda: "⏳ Analysis in progress...",
+        inputs=None,
+        outputs=status_msg,
+        queue=False
+    ).then(
+        analyze_repository,
+        inputs=[repo_url],
+        outputs=[summary, analysis_file, status_msg],
+    )
+    ask_btn.click(
+        ask_question,
+        inputs=[question, analysis_file, chatbot],
+        outputs=[chatbot],
+    ).then(
+        lambda: "",  # Clear the question input
+        None,
+        question,
+        queue=False
+    )
+    clear_btn.click(
+        clear_outputs,
+        inputs=None,
+        outputs=[summary, chatbot, question, status_msg],
+        queue=False
+    )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        debug=True
+    )