nihalaninihal commited on
Commit
689257f
Β·
verified Β·
1 Parent(s): 40df0c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +510 -59
app.py CHANGED
@@ -1,64 +1,515 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
 
62
 
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import google.generativeai as genai
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from github import Github
6
+ import json
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+ from collections import defaultdict
10
+ import base64
11
+ from typing import Dict, List, Any, Optional, Tuple
12
+ import tempfile
13
+ from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
14
+ import time
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Configure API keys
20
+ GITHUB_TOKEN = "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"
21
+ GEMINI_API_KEY = "AIzaSyBsCg4bzMgKqn-tuahOZEN9rBCUugotO9Q"
22
+
23
+ if not GITHUB_TOKEN or not GEMINI_API_KEY:
24
+ raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment")
25
+
26
+ # Initialize APIs
27
+ gh = Github(GITHUB_TOKEN)
28
+ genai.configure(api_key=GEMINI_API_KEY)
29
+ model = genai.GenerativeModel(
30
+ model_name="gemini-1.5-pro",
31
+ generation_config = {
32
+ "temperature": 1,
33
+ "top_p": 0.95,
34
+ "top_k": 40,
35
+ "max_output_tokens": 8192,
36
+ "response_mime_type": "text/plain",
37
+ },
38
+
39
+ safety_settings=[
40
+ {
41
+ "category": "HARM_CATEGORY_HARASSMENT",
42
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
43
+ },
44
+ {
45
+ "category": "HARM_CATEGORY_HATE_SPEECH",
46
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
47
+ },
48
+ {
49
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
50
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
51
+ },
52
+ {
53
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
54
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
55
+ },
56
+ ]
57
+ )
58
+
59
+ RELEVANT_EXTENSIONS = {
60
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
61
+ ".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt"
62
+ }
63
+
64
+ class RepositoryAnalyzer:
65
+ """Handles GitHub repository analysis"""
66
+
67
+ def __init__(self, repo_url: str):
68
+ # Extract owner and repo name from URL
69
+ parts = repo_url.rstrip('/').split('/')
70
+ if len(parts) < 2:
71
+ raise ValueError("Invalid repository URL format")
72
+
73
+ self.repo_name = parts[-1]
74
+ self.owner = parts[-2]
75
+ self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}")
76
+ self.analysis_data: Dict[str, Any] = {}
77
+
78
+ def analyze(self) -> Dict[str, Any]:
79
+ """Perform complete repository analysis"""
80
+ try:
81
+ # Basic repository information
82
+ self.analysis_data["basic_info"] = {
83
+ "name": self.repo.name,
84
+ "owner": self.repo.owner.login,
85
+ "description": self.repo.description or "No description available",
86
+ "stars": self.repo.stargazers_count,
87
+ "forks": self.repo.forks_count,
88
+ "created_at": self.repo.created_at.isoformat(),
89
+ "last_updated": self.repo.updated_at.isoformat(),
90
+ "primary_language": self.repo.language or "Not specified",
91
+ }
92
+
93
+ # Analyze repository structure
94
+ self.analysis_data["structure"] = self._analyze_structure()
95
+
96
+ # Analyze code patterns
97
+ self.analysis_data["code_patterns"] = self._analyze_code_patterns()
98
+
99
+ # Analyze commit history
100
+ self.analysis_data["commit_history"] = self._analyze_commits()
101
+
102
+ # Get contributor statistics
103
+ self.analysis_data["contributors"] = self._analyze_contributors()
104
+
105
+ return self.analysis_data
106
+
107
+ except Exception as e:
108
+ raise Exception(f"Error analyzing repository: {str(e)}")
109
+
110
+ def _analyze_structure(self) -> Dict[str, Any]:
111
+ """Analyze repository structure and organization"""
112
+ structure = {
113
+ "files": defaultdict(int),
114
+ "directories": set(),
115
+ "total_size": 0,
116
+ }
117
+
118
+ try:
119
+ contents = self.repo.get_contents("")
120
+ while contents:
121
+ content = contents.pop(0)
122
+ if content.type == "dir":
123
+ structure["directories"].add(content.path)
124
+ contents.extend(self.repo.get_contents(content.path))
125
+ else:
126
+ ext = Path(content.path).suffix.lower()
127
+ if ext in RELEVANT_EXTENSIONS:
128
+ structure["files"][ext] += 1
129
+ structure["total_size"] += content.size
130
+ except Exception as e:
131
+ print(f"Error analyzing structure: {str(e)}")
132
+
133
+ return {
134
+ "file_types": dict(structure["files"]),
135
+ "directory_count": len(structure["directories"]),
136
+ "total_size": structure["total_size"],
137
+ "file_count": sum(structure["files"].values())
138
+ }
139
+
140
+ def _analyze_code_patterns(self) -> Dict[str, Any]:
141
+ """Analyze code patterns and style"""
142
+ patterns = {
143
+ "samples": [],
144
+ "languages": defaultdict(int),
145
+ "complexity_metrics": defaultdict(list)
146
+ }
147
+
148
+ try:
149
+ files = self.repo.get_contents("")
150
+ analyzed = 0
151
+
152
+ while files and analyzed < 5:
153
+ file = files.pop(0)
154
+ if file.type == "dir":
155
+ files.extend(self.repo.get_contents(file.path))
156
+ elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS:
157
+ try:
158
+ content = base64.b64decode(file.content).decode('utf-8')
159
+ lines = content.splitlines()
160
+
161
+ if not lines:
162
+ continue
163
+
164
+ loc = len([line for line in lines if line.strip()])
165
+ avg_line_length = sum(len(line) for line in lines) / len(lines)
166
+
167
+ patterns["samples"].append({
168
+ "path": file.path,
169
+ "language": Path(file.path).suffix[1:],
170
+ "loc": loc,
171
+ "avg_line_length": round(avg_line_length, 2)
172
+ })
173
+
174
+ patterns["languages"][Path(file.path).suffix[1:]] += loc
175
+ patterns["complexity_metrics"]["loc"].append(loc)
176
+ patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length)
177
+
178
+ analyzed += 1
179
+
180
+ except Exception as e:
181
+ print(f"Error analyzing file {file.path}: {str(e)}")
182
+ continue
183
+
184
+ except Exception as e:
185
+ print(f"Error in code pattern analysis: {str(e)}")
186
+
187
+ return patterns
188
+
189
+ def _analyze_commits(self) -> Dict[str, Any]:
190
+ """Analyze commit history and patterns"""
191
+ commit_data = []
192
+ commit_times = []
193
+
194
+ try:
195
+ commits = list(self.repo.get_commits()[:100]) # Get last 100 commits
196
+
197
+ for commit in commits:
198
+ try:
199
+ commit_info = {
200
+ "sha": commit.sha,
201
+ "author": commit.author.login if commit.author else "Unknown",
202
+ "date": commit.commit.author.date.isoformat(),
203
+ "message": commit.commit.message,
204
+ "changes": {
205
+ "additions": commit.stats.additions,
206
+ "deletions": commit.stats.deletions,
207
+ }
208
+ }
209
+ commit_data.append(commit_info)
210
+ commit_times.append(commit.commit.author.date.hour)
211
+ except Exception as e:
212
+ print(f"Error processing commit {commit.sha}: {str(e)}")
213
+ continue
214
+
215
+ # Analyze commit patterns
216
+ commit_hours = defaultdict(int)
217
+ for hour in commit_times:
218
+ commit_hours[hour] += 1
219
+
220
+ total_commits = len(commit_data)
221
+ return {
222
+ "commits": commit_data,
223
+ "total_commits": total_commits,
224
+ "commit_hours": dict(commit_hours),
225
+ "avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0,
226
+ "avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0,
227
+ }
228
+
229
+ except Exception as e:
230
+ print(f"Error in commit analysis: {str(e)}")
231
+ return {
232
+ "commits": [],
233
+ "total_commits": 0,
234
+ "commit_hours": {},
235
+ "avg_additions": 0,
236
+ "avg_deletions": 0
237
+ }
238
+
239
+ def _analyze_contributors(self) -> Dict[str, Any]:
240
+ """Analyze contributor statistics"""
241
+ contributor_data = []
242
+
243
+ try:
244
+ contributors = list(self.repo.get_contributors())
245
+ for contributor in contributors:
246
+ contributor_data.append({
247
+ "login": contributor.login,
248
+ "contributions": contributor.contributions,
249
+ "type": contributor.type,
250
+ })
251
+ except Exception as e:
252
+ print(f"Error analyzing contributors: {str(e)}")
253
+
254
+ return {
255
+ "total_contributors": len(contributor_data),
256
+ "contributors": contributor_data
257
+ }
258
+
259
+ @retry(
260
+ retry=retry_if_exception_type(Exception),
261
+ stop=stop_after_attempt(3),
262
+ wait=wait_exponential(multiplier=1, min=4, max=10)
263
  )
264
+ def analyze_repository(repo_url: str, progress=gr.Progress()) -> Tuple[str, str, str]:
265
+ """Analyze repository and generate LLM summary with rate limit handling"""
266
+ try:
267
+ # Initialize analyzer
268
+ progress(0, desc="Initializing repository analysis...")
269
+ analyzer = RepositoryAnalyzer(repo_url)
270
+
271
+ # Perform analysis
272
+ progress(0.3, desc="Analyzing repository structure and patterns...")
273
+ analysis_data = analyzer.analyze()
274
+
275
+ # Generate LLM summary
276
+ progress(0.7, desc="Generating analysis summary...")
277
+
278
+ system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template:
279
+
280
+ # Repository Analysis
281
+
282
+ ## πŸ“Š Project Overview
283
+ [Provide a comprehensive overview including:
284
+ - Project purpose and scope
285
+ - Age and maturity of the project
286
+ - Current activity level and maintenance status
287
+ - Key metrics (stars, forks, etc.)
288
+ - Primary technologies and languages used]
289
+
290
+ ## πŸ—οΈ Architecture and Code Organization
291
+ [Analyze in detail:
292
+ - Repository structure and organization
293
+ - Code distribution across different technologies
294
+ - File and directory organization patterns
295
+ - Project size and complexity metrics
296
+ - Code modularity and component structure
297
+ - Presence of key architectural patterns]
298
+
299
+ ## πŸ’» Development Practices & Code Quality
300
+ [Evaluate:
301
+ - Coding standards and consistency
302
+ - Code complexity and maintainability metrics
303
+ - Documentation practices
304
+ - Testing approach and coverage (if visible)
305
+ - Error handling and logging practices
306
+ - Use of design patterns and best practices]
307
+
308
+ ## πŸ“ˆ Development Workflow & History
309
+ [Analyze:
310
+ - Commit patterns and frequency
311
+ - Release cycles and versioning
312
+ - Branch management strategy
313
+ - Code review practices
314
+ - Continuous integration/deployment indicators
315
+ - Peak development periods and cycles]
316
+
317
+ ## πŸ‘₯ Team Dynamics & Collaboration
318
+ [Examine:
319
+ - Team size and composition
320
+ - Contribution patterns
321
+ - Core maintainer identification
322
+ - Community engagement level
323
+ - Communication patterns
324
+ - Collaboration efficiency]
325
+
326
+ ## πŸ”§ Technical Depth & Innovation
327
+ [Assess:
328
+ - Technical sophistication level
329
+ - Innovative approaches or solutions
330
+ - Complex problem-solving examples
331
+ - Performance optimization efforts
332
+ - Security considerations
333
+ - Scalability approach]
334
+
335
+ ## πŸš€ Project Health & Sustainability
336
+ [Evaluate:
337
+ - Project momentum and growth trends
338
+ - Maintenance patterns
339
+ - Community health indicators
340
+ - Documentation completeness
341
+ - Onboarding friendliness
342
+ - Long-term viability indicators]
343
+
344
+ ## πŸ’‘ Key Insights & Recommendations
345
+ [Provide:
346
+ - 3-5 key strengths identified
347
+ - 3-5 potential improvement areas
348
+ - Notable patterns or practices
349
+ - Unique characteristics
350
+ - Strategic recommendations]
351
+
352
+ Please provide detailed analysis for each section while maintaining the formatting and emojis. Support insights with specific metrics and examples from the repository data where possible."""
353
+
354
+ chat = model.start_chat(history=[])
355
+ response = chat.send_message(f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}")
356
+
357
+ # Save analysis data
358
+ progress(0.9, desc="Saving analysis results...")
359
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
360
+ json.dump(analysis_data, f, indent=2)
361
+ analysis_file = f.name
362
+
363
+ progress(1.0, desc="Analysis complete!")
364
+ return response.text, analysis_file, "βœ… Analysis completed successfully!"
365
+
366
+ except Exception as e:
367
+ error_message = f"❌ Error analyzing repository: {str(e)}"
368
+ return "", "", error_message
369
+
370
+ def create_chat_session() -> Any:
371
+ """Create a new chat session for follow-up questions"""
372
+ return genai.GenerativeModel(
373
+ model_name="gemini-pro",
374
+ generation_config={
375
+ 'temperature': 0.7,
376
+ 'top_p': 0.8,
377
+ 'top_k': 40,
378
+ 'max_output_tokens': 2048,
379
+ }
380
+ )
381
+
382
+ @retry(
383
+ retry=retry_if_exception_type(Exception),
384
+ stop=stop_after_attempt(3),
385
+ wait=wait_exponential(multiplier=1, min=4, max=10)
386
+ )
387
+ def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
388
+ """Process a follow-up question about the analysis"""
389
+ if not analysis_file:
390
+ return chat_history + [(question, "Please analyze a repository first before asking questions.")]
391
+
392
+ try:
393
+ # Load analysis data
394
+ with open(analysis_file, 'r') as f:
395
+ analysis_data = json.load(f)
396
+
397
+ # Initialize chat model
398
+ model = create_chat_session()
399
+
400
+ # Build context from chat history and current question
401
+ context = "You are an expert code analyst helping users understand repository analysis results.\n\n"
402
+ context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n"
403
+
404
+ # Add chat history context
405
+ if chat_history:
406
+ context += "Previous conversation:\n"
407
+ for user_msg, assistant_msg in chat_history:
408
+ context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
409
+
410
+ # Add current question
411
+ prompt = context + f"\nUser: {question}\nPlease provide your analysis:"
412
+
413
+ # Get response
414
+ response = model.generate_content(prompt)
415
+
416
+ # Return in the correct tuple format for Gradio chatbot
417
+ return chat_history + [(question, response.text)]
418
+
419
+ except Exception as e:
420
+ error_message = f"Error processing question: {str(e)}"
421
+ return chat_history + [(question, error_message)]
422
+
423
+
424
+
425
+
426
+ # Create Gradio interface
427
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
428
+ gr.Markdown("""
429
+ # πŸ” GitHub Repository Analyzer
430
+
431
+ Analyze any public GitHub repository using AI. The tool will:
432
+ 1. πŸ“Š Analyze repository structure and patterns
433
+ 2. πŸ’‘ Generate insights about development practices
434
+ 3. πŸ’­ Allow you to ask follow-up questions about the analysis
435
+
436
+ Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`)
437
+ """)
438
+
439
+ with gr.Row():
440
+ repo_url = gr.Textbox(
441
+ label="GitHub Repository URL",
442
+ placeholder="https://github.com/owner/repo",
443
+ scale=4
444
+ )
445
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary", scale=1)
446
+
447
+ # Add status message
448
+ status_msg = gr.Markdown("", elem_id="status_message")
449
+
450
+ with gr.Row():
451
+ # Use Markdown instead of Textbox for better formatting
452
+ summary = gr.Markdown(
453
+ label="Analysis Summary",
454
+ )
455
+
456
+ with gr.Row():
457
+ chatbot = gr.Chatbot(
458
+ label="Ask Questions",
459
+ height=400,
460
+ show_label=True
461
+ )
462
+
463
+ with gr.Row():
464
+ question = gr.Textbox(
465
+ label="Your Question",
466
+ placeholder="Ask about the analysis...",
467
+ scale=4
468
+ )
469
+ ask_btn = gr.Button("πŸ’­ Ask", variant="primary", scale=1)
470
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary", scale=1)
471
+
472
+ # Hidden state for analysis file
473
+ analysis_file = gr.State("")
474
+
475
+ def clear_outputs():
476
+ return "", [], "", ""
477
+
478
+ # Set up event handlers
479
+ analyze_btn.click(
480
+ fn=lambda: "⏳ Analysis in progress...",
481
+ inputs=None,
482
+ outputs=status_msg,
483
+ queue=False
484
+ ).then(
485
+ analyze_repository,
486
+ inputs=[repo_url],
487
+ outputs=[summary, analysis_file, status_msg],
488
+ )
489
+
490
+ ask_btn.click(
491
+ ask_question,
492
+ inputs=[question, analysis_file, chatbot],
493
+ outputs=[chatbot],
494
+ ).then(
495
+ lambda: "", # Clear the question input
496
+ None,
497
+ question,
498
+ queue=False
499
+ )
500
 
501
+ clear_btn.click(
502
+ clear_outputs,
503
+ inputs=None,
504
+ outputs=[summary, chatbot, question, status_msg],
505
+ queue=False
506
+ )
507
 
508
+ # Launch the app
509
  if __name__ == "__main__":
510
+ demo.launch(
511
+ server_name="0.0.0.0",
512
+ server_port=7860,
513
+ share=True,
514
+ debug=True
515
+ )