Merge pull request #3 from Jwaminju/update-translator
Browse files- .gitignore +2 -1
- agent/handler.py +0 -6
- agent/toctree_handler.py +0 -1
- agent/workflow.py +20 -29
- example.env +12 -1
- logger/github_logger.py +71 -0
- pr_success.log +0 -2
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
.env
|
2 |
-
*/__pycache__/
|
|
|
|
1 |
.env
|
2 |
+
*/__pycache__/
|
3 |
+
pr_success.log
|
agent/handler.py
CHANGED
@@ -150,12 +150,6 @@ def start_translation_process():
|
|
150 |
""
|
151 |
f"{original_file_link}\n"
|
152 |
"**🌐 Translated Content:**\n"
|
153 |
-
# f"\n```\n\n{_extract_content_for_display(translated)}\n```"
|
154 |
-
# "\n```\n\n"
|
155 |
-
# f"\n{translated}\n"
|
156 |
-
# f"```"
|
157 |
-
# f"{status}\n"
|
158 |
-
# "✅ Translation completed. The code block will be added when generating PR."
|
159 |
)
|
160 |
return response, translated
|
161 |
|
|
|
150 |
""
|
151 |
f"{original_file_link}\n"
|
152 |
"**🌐 Translated Content:**\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
)
|
154 |
return response, translated
|
155 |
|
agent/toctree_handler.py
CHANGED
@@ -265,7 +265,6 @@ Korean title:"""
|
|
265 |
# Process toctree update with LLM
|
266 |
self.process_pr_commit(filepath)
|
267 |
# Commit toctree as separate commit
|
268 |
-
print("self.updated_ko_toctree:", self.updated_ko_toctree)
|
269 |
if self.updated_ko_toctree:
|
270 |
return self.commit_and_push_toctree(
|
271 |
pr_agent=pr_agent,
|
|
|
265 |
# Process toctree update with LLM
|
266 |
self.process_pr_commit(filepath)
|
267 |
# Commit toctree as separate commit
|
|
|
268 |
if self.updated_ko_toctree:
|
269 |
return self.commit_and_push_toctree(
|
270 |
pr_agent=pr_agent,
|
agent/workflow.py
CHANGED
@@ -12,7 +12,6 @@ from translator.content import (
|
|
12 |
preprocess_content,
|
13 |
)
|
14 |
from translator.retriever import report, get_github_issue_open_pr
|
15 |
-
|
16 |
# GitHub PR Agent import
|
17 |
try:
|
18 |
from pr_generator.agent import GitHubPRAgent
|
@@ -22,7 +21,8 @@ except ImportError as e:
|
|
22 |
print(f"⚠️ GitHub PR Agent is not available: {e}")
|
23 |
GITHUB_PR_AVAILABLE = False
|
24 |
|
25 |
-
|
|
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
@@ -71,7 +71,7 @@ def translate_docs(lang: str, file_path: str, additional_instruction: str = "")
|
|
71 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
72 |
existing_content = f.read()
|
73 |
if existing_content.strip():
|
74 |
-
return "Existing translation loaded (no tokens used)", existing_content
|
75 |
|
76 |
# step 1. Get content from file path
|
77 |
content = get_content(file_path)
|
@@ -115,6 +115,7 @@ def translate_docs_interactive(
|
|
115 |
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction)
|
116 |
status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
|
117 |
|
|
|
118 |
print(status)
|
119 |
|
120 |
return translated_content
|
@@ -190,10 +191,11 @@ def generate_github_pr(
|
|
190 |
repo_name=github_config["repo_name"],
|
191 |
base_branch=github_config.get("base_branch", "main"),
|
192 |
)
|
|
|
193 |
# result = {
|
194 |
-
# 'status': 'partial_success',
|
195 |
-
# 'branch': 'ko-attention_interface',
|
196 |
-
# 'file_path': 'docs/source/ko/attention_interface.md',
|
197 |
# 'message': 'File was saved and commit was successful.\nPR creation failed: ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1', 'error_details': 'ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1'
|
198 |
# }
|
199 |
# Process toctree update after successful translation PR
|
@@ -213,19 +215,19 @@ def generate_github_pr(
|
|
213 |
toctree_status = f"\n📋 **Toctree Updated:** ✅ {toctree_result['message']}"
|
214 |
else:
|
215 |
toctree_status = f"\n📋 **Toctree Update Failed:** ❌ {toctree_result['message']}"
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
if result["status"] == "success":
|
218 |
-
# Log successful PR to file
|
219 |
-
import datetime
|
220 |
-
pr_url = result.get('pr_url', 'NO_PR_URL')
|
221 |
-
log_entry = f"[{datetime.datetime.now().isoformat()}] {result['file_path']} -> {pr_url} ({result['status']})\n"
|
222 |
-
try:
|
223 |
-
with open("pr_success.log", "a", encoding="utf-8") as f:
|
224 |
-
f.write(log_entry)
|
225 |
-
print(f"✅ Logged PR result: {log_entry.strip()}")
|
226 |
-
except Exception as e:
|
227 |
-
print(f"❌ Failed to log PR result: {e}")
|
228 |
-
|
229 |
return f"""✅ **GitHub PR Creation Successful!**
|
230 |
|
231 |
🔗 **PR URL:** {result.get('pr_url', 'NO_PR_URL')}
|
@@ -235,17 +237,6 @@ def generate_github_pr(
|
|
235 |
{result["message"]}"""
|
236 |
|
237 |
elif result["status"] == "partial_success":
|
238 |
-
# Log partial success to file
|
239 |
-
import datetime
|
240 |
-
pr_url = result.get('pr_url', 'NO_PR_URL')
|
241 |
-
log_entry = f"[{datetime.datetime.now().isoformat()}] {result['file_path']} -> {pr_url} ({result['status']})\n"
|
242 |
-
try:
|
243 |
-
with open("pr_success.log", "a", encoding="utf-8") as f:
|
244 |
-
f.write(log_entry)
|
245 |
-
print(f"✅ Logged PR result: {log_entry.strip()}")
|
246 |
-
except Exception as e:
|
247 |
-
print(f"❌ Failed to log PR result: {e}")
|
248 |
-
|
249 |
return f"""⚠️ **Partial Success**
|
250 |
|
251 |
🌿 **Branch:** {result["branch"]}
|
|
|
12 |
preprocess_content,
|
13 |
)
|
14 |
from translator.retriever import report, get_github_issue_open_pr
|
|
|
15 |
# GitHub PR Agent import
|
16 |
try:
|
17 |
from pr_generator.agent import GitHubPRAgent
|
|
|
21 |
print(f"⚠️ GitHub PR Agent is not available: {e}")
|
22 |
GITHUB_PR_AVAILABLE = False
|
23 |
|
24 |
+
import json
|
25 |
+
from logger.github_logger import GitHubLogger
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
|
|
71 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
72 |
existing_content = f.read()
|
73 |
if existing_content.strip():
|
74 |
+
return "Existing translation loaded (no tokens used). If you want to translate again, please restart the gradio app.", existing_content
|
75 |
|
76 |
# step 1. Get content from file path
|
77 |
content = get_content(file_path)
|
|
|
115 |
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction)
|
116 |
status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
|
117 |
|
118 |
+
print(callback_result)
|
119 |
print(status)
|
120 |
|
121 |
return translated_content
|
|
|
191 |
repo_name=github_config["repo_name"],
|
192 |
base_branch=github_config.get("base_branch", "main"),
|
193 |
)
|
194 |
+
# TEST CODE
|
195 |
# result = {
|
196 |
+
# 'status': 'partial_success',
|
197 |
+
# 'branch': 'ko-attention_interface',
|
198 |
+
# 'file_path': 'docs/source/ko/attention_interface.md',
|
199 |
# 'message': 'File was saved and commit was successful.\nPR creation failed: ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1', 'error_details': 'ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1'
|
200 |
# }
|
201 |
# Process toctree update after successful translation PR
|
|
|
215 |
toctree_status = f"\n📋 **Toctree Updated:** ✅ {toctree_result['message']}"
|
216 |
else:
|
217 |
toctree_status = f"\n📋 **Toctree Update Failed:** ❌ {toctree_result['message']}"
|
218 |
+
|
219 |
+
# Append full result JSON to dedicated GitHub logging repository (always)
|
220 |
+
try:
|
221 |
+
log_data = result.copy()
|
222 |
+
if toctree_result:
|
223 |
+
log_data["toctree_result"] = toctree_result
|
224 |
+
log_entry = json.dumps(log_data, ensure_ascii=False) + "\n"
|
225 |
+
log_res = GitHubLogger().append_jsonl(log_entry)
|
226 |
+
print(f"📝 Log append result: {log_res}")
|
227 |
+
except Exception as e:
|
228 |
+
print(f"❌ Failed to append PR log via GitHub API: {e}")
|
229 |
+
|
230 |
if result["status"] == "success":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
return f"""✅ **GitHub PR Creation Successful!**
|
232 |
|
233 |
🔗 **PR URL:** {result.get('pr_url', 'NO_PR_URL')}
|
|
|
237 |
{result["message"]}"""
|
238 |
|
239 |
elif result["status"] == "partial_success":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
return f"""⚠️ **Partial Success**
|
241 |
|
242 |
🌿 **Branch:** {result["branch"]}
|
example.env
CHANGED
@@ -4,4 +4,15 @@ ANTHROPIC_API_KEY=<your api key>
|
|
4 |
GITHUB_TOKEN=<your github token>
|
5 |
GITHUB_OWNER=<your github username>
|
6 |
GITHUB_REPO=<your repository name>
|
7 |
-
REFERENCE_PR_URL=<reference pr url for style analysis>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
GITHUB_TOKEN=<your github token>
|
5 |
GITHUB_OWNER=<your github username>
|
6 |
GITHUB_REPO=<your repository name>
|
7 |
+
REFERENCE_PR_URL=<reference pr url for style analysis>
|
8 |
+
|
9 |
+
# Secrets for deployment to HF space
|
10 |
+
HF_TOKEN=
|
11 |
+
HF_USERNAME=
|
12 |
+
HF_SPACE_NAME=
|
13 |
+
|
14 |
+
# Secrets for logging to Github
|
15 |
+
LOG_REPO=
|
16 |
+
LOG_GITHUB_TOKEN=
|
17 |
+
LOG_BRANCH=
|
18 |
+
LOG_FILE_PATH=
|
logger/github_logger.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
from typing import Optional
|
4 |
+
|
5 |
+
try:
|
6 |
+
from github import Github, GithubException
|
7 |
+
LIBS_OK = True
|
8 |
+
except ImportError:
|
9 |
+
LIBS_OK = False
|
10 |
+
|
11 |
+
class GitHubLogger:
|
12 |
+
"""Dedicated logger that appends JSONL entries to a GitHub repo/branch/file.
|
13 |
+
|
14 |
+
Env vars:
|
15 |
+
- LOG_GITHUB_TOKEN (fallback: GITHUB_TOKEN)
|
16 |
+
- LOG_REPO (format: owner/repo)
|
17 |
+
- LOG_BRANCH (default: 'log_event')
|
18 |
+
- LOG_FILE_PATH (default: 'pr_success.log')
|
19 |
+
"""
|
20 |
+
|
21 |
+
def __init__(self):
|
22 |
+
if not LIBS_OK:
|
23 |
+
raise ImportError("PyGithub not installed. Please install PyGithub.")
|
24 |
+
token = os.environ.get("LOG_GITHUB_TOKEN") or os.environ.get("GITHUB_TOKEN")
|
25 |
+
if not token:
|
26 |
+
raise ValueError("Missing LOG_GITHUB_TOKEN or GITHUB_TOKEN for logging")
|
27 |
+
self._client = Github(token)
|
28 |
+
|
29 |
+
repo_spec = os.environ.get("LOG_REPO")
|
30 |
+
if not repo_spec or "/" not in repo_spec:
|
31 |
+
raise ValueError("Missing or invalid LOG_REPO. Expected 'owner/repo'.")
|
32 |
+
self.owner, self.repo_name = repo_spec.split("/", 1)
|
33 |
+
|
34 |
+
self.branch = os.environ.get("LOG_BRANCH", "log_event")
|
35 |
+
self.path = os.environ.get("LOG_FILE_PATH", "pr_success.log")
|
36 |
+
|
37 |
+
def _ensure_branch(self, repo):
|
38 |
+
try:
|
39 |
+
repo.get_branch(self.branch)
|
40 |
+
except GithubException as e:
|
41 |
+
if e.status == 404:
|
42 |
+
base = repo.get_branch(repo.default_branch)
|
43 |
+
repo.create_git_ref(ref=f"refs/heads/{self.branch}", sha=base.commit.sha)
|
44 |
+
else:
|
45 |
+
raise
|
46 |
+
|
47 |
+
def append_jsonl(self, jsonl_line: str, commit_message: str = "chore(log): append entry") -> str:
|
48 |
+
repo = self._client.get_repo(f"{self.owner}/{self.repo_name}")
|
49 |
+
self._ensure_branch(repo)
|
50 |
+
try:
|
51 |
+
existing = repo.get_contents(self.path, ref=self.branch)
|
52 |
+
existing_content = base64.b64decode(existing.content).decode("utf-8")
|
53 |
+
new_content = existing_content + jsonl_line
|
54 |
+
repo.update_file(
|
55 |
+
path=self.path,
|
56 |
+
message=commit_message,
|
57 |
+
content=new_content,
|
58 |
+
sha=existing.sha,
|
59 |
+
branch=self.branch,
|
60 |
+
)
|
61 |
+
return "SUCCESS: Log appended"
|
62 |
+
except GithubException as e:
|
63 |
+
if e.status == 404:
|
64 |
+
repo.create_file(
|
65 |
+
path=self.path,
|
66 |
+
message=commit_message,
|
67 |
+
content=jsonl_line,
|
68 |
+
branch=self.branch,
|
69 |
+
)
|
70 |
+
return "SUCCESS: Log file created and first entry appended"
|
71 |
+
raise
|
pr_success.log
CHANGED
@@ -1,2 +0,0 @@
|
|
1 |
-
# PR Success Log
|
2 |
-
# Format: [timestamp] file_path -> pr_url
|
|
|
|
|
|