wony617
commited on
Commit
Β·
7e4dd83
1
Parent(s):
fe6c90f
Initial for supporting smolagent translation
Browse files- agent/handler.py +24 -15
- agent/workflow.py +5 -4
- app.py +7 -1
- example.env +0 -1
- logger/github_logger.py +3 -3
- translator/project_config.py +48 -0
- translator/retriever.py +39 -23
agent/handler.py
CHANGED
@@ -13,12 +13,14 @@ from agent.workflow import (
|
|
13 |
)
|
14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
|
|
16 |
|
17 |
|
18 |
# State management
|
19 |
class ChatState:
|
20 |
def __init__(self):
|
21 |
-
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
|
|
22 |
self.target_language = "ko"
|
23 |
self.k_files = 10
|
24 |
self.files_to_translate = []
|
@@ -53,25 +55,26 @@ def _extract_content_for_display(content: str) -> str:
|
|
53 |
|
54 |
|
55 |
def get_welcome_message():
|
56 |
-
"""Initial welcome message with
|
57 |
return """**π Welcome to π Hugging Face i18n Translation Agent!**
|
58 |
|
59 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
60 |
|
61 |
-
|
62 |
|
63 |
-
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
|
64 |
"""
|
65 |
|
66 |
|
67 |
-
def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
68 |
"""Process file search request and update Gradio UI components."""
|
69 |
global state
|
|
|
70 |
state.target_language = lang
|
71 |
state.k_files = k
|
72 |
state.step = "find_files"
|
73 |
|
74 |
-
status_report, files_list = report_translation_target_files(lang, k)
|
75 |
state.files_to_translate = (
|
76 |
[file[0] for file in files_list]
|
77 |
if files_list
|
@@ -87,8 +90,10 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
87 |
"""
|
88 |
|
89 |
if state.files_to_translate:
|
|
|
90 |
for i, file in enumerate(state.files_to_translate, 1):
|
91 |
-
|
|
|
92 |
|
93 |
# if len(state.files_to_translate) > 5:
|
94 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
@@ -138,9 +143,8 @@ def start_translation_process():
|
|
138 |
p.parent.mkdir(parents=True, exist_ok=True)
|
139 |
p.write_text(translated, encoding="utf-8")
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
)
|
144 |
print("Compeleted translation:\n")
|
145 |
print(translated)
|
146 |
print("----------------------------")
|
@@ -226,12 +230,12 @@ def handle_user_message(message, history):
|
|
226 |
|
227 |
def update_status():
|
228 |
if state.step == "welcome":
|
229 |
-
return """
|
230 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
231 |
<div><strong>π Step:</strong> Welcome</div>
|
|
|
232 |
<div><strong>π Files:</strong> 0</div>
|
233 |
-
<div><strong>π Language:</strong>
|
234 |
-
<div><strong>β³ Progress:</strong> Ready</div>
|
235 |
</div>
|
236 |
"""
|
237 |
|
@@ -267,6 +271,7 @@ def update_status():
|
|
267 |
status_html = f"""
|
268 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
269 |
<div><strong>π Step:</strong> {step_map.get(state.step, state.step)}</div>
|
|
|
270 |
<div><strong>π Files:</strong> {len(state.files_to_translate)}</div>
|
271 |
<div><strong>π Language:</strong> {state.target_language}</div>
|
272 |
<div><strong>β³ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
@@ -292,14 +297,18 @@ def update_github_config(token, owner, repo, reference_pr_url):
|
|
292 |
if token:
|
293 |
os.environ["GITHUB_TOKEN"] = token
|
294 |
|
|
|
|
|
|
|
|
|
|
|
295 |
# Save GitHub configuration to state
|
296 |
state.github_config.update(
|
297 |
{
|
298 |
"token": token,
|
299 |
"owner": owner,
|
300 |
"repo_name": repo,
|
301 |
-
"reference_pr_url": reference_pr_url
|
302 |
-
or state.github_config["reference_pr_url"],
|
303 |
}
|
304 |
)
|
305 |
|
|
|
13 |
)
|
14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
16 |
+
from translator.project_config import get_available_projects, get_project_config
|
17 |
|
18 |
|
19 |
# State management
|
20 |
class ChatState:
|
21 |
def __init__(self):
|
22 |
+
self.step = "welcome" # welcome -> select_project -> find_files -> translate -> create_github_pr
|
23 |
+
self.selected_project = "transformers" # Default project
|
24 |
self.target_language = "ko"
|
25 |
self.k_files = 10
|
26 |
self.files_to_translate = []
|
|
|
55 |
|
56 |
|
57 |
def get_welcome_message():
|
58 |
+
"""Initial welcome message with project selection"""
|
59 |
return """**π Welcome to π Hugging Face i18n Translation Agent!**
|
60 |
|
61 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
62 |
|
63 |
+
**π― First, select which project you want to translate:**
|
64 |
|
65 |
+
Use the **`Quick Controls`** on the right to select a project, or **ask me `what`, `how`, or `help`** to get started.
|
66 |
"""
|
67 |
|
68 |
|
69 |
+
def process_file_search_handler(project: str, lang: str, k: int, history: list) -> tuple:
|
70 |
"""Process file search request and update Gradio UI components."""
|
71 |
global state
|
72 |
+
state.selected_project = project
|
73 |
state.target_language = lang
|
74 |
state.k_files = k
|
75 |
state.step = "find_files"
|
76 |
|
77 |
+
status_report, files_list = report_translation_target_files(project, lang, k)
|
78 |
state.files_to_translate = (
|
79 |
[file[0] for file in files_list]
|
80 |
if files_list
|
|
|
90 |
"""
|
91 |
|
92 |
if state.files_to_translate:
|
93 |
+
config = get_project_config(state.selected_project)
|
94 |
for i, file in enumerate(state.files_to_translate, 1):
|
95 |
+
file_link = f"{config.repo_url}/blob/main/{file}"
|
96 |
+
response += f"\n{i}. [`{file}`]({file_link})"
|
97 |
|
98 |
# if len(state.files_to_translate) > 5:
|
99 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
|
143 |
p.parent.mkdir(parents=True, exist_ok=True)
|
144 |
p.write_text(translated, encoding="utf-8")
|
145 |
|
146 |
+
config = get_project_config(state.selected_project)
|
147 |
+
original_file_link = f"{config.repo_url}/blob/main/{current_file}"
|
|
|
148 |
print("Compeleted translation:\n")
|
149 |
print(translated)
|
150 |
print("----------------------------")
|
|
|
230 |
|
231 |
def update_status():
|
232 |
if state.step == "welcome":
|
233 |
+
return f"""
|
234 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
235 |
<div><strong>π Step:</strong> Welcome</div>
|
236 |
+
<div><strong>π― Project:</strong> {state.selected_project}</div>
|
237 |
<div><strong>π Files:</strong> 0</div>
|
238 |
+
<div><strong>π Language:</strong> {state.target_language}</div>
|
|
|
239 |
</div>
|
240 |
"""
|
241 |
|
|
|
271 |
status_html = f"""
|
272 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
273 |
<div><strong>π Step:</strong> {step_map.get(state.step, state.step)}</div>
|
274 |
+
<div><strong>π― Project:</strong> {state.selected_project}</div>
|
275 |
<div><strong>π Files:</strong> {len(state.files_to_translate)}</div>
|
276 |
<div><strong>π Language:</strong> {state.target_language}</div>
|
277 |
<div><strong>β³ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
|
297 |
if token:
|
298 |
os.environ["GITHUB_TOKEN"] = token
|
299 |
|
300 |
+
# Get default reference PR URL from project config if not provided
|
301 |
+
if not reference_pr_url:
|
302 |
+
config = get_project_config(state.selected_project)
|
303 |
+
reference_pr_url = config.reference_pr_url
|
304 |
+
|
305 |
# Save GitHub configuration to state
|
306 |
state.github_config.update(
|
307 |
{
|
308 |
"token": token,
|
309 |
"owner": owner,
|
310 |
"repo_name": repo,
|
311 |
+
"reference_pr_url": reference_pr_url,
|
|
|
312 |
}
|
313 |
)
|
314 |
|
agent/workflow.py
CHANGED
@@ -26,19 +26,20 @@ from logger.github_logger import GitHubLogger
|
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
29 |
-
translate_lang: str, top_k: int = 1
|
30 |
) -> tuple[str, list[list[str]]]:
|
31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
32 |
|
33 |
Args:
|
|
|
34 |
translate_lang: Target language to translate
|
35 |
top_k: Number of top-first files to return for translation. (Default 1)
|
36 |
"""
|
37 |
# Get files in progress
|
38 |
-
docs_in_progress, pr_info_list = get_github_issue_open_pr(translate_lang)
|
39 |
|
40 |
# Get all available files for translation
|
41 |
-
all_status_report, all_filepath_list = report(translate_lang, top_k * 2) # Get more to account for filtering
|
42 |
|
43 |
# Filter out files that are already in progress
|
44 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
@@ -52,7 +53,7 @@ def report_translation_target_files(
|
|
52 |
if docs_in_progress:
|
53 |
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:"
|
54 |
for i, file in enumerate(docs_in_progress):
|
55 |
-
status_report += f"\n{i+1}. `{file}
|
56 |
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):"
|
57 |
|
58 |
return status_report, [[file] for file in filepath_list]
|
|
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
29 |
+
project: str, translate_lang: str, top_k: int = 1
|
30 |
) -> tuple[str, list[list[str]]]:
|
31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
32 |
|
33 |
Args:
|
34 |
+
project: Project to translate (e.g., "transformers", "smolagents")
|
35 |
translate_lang: Target language to translate
|
36 |
top_k: Number of top-first files to return for translation. (Default 1)
|
37 |
"""
|
38 |
# Get files in progress
|
39 |
+
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang)
|
40 |
|
41 |
# Get all available files for translation
|
42 |
+
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2) # Get more to account for filtering
|
43 |
|
44 |
# Filter out files that are already in progress
|
45 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
|
53 |
if docs_in_progress:
|
54 |
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:"
|
55 |
for i, file in enumerate(docs_in_progress):
|
56 |
+
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
|
57 |
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):"
|
58 |
|
59 |
return status_report, [[file] for file in filepath_list]
|
app.py
CHANGED
@@ -19,6 +19,7 @@ from agent.handler import (
|
|
19 |
update_github_config,
|
20 |
)
|
21 |
from translator.model import Languages
|
|
|
22 |
|
23 |
load_dotenv()
|
24 |
|
@@ -125,6 +126,11 @@ with gr.Blocks(
|
|
125 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
126 |
with gr.TabItem("1. Find Files", id=0):
|
127 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
128 |
lang_dropdown = gr.Radio(
|
129 |
choices=[language.value for language in Languages],
|
130 |
label="π Translate To",
|
@@ -226,7 +232,7 @@ with gr.Blocks(
|
|
226 |
|
227 |
find_btn.click(
|
228 |
fn=process_file_search_handler,
|
229 |
-
inputs=[lang_dropdown, k_input, chatbot],
|
230 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
231 |
)
|
232 |
|
|
|
19 |
update_github_config,
|
20 |
)
|
21 |
from translator.model import Languages
|
22 |
+
from translator.project_config import get_available_projects
|
23 |
|
24 |
load_dotenv()
|
25 |
|
|
|
126 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
127 |
with gr.TabItem("1. Find Files", id=0):
|
128 |
with gr.Group():
|
129 |
+
project_dropdown = gr.Radio(
|
130 |
+
choices=get_available_projects(),
|
131 |
+
label="π― Select Project",
|
132 |
+
value="transformers",
|
133 |
+
)
|
134 |
lang_dropdown = gr.Radio(
|
135 |
choices=[language.value for language in Languages],
|
136 |
label="π Translate To",
|
|
|
232 |
|
233 |
find_btn.click(
|
234 |
fn=process_file_search_handler,
|
235 |
+
inputs=[project_dropdown, lang_dropdown, k_input, chatbot],
|
236 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
237 |
)
|
238 |
|
example.env
CHANGED
@@ -13,6 +13,5 @@ HF_SPACE_NAME=
|
|
13 |
|
14 |
# Secrets for logging to Github
|
15 |
LOG_REPO=
|
16 |
-
LOG_GITHUB_TOKEN=
|
17 |
LOG_BRANCH=
|
18 |
LOG_FILE_PATH=
|
|
|
13 |
|
14 |
# Secrets for logging to Github
|
15 |
LOG_REPO=
|
|
|
16 |
LOG_BRANCH=
|
17 |
LOG_FILE_PATH=
|
logger/github_logger.py
CHANGED
@@ -12,7 +12,7 @@ class GitHubLogger:
|
|
12 |
"""Dedicated logger that appends JSONL entries to a GitHub repo/branch/file.
|
13 |
|
14 |
Env vars:
|
15 |
-
-
|
16 |
- LOG_REPO (format: owner/repo)
|
17 |
- LOG_BRANCH (default: 'log_event')
|
18 |
- LOG_FILE_PATH (default: 'pr_success.log')
|
@@ -21,9 +21,9 @@ class GitHubLogger:
|
|
21 |
def __init__(self):
|
22 |
if not LIBS_OK:
|
23 |
raise ImportError("PyGithub not installed. Please install PyGithub.")
|
24 |
-
token = os.environ.get("
|
25 |
if not token:
|
26 |
-
raise ValueError("Missing
|
27 |
self._client = Github(token)
|
28 |
|
29 |
repo_spec = os.environ.get("LOG_REPO")
|
|
|
12 |
"""Dedicated logger that appends JSONL entries to a GitHub repo/branch/file.
|
13 |
|
14 |
Env vars:
|
15 |
+
- GITHUB_TOKEN
|
16 |
- LOG_REPO (format: owner/repo)
|
17 |
- LOG_BRANCH (default: 'log_event')
|
18 |
- LOG_FILE_PATH (default: 'pr_success.log')
|
|
|
21 |
def __init__(self):
|
22 |
if not LIBS_OK:
|
23 |
raise ImportError("PyGithub not installed. Please install PyGithub.")
|
24 |
+
token = os.environ.get("GITHUB_TOKEN")
|
25 |
if not token:
|
26 |
+
raise ValueError("Missing GITHUB_TOKEN for logging")
|
27 |
self._client = Github(token)
|
28 |
|
29 |
repo_spec = os.environ.get("LOG_REPO")
|
translator/project_config.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Project configuration for different HuggingFace repositories."""
|
2 |
+
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Dict
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class ProjectConfig:
|
9 |
+
"""Configuration for a specific HuggingFace project."""
|
10 |
+
name: str
|
11 |
+
repo_url: str
|
12 |
+
api_url: str
|
13 |
+
docs_path: str
|
14 |
+
github_issues: Dict[str, str] # language -> issue_id
|
15 |
+
reference_pr_url: str
|
16 |
+
|
17 |
+
|
18 |
+
# Project configurations
|
19 |
+
PROJECTS = {
|
20 |
+
"transformers": ProjectConfig(
|
21 |
+
name="Transformers",
|
22 |
+
repo_url="https://github.com/huggingface/transformers",
|
23 |
+
api_url="https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1",
|
24 |
+
docs_path="docs/source",
|
25 |
+
github_issues={"ko": "20179"},
|
26 |
+
reference_pr_url="https://github.com/huggingface/transformers/pull/24968"
|
27 |
+
),
|
28 |
+
"smolagents": ProjectConfig(
|
29 |
+
name="SmolAgents",
|
30 |
+
repo_url="https://github.com/huggingface/smolagents",
|
31 |
+
api_url="https://api.github.com/repos/huggingface/smolagents/git/trees/main?recursive=1",
|
32 |
+
docs_path="docs/source",
|
33 |
+
github_issues={"ko": "20179"}, # To be filled when issue is created
|
34 |
+
reference_pr_url="https://github.com/huggingface/smolagents/pull/1581" # To be filled with actual PR URL
|
35 |
+
)
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
def get_project_config(project_key: str) -> ProjectConfig:
|
40 |
+
"""Get project configuration by key."""
|
41 |
+
if project_key not in PROJECTS:
|
42 |
+
raise ValueError(f"Unknown project: {project_key}. Available: {list(PROJECTS.keys())}")
|
43 |
+
return PROJECTS[project_key]
|
44 |
+
|
45 |
+
|
46 |
+
def get_available_projects() -> list[str]:
|
47 |
+
"""Get list of available project keys."""
|
48 |
+
return list(PROJECTS.keys())
|
translator/retriever.py
CHANGED
@@ -5,15 +5,22 @@ from pathlib import Path
|
|
5 |
import requests
|
6 |
|
7 |
from .model import Languages, Summary, TranslationDoc
|
|
|
8 |
|
9 |
-
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
|
10 |
|
11 |
-
|
12 |
-
def get_github_repo_files():
|
13 |
"""
|
14 |
Get github repo files
|
15 |
"""
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
data = response.json()
|
19 |
all_items = data.get("tree", [])
|
@@ -26,27 +33,33 @@ def get_github_repo_files():
|
|
26 |
return file_paths
|
27 |
|
28 |
|
29 |
-
def get_github_issue_open_pr(lang: str = "ko"):
|
30 |
"""
|
31 |
-
Get open PR in the github issue, filtered by title
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
)
|
39 |
|
40 |
headers = {
|
41 |
"Accept": "application/vnd.github+json",
|
42 |
}
|
43 |
|
|
|
|
|
|
|
|
|
|
|
44 |
all_open_prs = []
|
45 |
page = 1
|
46 |
per_page = 100 # Maximum allowed by GitHub API
|
47 |
|
48 |
while True:
|
49 |
-
|
|
|
50 |
response = requests.get(url, headers=headers)
|
51 |
|
52 |
if response.status_code != 200:
|
@@ -63,17 +76,20 @@ def get_github_issue_open_pr(lang: str = "ko"):
|
|
63 |
if len(page_prs) < per_page:
|
64 |
break
|
65 |
|
66 |
-
filtered_prs = [pr for pr in all_open_prs if
|
67 |
|
68 |
-
|
|
|
69 |
|
70 |
-
filenames = [
|
71 |
-
|
72 |
-
|
73 |
-
if
|
74 |
-
|
|
|
|
|
75 |
pr_info_list = [
|
76 |
-
f"
|
77 |
for pr in filtered_prs
|
78 |
]
|
79 |
return filenames, pr_info_list
|
@@ -99,11 +115,11 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
|
99 |
return report, first_missing_docs
|
100 |
|
101 |
|
102 |
-
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
103 |
"""
|
104 |
Generate a report for the translated docs
|
105 |
"""
|
106 |
-
docs_file = get_github_repo_files()
|
107 |
|
108 |
base_docs_path = Path("docs/source")
|
109 |
en_docs_path = Path("docs/source/en")
|
|
|
5 |
import requests
|
6 |
|
7 |
from .model import Languages, Summary, TranslationDoc
|
8 |
+
from .project_config import get_project_config
|
9 |
|
|
|
10 |
|
11 |
+
def get_github_repo_files(project: str = "transformers"):
|
|
|
12 |
"""
|
13 |
Get github repo files
|
14 |
"""
|
15 |
+
config = get_project_config(project)
|
16 |
+
|
17 |
+
# Add GitHub token if available to avoid rate limiting
|
18 |
+
headers = {}
|
19 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
20 |
+
if github_token:
|
21 |
+
headers["Authorization"] = f"token {github_token}"
|
22 |
+
|
23 |
+
response = requests.get(config.api_url, headers=headers)
|
24 |
|
25 |
data = response.json()
|
26 |
all_items = data.get("tree", [])
|
|
|
33 |
return file_paths
|
34 |
|
35 |
|
36 |
+
def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko"):
|
37 |
"""
|
38 |
+
Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
|
39 |
"""
|
40 |
+
config = get_project_config(project)
|
41 |
+
issue_id = config.github_issues.get(lang)
|
42 |
+
|
43 |
+
# For projects without GitHub issue tracking, still search for PRs
|
44 |
+
if not issue_id:
|
45 |
+
raise ValueError(f"β οΈ No GitHub issue registered for {project}.")
|
46 |
|
47 |
headers = {
|
48 |
"Accept": "application/vnd.github+json",
|
49 |
}
|
50 |
|
51 |
+
# Add GitHub token if available to avoid rate limiting
|
52 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
53 |
+
if github_token:
|
54 |
+
headers["Authorization"] = f"token {github_token}"
|
55 |
+
|
56 |
all_open_prs = []
|
57 |
page = 1
|
58 |
per_page = 100 # Maximum allowed by GitHub API
|
59 |
|
60 |
while True:
|
61 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
62 |
+
url = f"https://api.github.com/repos/{repo_path}/pulls?state=open&page={page}&per_page={per_page}"
|
63 |
response = requests.get(url, headers=headers)
|
64 |
|
65 |
if response.status_code != 200:
|
|
|
76 |
if len(page_prs) < per_page:
|
77 |
break
|
78 |
|
79 |
+
filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
|
80 |
|
81 |
+
# Pattern to match both `filename.md` and filename.md formats
|
82 |
+
pattern = re.compile(r"(?:`([^`]+\.md)`|(\w+\.md))")
|
83 |
|
84 |
+
filenames = []
|
85 |
+
for pr in filtered_prs:
|
86 |
+
match = pattern.search(pr["title"])
|
87 |
+
if match:
|
88 |
+
# Use group 1 (with backticks) or group 2 (without backticks)
|
89 |
+
filename = match.group(1) or match.group(2)
|
90 |
+
filenames.append("docs/source/en/" + filename)
|
91 |
pr_info_list = [
|
92 |
+
f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}"
|
93 |
for pr in filtered_prs
|
94 |
]
|
95 |
return filenames, pr_info_list
|
|
|
115 |
return report, first_missing_docs
|
116 |
|
117 |
|
118 |
+
def report(project: str, target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
119 |
"""
|
120 |
Generate a report for the translated docs
|
121 |
"""
|
122 |
+
docs_file = get_github_repo_files(project)
|
123 |
|
124 |
base_docs_path = Path("docs/source")
|
125 |
en_docs_path = Path("docs/source/en")
|