Merge pull request #4 from Jwaminju/part-of-support-smolagent
Browse files- agent/handler.py +236 -65
- agent/toctree_handler.py +13 -4
- agent/workflow.py +71 -31
- app.py +109 -47
- pr_generator/agent.py +17 -12
- translator/content.py +8 -5
- translator/project_config.py +48 -0
- translator/retriever.py +93 -27
agent/handler.py
CHANGED
@@ -13,25 +13,48 @@ from agent.workflow import (
|
|
13 |
)
|
14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
|
|
16 |
|
17 |
|
18 |
# State management
|
19 |
class ChatState:
|
20 |
def __init__(self):
|
21 |
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
|
|
|
|
|
|
22 |
self.target_language = "ko"
|
23 |
self.k_files = 10
|
24 |
self.files_to_translate = []
|
25 |
self.additional_instruction = ""
|
26 |
self.current_file_content = {"translated": ""}
|
27 |
-
self.pr_result = None
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
"
|
32 |
-
"
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
|
37 |
state = ChatState()
|
@@ -53,25 +76,41 @@ def _extract_content_for_display(content: str) -> str:
|
|
53 |
|
54 |
|
55 |
def get_welcome_message():
|
56 |
-
"""Initial welcome message with
|
57 |
return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
|
58 |
|
59 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
60 |
|
61 |
-
|
62 |
|
63 |
-
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
|
64 |
"""
|
65 |
|
66 |
|
67 |
-
def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
68 |
"""Process file search request and update Gradio UI components."""
|
69 |
global state
|
|
|
70 |
state.target_language = lang
|
71 |
state.k_files = k
|
72 |
state.step = "find_files"
|
73 |
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
state.files_to_translate = (
|
76 |
[file[0] for file in files_list]
|
77 |
if files_list
|
@@ -87,8 +126,10 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
87 |
"""
|
88 |
|
89 |
if state.files_to_translate:
|
|
|
90 |
for i, file in enumerate(state.files_to_translate, 1):
|
91 |
-
|
|
|
92 |
|
93 |
# if len(state.files_to_translate) > 5:
|
94 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
@@ -100,14 +141,13 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
100 |
# Add to history
|
101 |
history.append(["Please find files that need translation", response])
|
102 |
cleared_input = ""
|
103 |
-
selected_tab = 1 if state.files_to_translate else 0
|
104 |
|
105 |
# 드롭다운 choices로 쓸 파일 리스트 반환 추가
|
106 |
return (
|
107 |
history,
|
108 |
cleared_input,
|
109 |
update_status(),
|
110 |
-
gr.Tabs(
|
111 |
update_dropdown_choices(state.files_to_translate),
|
112 |
)
|
113 |
|
@@ -116,7 +156,30 @@ def update_dropdown_choices(file_list):
|
|
116 |
return gr.update(choices=file_list, value=None)
|
117 |
|
118 |
|
119 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
"""Start the translation process for the first file"""
|
121 |
if not state.files_to_translate:
|
122 |
return "❌ No files available for translation.", ""
|
@@ -125,8 +188,8 @@ def start_translation_process():
|
|
125 |
|
126 |
# Call translation function (simplified for demo)
|
127 |
try:
|
128 |
-
translated = translate_docs_interactive(
|
129 |
-
state.target_language, [[current_file]], state.additional_instruction
|
130 |
)
|
131 |
|
132 |
state.current_file_content = {"translated": translated}
|
@@ -138,19 +201,22 @@ def start_translation_process():
|
|
138 |
p.parent.mkdir(parents=True, exist_ok=True)
|
139 |
p.write_text(translated, encoding="utf-8")
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
)
|
144 |
print("Compeleted translation:\n")
|
145 |
print(translated)
|
146 |
print("----------------------------")
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
""
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
154 |
return response, translated
|
155 |
|
156 |
|
@@ -188,7 +254,12 @@ Currently available actions with quick controls:
|
|
188 |
else:
|
189 |
return """I understand you want to work on translations!
|
190 |
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
192 |
"""
|
193 |
|
194 |
|
@@ -226,12 +297,12 @@ def handle_user_message(message, history):
|
|
226 |
|
227 |
def update_status():
|
228 |
if state.step == "welcome":
|
229 |
-
return """
|
230 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
231 |
<div><strong>🔄 Step:</strong> Welcome</div>
|
|
|
232 |
<div><strong>📁 Files:</strong> 0</div>
|
233 |
-
<div><strong>🌍 Language:</strong>
|
234 |
-
<div><strong>⏳ Progress:</strong> Ready</div>
|
235 |
</div>
|
236 |
"""
|
237 |
|
@@ -267,6 +338,7 @@ def update_status():
|
|
267 |
status_html = f"""
|
268 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
269 |
<div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
|
|
|
270 |
<div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
|
271 |
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
272 |
<div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
@@ -284,26 +356,66 @@ def sync_language_displays(lang):
|
|
284 |
return lang
|
285 |
|
286 |
|
287 |
-
def
|
288 |
-
"""Update
|
289 |
global state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
|
291 |
-
# Set GitHub token in environment variables
|
292 |
-
if token:
|
293 |
-
os.environ["GITHUB_TOKEN"] = token
|
294 |
-
|
295 |
-
# Save GitHub configuration to state
|
296 |
-
state.github_config.update(
|
297 |
-
{
|
298 |
-
"token": token,
|
299 |
-
"owner": owner,
|
300 |
-
"repo_name": repo,
|
301 |
-
"reference_pr_url": reference_pr_url
|
302 |
-
or state.github_config["reference_pr_url"],
|
303 |
-
}
|
304 |
-
)
|
305 |
|
306 |
-
|
|
|
|
|
307 |
|
308 |
|
309 |
def update_prompt_preview(language, file_path, additional_instruction):
|
@@ -319,7 +431,7 @@ def update_prompt_preview(language, file_path, additional_instruction):
|
|
319 |
translation_lang = language
|
320 |
|
321 |
# Get sample content (first 500 characters)
|
322 |
-
content = get_content(file_path)
|
323 |
to_translate = preprocess_content(content)
|
324 |
|
325 |
# Truncate for preview
|
@@ -330,7 +442,10 @@ def update_prompt_preview(language, file_path, additional_instruction):
|
|
330 |
|
331 |
return prompt
|
332 |
except Exception as e:
|
333 |
-
|
|
|
|
|
|
|
334 |
|
335 |
|
336 |
def send_message(message, history):
|
@@ -339,14 +454,39 @@ def send_message(message, history):
|
|
339 |
|
340 |
|
341 |
# Button handlers with tab switching
|
342 |
-
def start_translate_handler(history,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
state.additional_instruction = additional_instruction
|
346 |
state.files_to_translate = [file_to_translate]
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
|
352 |
def approve_handler(history, owner, repo, reference_pr_url):
|
@@ -354,18 +494,34 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
354 |
global state
|
355 |
state.step = "create_github_pr"
|
356 |
|
357 |
-
#
|
358 |
-
state.
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
history.append(["GitHub PR creation request", response])
|
367 |
return history, "", update_status()
|
368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
# If reference PR is not provided, use the agent to find one
|
370 |
if not github_config.get("reference_pr_url"):
|
371 |
response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
|
@@ -421,6 +577,7 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
421 |
translated_content=translated_content,
|
422 |
github_config=state.github_config,
|
423 |
en_title=file_name,
|
|
|
424 |
)
|
425 |
response += f"\n{pr_response}"
|
426 |
else:
|
@@ -431,9 +588,23 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
431 |
|
432 |
|
433 |
def restart_handler(history):
|
434 |
-
"""Resets the state
|
435 |
global state
|
|
|
|
|
|
|
|
|
436 |
state = ChatState()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
welcome_msg = get_welcome_message()
|
438 |
new_hist = [[None, welcome_msg]]
|
439 |
return new_hist, "", update_status(), gr.Tabs(selected=0)
|
|
|
13 |
)
|
14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
16 |
+
from translator.project_config import get_available_projects, get_project_config
|
17 |
|
18 |
|
19 |
# State management
|
20 |
class ChatState:
|
21 |
def __init__(self):
|
22 |
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
23 |
+
|
24 |
+
# Transient state (reset on restart)
|
25 |
+
self.selected_project = "transformers"
|
26 |
self.target_language = "ko"
|
27 |
self.k_files = 10
|
28 |
self.files_to_translate = []
|
29 |
self.additional_instruction = ""
|
30 |
self.current_file_content = {"translated": ""}
|
31 |
+
self.pr_result = None
|
32 |
+
|
33 |
+
# Persistent settings (preserved across restarts)
|
34 |
+
self.persistent_settings = {
|
35 |
+
"anthropic_api_key": "",
|
36 |
+
"github_config": {
|
37 |
+
"token": "",
|
38 |
+
"owner": "",
|
39 |
+
"repo_name": "",
|
40 |
+
"reference_pr_url": "",
|
41 |
+
}
|
42 |
}
|
43 |
+
|
44 |
+
def reset_transient_state(self):
|
45 |
+
"""Reset only the workflow state, keep persistent settings"""
|
46 |
+
self.step = "welcome"
|
47 |
+
self.selected_project = "transformers"
|
48 |
+
self.target_language = "ko"
|
49 |
+
self.k_files = 10
|
50 |
+
self.files_to_translate = []
|
51 |
+
self.additional_instruction = ""
|
52 |
+
self.current_file_content = {"translated": ""}
|
53 |
+
self.pr_result = None
|
54 |
+
|
55 |
+
@property
|
56 |
+
def github_config(self):
|
57 |
+
return self.persistent_settings["github_config"]
|
58 |
|
59 |
|
60 |
state = ChatState()
|
|
|
76 |
|
77 |
|
78 |
def get_welcome_message():
|
79 |
+
"""Initial welcome message with project selection"""
|
80 |
return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
|
81 |
|
82 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
83 |
|
84 |
+
**🎯 First, select which project you want to translate:**
|
85 |
|
86 |
+
Use the **`Quick Controls`** on the right to select a project, or **ask me `what`, `how`, or `help`** to get started.
|
87 |
"""
|
88 |
|
89 |
|
90 |
+
def process_file_search_handler(project: str, lang: str, k: int, history: list) -> tuple:
|
91 |
"""Process file search request and update Gradio UI components."""
|
92 |
global state
|
93 |
+
state.selected_project = project
|
94 |
state.target_language = lang
|
95 |
state.k_files = k
|
96 |
state.step = "find_files"
|
97 |
|
98 |
+
try:
|
99 |
+
status_report, files_list = report_translation_target_files(project, lang, k)
|
100 |
+
except Exception as e:
|
101 |
+
if "rate limit" in str(e).lower():
|
102 |
+
response = f"""❌ **GitHub API Rate Limit Exceeded**
|
103 |
+
|
104 |
+
{str(e)}
|
105 |
+
|
106 |
+
**💡 To fix this:**
|
107 |
+
1. Set GitHub Token in Configuration panel above
|
108 |
+
2. Click "💾 Save Configuration"
|
109 |
+
3. Try "Find Files" again"""
|
110 |
+
history.append(["File search request", response])
|
111 |
+
return history, "", update_status(), gr.Tabs(selected=0), gr.update(choices=[]), gr.update(visible=False)
|
112 |
+
else:
|
113 |
+
raise # Re-raise non-rate-limit errors
|
114 |
state.files_to_translate = (
|
115 |
[file[0] for file in files_list]
|
116 |
if files_list
|
|
|
126 |
"""
|
127 |
|
128 |
if state.files_to_translate:
|
129 |
+
config = get_project_config(state.selected_project)
|
130 |
for i, file in enumerate(state.files_to_translate, 1):
|
131 |
+
file_link = f"{config.repo_url}/blob/main/{file}"
|
132 |
+
response += f"\n{i}. [`{file}`]({file_link})"
|
133 |
|
134 |
# if len(state.files_to_translate) > 5:
|
135 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
|
141 |
# Add to history
|
142 |
history.append(["Please find files that need translation", response])
|
143 |
cleared_input = ""
|
|
|
144 |
|
145 |
# 드롭다운 choices로 쓸 파일 리스트 반환 추가
|
146 |
return (
|
147 |
history,
|
148 |
cleared_input,
|
149 |
update_status(),
|
150 |
+
gr.Tabs(), # Don't change tab
|
151 |
update_dropdown_choices(state.files_to_translate),
|
152 |
)
|
153 |
|
|
|
156 |
return gr.update(choices=file_list, value=None)
|
157 |
|
158 |
|
159 |
+
def confirm_and_go_translate_handler(history):
|
160 |
+
"""Confirm selection and go to translate tab"""
|
161 |
+
global state
|
162 |
+
|
163 |
+
response = f"✅ **Selection confirmed!**\n\n🎯 **Project:** {state.selected_project}\n🌍 **Language:** {state.target_language}\n\n**➡️ Go to Tab 2 to start translation.**"
|
164 |
+
history.append(["Confirm selection", response])
|
165 |
+
return history, "", update_status(), gr.Tabs(selected=1)
|
166 |
+
|
167 |
+
|
168 |
+
def confirm_translation_and_go_upload_handler(history):
|
169 |
+
"""Confirm translation and go to upload PR tab"""
|
170 |
+
global state
|
171 |
+
|
172 |
+
if not state.current_file_content.get("translated"):
|
173 |
+
response = "❌ No translation available. Please complete translation first."
|
174 |
+
history.append(["Upload PR request", response])
|
175 |
+
return history, "", update_status(), gr.Tabs()
|
176 |
+
|
177 |
+
response = f"✅ **Translation confirmed!**\n\n📄 **File:** `{state.files_to_translate[0] if state.files_to_translate else 'Unknown'}`\n\n**➡️ Go to Tab 3 to upload PR.**"
|
178 |
+
history.append(["Upload PR request", response])
|
179 |
+
return history, "", update_status(), gr.Tabs(selected=2)
|
180 |
+
|
181 |
+
|
182 |
+
def start_translation_process(force_retranslate=False):
|
183 |
"""Start the translation process for the first file"""
|
184 |
if not state.files_to_translate:
|
185 |
return "❌ No files available for translation.", ""
|
|
|
188 |
|
189 |
# Call translation function (simplified for demo)
|
190 |
try:
|
191 |
+
status, translated = translate_docs_interactive(
|
192 |
+
state.target_language, [[current_file]], state.additional_instruction, state.selected_project, force_retranslate
|
193 |
)
|
194 |
|
195 |
state.current_file_content = {"translated": translated}
|
|
|
201 |
p.parent.mkdir(parents=True, exist_ok=True)
|
202 |
p.write_text(translated, encoding="utf-8")
|
203 |
|
204 |
+
config = get_project_config(state.selected_project)
|
205 |
+
original_file_link = f"{config.repo_url}/blob/main/{current_file}"
|
|
|
206 |
print("Compeleted translation:\n")
|
207 |
print(translated)
|
208 |
print("----------------------------")
|
209 |
+
|
210 |
+
# Different response format for existing vs new translation
|
211 |
+
if isinstance(status, str) and "Existing translation loaded" in status:
|
212 |
+
response = f"{status}\n**📄 Original Content Link:** {original_file_link}\n\n**🌐 Translated Content:**"
|
213 |
+
else:
|
214 |
+
response = (
|
215 |
+
f"""🔄 Translation for: `{current_file}`\n"""
|
216 |
+
f"**📄 Original Content Link:** {original_file_link}\n\n"
|
217 |
+
f"{status}\n\n"
|
218 |
+
"**🌐 Translated Content:**"
|
219 |
+
)
|
220 |
return response, translated
|
221 |
|
222 |
|
|
|
254 |
else:
|
255 |
return """I understand you want to work on translations!
|
256 |
|
257 |
+
**Two ways to get started:**
|
258 |
+
|
259 |
+
1. **🔍 Find Files first** - Use Tab 1 to discover files that need translation
|
260 |
+
2. **🚀 Direct Translation** - Go to Tab 2 and enter a file path directly (e.g., `docs/source/en/model_doc/bert.md`)
|
261 |
+
|
262 |
+
Make sure to configure your API keys in the Configuration panel above.
|
263 |
"""
|
264 |
|
265 |
|
|
|
297 |
|
298 |
def update_status():
|
299 |
if state.step == "welcome":
|
300 |
+
return f"""
|
301 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
302 |
<div><strong>🔄 Step:</strong> Welcome</div>
|
303 |
+
<div><strong>🎯 Project:</strong> {state.selected_project}</div>
|
304 |
<div><strong>📁 Files:</strong> 0</div>
|
305 |
+
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
|
|
306 |
</div>
|
307 |
"""
|
308 |
|
|
|
338 |
status_html = f"""
|
339 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
340 |
<div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
|
341 |
+
<div><strong>🎯 Project:</strong> {state.selected_project}</div>
|
342 |
<div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
|
343 |
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
344 |
<div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
|
356 |
return lang
|
357 |
|
358 |
|
359 |
+
def update_project_selection(project, history):
|
360 |
+
"""Update state when project is selected"""
|
361 |
global state
|
362 |
+
state.selected_project = project
|
363 |
+
response = f"Selection confirmed: 🎯 Project → **{project}**"
|
364 |
+
history.append(["Project selection", response])
|
365 |
+
return history, "", update_status()
|
366 |
+
|
367 |
+
|
368 |
+
def update_language_selection(lang, history):
|
369 |
+
"""Update state when language is selected"""
|
370 |
+
global state
|
371 |
+
state.target_language = lang
|
372 |
+
response = f"Selection confirmed: 🌍 Language → **{lang}**"
|
373 |
+
history.append(["Language selection", response])
|
374 |
+
return history, "", update_status(), lang
|
375 |
+
|
376 |
+
|
377 |
+
def update_persistent_config(anthropic_key, github_token, github_owner, github_repo, reference_pr_url, history):
|
378 |
+
"""Update persistent configuration settings."""
|
379 |
+
global state
|
380 |
+
|
381 |
+
# Update API keys
|
382 |
+
if anthropic_key:
|
383 |
+
state.persistent_settings["anthropic_api_key"] = anthropic_key
|
384 |
+
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
385 |
+
|
386 |
+
if github_token:
|
387 |
+
os.environ["GITHUB_TOKEN"] = github_token
|
388 |
+
|
389 |
+
# Get default reference PR URL from project config if not provided
|
390 |
+
if not reference_pr_url and state.selected_project:
|
391 |
+
try:
|
392 |
+
config = get_project_config(state.selected_project)
|
393 |
+
reference_pr_url = config.reference_pr_url
|
394 |
+
except:
|
395 |
+
pass
|
396 |
+
|
397 |
+
# Save GitHub configuration to persistent settings
|
398 |
+
state.persistent_settings["github_config"].update({
|
399 |
+
"token": github_token or "",
|
400 |
+
"owner": github_owner or "",
|
401 |
+
"repo_name": github_repo or "",
|
402 |
+
"reference_pr_url": reference_pr_url or "",
|
403 |
+
})
|
404 |
+
|
405 |
+
# Build response message based on what was configured
|
406 |
+
response = "✅ Configuration saved!"
|
407 |
+
if github_owner and github_repo:
|
408 |
+
response += f" GitHub: {github_owner}/{github_repo}"
|
409 |
+
elif anthropic_key:
|
410 |
+
response += " Anthropic API key updated."
|
411 |
+
|
412 |
+
history.append(["Configuration update", response])
|
413 |
+
return history, "", update_status()
|
414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
+
def update_github_config(token, owner, repo, reference_pr_url):
|
417 |
+
"""Legacy function for backward compatibility."""
|
418 |
+
return update_persistent_config("", token, owner, repo, reference_pr_url)
|
419 |
|
420 |
|
421 |
def update_prompt_preview(language, file_path, additional_instruction):
|
|
|
431 |
translation_lang = language
|
432 |
|
433 |
# Get sample content (first 500 characters)
|
434 |
+
content = get_content(file_path, state.selected_project)
|
435 |
to_translate = preprocess_content(content)
|
436 |
|
437 |
# Truncate for preview
|
|
|
442 |
|
443 |
return prompt
|
444 |
except Exception as e:
|
445 |
+
error_str = str(e)
|
446 |
+
if "Failed to retrieve content from the URL" in error_str:
|
447 |
+
return f"❌ **File not found:** `{file_path}`\n\n💡 **Please check:**\n1. Is this file in the **{state.selected_project}** project?\n2. Use \"🔍 Find Files to Translate\" to see available files\n3. Verify the file path is correct"
|
448 |
+
return f"Error generating prompt preview: {error_str}"
|
449 |
|
450 |
|
451 |
def send_message(message, history):
|
|
|
454 |
|
455 |
|
456 |
# Button handlers with tab switching
|
457 |
+
def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
|
458 |
+
# Use persistent anthropic key
|
459 |
+
anthropic_key = state.persistent_settings["anthropic_api_key"]
|
460 |
+
if not anthropic_key:
|
461 |
+
response = "❌ Please set Anthropic API key in Configuration panel first."
|
462 |
+
history.append(["Translation request", response])
|
463 |
+
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
464 |
+
|
465 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
466 |
|
467 |
+
# Check if file path is provided
|
468 |
+
if not file_to_translate or not file_to_translate.strip():
|
469 |
+
response = "❌ Please select a file from the dropdown or enter a file path to translate."
|
470 |
+
history.append(["Translation request", response])
|
471 |
+
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
472 |
+
|
473 |
state.additional_instruction = additional_instruction
|
474 |
state.files_to_translate = [file_to_translate]
|
475 |
+
state.step = "translate"
|
476 |
+
|
477 |
+
# Start translation directly
|
478 |
+
if force_retranslate:
|
479 |
+
history.append(["Translation request", "🔄 **Force retranslation started...**"])
|
480 |
+
response, translated = start_translation_process(force_retranslate)
|
481 |
+
history.append(["", response])
|
482 |
+
if translated:
|
483 |
+
history.append(["", translated])
|
484 |
+
|
485 |
+
# Update button text and show confirm button after translation
|
486 |
+
start_btn_text = "🔄 Retranslation" if state.current_file_content["translated"] else "🚀 Start Translation"
|
487 |
+
confirm_btn_visible = bool(state.current_file_content["translated"])
|
488 |
+
|
489 |
+
return history, "", update_status(), gr.Tabs(), gr.update(value=start_btn_text), gr.update(visible=confirm_btn_visible)
|
490 |
|
491 |
|
492 |
def approve_handler(history, owner, repo, reference_pr_url):
|
|
|
494 |
global state
|
495 |
state.step = "create_github_pr"
|
496 |
|
497 |
+
# Check all required GitHub configuration at once
|
498 |
+
github_config = state.persistent_settings["github_config"]
|
499 |
+
missing_config = []
|
500 |
+
|
501 |
+
if not github_config.get("token"):
|
502 |
+
missing_config.append("GitHub Token")
|
503 |
+
if not owner:
|
504 |
+
missing_config.append("GitHub Owner")
|
505 |
+
if not repo:
|
506 |
+
missing_config.append("Repository Name")
|
507 |
+
|
508 |
+
if missing_config:
|
509 |
+
config = get_project_config(state.selected_project)
|
510 |
+
repo_name = config.repo_url.split('/')[-1] # Extract repo name from URL
|
511 |
+
response = f"❌ Please set the following in Configuration panel first: {', '.join(missing_config)}\n\n💡 **Note:** GitHub Owner/Repository should be your fork of [`{repo_name}`]({config.repo_url}) (e.g., Owner: `your-username`, Repository: `{repo_name}`)"
|
512 |
history.append(["GitHub PR creation request", response])
|
513 |
return history, "", update_status()
|
514 |
|
515 |
+
# Update reference PR URL (can be set per PR)
|
516 |
+
if reference_pr_url:
|
517 |
+
state.persistent_settings["github_config"]["reference_pr_url"] = reference_pr_url
|
518 |
+
|
519 |
+
# Use persistent settings
|
520 |
+
github_config = state.persistent_settings["github_config"]
|
521 |
+
|
522 |
+
# Initialize response variable
|
523 |
+
response = ""
|
524 |
+
|
525 |
# If reference PR is not provided, use the agent to find one
|
526 |
if not github_config.get("reference_pr_url"):
|
527 |
response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
|
|
|
577 |
translated_content=translated_content,
|
578 |
github_config=state.github_config,
|
579 |
en_title=file_name,
|
580 |
+
project=state.selected_project,
|
581 |
)
|
582 |
response += f"\n{pr_response}"
|
583 |
else:
|
|
|
588 |
|
589 |
|
590 |
def restart_handler(history):
|
591 |
+
"""Resets the workflow state but preserves persistent settings."""
|
592 |
global state
|
593 |
+
# Backup persistent settings
|
594 |
+
backup_settings = state.persistent_settings.copy()
|
595 |
+
|
596 |
+
# Reset state
|
597 |
state = ChatState()
|
598 |
+
|
599 |
+
# Restore persistent settings
|
600 |
+
state.persistent_settings = backup_settings
|
601 |
+
|
602 |
+
# Restore environment variables
|
603 |
+
if backup_settings["anthropic_api_key"]:
|
604 |
+
os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
|
605 |
+
if backup_settings["github_config"]["token"]:
|
606 |
+
os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
|
607 |
+
|
608 |
welcome_msg = get_welcome_message()
|
609 |
new_hist = [[None, welcome_msg]]
|
610 |
return new_hist, "", update_status(), gr.Tabs(selected=0)
|
agent/toctree_handler.py
CHANGED
@@ -4,9 +4,17 @@ from typing import Dict, List, Any
|
|
4 |
import os
|
5 |
|
6 |
class TocTreeHandler:
|
7 |
-
def __init__(self):
|
8 |
-
|
9 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
self.local_docs_path = "docs/source/ko"
|
11 |
|
12 |
def fetch_toctree(self, url: str) -> Dict[str, Any]:
|
@@ -245,7 +253,8 @@ Korean title:"""
|
|
245 |
translation_result: dict,
|
246 |
filepath: str,
|
247 |
pr_agent,
|
248 |
-
github_config: dict
|
|
|
249 |
) -> dict:
|
250 |
"""Update toctree after successful translation PR.
|
251 |
|
|
|
4 |
import os
|
5 |
|
6 |
class TocTreeHandler:
|
7 |
+
def __init__(self, project: str = "transformers"):
|
8 |
+
from translator.project_config import get_project_config
|
9 |
+
self.project = project
|
10 |
+
self.project_config = get_project_config(project)
|
11 |
+
|
12 |
+
# Extract repository path from config
|
13 |
+
repo_path = self.project_config.repo_url.replace("https://github.com/", "")
|
14 |
+
|
15 |
+
# Build project-specific URLs
|
16 |
+
self.en_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/en/_toctree.yml"
|
17 |
+
self.ko_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/ko/_toctree.yml"
|
18 |
self.local_docs_path = "docs/source/ko"
|
19 |
|
20 |
def fetch_toctree(self, url: str) -> Dict[str, Any]:
|
|
|
253 |
translation_result: dict,
|
254 |
filepath: str,
|
255 |
pr_agent,
|
256 |
+
github_config: dict,
|
257 |
+
project: str = "transformers"
|
258 |
) -> dict:
|
259 |
"""Update toctree after successful translation PR.
|
260 |
|
agent/workflow.py
CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
|
|
11 |
llm_translate,
|
12 |
preprocess_content,
|
13 |
)
|
14 |
-
from translator.retriever import report, get_github_issue_open_pr
|
15 |
# GitHub PR Agent import
|
16 |
try:
|
17 |
from pr_generator.agent import GitHubPRAgent
|
@@ -26,19 +26,23 @@ from logger.github_logger import GitHubLogger
|
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
29 |
-
translate_lang: str, top_k: int = 1
|
30 |
) -> tuple[str, list[list[str]]]:
|
31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
32 |
|
33 |
Args:
|
|
|
34 |
translate_lang: Target language to translate
|
35 |
top_k: Number of top-first files to return for translation. (Default 1)
|
36 |
"""
|
37 |
-
# Get files
|
38 |
-
|
39 |
-
|
40 |
-
# Get all available files for translation
|
41 |
-
all_status_report, all_filepath_list = report(translate_lang, top_k * 2) # Get more to account for filtering
|
|
|
|
|
|
|
42 |
|
43 |
# Filter out files that are already in progress
|
44 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
@@ -52,29 +56,30 @@ def report_translation_target_files(
|
|
52 |
if docs_in_progress:
|
53 |
status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
|
54 |
for i, file in enumerate(docs_in_progress):
|
55 |
-
status_report += f"\n{i+1}. `{file}
|
56 |
status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
|
57 |
|
58 |
return status_report, [[file] for file in filepath_list]
|
59 |
|
60 |
|
61 |
-
def translate_docs(lang: str, file_path: str, additional_instruction: str = "") -> tuple[str, str]:
|
62 |
"""Translate documentation."""
|
63 |
-
# Check if translation already exists
|
64 |
translation_file_path = (
|
65 |
Path(__file__).resolve().parent.parent
|
66 |
/ f"translation_result/{file_path}"
|
67 |
)
|
68 |
|
69 |
-
if translation_file_path.exists():
|
70 |
print(f"📄 Found existing translation: {translation_file_path}")
|
71 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
72 |
existing_content = f.read()
|
73 |
if existing_content.strip():
|
74 |
-
|
|
|
75 |
|
76 |
# step 1. Get content from file path
|
77 |
-
content = get_content(file_path)
|
78 |
to_translate = preprocess_content(content)
|
79 |
|
80 |
# step 2. Prepare prompt with docs content
|
@@ -97,7 +102,7 @@ def translate_docs(lang: str, file_path: str, additional_instruction: str = "")
|
|
97 |
|
98 |
|
99 |
def translate_docs_interactive(
|
100 |
-
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = ""
|
101 |
) -> tuple[str, str]:
|
102 |
"""Interactive translation function that processes files one by one.
|
103 |
|
@@ -111,14 +116,22 @@ def translate_docs_interactive(
|
|
111 |
# Start with the first file
|
112 |
current_file = file_paths[0]
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
print(callback_result)
|
119 |
print(status)
|
120 |
|
121 |
-
return translated_content
|
122 |
|
123 |
|
124 |
def generate_github_pr(
|
@@ -127,6 +140,7 @@ def generate_github_pr(
|
|
127 |
translated_content: str = None,
|
128 |
github_config: dict = None,
|
129 |
en_title: str = None,
|
|
|
130 |
) -> str:
|
131 |
"""Generate a GitHub PR for translated documentation.
|
132 |
|
@@ -144,7 +158,7 @@ def generate_github_pr(
|
|
144 |
return "❌ GitHub PR Agent is not available. Please install required libraries."
|
145 |
|
146 |
if not github_config:
|
147 |
-
return "❌ GitHub configuration not provided."
|
148 |
|
149 |
# Validate required configuration
|
150 |
required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
|
@@ -153,7 +167,7 @@ def generate_github_pr(
|
|
153 |
]
|
154 |
|
155 |
if missing_fields:
|
156 |
-
return f"❌ Missing required configuration: {', '.join(missing_fields)}
|
157 |
|
158 |
# Set token in environment for the agent.
|
159 |
os.environ["GITHUB_TOKEN"] = github_config["token"]
|
@@ -166,29 +180,39 @@ def generate_github_pr(
|
|
166 |
/ f"translation_result/{filepath}"
|
167 |
)
|
168 |
if not translation_file_path.exists():
|
169 |
-
return f"❌ Translation file not found: {translation_file_path}"
|
170 |
|
171 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
172 |
translated_content = f.read()
|
173 |
|
174 |
if not translated_content or not translated_content.strip():
|
175 |
-
return "❌ Translated content is empty."
|
176 |
|
177 |
# Execute GitHub PR Agent
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
print(f"🚀 Starting GitHub PR creation...")
|
179 |
print(f" 📁 File: {filepath}")
|
180 |
print(f" 🌍 Language: {target_language}")
|
181 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
182 |
-
print(f" 🏠
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
result = agent.run_translation_pr_workflow(
|
186 |
reference_pr_url=github_config["reference_pr_url"],
|
187 |
target_language=target_language,
|
188 |
filepath=filepath,
|
189 |
translated_doc=translated_content,
|
190 |
-
owner=github_config["owner"],
|
191 |
-
repo_name=github_config["repo_name"],
|
192 |
base_branch=github_config.get("base_branch", "main"),
|
193 |
)
|
194 |
# TEST CODE
|
@@ -202,9 +226,9 @@ def generate_github_pr(
|
|
202 |
toctree_result = None
|
203 |
if en_title:
|
204 |
from agent.toctree_handler import TocTreeHandler
|
205 |
-
toctree_handler = TocTreeHandler()
|
206 |
toctree_result = toctree_handler.update_toctree_after_translation(
|
207 |
-
result, filepath, agent, github_config
|
208 |
)
|
209 |
|
210 |
# Process result
|
@@ -248,13 +272,29 @@ def generate_github_pr(
|
|
248 |
{result.get("error_details", "Unknown error")}"""
|
249 |
|
250 |
else:
|
|
|
251 |
return f"""❌ **GitHub PR Creation Failed**
|
252 |
|
253 |
**Error Message:**
|
254 |
-
{result["message"]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
except Exception as e:
|
257 |
-
error_msg = f"❌ Unexpected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
print(error_msg)
|
259 |
return error_msg
|
260 |
|
|
|
11 |
llm_translate,
|
12 |
preprocess_content,
|
13 |
)
|
14 |
+
from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files
|
15 |
# GitHub PR Agent import
|
16 |
try:
|
17 |
from pr_generator.agent import GitHubPRAgent
|
|
|
26 |
|
27 |
|
28 |
def report_translation_target_files(
|
29 |
+
project: str, translate_lang: str, top_k: int = 1
|
30 |
) -> tuple[str, list[list[str]]]:
|
31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
32 |
|
33 |
Args:
|
34 |
+
project: Project to translate (e.g., "transformers", "smolagents")
|
35 |
translate_lang: Target language to translate
|
36 |
top_k: Number of top-first files to return for translation. (Default 1)
|
37 |
"""
|
38 |
+
# Get repo files once to avoid duplicate API calls
|
39 |
+
all_repo_files = get_github_repo_files(project)
|
40 |
+
|
41 |
+
# Get all available files for translation using the file list
|
42 |
+
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) # Get more to account for filtering
|
43 |
+
|
44 |
+
# Get files in progress using the same file list
|
45 |
+
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files)
|
46 |
|
47 |
# Filter out files that are already in progress
|
48 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
|
56 |
if docs_in_progress:
|
57 |
status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
|
58 |
for i, file in enumerate(docs_in_progress):
|
59 |
+
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
|
60 |
status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
|
61 |
|
62 |
return status_report, [[file] for file in filepath_list]
|
63 |
|
64 |
|
65 |
+
def translate_docs(lang: str, file_path: str, additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False) -> tuple[str, str]:
|
66 |
"""Translate documentation."""
|
67 |
+
# Check if translation already exists (unless force retranslate is enabled)
|
68 |
translation_file_path = (
|
69 |
Path(__file__).resolve().parent.parent
|
70 |
/ f"translation_result/{file_path}"
|
71 |
)
|
72 |
|
73 |
+
if not force_retranslate and translation_file_path.exists():
|
74 |
print(f"📄 Found existing translation: {translation_file_path}")
|
75 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
76 |
existing_content = f.read()
|
77 |
if existing_content.strip():
|
78 |
+
existing_msg = f"♻️ **Existing translation loaded** (no tokens used)\n📁 **File:** `{file_path}`\n📅 **Loaded from:** `{translation_file_path}`\n💡 **To retranslate:** Check 'Force Retranslate' option."
|
79 |
+
return existing_msg, existing_content
|
80 |
|
81 |
# step 1. Get content from file path
|
82 |
+
content = get_content(file_path, project)
|
83 |
to_translate = preprocess_content(content)
|
84 |
|
85 |
# step 2. Prepare prompt with docs content
|
|
|
102 |
|
103 |
|
104 |
def translate_docs_interactive(
|
105 |
+
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False
|
106 |
) -> tuple[str, str]:
|
107 |
"""Interactive translation function that processes files one by one.
|
108 |
|
|
|
116 |
# Start with the first file
|
117 |
current_file = file_paths[0]
|
118 |
|
119 |
+
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction, project, force_retranslate)
|
120 |
+
|
121 |
+
# Check if existing translation was loaded
|
122 |
+
if isinstance(callback_result, str) and "Existing translation loaded" in callback_result:
|
123 |
+
status = callback_result # Use the existing translation message
|
124 |
+
else:
|
125 |
+
if force_retranslate:
|
126 |
+
status = f"🔄 **Force Retranslation completed**: `{current_file}` → `{translate_lang}`\n\n"
|
127 |
+
else:
|
128 |
+
status = f"✅ Translation completed: `{current_file}` → `{translate_lang}`\n\n"
|
129 |
+
status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
|
130 |
|
131 |
print(callback_result)
|
132 |
print(status)
|
133 |
|
134 |
+
return status, translated_content
|
135 |
|
136 |
|
137 |
def generate_github_pr(
|
|
|
140 |
translated_content: str = None,
|
141 |
github_config: dict = None,
|
142 |
en_title: str = None,
|
143 |
+
project: str = "transformers",
|
144 |
) -> str:
|
145 |
"""Generate a GitHub PR for translated documentation.
|
146 |
|
|
|
158 |
return "❌ GitHub PR Agent is not available. Please install required libraries."
|
159 |
|
160 |
if not github_config:
|
161 |
+
return "❌ GitHub configuration not provided. Please set up GitHub token, owner, and repository in Configuration panel."
|
162 |
|
163 |
# Validate required configuration
|
164 |
required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
|
|
|
167 |
]
|
168 |
|
169 |
if missing_fields:
|
170 |
+
return f"❌ Missing required GitHub configuration: {', '.join(missing_fields)}\n\n💡 Go to Configuration panel and set:\n" + "\n".join([f" • {field}" for field in missing_fields])
|
171 |
|
172 |
# Set token in environment for the agent.
|
173 |
os.environ["GITHUB_TOKEN"] = github_config["token"]
|
|
|
180 |
/ f"translation_result/{filepath}"
|
181 |
)
|
182 |
if not translation_file_path.exists():
|
183 |
+
return f"❌ Translation file not found: {translation_file_path}\n\n💡 Please complete translation first in Tab 2 for file: {filepath}"
|
184 |
|
185 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
186 |
translated_content = f.read()
|
187 |
|
188 |
if not translated_content or not translated_content.strip():
|
189 |
+
return f"❌ Translated content is empty for file: {filepath}\n\n💡 Please complete translation first in Tab 2."
|
190 |
|
191 |
# Execute GitHub PR Agent
|
192 |
+
# Get base repository from project config
|
193 |
+
from translator.project_config import get_project_config
|
194 |
+
project_config = get_project_config(project)
|
195 |
+
base_repo_path = project_config.repo_url.replace("https://github.com/", "")
|
196 |
+
base_owner, base_repo = base_repo_path.split("/")
|
197 |
+
|
198 |
print(f"🚀 Starting GitHub PR creation...")
|
199 |
print(f" 📁 File: {filepath}")
|
200 |
print(f" 🌍 Language: {target_language}")
|
201 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
202 |
+
print(f" 🏠 User Fork: {github_config['owner']}/{github_config['repo_name']}")
|
203 |
+
print(f" 🎯 Base Repository: {base_owner}/{base_repo}")
|
204 |
+
|
205 |
+
agent = GitHubPRAgent(
|
206 |
+
user_owner=github_config["owner"],
|
207 |
+
user_repo=github_config["repo_name"],
|
208 |
+
base_owner=base_owner,
|
209 |
+
base_repo=base_repo,
|
210 |
+
)
|
211 |
result = agent.run_translation_pr_workflow(
|
212 |
reference_pr_url=github_config["reference_pr_url"],
|
213 |
target_language=target_language,
|
214 |
filepath=filepath,
|
215 |
translated_doc=translated_content,
|
|
|
|
|
216 |
base_branch=github_config.get("base_branch", "main"),
|
217 |
)
|
218 |
# TEST CODE
|
|
|
226 |
toctree_result = None
|
227 |
if en_title:
|
228 |
from agent.toctree_handler import TocTreeHandler
|
229 |
+
toctree_handler = TocTreeHandler(project)
|
230 |
toctree_result = toctree_handler.update_toctree_after_translation(
|
231 |
+
result, filepath, agent, github_config, project
|
232 |
)
|
233 |
|
234 |
# Process result
|
|
|
272 |
{result.get("error_details", "Unknown error")}"""
|
273 |
|
274 |
else:
|
275 |
+
error_details = result.get("error_details", "No additional details")
|
276 |
return f"""❌ **GitHub PR Creation Failed**
|
277 |
|
278 |
**Error Message:**
|
279 |
+
{result["message"]}
|
280 |
+
|
281 |
+
**Error Details:**
|
282 |
+
{error_details}
|
283 |
+
|
284 |
+
💡 **Common Solutions:**
|
285 |
+
1. **Project Mismatch**: Selected project '{project}' but fork is '{github_config.get('repo_name', 'REPO')}' - ensure they match
|
286 |
+
2. Check if your GitHub fork exists: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
|
287 |
+
3. Verify GitHub token has write access to your fork"""
|
288 |
|
289 |
except Exception as e:
|
290 |
+
error_msg = f"""❌ **Unexpected Error During PR Creation**
|
291 |
+
|
292 |
+
**Error:** {str(e)}
|
293 |
+
|
294 |
+
**Configuration:**
|
295 |
+
• Project: {project}
|
296 |
+
• File: {filepath}
|
297 |
+
• Target: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} → {base_owner if 'base_owner' in locals() else 'BASE'}/{base_repo if 'base_repo' in locals() else 'REPO'}"""
|
298 |
print(error_msg)
|
299 |
return error_msg
|
300 |
|
app.py
CHANGED
@@ -8,17 +8,23 @@ from dotenv import load_dotenv
|
|
8 |
|
9 |
from agent.handler import (
|
10 |
approve_handler,
|
|
|
|
|
11 |
get_welcome_message,
|
12 |
process_file_search_handler,
|
13 |
restart_handler,
|
14 |
send_message,
|
15 |
start_translate_handler,
|
16 |
sync_language_displays,
|
|
|
|
|
17 |
update_prompt_preview,
|
18 |
update_status,
|
19 |
update_github_config,
|
|
|
20 |
)
|
21 |
from translator.model import Languages
|
|
|
22 |
|
23 |
load_dotenv()
|
24 |
|
@@ -111,12 +117,54 @@ with gr.Blocks(
|
|
111 |
gr.Markdown("### 🌐 Hugging Face i18n Agent")
|
112 |
|
113 |
chatbot = gr.Chatbot(
|
114 |
-
value=[[None, get_welcome_message()]], scale=1, height=
|
115 |
show_copy_button=True
|
116 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
# Controller interface
|
119 |
with gr.Column(scale=2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
# Quick Controller
|
121 |
with gr.Column(elem_classes=["control-panel"]):
|
122 |
gr.Markdown("### 🛠️ Quick Controls")
|
@@ -125,6 +173,11 @@ with gr.Blocks(
|
|
125 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
126 |
with gr.TabItem("1. Find Files", id=0):
|
127 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
128 |
lang_dropdown = gr.Radio(
|
129 |
choices=[language.value for language in Languages],
|
130 |
label="🌍 Translate To",
|
@@ -139,6 +192,11 @@ with gr.Blocks(
|
|
139 |
"🔍 Find Files to Translate",
|
140 |
elem_classes="action-button",
|
141 |
)
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
with gr.TabItem("2. Translate", id=1):
|
144 |
with gr.Group():
|
@@ -159,19 +217,19 @@ with gr.Blocks(
|
|
159 |
value="ko",
|
160 |
interactive=False,
|
161 |
)
|
162 |
-
anthropic_key = gr.Textbox(
|
163 |
-
label="🔑 Anthropic API key for translation generation",
|
164 |
-
type="password",
|
165 |
-
)
|
166 |
additional_instruction = gr.Textbox(
|
167 |
label="📝 Additional instructions (Optional - e.g., custom glossary)",
|
168 |
placeholder="Example: Translate 'model' as '모델' consistently",
|
169 |
lines=2,
|
170 |
)
|
171 |
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
173 |
prompt_preview = gr.Textbox(
|
174 |
-
label="Current Translation Prompt",
|
175 |
lines=8,
|
176 |
interactive=False,
|
177 |
placeholder="Select a file and language to see the prompt preview...",
|
@@ -181,29 +239,18 @@ with gr.Blocks(
|
|
181 |
start_translate_btn = gr.Button(
|
182 |
"🚀 Start Translation", elem_classes="action-button"
|
183 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
with gr.TabItem("3. Upload PR", id=2):
|
186 |
with gr.Group():
|
187 |
-
github_token = gr.Textbox(
|
188 |
-
label="🔑 GitHub Token",
|
189 |
-
type="password",
|
190 |
-
placeholder="ghp_xxxxxxxxxxxxxxxxxxxx",
|
191 |
-
)
|
192 |
-
github_owner = gr.Textbox(
|
193 |
-
label="👤 GitHub Owner/Username",
|
194 |
-
placeholder="your-username",
|
195 |
-
)
|
196 |
-
github_repo = gr.Textbox(
|
197 |
-
label="📁 Repository Name",
|
198 |
-
placeholder="your-repository",
|
199 |
-
)
|
200 |
reference_pr_url = gr.Textbox(
|
201 |
-
label="🔗 Reference PR URL (Optional
|
202 |
-
placeholder="
|
203 |
-
)
|
204 |
-
|
205 |
-
save_config_btn = gr.Button(
|
206 |
-
"💾 Save GitHub Config", elem_classes="action-button"
|
207 |
)
|
208 |
approve_btn = gr.Button(
|
209 |
"✅ Generate GitHub PR", elem_classes="action-button"
|
@@ -212,29 +259,38 @@ with gr.Blocks(
|
|
212 |
"🔄 Restart Translation", elem_classes="action-button"
|
213 |
)
|
214 |
|
215 |
-
# Chat Controller
|
216 |
-
with gr.Column(elem_classes=["control-panel"]):
|
217 |
-
gr.Markdown("### 💬 Chat with agent (Only simple chat is available)")
|
218 |
-
msg_input = gr.Textbox(
|
219 |
-
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
220 |
-
container=False,
|
221 |
-
scale=4,
|
222 |
-
)
|
223 |
-
send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
|
224 |
-
|
225 |
# Event Handlers
|
226 |
|
227 |
find_btn.click(
|
228 |
fn=process_file_search_handler,
|
229 |
-
inputs=[lang_dropdown, k_input, chatbot],
|
230 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
231 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
lang_dropdown.change(
|
235 |
-
fn=
|
236 |
-
inputs=[lang_dropdown],
|
237 |
-
outputs=[translate_lang_display],
|
238 |
)
|
239 |
|
240 |
#
|
@@ -247,20 +303,26 @@ with gr.Blocks(
|
|
247 |
# Button event handlers
|
248 |
start_translate_btn.click(
|
249 |
fn=start_translate_handler,
|
250 |
-
inputs=[chatbot,
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
252 |
)
|
253 |
|
254 |
-
#
|
255 |
save_config_btn.click(
|
256 |
-
fn=
|
257 |
-
inputs=[
|
258 |
-
outputs=[msg_input],
|
259 |
)
|
260 |
|
261 |
approve_btn.click(
|
262 |
fn=approve_handler,
|
263 |
-
inputs=[chatbot,
|
264 |
outputs=[chatbot, msg_input, status_display],
|
265 |
)
|
266 |
|
|
|
8 |
|
9 |
from agent.handler import (
|
10 |
approve_handler,
|
11 |
+
confirm_and_go_translate_handler,
|
12 |
+
confirm_translation_and_go_upload_handler,
|
13 |
get_welcome_message,
|
14 |
process_file_search_handler,
|
15 |
restart_handler,
|
16 |
send_message,
|
17 |
start_translate_handler,
|
18 |
sync_language_displays,
|
19 |
+
update_language_selection,
|
20 |
+
update_project_selection,
|
21 |
update_prompt_preview,
|
22 |
update_status,
|
23 |
update_github_config,
|
24 |
+
update_persistent_config,
|
25 |
)
|
26 |
from translator.model import Languages
|
27 |
+
from translator.project_config import get_available_projects
|
28 |
|
29 |
load_dotenv()
|
30 |
|
|
|
117 |
gr.Markdown("### 🌐 Hugging Face i18n Agent")
|
118 |
|
119 |
chatbot = gr.Chatbot(
|
120 |
+
value=[[None, get_welcome_message()]], scale=1, height=525,
|
121 |
show_copy_button=True
|
122 |
)
|
123 |
+
|
124 |
+
# Chat input directly under main chat
|
125 |
+
gr.Markdown("### 💬 Chat with agent")
|
126 |
+
with gr.Row():
|
127 |
+
msg_input = gr.Textbox(
|
128 |
+
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
129 |
+
container=False,
|
130 |
+
scale=4,
|
131 |
+
)
|
132 |
+
send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
|
133 |
|
134 |
# Controller interface
|
135 |
with gr.Column(scale=2):
|
136 |
+
# Configuration Panel
|
137 |
+
with gr.Column(elem_classes=["control-panel"]):
|
138 |
+
gr.Markdown("### ⚙️ Configuration")
|
139 |
+
|
140 |
+
with gr.Accordion("🔧 API & GitHub Settings", open=True):
|
141 |
+
config_anthropic_key = gr.Textbox(
|
142 |
+
label="🔑 Anthropic API Key",
|
143 |
+
type="password",
|
144 |
+
placeholder="sk-ant-...",
|
145 |
+
)
|
146 |
+
config_github_token = gr.Textbox(
|
147 |
+
label="🔑 GitHub Token (Required for PR, Optional for file search)",
|
148 |
+
type="password",
|
149 |
+
placeholder="ghp_...",
|
150 |
+
)
|
151 |
+
|
152 |
+
with gr.Row():
|
153 |
+
config_github_owner = gr.Textbox(
|
154 |
+
label="👤 GitHub Owner",
|
155 |
+
placeholder="your-username",
|
156 |
+
scale=1,
|
157 |
+
)
|
158 |
+
config_github_repo = gr.Textbox(
|
159 |
+
label="📁 Repository Name",
|
160 |
+
placeholder="your-repository",
|
161 |
+
scale=1,
|
162 |
+
)
|
163 |
+
|
164 |
+
save_config_btn = gr.Button(
|
165 |
+
"💾 Save Configuration", elem_classes="action-button"
|
166 |
+
)
|
167 |
+
|
168 |
# Quick Controller
|
169 |
with gr.Column(elem_classes=["control-panel"]):
|
170 |
gr.Markdown("### 🛠️ Quick Controls")
|
|
|
173 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
174 |
with gr.TabItem("1. Find Files", id=0):
|
175 |
with gr.Group():
|
176 |
+
project_dropdown = gr.Radio(
|
177 |
+
choices=get_available_projects(),
|
178 |
+
label="🎯 Select Project",
|
179 |
+
value="transformers",
|
180 |
+
)
|
181 |
lang_dropdown = gr.Radio(
|
182 |
choices=[language.value for language in Languages],
|
183 |
label="🌍 Translate To",
|
|
|
192 |
"🔍 Find Files to Translate",
|
193 |
elem_classes="action-button",
|
194 |
)
|
195 |
+
|
196 |
+
confirm_go_btn = gr.Button(
|
197 |
+
"✅ Confirm Selection & Go to Translate",
|
198 |
+
elem_classes="action-button",
|
199 |
+
)
|
200 |
|
201 |
with gr.TabItem("2. Translate", id=1):
|
202 |
with gr.Group():
|
|
|
217 |
value="ko",
|
218 |
interactive=False,
|
219 |
)
|
|
|
|
|
|
|
|
|
220 |
additional_instruction = gr.Textbox(
|
221 |
label="📝 Additional instructions (Optional - e.g., custom glossary)",
|
222 |
placeholder="Example: Translate 'model' as '모델' consistently",
|
223 |
lines=2,
|
224 |
)
|
225 |
|
226 |
+
force_retranslate = gr.Checkbox(
|
227 |
+
label="🔄 Force Retranslate (ignore existing translations)",
|
228 |
+
value=False,
|
229 |
+
)
|
230 |
+
|
231 |
+
with gr.Accordion("🔍 Preview Translation Prompt", open=False):
|
232 |
prompt_preview = gr.Textbox(
|
|
|
233 |
lines=8,
|
234 |
interactive=False,
|
235 |
placeholder="Select a file and language to see the prompt preview...",
|
|
|
239 |
start_translate_btn = gr.Button(
|
240 |
"🚀 Start Translation", elem_classes="action-button"
|
241 |
)
|
242 |
+
|
243 |
+
confirm_upload_btn = gr.Button(
|
244 |
+
"✅ Confirm Translation & Upload PR",
|
245 |
+
elem_classes="action-button",
|
246 |
+
visible=False,
|
247 |
+
)
|
248 |
|
249 |
with gr.TabItem("3. Upload PR", id=2):
|
250 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
reference_pr_url = gr.Textbox(
|
252 |
+
label="🔗 Reference PR URL (Optional)",
|
253 |
+
placeholder="Auto-filled based on project selection",
|
|
|
|
|
|
|
|
|
254 |
)
|
255 |
approve_btn = gr.Button(
|
256 |
"✅ Generate GitHub PR", elem_classes="action-button"
|
|
|
259 |
"🔄 Restart Translation", elem_classes="action-button"
|
260 |
)
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
# Event Handlers
|
263 |
|
264 |
find_btn.click(
|
265 |
fn=process_file_search_handler,
|
266 |
+
inputs=[project_dropdown, lang_dropdown, k_input, chatbot],
|
267 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
268 |
)
|
269 |
+
|
270 |
+
confirm_go_btn.click(
|
271 |
+
fn=confirm_and_go_translate_handler,
|
272 |
+
inputs=[chatbot],
|
273 |
+
outputs=[chatbot, msg_input, status_display, control_tabs],
|
274 |
+
)
|
275 |
|
276 |
+
# Auto-save selections to state and update prompt preview
|
277 |
+
project_dropdown.change(
|
278 |
+
fn=update_project_selection,
|
279 |
+
inputs=[project_dropdown, chatbot],
|
280 |
+
outputs=[chatbot, msg_input, status_display],
|
281 |
+
)
|
282 |
+
|
283 |
+
# Update prompt preview when project changes
|
284 |
+
project_dropdown.change(
|
285 |
+
fn=update_prompt_preview,
|
286 |
+
inputs=[translate_lang_display, file_to_translate_input, additional_instruction],
|
287 |
+
outputs=[prompt_preview],
|
288 |
+
)
|
289 |
+
|
290 |
lang_dropdown.change(
|
291 |
+
fn=update_language_selection,
|
292 |
+
inputs=[lang_dropdown, chatbot],
|
293 |
+
outputs=[chatbot, msg_input, status_display, translate_lang_display],
|
294 |
)
|
295 |
|
296 |
#
|
|
|
303 |
# Button event handlers
|
304 |
start_translate_btn.click(
|
305 |
fn=start_translate_handler,
|
306 |
+
inputs=[chatbot, file_to_translate_input, additional_instruction, force_retranslate],
|
307 |
+
outputs=[chatbot, msg_input, status_display, control_tabs, start_translate_btn, confirm_upload_btn],
|
308 |
+
)
|
309 |
+
|
310 |
+
confirm_upload_btn.click(
|
311 |
+
fn=confirm_translation_and_go_upload_handler,
|
312 |
+
inputs=[chatbot],
|
313 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
314 |
)
|
315 |
|
316 |
+
# Configuration Save
|
317 |
save_config_btn.click(
|
318 |
+
fn=update_persistent_config,
|
319 |
+
inputs=[config_anthropic_key, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
|
320 |
+
outputs=[chatbot, msg_input, status_display],
|
321 |
)
|
322 |
|
323 |
approve_btn.click(
|
324 |
fn=approve_handler,
|
325 |
+
inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
|
326 |
outputs=[chatbot, msg_input, status_display],
|
327 |
)
|
328 |
|
pr_generator/agent.py
CHANGED
@@ -34,9 +34,13 @@ except ImportError as e:
|
|
34 |
class GitHubPRAgent:
|
35 |
"""Agent class for GitHub PR creation"""
|
36 |
|
37 |
-
def __init__(self):
|
38 |
self._github_client = None
|
39 |
self._llm = None
|
|
|
|
|
|
|
|
|
40 |
|
41 |
@property
|
42 |
def github_client(self) -> Optional[Github]:
|
@@ -433,8 +437,6 @@ Please return only the commit message. No other explanation is needed."""
|
|
433 |
target_language: str,
|
434 |
filepath: str,
|
435 |
translated_doc: str,
|
436 |
-
owner: str,
|
437 |
-
repo_name: str,
|
438 |
base_branch: str = "main",
|
439 |
) -> Dict[str, Any]:
|
440 |
"""Execute translation document PR creation workflow."""
|
@@ -458,19 +460,20 @@ Please return only the commit message. No other explanation is needed."""
|
|
458 |
)
|
459 |
|
460 |
# 3. Get main branch SHA from upstream and create branch in fork
|
461 |
-
upstream_repo = self.github_client.get_repo(f"
|
462 |
main_branch = upstream_repo.get_branch(base_branch)
|
463 |
main_sha = main_branch.commit.sha
|
464 |
|
465 |
print(f"🌿 Creating branch: {branch_name} in fork repository")
|
466 |
-
branch_result = self.create_branch(
|
467 |
|
468 |
# Check branch creation result
|
469 |
if branch_result.startswith("ERROR"):
|
470 |
return {
|
471 |
"status": "error",
|
472 |
-
"message": f"Branch creation failed: {branch_result}",
|
473 |
"branch": branch_name,
|
|
|
474 |
}
|
475 |
elif branch_result.startswith("WARNING"):
|
476 |
print(f"⚠️ {branch_result}")
|
@@ -489,8 +492,8 @@ Please return only the commit message. No other explanation is needed."""
|
|
489 |
|
490 |
print(f"📄 Saving file: {target_filepath}")
|
491 |
file_result = self.create_or_update_file(
|
492 |
-
|
493 |
-
|
494 |
target_filepath,
|
495 |
commit_message,
|
496 |
translated_doc,
|
@@ -500,9 +503,10 @@ Please return only the commit message. No other explanation is needed."""
|
|
500 |
if not file_result.startswith("SUCCESS"):
|
501 |
return {
|
502 |
"status": "error",
|
503 |
-
"message": "
|
504 |
"branch": branch_name,
|
505 |
"file_path": target_filepath,
|
|
|
506 |
}
|
507 |
|
508 |
print(f"{file_result}")
|
@@ -518,11 +522,11 @@ Please return only the commit message. No other explanation is needed."""
|
|
518 |
)
|
519 |
|
520 |
print(f"🔄 Creating PR: {pr_title}")
|
521 |
-
print(f" Head: {
|
522 |
|
523 |
# Create PR from fork to upstream repository
|
524 |
pr_result = self.create_pull_request(
|
525 |
-
|
526 |
)
|
527 |
|
528 |
if pr_result.startswith("ERROR"):
|
@@ -554,7 +558,8 @@ Please return only the commit message. No other explanation is needed."""
|
|
554 |
except Exception as e:
|
555 |
return {
|
556 |
"status": "error",
|
557 |
-
"message": f"
|
|
|
558 |
}
|
559 |
|
560 |
|
|
|
34 |
class GitHubPRAgent:
|
35 |
"""Agent class for GitHub PR creation"""
|
36 |
|
37 |
+
def __init__(self, user_owner: str = None, user_repo: str = None, base_owner: str = None, base_repo: str = None):
|
38 |
self._github_client = None
|
39 |
self._llm = None
|
40 |
+
self.user_owner = user_owner
|
41 |
+
self.user_repo = user_repo
|
42 |
+
self.base_owner = base_owner
|
43 |
+
self.base_repo = base_repo
|
44 |
|
45 |
@property
|
46 |
def github_client(self) -> Optional[Github]:
|
|
|
437 |
target_language: str,
|
438 |
filepath: str,
|
439 |
translated_doc: str,
|
|
|
|
|
440 |
base_branch: str = "main",
|
441 |
) -> Dict[str, Any]:
|
442 |
"""Execute translation document PR creation workflow."""
|
|
|
460 |
)
|
461 |
|
462 |
# 3. Get main branch SHA from upstream and create branch in fork
|
463 |
+
upstream_repo = self.github_client.get_repo(f"{self.base_owner}/{self.base_repo}")
|
464 |
main_branch = upstream_repo.get_branch(base_branch)
|
465 |
main_sha = main_branch.commit.sha
|
466 |
|
467 |
print(f"🌿 Creating branch: {branch_name} in fork repository")
|
468 |
+
branch_result = self.create_branch(self.user_owner, self.user_repo, branch_name, main_sha)
|
469 |
|
470 |
# Check branch creation result
|
471 |
if branch_result.startswith("ERROR"):
|
472 |
return {
|
473 |
"status": "error",
|
474 |
+
"message": f"Branch creation failed: {branch_result}\n\nTarget: {self.user_owner}/{self.user_repo}\nBranch: {branch_name}\nBase SHA: {main_sha[:8]}",
|
475 |
"branch": branch_name,
|
476 |
+
"error_details": branch_result,
|
477 |
}
|
478 |
elif branch_result.startswith("WARNING"):
|
479 |
print(f"⚠️ {branch_result}")
|
|
|
492 |
|
493 |
print(f"📄 Saving file: {target_filepath}")
|
494 |
file_result = self.create_or_update_file(
|
495 |
+
self.user_owner,
|
496 |
+
self.user_repo,
|
497 |
target_filepath,
|
498 |
commit_message,
|
499 |
translated_doc,
|
|
|
503 |
if not file_result.startswith("SUCCESS"):
|
504 |
return {
|
505 |
"status": "error",
|
506 |
+
"message": f"File save failed: {file_result}\n\n🎯 Target: {self.user_owner}/{self.user_repo} (expected: {target_language} fork of {self.base_owner}/{self.base_repo})\n🌿 Branch: {branch_name}\n📁 File: {target_filepath}",
|
507 |
"branch": branch_name,
|
508 |
"file_path": target_filepath,
|
509 |
+
"error_details": file_result,
|
510 |
}
|
511 |
|
512 |
print(f"{file_result}")
|
|
|
522 |
)
|
523 |
|
524 |
print(f"🔄 Creating PR: {pr_title}")
|
525 |
+
print(f" Head: {self.user_owner}:{branch_name} → Base: {self.base_owner}:{base_branch}")
|
526 |
|
527 |
# Create PR from fork to upstream repository
|
528 |
pr_result = self.create_pull_request(
|
529 |
+
self.base_owner, self.base_repo, pr_title, f"{self.user_owner}:{branch_name}", base_branch, pr_body, draft=True
|
530 |
)
|
531 |
|
532 |
if pr_result.startswith("ERROR"):
|
|
|
558 |
except Exception as e:
|
559 |
return {
|
560 |
"status": "error",
|
561 |
+
"message": f"Workflow execution failed: {str(e)}\n\nConfig: {self.user_owner}/{self.user_repo} → {self.base_owner}/{self.base_repo}\nFile: {filepath if 'filepath' in locals() else 'Unknown'}",
|
562 |
+
"error_details": str(e),
|
563 |
}
|
564 |
|
565 |
|
translator/content.py
CHANGED
@@ -6,15 +6,18 @@ from langchain.callbacks import get_openai_callback
|
|
6 |
from langchain_anthropic import ChatAnthropic
|
7 |
|
8 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
|
|
9 |
|
10 |
|
11 |
-
def get_content(filepath: str) -> str:
|
12 |
if filepath == "":
|
13 |
raise ValueError("No files selected for translation.")
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
response = requests.get(url)
|
19 |
if response.status_code == 200:
|
20 |
content = response.text
|
@@ -170,4 +173,4 @@ def llm_translate(to_translate: str) -> tuple[str, str]:
|
|
170 |
)
|
171 |
ai_message = model.invoke(to_translate)
|
172 |
print("cb:", cb)
|
173 |
-
return cb, ai_message.content
|
|
|
6 |
from langchain_anthropic import ChatAnthropic
|
7 |
|
8 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
9 |
+
from translator.project_config import get_project_config
|
10 |
|
11 |
|
12 |
+
def get_content(filepath: str, project: str = "transformers") -> str:
|
13 |
if filepath == "":
|
14 |
raise ValueError("No files selected for translation.")
|
15 |
|
16 |
+
config = get_project_config(project)
|
17 |
+
# Extract repo path from repo_url (e.g., "huggingface/transformers")
|
18 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
19 |
+
|
20 |
+
url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}"
|
21 |
response = requests.get(url)
|
22 |
if response.status_code == 200:
|
23 |
content = response.text
|
|
|
173 |
)
|
174 |
ai_message = model.invoke(to_translate)
|
175 |
print("cb:", cb)
|
176 |
+
return str(cb), ai_message.content
|
translator/project_config.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Project configuration for different HuggingFace repositories."""
|
2 |
+
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Dict
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class ProjectConfig:
|
9 |
+
"""Configuration for a specific HuggingFace project."""
|
10 |
+
name: str
|
11 |
+
repo_url: str
|
12 |
+
api_url: str
|
13 |
+
docs_path: str
|
14 |
+
github_issues: Dict[str, str] # language -> issue_id
|
15 |
+
reference_pr_url: str
|
16 |
+
|
17 |
+
|
18 |
+
# Project configurations
|
19 |
+
PROJECTS = {
|
20 |
+
"transformers": ProjectConfig(
|
21 |
+
name="Transformers",
|
22 |
+
repo_url="https://github.com/huggingface/transformers",
|
23 |
+
api_url="https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1",
|
24 |
+
docs_path="docs/source",
|
25 |
+
github_issues={"ko": "20179"},
|
26 |
+
reference_pr_url="https://github.com/huggingface/transformers/pull/24968"
|
27 |
+
),
|
28 |
+
"smolagents": ProjectConfig(
|
29 |
+
name="SmolAgents",
|
30 |
+
repo_url="https://github.com/huggingface/smolagents",
|
31 |
+
api_url="https://api.github.com/repos/huggingface/smolagents/git/trees/main?recursive=1",
|
32 |
+
docs_path="docs/source",
|
33 |
+
github_issues={"ko": "20179"}, # To be filled when issue is created
|
34 |
+
reference_pr_url="https://github.com/huggingface/smolagents/pull/1581" # To be filled with actual PR URL
|
35 |
+
)
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
def get_project_config(project_key: str) -> ProjectConfig:
|
40 |
+
"""Get project configuration by key."""
|
41 |
+
if project_key not in PROJECTS:
|
42 |
+
raise ValueError(f"Unknown project: {project_key}. Available: {list(PROJECTS.keys())}")
|
43 |
+
return PROJECTS[project_key]
|
44 |
+
|
45 |
+
|
46 |
+
def get_available_projects() -> list[str]:
|
47 |
+
"""Get list of available project keys."""
|
48 |
+
return list(PROJECTS.keys())
|
translator/retriever.py
CHANGED
@@ -5,15 +5,26 @@ from pathlib import Path
|
|
5 |
import requests
|
6 |
|
7 |
from .model import Languages, Summary, TranslationDoc
|
|
|
8 |
|
9 |
-
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
|
10 |
|
11 |
-
|
12 |
-
def get_github_repo_files():
|
13 |
"""
|
14 |
Get github repo files
|
15 |
"""
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
data = response.json()
|
19 |
all_items = data.get("tree", [])
|
@@ -26,30 +37,42 @@ def get_github_repo_files():
|
|
26 |
return file_paths
|
27 |
|
28 |
|
29 |
-
def get_github_issue_open_pr(lang: str = "ko"):
|
30 |
"""
|
31 |
-
Get open PR in the github issue, filtered by title
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
)
|
39 |
|
|
|
|
|
|
|
|
|
40 |
headers = {
|
41 |
"Accept": "application/vnd.github+json",
|
42 |
}
|
43 |
|
|
|
|
|
|
|
|
|
|
|
44 |
all_open_prs = []
|
45 |
page = 1
|
46 |
per_page = 100 # Maximum allowed by GitHub API
|
47 |
|
48 |
while True:
|
49 |
-
|
|
|
50 |
response = requests.get(url, headers=headers)
|
51 |
|
52 |
-
if response.status_code
|
|
|
|
|
53 |
raise Exception(f"GitHub API error: {response.status_code} {response.text}")
|
54 |
|
55 |
page_prs = response.json()
|
@@ -63,19 +86,61 @@ def get_github_issue_open_pr(lang: str = "ko"):
|
|
63 |
if len(page_prs) < per_page:
|
64 |
break
|
65 |
|
66 |
-
filtered_prs = [pr for pr in all_open_prs if
|
67 |
|
68 |
-
|
|
|
69 |
|
70 |
-
|
71 |
-
"
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
return filenames, pr_info_list
|
80 |
|
81 |
|
@@ -99,11 +164,12 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
|
99 |
return report, first_missing_docs
|
100 |
|
101 |
|
102 |
-
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
103 |
"""
|
104 |
Generate a report for the translated docs
|
105 |
"""
|
106 |
-
docs_file
|
|
|
107 |
|
108 |
base_docs_path = Path("docs/source")
|
109 |
en_docs_path = Path("docs/source/en")
|
|
|
5 |
import requests
|
6 |
|
7 |
from .model import Languages, Summary, TranslationDoc
|
8 |
+
from .project_config import get_project_config
|
9 |
|
|
|
10 |
|
11 |
+
def get_github_repo_files(project: str = "transformers"):
|
|
|
12 |
"""
|
13 |
Get github repo files
|
14 |
"""
|
15 |
+
config = get_project_config(project)
|
16 |
+
|
17 |
+
# Add GitHub token if available to avoid rate limiting (optional)
|
18 |
+
headers = {}
|
19 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
20 |
+
if github_token:
|
21 |
+
headers["Authorization"] = f"token {github_token}"
|
22 |
+
|
23 |
+
response = requests.get(config.api_url, headers=headers)
|
24 |
+
|
25 |
+
# Handle rate limit with helpful message
|
26 |
+
if response.status_code == 403 and "rate limit" in response.text.lower():
|
27 |
+
raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
|
28 |
|
29 |
data = response.json()
|
30 |
all_items = data.get("tree", [])
|
|
|
37 |
return file_paths
|
38 |
|
39 |
|
40 |
+
def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko", all_files: list = None):
|
41 |
"""
|
42 |
+
Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
|
43 |
"""
|
44 |
+
config = get_project_config(project)
|
45 |
+
issue_id = config.github_issues.get(lang)
|
46 |
+
|
47 |
+
# For projects without GitHub issue tracking, still search for PRs
|
48 |
+
if not issue_id:
|
49 |
+
raise ValueError(f"⚠️ No GitHub issue registered for {project}.")
|
50 |
|
51 |
+
# Require all_files parameter
|
52 |
+
if all_files is None:
|
53 |
+
raise ValueError("Repository file list must be provided")
|
54 |
+
|
55 |
headers = {
|
56 |
"Accept": "application/vnd.github+json",
|
57 |
}
|
58 |
|
59 |
+
# Add GitHub token if available to avoid rate limiting (optional)
|
60 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
61 |
+
if github_token:
|
62 |
+
headers["Authorization"] = f"token {github_token}"
|
63 |
+
|
64 |
all_open_prs = []
|
65 |
page = 1
|
66 |
per_page = 100 # Maximum allowed by GitHub API
|
67 |
|
68 |
while True:
|
69 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
70 |
+
url = f"https://api.github.com/repos/{repo_path}/pulls?state=open&page={page}&per_page={per_page}"
|
71 |
response = requests.get(url, headers=headers)
|
72 |
|
73 |
+
if response.status_code == 403 and "rate limit" in response.text.lower():
|
74 |
+
raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
|
75 |
+
elif response.status_code != 200:
|
76 |
raise Exception(f"GitHub API error: {response.status_code} {response.text}")
|
77 |
|
78 |
page_prs = response.json()
|
|
|
86 |
if len(page_prs) < per_page:
|
87 |
break
|
88 |
|
89 |
+
filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
|
90 |
|
91 |
+
# Pattern to match filenames after "Translated" keyword
|
92 |
+
pattern = re.compile(r"Translated\s+(?:`([^`]+)`|(\S+))\s+to")
|
93 |
|
94 |
+
def find_original_file_path(filename_from_title, all_files):
|
95 |
+
"""Find the exact file path from repo files by matching filename"""
|
96 |
+
if not filename_from_title:
|
97 |
+
return None
|
98 |
+
|
99 |
+
# Remove .md extension for matching
|
100 |
+
base_name = filename_from_title.replace('.md', '')
|
101 |
+
|
102 |
+
# Look for exact matches in repo files
|
103 |
+
for file_path in all_files:
|
104 |
+
if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
|
105 |
+
file_base = file_path.split("/")[-1].replace('.md', '')
|
106 |
+
if file_base == base_name:
|
107 |
+
return file_path
|
108 |
+
|
109 |
+
# If no exact match, fallback to simple path
|
110 |
+
return f"docs/source/en/{filename_from_title}"
|
111 |
+
|
112 |
+
filenames = []
|
113 |
+
pr_info_list = []
|
114 |
+
|
115 |
+
for pr in filtered_prs:
|
116 |
+
match = pattern.search(pr["title"])
|
117 |
+
if match:
|
118 |
+
# Use group 1 (with backticks) or group 2 (without backticks)
|
119 |
+
filename = match.group(1) or match.group(2)
|
120 |
+
# Add .md extension if not present
|
121 |
+
if not filename.endswith('.md'):
|
122 |
+
filename += '.md'
|
123 |
+
|
124 |
+
# Find the correct file path by matching filename
|
125 |
+
correct_path = None
|
126 |
+
if filename:
|
127 |
+
# Remove .md extension for matching
|
128 |
+
base_name = filename.replace('.md', '')
|
129 |
+
|
130 |
+
# Look for exact matches in repo files
|
131 |
+
for file_path in all_files:
|
132 |
+
if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
|
133 |
+
file_base = file_path.split("/")[-1].replace('.md', '')
|
134 |
+
if file_base == base_name:
|
135 |
+
correct_path = file_path
|
136 |
+
break
|
137 |
+
|
138 |
+
# If no exact match, fallback to simple path
|
139 |
+
if not correct_path:
|
140 |
+
correct_path = f"docs/source/en/{filename}"
|
141 |
+
if correct_path:
|
142 |
+
filenames.append(correct_path)
|
143 |
+
pr_info_list.append(f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}")
|
144 |
return filenames, pr_info_list
|
145 |
|
146 |
|
|
|
164 |
return report, first_missing_docs
|
165 |
|
166 |
|
167 |
+
def report(project: str, target_lang: str, top_k: int = 1, docs_file: list = None) -> tuple[str, list[str]]:
|
168 |
"""
|
169 |
Generate a report for the translated docs
|
170 |
"""
|
171 |
+
if docs_file is None:
|
172 |
+
raise ValueError("Repository file list must be provided")
|
173 |
|
174 |
base_docs_path = Path("docs/source")
|
175 |
en_docs_path = Path("docs/source/en")
|