wony617 commited on
Commit
6c707e1
·
unverified ·
2 Parent(s): fe6c90f d229b84

Merge pull request #4 from Jwaminju/part-of-support-smolagent

Browse files
agent/handler.py CHANGED
@@ -13,25 +13,48 @@ from agent.workflow import (
13
  )
14
  from pr_generator.searcher import find_reference_pr_simple_stream
15
  from translator.content import get_full_prompt, get_content, preprocess_content
 
16
 
17
 
18
  # State management
19
  class ChatState:
20
  def __init__(self):
21
  self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
 
 
 
22
  self.target_language = "ko"
23
  self.k_files = 10
24
  self.files_to_translate = []
25
  self.additional_instruction = ""
26
  self.current_file_content = {"translated": ""}
27
- self.pr_result = None # Store PR creation result
28
- # GitHub configuration
29
- self.github_config = {
30
- "token": "",
31
- "owner": "",
32
- "repo_name": "",
33
- "reference_pr_url": "https://github.com/huggingface/transformers/pull/24968",
 
 
 
 
34
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  state = ChatState()
@@ -53,25 +76,41 @@ def _extract_content_for_display(content: str) -> str:
53
 
54
 
55
  def get_welcome_message():
56
- """Initial welcome message with file finding controls"""
57
  return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
58
 
59
  I'll help you find files that need translation and translate them in a streamlined workflow.
60
 
61
- **🔎 Let's start by finding files that need translation.**
62
 
63
- Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
64
  """
65
 
66
 
67
- def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
68
  """Process file search request and update Gradio UI components."""
69
  global state
 
70
  state.target_language = lang
71
  state.k_files = k
72
  state.step = "find_files"
73
 
74
- status_report, files_list = report_translation_target_files(lang, k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  state.files_to_translate = (
76
  [file[0] for file in files_list]
77
  if files_list
@@ -87,8 +126,10 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
87
  """
88
 
89
  if state.files_to_translate:
 
90
  for i, file in enumerate(state.files_to_translate, 1):
91
- response += f"\n{i}. `{file}`"
 
92
 
93
  # if len(state.files_to_translate) > 5:
94
  # response += f"\n... and {len(state.files_to_translate) - 5} more files"
@@ -100,14 +141,13 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
100
  # Add to history
101
  history.append(["Please find files that need translation", response])
102
  cleared_input = ""
103
- selected_tab = 1 if state.files_to_translate else 0
104
 
105
  # 드롭다운 choices로 쓸 파일 리스트 반환 추가
106
  return (
107
  history,
108
  cleared_input,
109
  update_status(),
110
- gr.Tabs(selected=selected_tab),
111
  update_dropdown_choices(state.files_to_translate),
112
  )
113
 
@@ -116,7 +156,30 @@ def update_dropdown_choices(file_list):
116
  return gr.update(choices=file_list, value=None)
117
 
118
 
119
- def start_translation_process():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  """Start the translation process for the first file"""
121
  if not state.files_to_translate:
122
  return "❌ No files available for translation.", ""
@@ -125,8 +188,8 @@ def start_translation_process():
125
 
126
  # Call translation function (simplified for demo)
127
  try:
128
- translated = translate_docs_interactive(
129
- state.target_language, [[current_file]], state.additional_instruction
130
  )
131
 
132
  state.current_file_content = {"translated": translated}
@@ -138,19 +201,22 @@ def start_translation_process():
138
  p.parent.mkdir(parents=True, exist_ok=True)
139
  p.write_text(translated, encoding="utf-8")
140
 
141
- original_file_link = (
142
- "https://github.com/huggingface/transformers/blob/main/" + current_file
143
- )
144
  print("Compeleted translation:\n")
145
  print(translated)
146
  print("----------------------------")
147
- response = (
148
- f"""🔄 Translation for: `{current_file}`\n"""
149
- "**📄 Original Content Link:**\n"
150
- ""
151
- f"{original_file_link}\n"
152
- "**🌐 Translated Content:**\n"
153
- )
 
 
 
 
154
  return response, translated
155
 
156
 
@@ -188,7 +254,12 @@ Currently available actions with quick controls:
188
  else:
189
  return """I understand you want to work on translations!
190
 
191
- To get started, please use the controls above to configure your translation settings and find files that need translation.
 
 
 
 
 
192
  """
193
 
194
 
@@ -226,12 +297,12 @@ def handle_user_message(message, history):
226
 
227
  def update_status():
228
  if state.step == "welcome":
229
- return """
230
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
231
  <div><strong>🔄 Step:</strong> Welcome</div>
 
232
  <div><strong>📁 Files:</strong> 0</div>
233
- <div><strong>🌍 Language:</strong> ko</div>
234
- <div><strong>⏳ Progress:</strong> Ready</div>
235
  </div>
236
  """
237
 
@@ -267,6 +338,7 @@ def update_status():
267
  status_html = f"""
268
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
269
  <div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
 
270
  <div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
271
  <div><strong>🌍 Language:</strong> {state.target_language}</div>
272
  <div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
@@ -284,26 +356,66 @@ def sync_language_displays(lang):
284
  return lang
285
 
286
 
287
- def update_github_config(token, owner, repo, reference_pr_url):
288
- """Update GitHub configuration settings."""
289
  global state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- # Set GitHub token in environment variables
292
- if token:
293
- os.environ["GITHUB_TOKEN"] = token
294
-
295
- # Save GitHub configuration to state
296
- state.github_config.update(
297
- {
298
- "token": token,
299
- "owner": owner,
300
- "repo_name": repo,
301
- "reference_pr_url": reference_pr_url
302
- or state.github_config["reference_pr_url"],
303
- }
304
- )
305
 
306
- return f"✅ GitHub configuration updated: {owner}/{repo}"
 
 
307
 
308
 
309
  def update_prompt_preview(language, file_path, additional_instruction):
@@ -319,7 +431,7 @@ def update_prompt_preview(language, file_path, additional_instruction):
319
  translation_lang = language
320
 
321
  # Get sample content (first 500 characters)
322
- content = get_content(file_path)
323
  to_translate = preprocess_content(content)
324
 
325
  # Truncate for preview
@@ -330,7 +442,10 @@ def update_prompt_preview(language, file_path, additional_instruction):
330
 
331
  return prompt
332
  except Exception as e:
333
- return f"Error generating prompt preview: {str(e)}"
 
 
 
334
 
335
 
336
  def send_message(message, history):
@@ -339,14 +454,39 @@ def send_message(message, history):
339
 
340
 
341
  # Button handlers with tab switching
342
- def start_translate_handler(history, anthropic_key, file_to_translate, additional_instruction=""):
 
 
 
 
 
 
 
343
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
344
 
 
 
 
 
 
 
345
  state.additional_instruction = additional_instruction
346
  state.files_to_translate = [file_to_translate]
347
- new_hist, cleared_input = handle_user_message("start translation", history)
348
- selected_tabs = 2 if state.current_file_content["translated"] else 0
349
- return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
 
352
  def approve_handler(history, owner, repo, reference_pr_url):
@@ -354,18 +494,34 @@ def approve_handler(history, owner, repo, reference_pr_url):
354
  global state
355
  state.step = "create_github_pr"
356
 
357
- # Update github config from the latest UI values
358
- state.github_config["owner"] = owner
359
- state.github_config["repo_name"] = repo
360
- state.github_config["reference_pr_url"] = reference_pr_url
361
-
362
- # Validate GitHub configuration
363
- github_config = state.github_config
364
- if not all([github_config.get("token"), owner, repo]):
365
- response = "❌ GitHub configuration incomplete. Please provide GitHub Token, Owner, and Repository Name in Tab 3."
 
 
 
 
 
 
366
  history.append(["GitHub PR creation request", response])
367
  return history, "", update_status()
368
 
 
 
 
 
 
 
 
 
 
 
369
  # If reference PR is not provided, use the agent to find one
370
  if not github_config.get("reference_pr_url"):
371
  response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
@@ -421,6 +577,7 @@ def approve_handler(history, owner, repo, reference_pr_url):
421
  translated_content=translated_content,
422
  github_config=state.github_config,
423
  en_title=file_name,
 
424
  )
425
  response += f"\n{pr_response}"
426
  else:
@@ -431,9 +588,23 @@ def approve_handler(history, owner, repo, reference_pr_url):
431
 
432
 
433
  def restart_handler(history):
434
- """Resets the state and UI."""
435
  global state
 
 
 
 
436
  state = ChatState()
 
 
 
 
 
 
 
 
 
 
437
  welcome_msg = get_welcome_message()
438
  new_hist = [[None, welcome_msg]]
439
  return new_hist, "", update_status(), gr.Tabs(selected=0)
 
13
  )
14
  from pr_generator.searcher import find_reference_pr_simple_stream
15
  from translator.content import get_full_prompt, get_content, preprocess_content
16
+ from translator.project_config import get_available_projects, get_project_config
17
 
18
 
19
  # State management
20
  class ChatState:
21
  def __init__(self):
22
  self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
23
+
24
+ # Transient state (reset on restart)
25
+ self.selected_project = "transformers"
26
  self.target_language = "ko"
27
  self.k_files = 10
28
  self.files_to_translate = []
29
  self.additional_instruction = ""
30
  self.current_file_content = {"translated": ""}
31
+ self.pr_result = None
32
+
33
+ # Persistent settings (preserved across restarts)
34
+ self.persistent_settings = {
35
+ "anthropic_api_key": "",
36
+ "github_config": {
37
+ "token": "",
38
+ "owner": "",
39
+ "repo_name": "",
40
+ "reference_pr_url": "",
41
+ }
42
  }
43
+
44
+ def reset_transient_state(self):
45
+ """Reset only the workflow state, keep persistent settings"""
46
+ self.step = "welcome"
47
+ self.selected_project = "transformers"
48
+ self.target_language = "ko"
49
+ self.k_files = 10
50
+ self.files_to_translate = []
51
+ self.additional_instruction = ""
52
+ self.current_file_content = {"translated": ""}
53
+ self.pr_result = None
54
+
55
+ @property
56
+ def github_config(self):
57
+ return self.persistent_settings["github_config"]
58
 
59
 
60
  state = ChatState()
 
76
 
77
 
78
  def get_welcome_message():
79
+ """Initial welcome message with project selection"""
80
  return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
81
 
82
  I'll help you find files that need translation and translate them in a streamlined workflow.
83
 
84
+ **🎯 First, select which project you want to translate:**
85
 
86
+ Use the **`Quick Controls`** on the right to select a project, or **ask me `what`, `how`, or `help`** to get started.
87
  """
88
 
89
 
90
+ def process_file_search_handler(project: str, lang: str, k: int, history: list) -> tuple:
91
  """Process file search request and update Gradio UI components."""
92
  global state
93
+ state.selected_project = project
94
  state.target_language = lang
95
  state.k_files = k
96
  state.step = "find_files"
97
 
98
+ try:
99
+ status_report, files_list = report_translation_target_files(project, lang, k)
100
+ except Exception as e:
101
+ if "rate limit" in str(e).lower():
102
+ response = f"""❌ **GitHub API Rate Limit Exceeded**
103
+
104
+ {str(e)}
105
+
106
+ **💡 To fix this:**
107
+ 1. Set GitHub Token in Configuration panel above
108
+ 2. Click "💾 Save Configuration"
109
+ 3. Try "Find Files" again"""
110
+ history.append(["File search request", response])
111
+ return history, "", update_status(), gr.Tabs(selected=0), gr.update(choices=[]), gr.update(visible=False)
112
+ else:
113
+ raise # Re-raise non-rate-limit errors
114
  state.files_to_translate = (
115
  [file[0] for file in files_list]
116
  if files_list
 
126
  """
127
 
128
  if state.files_to_translate:
129
+ config = get_project_config(state.selected_project)
130
  for i, file in enumerate(state.files_to_translate, 1):
131
+ file_link = f"{config.repo_url}/blob/main/{file}"
132
+ response += f"\n{i}. [`{file}`]({file_link})"
133
 
134
  # if len(state.files_to_translate) > 5:
135
  # response += f"\n... and {len(state.files_to_translate) - 5} more files"
 
141
  # Add to history
142
  history.append(["Please find files that need translation", response])
143
  cleared_input = ""
 
144
 
145
  # 드롭다운 choices로 쓸 파일 리스트 반환 추가
146
  return (
147
  history,
148
  cleared_input,
149
  update_status(),
150
+ gr.Tabs(), # Don't change tab
151
  update_dropdown_choices(state.files_to_translate),
152
  )
153
 
 
156
  return gr.update(choices=file_list, value=None)
157
 
158
 
159
+ def confirm_and_go_translate_handler(history):
160
+ """Confirm selection and go to translate tab"""
161
+ global state
162
+
163
+ response = f"✅ **Selection confirmed!**\n\n🎯 **Project:** {state.selected_project}\n🌍 **Language:** {state.target_language}\n\n**➡️ Go to Tab 2 to start translation.**"
164
+ history.append(["Confirm selection", response])
165
+ return history, "", update_status(), gr.Tabs(selected=1)
166
+
167
+
168
+ def confirm_translation_and_go_upload_handler(history):
169
+ """Confirm translation and go to upload PR tab"""
170
+ global state
171
+
172
+ if not state.current_file_content.get("translated"):
173
+ response = "❌ No translation available. Please complete translation first."
174
+ history.append(["Upload PR request", response])
175
+ return history, "", update_status(), gr.Tabs()
176
+
177
+ response = f"✅ **Translation confirmed!**\n\n📄 **File:** `{state.files_to_translate[0] if state.files_to_translate else 'Unknown'}`\n\n**➡️ Go to Tab 3 to upload PR.**"
178
+ history.append(["Upload PR request", response])
179
+ return history, "", update_status(), gr.Tabs(selected=2)
180
+
181
+
182
+ def start_translation_process(force_retranslate=False):
183
  """Start the translation process for the first file"""
184
  if not state.files_to_translate:
185
  return "❌ No files available for translation.", ""
 
188
 
189
  # Call translation function (simplified for demo)
190
  try:
191
+ status, translated = translate_docs_interactive(
192
+ state.target_language, [[current_file]], state.additional_instruction, state.selected_project, force_retranslate
193
  )
194
 
195
  state.current_file_content = {"translated": translated}
 
201
  p.parent.mkdir(parents=True, exist_ok=True)
202
  p.write_text(translated, encoding="utf-8")
203
 
204
+ config = get_project_config(state.selected_project)
205
+ original_file_link = f"{config.repo_url}/blob/main/{current_file}"
 
206
  print("Compeleted translation:\n")
207
  print(translated)
208
  print("----------------------------")
209
+
210
+ # Different response format for existing vs new translation
211
+ if isinstance(status, str) and "Existing translation loaded" in status:
212
+ response = f"{status}\n**📄 Original Content Link:** {original_file_link}\n\n**🌐 Translated Content:**"
213
+ else:
214
+ response = (
215
+ f"""🔄 Translation for: `{current_file}`\n"""
216
+ f"**📄 Original Content Link:** {original_file_link}\n\n"
217
+ f"{status}\n\n"
218
+ "**🌐 Translated Content:**"
219
+ )
220
  return response, translated
221
 
222
 
 
254
  else:
255
  return """I understand you want to work on translations!
256
 
257
+ **Two ways to get started:**
258
+
259
+ 1. **🔍 Find Files first** - Use Tab 1 to discover files that need translation
260
+ 2. **🚀 Direct Translation** - Go to Tab 2 and enter a file path directly (e.g., `docs/source/en/model_doc/bert.md`)
261
+
262
+ Make sure to configure your API keys in the Configuration panel above.
263
  """
264
 
265
 
 
297
 
298
  def update_status():
299
  if state.step == "welcome":
300
+ return f"""
301
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
302
  <div><strong>🔄 Step:</strong> Welcome</div>
303
+ <div><strong>🎯 Project:</strong> {state.selected_project}</div>
304
  <div><strong>📁 Files:</strong> 0</div>
305
+ <div><strong>🌍 Language:</strong> {state.target_language}</div>
 
306
  </div>
307
  """
308
 
 
338
  status_html = f"""
339
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
340
  <div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
341
+ <div><strong>🎯 Project:</strong> {state.selected_project}</div>
342
  <div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
343
  <div><strong>🌍 Language:</strong> {state.target_language}</div>
344
  <div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
 
356
  return lang
357
 
358
 
359
+ def update_project_selection(project, history):
360
+ """Update state when project is selected"""
361
  global state
362
+ state.selected_project = project
363
+ response = f"Selection confirmed: 🎯 Project → **{project}**"
364
+ history.append(["Project selection", response])
365
+ return history, "", update_status()
366
+
367
+
368
+ def update_language_selection(lang, history):
369
+ """Update state when language is selected"""
370
+ global state
371
+ state.target_language = lang
372
+ response = f"Selection confirmed: 🌍 Language → **{lang}**"
373
+ history.append(["Language selection", response])
374
+ return history, "", update_status(), lang
375
+
376
+
377
+ def update_persistent_config(anthropic_key, github_token, github_owner, github_repo, reference_pr_url, history):
378
+ """Update persistent configuration settings."""
379
+ global state
380
+
381
+ # Update API keys
382
+ if anthropic_key:
383
+ state.persistent_settings["anthropic_api_key"] = anthropic_key
384
+ os.environ["ANTHROPIC_API_KEY"] = anthropic_key
385
+
386
+ if github_token:
387
+ os.environ["GITHUB_TOKEN"] = github_token
388
+
389
+ # Get default reference PR URL from project config if not provided
390
+ if not reference_pr_url and state.selected_project:
391
+ try:
392
+ config = get_project_config(state.selected_project)
393
+ reference_pr_url = config.reference_pr_url
394
+ except:
395
+ pass
396
+
397
+ # Save GitHub configuration to persistent settings
398
+ state.persistent_settings["github_config"].update({
399
+ "token": github_token or "",
400
+ "owner": github_owner or "",
401
+ "repo_name": github_repo or "",
402
+ "reference_pr_url": reference_pr_url or "",
403
+ })
404
+
405
+ # Build response message based on what was configured
406
+ response = "✅ Configuration saved!"
407
+ if github_owner and github_repo:
408
+ response += f" GitHub: {github_owner}/{github_repo}"
409
+ elif anthropic_key:
410
+ response += " Anthropic API key updated."
411
+
412
+ history.append(["Configuration update", response])
413
+ return history, "", update_status()
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
+ def update_github_config(token, owner, repo, reference_pr_url):
417
+ """Legacy function for backward compatibility."""
418
+ return update_persistent_config("", token, owner, repo, reference_pr_url)
419
 
420
 
421
  def update_prompt_preview(language, file_path, additional_instruction):
 
431
  translation_lang = language
432
 
433
  # Get sample content (first 500 characters)
434
+ content = get_content(file_path, state.selected_project)
435
  to_translate = preprocess_content(content)
436
 
437
  # Truncate for preview
 
442
 
443
  return prompt
444
  except Exception as e:
445
+ error_str = str(e)
446
+ if "Failed to retrieve content from the URL" in error_str:
447
+ return f"❌ **File not found:** `{file_path}`\n\n💡 **Please check:**\n1. Is this file in the **{state.selected_project}** project?\n2. Use \"🔍 Find Files to Translate\" to see available files\n3. Verify the file path is correct"
448
+ return f"Error generating prompt preview: {error_str}"
449
 
450
 
451
  def send_message(message, history):
 
454
 
455
 
456
  # Button handlers with tab switching
457
+ def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
458
+ # Use persistent anthropic key
459
+ anthropic_key = state.persistent_settings["anthropic_api_key"]
460
+ if not anthropic_key:
461
+ response = "❌ Please set Anthropic API key in Configuration panel first."
462
+ history.append(["Translation request", response])
463
+ return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
464
+
465
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
466
 
467
+ # Check if file path is provided
468
+ if not file_to_translate or not file_to_translate.strip():
469
+ response = "❌ Please select a file from the dropdown or enter a file path to translate."
470
+ history.append(["Translation request", response])
471
+ return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
472
+
473
  state.additional_instruction = additional_instruction
474
  state.files_to_translate = [file_to_translate]
475
+ state.step = "translate"
476
+
477
+ # Start translation directly
478
+ if force_retranslate:
479
+ history.append(["Translation request", "🔄 **Force retranslation started...**"])
480
+ response, translated = start_translation_process(force_retranslate)
481
+ history.append(["", response])
482
+ if translated:
483
+ history.append(["", translated])
484
+
485
+ # Update button text and show confirm button after translation
486
+ start_btn_text = "🔄 Retranslation" if state.current_file_content["translated"] else "🚀 Start Translation"
487
+ confirm_btn_visible = bool(state.current_file_content["translated"])
488
+
489
+ return history, "", update_status(), gr.Tabs(), gr.update(value=start_btn_text), gr.update(visible=confirm_btn_visible)
490
 
491
 
492
  def approve_handler(history, owner, repo, reference_pr_url):
 
494
  global state
495
  state.step = "create_github_pr"
496
 
497
+ # Check all required GitHub configuration at once
498
+ github_config = state.persistent_settings["github_config"]
499
+ missing_config = []
500
+
501
+ if not github_config.get("token"):
502
+ missing_config.append("GitHub Token")
503
+ if not owner:
504
+ missing_config.append("GitHub Owner")
505
+ if not repo:
506
+ missing_config.append("Repository Name")
507
+
508
+ if missing_config:
509
+ config = get_project_config(state.selected_project)
510
+ repo_name = config.repo_url.split('/')[-1] # Extract repo name from URL
511
+ response = f"❌ Please set the following in Configuration panel first: {', '.join(missing_config)}\n\n💡 **Note:** GitHub Owner/Repository should be your fork of [`{repo_name}`]({config.repo_url}) (e.g., Owner: `your-username`, Repository: `{repo_name}`)"
512
  history.append(["GitHub PR creation request", response])
513
  return history, "", update_status()
514
 
515
+ # Update reference PR URL (can be set per PR)
516
+ if reference_pr_url:
517
+ state.persistent_settings["github_config"]["reference_pr_url"] = reference_pr_url
518
+
519
+ # Use persistent settings
520
+ github_config = state.persistent_settings["github_config"]
521
+
522
+ # Initialize response variable
523
+ response = ""
524
+
525
  # If reference PR is not provided, use the agent to find one
526
  if not github_config.get("reference_pr_url"):
527
  response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
 
577
  translated_content=translated_content,
578
  github_config=state.github_config,
579
  en_title=file_name,
580
+ project=state.selected_project,
581
  )
582
  response += f"\n{pr_response}"
583
  else:
 
588
 
589
 
590
  def restart_handler(history):
591
+ """Resets the workflow state but preserves persistent settings."""
592
  global state
593
+ # Backup persistent settings
594
+ backup_settings = state.persistent_settings.copy()
595
+
596
+ # Reset state
597
  state = ChatState()
598
+
599
+ # Restore persistent settings
600
+ state.persistent_settings = backup_settings
601
+
602
+ # Restore environment variables
603
+ if backup_settings["anthropic_api_key"]:
604
+ os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
605
+ if backup_settings["github_config"]["token"]:
606
+ os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
607
+
608
  welcome_msg = get_welcome_message()
609
  new_hist = [[None, welcome_msg]]
610
  return new_hist, "", update_status(), gr.Tabs(selected=0)
agent/toctree_handler.py CHANGED
@@ -4,9 +4,17 @@ from typing import Dict, List, Any
4
  import os
5
 
6
  class TocTreeHandler:
7
- def __init__(self):
8
- self.en_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml"
9
- self.ko_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/ko/_toctree.yml"
 
 
 
 
 
 
 
 
10
  self.local_docs_path = "docs/source/ko"
11
 
12
  def fetch_toctree(self, url: str) -> Dict[str, Any]:
@@ -245,7 +253,8 @@ Korean title:"""
245
  translation_result: dict,
246
  filepath: str,
247
  pr_agent,
248
- github_config: dict
 
249
  ) -> dict:
250
  """Update toctree after successful translation PR.
251
 
 
4
  import os
5
 
6
  class TocTreeHandler:
7
+ def __init__(self, project: str = "transformers"):
8
+ from translator.project_config import get_project_config
9
+ self.project = project
10
+ self.project_config = get_project_config(project)
11
+
12
+ # Extract repository path from config
13
+ repo_path = self.project_config.repo_url.replace("https://github.com/", "")
14
+
15
+ # Build project-specific URLs
16
+ self.en_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/en/_toctree.yml"
17
+ self.ko_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/ko/_toctree.yml"
18
  self.local_docs_path = "docs/source/ko"
19
 
20
  def fetch_toctree(self, url: str) -> Dict[str, Any]:
 
253
  translation_result: dict,
254
  filepath: str,
255
  pr_agent,
256
+ github_config: dict,
257
+ project: str = "transformers"
258
  ) -> dict:
259
  """Update toctree after successful translation PR.
260
 
agent/workflow.py CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
11
  llm_translate,
12
  preprocess_content,
13
  )
14
- from translator.retriever import report, get_github_issue_open_pr
15
  # GitHub PR Agent import
16
  try:
17
  from pr_generator.agent import GitHubPRAgent
@@ -26,19 +26,23 @@ from logger.github_logger import GitHubLogger
26
 
27
 
28
  def report_translation_target_files(
29
- translate_lang: str, top_k: int = 1
30
  ) -> tuple[str, list[list[str]]]:
31
  """Return the top-k files that need translation, excluding files already in progress.
32
 
33
  Args:
 
34
  translate_lang: Target language to translate
35
  top_k: Number of top-first files to return for translation. (Default 1)
36
  """
37
- # Get files in progress
38
- docs_in_progress, pr_info_list = get_github_issue_open_pr(translate_lang)
39
-
40
- # Get all available files for translation
41
- all_status_report, all_filepath_list = report(translate_lang, top_k * 2) # Get more to account for filtering
 
 
 
42
 
43
  # Filter out files that are already in progress
44
  available_files = [f for f in all_filepath_list if f not in docs_in_progress]
@@ -52,29 +56,30 @@ def report_translation_target_files(
52
  if docs_in_progress:
53
  status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
54
  for i, file in enumerate(docs_in_progress):
55
- status_report += f"\n{i+1}. `{file}`: {pr_info_list[i]}"
56
  status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
57
 
58
  return status_report, [[file] for file in filepath_list]
59
 
60
 
61
- def translate_docs(lang: str, file_path: str, additional_instruction: str = "") -> tuple[str, str]:
62
  """Translate documentation."""
63
- # Check if translation already exists
64
  translation_file_path = (
65
  Path(__file__).resolve().parent.parent
66
  / f"translation_result/{file_path}"
67
  )
68
 
69
- if translation_file_path.exists():
70
  print(f"📄 Found existing translation: {translation_file_path}")
71
  with open(translation_file_path, "r", encoding="utf-8") as f:
72
  existing_content = f.read()
73
  if existing_content.strip():
74
- return "Existing translation loaded (no tokens used). If you want to translate again, please restart the gradio app.", existing_content
 
75
 
76
  # step 1. Get content from file path
77
- content = get_content(file_path)
78
  to_translate = preprocess_content(content)
79
 
80
  # step 2. Prepare prompt with docs content
@@ -97,7 +102,7 @@ def translate_docs(lang: str, file_path: str, additional_instruction: str = "")
97
 
98
 
99
  def translate_docs_interactive(
100
- translate_lang: str, selected_files: list[list[str]], additional_instruction: str = ""
101
  ) -> tuple[str, str]:
102
  """Interactive translation function that processes files one by one.
103
 
@@ -111,14 +116,22 @@ def translate_docs_interactive(
111
  # Start with the first file
112
  current_file = file_paths[0]
113
 
114
- status = f"✅ Translation completed: `{current_file}` → `{translate_lang}`\n\n"
115
- callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction)
116
- status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
 
 
 
 
 
 
 
 
117
 
118
  print(callback_result)
119
  print(status)
120
 
121
- return translated_content
122
 
123
 
124
  def generate_github_pr(
@@ -127,6 +140,7 @@ def generate_github_pr(
127
  translated_content: str = None,
128
  github_config: dict = None,
129
  en_title: str = None,
 
130
  ) -> str:
131
  """Generate a GitHub PR for translated documentation.
132
 
@@ -144,7 +158,7 @@ def generate_github_pr(
144
  return "❌ GitHub PR Agent is not available. Please install required libraries."
145
 
146
  if not github_config:
147
- return "❌ GitHub configuration not provided."
148
 
149
  # Validate required configuration
150
  required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
@@ -153,7 +167,7 @@ def generate_github_pr(
153
  ]
154
 
155
  if missing_fields:
156
- return f"❌ Missing required configuration: {', '.join(missing_fields)}. Please provide these values."
157
 
158
  # Set token in environment for the agent.
159
  os.environ["GITHUB_TOKEN"] = github_config["token"]
@@ -166,29 +180,39 @@ def generate_github_pr(
166
  / f"translation_result/{filepath}"
167
  )
168
  if not translation_file_path.exists():
169
- return f"❌ Translation file not found: {translation_file_path}"
170
 
171
  with open(translation_file_path, "r", encoding="utf-8") as f:
172
  translated_content = f.read()
173
 
174
  if not translated_content or not translated_content.strip():
175
- return "❌ Translated content is empty."
176
 
177
  # Execute GitHub PR Agent
 
 
 
 
 
 
178
  print(f"🚀 Starting GitHub PR creation...")
179
  print(f" 📁 File: {filepath}")
180
  print(f" 🌍 Language: {target_language}")
181
  print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
182
- print(f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}")
183
-
184
- agent = GitHubPRAgent()
 
 
 
 
 
 
185
  result = agent.run_translation_pr_workflow(
186
  reference_pr_url=github_config["reference_pr_url"],
187
  target_language=target_language,
188
  filepath=filepath,
189
  translated_doc=translated_content,
190
- owner=github_config["owner"],
191
- repo_name=github_config["repo_name"],
192
  base_branch=github_config.get("base_branch", "main"),
193
  )
194
  # TEST CODE
@@ -202,9 +226,9 @@ def generate_github_pr(
202
  toctree_result = None
203
  if en_title:
204
  from agent.toctree_handler import TocTreeHandler
205
- toctree_handler = TocTreeHandler()
206
  toctree_result = toctree_handler.update_toctree_after_translation(
207
- result, filepath, agent, github_config
208
  )
209
 
210
  # Process result
@@ -248,13 +272,29 @@ def generate_github_pr(
248
  {result.get("error_details", "Unknown error")}"""
249
 
250
  else:
 
251
  return f"""❌ **GitHub PR Creation Failed**
252
 
253
  **Error Message:**
254
- {result["message"]}"""
 
 
 
 
 
 
 
 
255
 
256
  except Exception as e:
257
- error_msg = f"❌ Unexpected error occurred during PR creation: {str(e)}"
 
 
 
 
 
 
 
258
  print(error_msg)
259
  return error_msg
260
 
 
11
  llm_translate,
12
  preprocess_content,
13
  )
14
+ from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files
15
  # GitHub PR Agent import
16
  try:
17
  from pr_generator.agent import GitHubPRAgent
 
26
 
27
 
28
  def report_translation_target_files(
29
+ project: str, translate_lang: str, top_k: int = 1
30
  ) -> tuple[str, list[list[str]]]:
31
  """Return the top-k files that need translation, excluding files already in progress.
32
 
33
  Args:
34
+ project: Project to translate (e.g., "transformers", "smolagents")
35
  translate_lang: Target language to translate
36
  top_k: Number of top-first files to return for translation. (Default 1)
37
  """
38
+ # Get repo files once to avoid duplicate API calls
39
+ all_repo_files = get_github_repo_files(project)
40
+
41
+ # Get all available files for translation using the file list
42
+ all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) # Get more to account for filtering
43
+
44
+ # Get files in progress using the same file list
45
+ docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files)
46
 
47
  # Filter out files that are already in progress
48
  available_files = [f for f in all_filepath_list if f not in docs_in_progress]
 
56
  if docs_in_progress:
57
  status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
58
  for i, file in enumerate(docs_in_progress):
59
+ status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
60
  status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
61
 
62
  return status_report, [[file] for file in filepath_list]
63
 
64
 
65
+ def translate_docs(lang: str, file_path: str, additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False) -> tuple[str, str]:
66
  """Translate documentation."""
67
+ # Check if translation already exists (unless force retranslate is enabled)
68
  translation_file_path = (
69
  Path(__file__).resolve().parent.parent
70
  / f"translation_result/{file_path}"
71
  )
72
 
73
+ if not force_retranslate and translation_file_path.exists():
74
  print(f"📄 Found existing translation: {translation_file_path}")
75
  with open(translation_file_path, "r", encoding="utf-8") as f:
76
  existing_content = f.read()
77
  if existing_content.strip():
78
+ existing_msg = f"♻️ **Existing translation loaded** (no tokens used)\n📁 **File:** `{file_path}`\n📅 **Loaded from:** `{translation_file_path}`\n💡 **To retranslate:** Check 'Force Retranslate' option."
79
+ return existing_msg, existing_content
80
 
81
  # step 1. Get content from file path
82
+ content = get_content(file_path, project)
83
  to_translate = preprocess_content(content)
84
 
85
  # step 2. Prepare prompt with docs content
 
102
 
103
 
104
  def translate_docs_interactive(
105
+ translate_lang: str, selected_files: list[list[str]], additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False
106
  ) -> tuple[str, str]:
107
  """Interactive translation function that processes files one by one.
108
 
 
116
  # Start with the first file
117
  current_file = file_paths[0]
118
 
119
+ callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction, project, force_retranslate)
120
+
121
+ # Check if existing translation was loaded
122
+ if isinstance(callback_result, str) and "Existing translation loaded" in callback_result:
123
+ status = callback_result # Use the existing translation message
124
+ else:
125
+ if force_retranslate:
126
+ status = f"🔄 **Force Retranslation completed**: `{current_file}` → `{translate_lang}`\n\n"
127
+ else:
128
+ status = f"✅ Translation completed: `{current_file}` → `{translate_lang}`\n\n"
129
+ status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
130
 
131
  print(callback_result)
132
  print(status)
133
 
134
+ return status, translated_content
135
 
136
 
137
  def generate_github_pr(
 
140
  translated_content: str = None,
141
  github_config: dict = None,
142
  en_title: str = None,
143
+ project: str = "transformers",
144
  ) -> str:
145
  """Generate a GitHub PR for translated documentation.
146
 
 
158
  return "❌ GitHub PR Agent is not available. Please install required libraries."
159
 
160
  if not github_config:
161
+ return "❌ GitHub configuration not provided. Please set up GitHub token, owner, and repository in Configuration panel."
162
 
163
  # Validate required configuration
164
  required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
 
167
  ]
168
 
169
  if missing_fields:
170
+ return f"❌ Missing required GitHub configuration: {', '.join(missing_fields)}\n\n💡 Go to Configuration panel and set:\n" + "\n".join([f" • {field}" for field in missing_fields])
171
 
172
  # Set token in environment for the agent.
173
  os.environ["GITHUB_TOKEN"] = github_config["token"]
 
180
  / f"translation_result/{filepath}"
181
  )
182
  if not translation_file_path.exists():
183
+ return f"❌ Translation file not found: {translation_file_path}\n\n💡 Please complete translation first in Tab 2 for file: {filepath}"
184
 
185
  with open(translation_file_path, "r", encoding="utf-8") as f:
186
  translated_content = f.read()
187
 
188
  if not translated_content or not translated_content.strip():
189
+ return f"❌ Translated content is empty for file: {filepath}\n\n💡 Please complete translation first in Tab 2."
190
 
191
  # Execute GitHub PR Agent
192
+ # Get base repository from project config
193
+ from translator.project_config import get_project_config
194
+ project_config = get_project_config(project)
195
+ base_repo_path = project_config.repo_url.replace("https://github.com/", "")
196
+ base_owner, base_repo = base_repo_path.split("/")
197
+
198
  print(f"🚀 Starting GitHub PR creation...")
199
  print(f" 📁 File: {filepath}")
200
  print(f" 🌍 Language: {target_language}")
201
  print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
202
+ print(f" 🏠 User Fork: {github_config['owner']}/{github_config['repo_name']}")
203
+ print(f" 🎯 Base Repository: {base_owner}/{base_repo}")
204
+
205
+ agent = GitHubPRAgent(
206
+ user_owner=github_config["owner"],
207
+ user_repo=github_config["repo_name"],
208
+ base_owner=base_owner,
209
+ base_repo=base_repo,
210
+ )
211
  result = agent.run_translation_pr_workflow(
212
  reference_pr_url=github_config["reference_pr_url"],
213
  target_language=target_language,
214
  filepath=filepath,
215
  translated_doc=translated_content,
 
 
216
  base_branch=github_config.get("base_branch", "main"),
217
  )
218
  # TEST CODE
 
226
  toctree_result = None
227
  if en_title:
228
  from agent.toctree_handler import TocTreeHandler
229
+ toctree_handler = TocTreeHandler(project)
230
  toctree_result = toctree_handler.update_toctree_after_translation(
231
+ result, filepath, agent, github_config, project
232
  )
233
 
234
  # Process result
 
272
  {result.get("error_details", "Unknown error")}"""
273
 
274
  else:
275
+ error_details = result.get("error_details", "No additional details")
276
  return f"""❌ **GitHub PR Creation Failed**
277
 
278
  **Error Message:**
279
+ {result["message"]}
280
+
281
+ **Error Details:**
282
+ {error_details}
283
+
284
+ 💡 **Common Solutions:**
285
+ 1. **Project Mismatch**: Selected project '{project}' but fork is '{github_config.get('repo_name', 'REPO')}' - ensure they match
286
+ 2. Check if your GitHub fork exists: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
287
+ 3. Verify GitHub token has write access to your fork"""
288
 
289
  except Exception as e:
290
+ error_msg = f"""**Unexpected Error During PR Creation**
291
+
292
+ **Error:** {str(e)}
293
+
294
+ **Configuration:**
295
+ • Project: {project}
296
+ • File: {filepath}
297
+ • Target: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} → {base_owner if 'base_owner' in locals() else 'BASE'}/{base_repo if 'base_repo' in locals() else 'REPO'}"""
298
  print(error_msg)
299
  return error_msg
300
 
app.py CHANGED
@@ -8,17 +8,23 @@ from dotenv import load_dotenv
8
 
9
  from agent.handler import (
10
  approve_handler,
 
 
11
  get_welcome_message,
12
  process_file_search_handler,
13
  restart_handler,
14
  send_message,
15
  start_translate_handler,
16
  sync_language_displays,
 
 
17
  update_prompt_preview,
18
  update_status,
19
  update_github_config,
 
20
  )
21
  from translator.model import Languages
 
22
 
23
  load_dotenv()
24
 
@@ -111,12 +117,54 @@ with gr.Blocks(
111
  gr.Markdown("### 🌐 Hugging Face i18n Agent")
112
 
113
  chatbot = gr.Chatbot(
114
- value=[[None, get_welcome_message()]], scale=1, height=585,
115
  show_copy_button=True
116
  )
 
 
 
 
 
 
 
 
 
 
117
 
118
  # Controller interface
119
  with gr.Column(scale=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # Quick Controller
121
  with gr.Column(elem_classes=["control-panel"]):
122
  gr.Markdown("### 🛠️ Quick Controls")
@@ -125,6 +173,11 @@ with gr.Blocks(
125
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
126
  with gr.TabItem("1. Find Files", id=0):
127
  with gr.Group():
 
 
 
 
 
128
  lang_dropdown = gr.Radio(
129
  choices=[language.value for language in Languages],
130
  label="🌍 Translate To",
@@ -139,6 +192,11 @@ with gr.Blocks(
139
  "🔍 Find Files to Translate",
140
  elem_classes="action-button",
141
  )
 
 
 
 
 
142
 
143
  with gr.TabItem("2. Translate", id=1):
144
  with gr.Group():
@@ -159,19 +217,19 @@ with gr.Blocks(
159
  value="ko",
160
  interactive=False,
161
  )
162
- anthropic_key = gr.Textbox(
163
- label="🔑 Anthropic API key for translation generation",
164
- type="password",
165
- )
166
  additional_instruction = gr.Textbox(
167
  label="📝 Additional instructions (Optional - e.g., custom glossary)",
168
  placeholder="Example: Translate 'model' as '모델' consistently",
169
  lines=2,
170
  )
171
 
172
- with gr.Accordion("🔍 Preview Prompt", open=False):
 
 
 
 
 
173
  prompt_preview = gr.Textbox(
174
- label="Current Translation Prompt",
175
  lines=8,
176
  interactive=False,
177
  placeholder="Select a file and language to see the prompt preview...",
@@ -181,29 +239,18 @@ with gr.Blocks(
181
  start_translate_btn = gr.Button(
182
  "🚀 Start Translation", elem_classes="action-button"
183
  )
 
 
 
 
 
 
184
 
185
  with gr.TabItem("3. Upload PR", id=2):
186
  with gr.Group():
187
- github_token = gr.Textbox(
188
- label="🔑 GitHub Token",
189
- type="password",
190
- placeholder="ghp_xxxxxxxxxxxxxxxxxxxx",
191
- )
192
- github_owner = gr.Textbox(
193
- label="👤 GitHub Owner/Username",
194
- placeholder="your-username",
195
- )
196
- github_repo = gr.Textbox(
197
- label="📁 Repository Name",
198
- placeholder="your-repository",
199
- )
200
  reference_pr_url = gr.Textbox(
201
- label="🔗 Reference PR URL (Optional - Agent will find one if not provided)",
202
- placeholder="reference PR URL",
203
- )
204
-
205
- save_config_btn = gr.Button(
206
- "💾 Save GitHub Config", elem_classes="action-button"
207
  )
208
  approve_btn = gr.Button(
209
  "✅ Generate GitHub PR", elem_classes="action-button"
@@ -212,29 +259,38 @@ with gr.Blocks(
212
  "🔄 Restart Translation", elem_classes="action-button"
213
  )
214
 
215
- # Chat Controller
216
- with gr.Column(elem_classes=["control-panel"]):
217
- gr.Markdown("### 💬 Chat with agent (Only simple chat is available)")
218
- msg_input = gr.Textbox(
219
- placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
220
- container=False,
221
- scale=4,
222
- )
223
- send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
224
-
225
  # Event Handlers
226
 
227
  find_btn.click(
228
  fn=process_file_search_handler,
229
- inputs=[lang_dropdown, k_input, chatbot],
230
  outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
231
  )
 
 
 
 
 
 
232
 
233
- # Sync language across tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  lang_dropdown.change(
235
- fn=sync_language_displays,
236
- inputs=[lang_dropdown],
237
- outputs=[translate_lang_display],
238
  )
239
 
240
  #
@@ -247,20 +303,26 @@ with gr.Blocks(
247
  # Button event handlers
248
  start_translate_btn.click(
249
  fn=start_translate_handler,
250
- inputs=[chatbot, anthropic_key, file_to_translate_input, additional_instruction],
 
 
 
 
 
 
251
  outputs=[chatbot, msg_input, status_display, control_tabs],
252
  )
253
 
254
- # GitHub Config Save
255
  save_config_btn.click(
256
- fn=update_github_config,
257
- inputs=[github_token, github_owner, github_repo, reference_pr_url],
258
- outputs=[msg_input],
259
  )
260
 
261
  approve_btn.click(
262
  fn=approve_handler,
263
- inputs=[chatbot, github_owner, github_repo, reference_pr_url],
264
  outputs=[chatbot, msg_input, status_display],
265
  )
266
 
 
8
 
9
  from agent.handler import (
10
  approve_handler,
11
+ confirm_and_go_translate_handler,
12
+ confirm_translation_and_go_upload_handler,
13
  get_welcome_message,
14
  process_file_search_handler,
15
  restart_handler,
16
  send_message,
17
  start_translate_handler,
18
  sync_language_displays,
19
+ update_language_selection,
20
+ update_project_selection,
21
  update_prompt_preview,
22
  update_status,
23
  update_github_config,
24
+ update_persistent_config,
25
  )
26
  from translator.model import Languages
27
+ from translator.project_config import get_available_projects
28
 
29
  load_dotenv()
30
 
 
117
  gr.Markdown("### 🌐 Hugging Face i18n Agent")
118
 
119
  chatbot = gr.Chatbot(
120
+ value=[[None, get_welcome_message()]], scale=1, height=525,
121
  show_copy_button=True
122
  )
123
+
124
+ # Chat input directly under main chat
125
+ gr.Markdown("### 💬 Chat with agent")
126
+ with gr.Row():
127
+ msg_input = gr.Textbox(
128
+ placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
129
+ container=False,
130
+ scale=4,
131
+ )
132
+ send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
133
 
134
  # Controller interface
135
  with gr.Column(scale=2):
136
+ # Configuration Panel
137
+ with gr.Column(elem_classes=["control-panel"]):
138
+ gr.Markdown("### ⚙️ Configuration")
139
+
140
+ with gr.Accordion("🔧 API & GitHub Settings", open=True):
141
+ config_anthropic_key = gr.Textbox(
142
+ label="🔑 Anthropic API Key",
143
+ type="password",
144
+ placeholder="sk-ant-...",
145
+ )
146
+ config_github_token = gr.Textbox(
147
+ label="🔑 GitHub Token (Required for PR, Optional for file search)",
148
+ type="password",
149
+ placeholder="ghp_...",
150
+ )
151
+
152
+ with gr.Row():
153
+ config_github_owner = gr.Textbox(
154
+ label="👤 GitHub Owner",
155
+ placeholder="your-username",
156
+ scale=1,
157
+ )
158
+ config_github_repo = gr.Textbox(
159
+ label="📁 Repository Name",
160
+ placeholder="your-repository",
161
+ scale=1,
162
+ )
163
+
164
+ save_config_btn = gr.Button(
165
+ "💾 Save Configuration", elem_classes="action-button"
166
+ )
167
+
168
  # Quick Controller
169
  with gr.Column(elem_classes=["control-panel"]):
170
  gr.Markdown("### 🛠️ Quick Controls")
 
173
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
174
  with gr.TabItem("1. Find Files", id=0):
175
  with gr.Group():
176
+ project_dropdown = gr.Radio(
177
+ choices=get_available_projects(),
178
+ label="🎯 Select Project",
179
+ value="transformers",
180
+ )
181
  lang_dropdown = gr.Radio(
182
  choices=[language.value for language in Languages],
183
  label="🌍 Translate To",
 
192
  "🔍 Find Files to Translate",
193
  elem_classes="action-button",
194
  )
195
+
196
+ confirm_go_btn = gr.Button(
197
+ "✅ Confirm Selection & Go to Translate",
198
+ elem_classes="action-button",
199
+ )
200
 
201
  with gr.TabItem("2. Translate", id=1):
202
  with gr.Group():
 
217
  value="ko",
218
  interactive=False,
219
  )
 
 
 
 
220
  additional_instruction = gr.Textbox(
221
  label="📝 Additional instructions (Optional - e.g., custom glossary)",
222
  placeholder="Example: Translate 'model' as '모델' consistently",
223
  lines=2,
224
  )
225
 
226
+ force_retranslate = gr.Checkbox(
227
+ label="🔄 Force Retranslate (ignore existing translations)",
228
+ value=False,
229
+ )
230
+
231
+ with gr.Accordion("🔍 Preview Translation Prompt", open=False):
232
  prompt_preview = gr.Textbox(
 
233
  lines=8,
234
  interactive=False,
235
  placeholder="Select a file and language to see the prompt preview...",
 
239
  start_translate_btn = gr.Button(
240
  "🚀 Start Translation", elem_classes="action-button"
241
  )
242
+
243
+ confirm_upload_btn = gr.Button(
244
+ "✅ Confirm Translation & Upload PR",
245
+ elem_classes="action-button",
246
+ visible=False,
247
+ )
248
 
249
  with gr.TabItem("3. Upload PR", id=2):
250
  with gr.Group():
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  reference_pr_url = gr.Textbox(
252
+ label="🔗 Reference PR URL (Optional)",
253
+ placeholder="Auto-filled based on project selection",
 
 
 
 
254
  )
255
  approve_btn = gr.Button(
256
  "✅ Generate GitHub PR", elem_classes="action-button"
 
259
  "🔄 Restart Translation", elem_classes="action-button"
260
  )
261
 
 
 
 
 
 
 
 
 
 
 
262
  # Event Handlers
263
 
264
  find_btn.click(
265
  fn=process_file_search_handler,
266
+ inputs=[project_dropdown, lang_dropdown, k_input, chatbot],
267
  outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
268
  )
269
+
270
+ confirm_go_btn.click(
271
+ fn=confirm_and_go_translate_handler,
272
+ inputs=[chatbot],
273
+ outputs=[chatbot, msg_input, status_display, control_tabs],
274
+ )
275
 
276
+ # Auto-save selections to state and update prompt preview
277
+ project_dropdown.change(
278
+ fn=update_project_selection,
279
+ inputs=[project_dropdown, chatbot],
280
+ outputs=[chatbot, msg_input, status_display],
281
+ )
282
+
283
+ # Update prompt preview when project changes
284
+ project_dropdown.change(
285
+ fn=update_prompt_preview,
286
+ inputs=[translate_lang_display, file_to_translate_input, additional_instruction],
287
+ outputs=[prompt_preview],
288
+ )
289
+
290
  lang_dropdown.change(
291
+ fn=update_language_selection,
292
+ inputs=[lang_dropdown, chatbot],
293
+ outputs=[chatbot, msg_input, status_display, translate_lang_display],
294
  )
295
 
296
  #
 
303
  # Button event handlers
304
  start_translate_btn.click(
305
  fn=start_translate_handler,
306
+ inputs=[chatbot, file_to_translate_input, additional_instruction, force_retranslate],
307
+ outputs=[chatbot, msg_input, status_display, control_tabs, start_translate_btn, confirm_upload_btn],
308
+ )
309
+
310
+ confirm_upload_btn.click(
311
+ fn=confirm_translation_and_go_upload_handler,
312
+ inputs=[chatbot],
313
  outputs=[chatbot, msg_input, status_display, control_tabs],
314
  )
315
 
316
+ # Configuration Save
317
  save_config_btn.click(
318
+ fn=update_persistent_config,
319
+ inputs=[config_anthropic_key, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
320
+ outputs=[chatbot, msg_input, status_display],
321
  )
322
 
323
  approve_btn.click(
324
  fn=approve_handler,
325
+ inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
326
  outputs=[chatbot, msg_input, status_display],
327
  )
328
 
pr_generator/agent.py CHANGED
@@ -34,9 +34,13 @@ except ImportError as e:
34
  class GitHubPRAgent:
35
  """Agent class for GitHub PR creation"""
36
 
37
- def __init__(self):
38
  self._github_client = None
39
  self._llm = None
 
 
 
 
40
 
41
  @property
42
  def github_client(self) -> Optional[Github]:
@@ -433,8 +437,6 @@ Please return only the commit message. No other explanation is needed."""
433
  target_language: str,
434
  filepath: str,
435
  translated_doc: str,
436
- owner: str,
437
- repo_name: str,
438
  base_branch: str = "main",
439
  ) -> Dict[str, Any]:
440
  """Execute translation document PR creation workflow."""
@@ -458,19 +460,20 @@ Please return only the commit message. No other explanation is needed."""
458
  )
459
 
460
  # 3. Get main branch SHA from upstream and create branch in fork
461
- upstream_repo = self.github_client.get_repo(f"huggingface/{repo_name}")
462
  main_branch = upstream_repo.get_branch(base_branch)
463
  main_sha = main_branch.commit.sha
464
 
465
  print(f"🌿 Creating branch: {branch_name} in fork repository")
466
- branch_result = self.create_branch(owner, repo_name, branch_name, main_sha)
467
 
468
  # Check branch creation result
469
  if branch_result.startswith("ERROR"):
470
  return {
471
  "status": "error",
472
- "message": f"Branch creation failed: {branch_result}",
473
  "branch": branch_name,
 
474
  }
475
  elif branch_result.startswith("WARNING"):
476
  print(f"⚠️ {branch_result}")
@@ -489,8 +492,8 @@ Please return only the commit message. No other explanation is needed."""
489
 
490
  print(f"📄 Saving file: {target_filepath}")
491
  file_result = self.create_or_update_file(
492
- owner,
493
- repo_name,
494
  target_filepath,
495
  commit_message,
496
  translated_doc,
@@ -500,9 +503,10 @@ Please return only the commit message. No other explanation is needed."""
500
  if not file_result.startswith("SUCCESS"):
501
  return {
502
  "status": "error",
503
- "message": "An issue occurred while saving the file.",
504
  "branch": branch_name,
505
  "file_path": target_filepath,
 
506
  }
507
 
508
  print(f"{file_result}")
@@ -518,11 +522,11 @@ Please return only the commit message. No other explanation is needed."""
518
  )
519
 
520
  print(f"🔄 Creating PR: {pr_title}")
521
- print(f" Head: {owner}:{branch_name} → Base: huggingface:{base_branch}")
522
 
523
  # Create PR from fork to upstream repository
524
  pr_result = self.create_pull_request(
525
- "huggingface", "transformers", pr_title, f"{owner}:{branch_name}", base_branch, pr_body, draft=True
526
  )
527
 
528
  if pr_result.startswith("ERROR"):
@@ -554,7 +558,8 @@ Please return only the commit message. No other explanation is needed."""
554
  except Exception as e:
555
  return {
556
  "status": "error",
557
- "message": f"Error occurred during workflow execution: {str(e)}",
 
558
  }
559
 
560
 
 
34
  class GitHubPRAgent:
35
  """Agent class for GitHub PR creation"""
36
 
37
+ def __init__(self, user_owner: str = None, user_repo: str = None, base_owner: str = None, base_repo: str = None):
38
  self._github_client = None
39
  self._llm = None
40
+ self.user_owner = user_owner
41
+ self.user_repo = user_repo
42
+ self.base_owner = base_owner
43
+ self.base_repo = base_repo
44
 
45
  @property
46
  def github_client(self) -> Optional[Github]:
 
437
  target_language: str,
438
  filepath: str,
439
  translated_doc: str,
 
 
440
  base_branch: str = "main",
441
  ) -> Dict[str, Any]:
442
  """Execute translation document PR creation workflow."""
 
460
  )
461
 
462
  # 3. Get main branch SHA from upstream and create branch in fork
463
+ upstream_repo = self.github_client.get_repo(f"{self.base_owner}/{self.base_repo}")
464
  main_branch = upstream_repo.get_branch(base_branch)
465
  main_sha = main_branch.commit.sha
466
 
467
  print(f"🌿 Creating branch: {branch_name} in fork repository")
468
+ branch_result = self.create_branch(self.user_owner, self.user_repo, branch_name, main_sha)
469
 
470
  # Check branch creation result
471
  if branch_result.startswith("ERROR"):
472
  return {
473
  "status": "error",
474
+ "message": f"Branch creation failed: {branch_result}\n\nTarget: {self.user_owner}/{self.user_repo}\nBranch: {branch_name}\nBase SHA: {main_sha[:8]}",
475
  "branch": branch_name,
476
+ "error_details": branch_result,
477
  }
478
  elif branch_result.startswith("WARNING"):
479
  print(f"⚠️ {branch_result}")
 
492
 
493
  print(f"📄 Saving file: {target_filepath}")
494
  file_result = self.create_or_update_file(
495
+ self.user_owner,
496
+ self.user_repo,
497
  target_filepath,
498
  commit_message,
499
  translated_doc,
 
503
  if not file_result.startswith("SUCCESS"):
504
  return {
505
  "status": "error",
506
+ "message": f"File save failed: {file_result}\n\n🎯 Target: {self.user_owner}/{self.user_repo} (expected: {target_language} fork of {self.base_owner}/{self.base_repo})\n🌿 Branch: {branch_name}\n📁 File: {target_filepath}",
507
  "branch": branch_name,
508
  "file_path": target_filepath,
509
+ "error_details": file_result,
510
  }
511
 
512
  print(f"{file_result}")
 
522
  )
523
 
524
  print(f"🔄 Creating PR: {pr_title}")
525
+ print(f" Head: {self.user_owner}:{branch_name} → Base: {self.base_owner}:{base_branch}")
526
 
527
  # Create PR from fork to upstream repository
528
  pr_result = self.create_pull_request(
529
+ self.base_owner, self.base_repo, pr_title, f"{self.user_owner}:{branch_name}", base_branch, pr_body, draft=True
530
  )
531
 
532
  if pr_result.startswith("ERROR"):
 
558
  except Exception as e:
559
  return {
560
  "status": "error",
561
+ "message": f"Workflow execution failed: {str(e)}\n\nConfig: {self.user_owner}/{self.user_repo} → {self.base_owner}/{self.base_repo}\nFile: {filepath if 'filepath' in locals() else 'Unknown'}",
562
+ "error_details": str(e),
563
  }
564
 
565
 
translator/content.py CHANGED
@@ -6,15 +6,18 @@ from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
7
 
8
  from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
 
9
 
10
 
11
- def get_content(filepath: str) -> str:
12
  if filepath == "":
13
  raise ValueError("No files selected for translation.")
14
 
15
- url = string.Template(
16
- "https://raw.githubusercontent.com/huggingface/" "transformers/main/$filepath"
17
- ).safe_substitute(filepath=filepath)
 
 
18
  response = requests.get(url)
19
  if response.status_code == 200:
20
  content = response.text
@@ -170,4 +173,4 @@ def llm_translate(to_translate: str) -> tuple[str, str]:
170
  )
171
  ai_message = model.invoke(to_translate)
172
  print("cb:", cb)
173
- return cb, ai_message.content
 
6
  from langchain_anthropic import ChatAnthropic
7
 
8
  from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
9
+ from translator.project_config import get_project_config
10
 
11
 
12
+ def get_content(filepath: str, project: str = "transformers") -> str:
13
  if filepath == "":
14
  raise ValueError("No files selected for translation.")
15
 
16
+ config = get_project_config(project)
17
+ # Extract repo path from repo_url (e.g., "huggingface/transformers")
18
+ repo_path = config.repo_url.replace("https://github.com/", "")
19
+
20
+ url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}"
21
  response = requests.get(url)
22
  if response.status_code == 200:
23
  content = response.text
 
173
  )
174
  ai_message = model.invoke(to_translate)
175
  print("cb:", cb)
176
+ return str(cb), ai_message.content
translator/project_config.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Project configuration for different HuggingFace repositories."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+
7
+ @dataclass
8
+ class ProjectConfig:
9
+ """Configuration for a specific HuggingFace project."""
10
+ name: str
11
+ repo_url: str
12
+ api_url: str
13
+ docs_path: str
14
+ github_issues: Dict[str, str] # language -> issue_id
15
+ reference_pr_url: str
16
+
17
+
18
+ # Project configurations
19
+ PROJECTS = {
20
+ "transformers": ProjectConfig(
21
+ name="Transformers",
22
+ repo_url="https://github.com/huggingface/transformers",
23
+ api_url="https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1",
24
+ docs_path="docs/source",
25
+ github_issues={"ko": "20179"},
26
+ reference_pr_url="https://github.com/huggingface/transformers/pull/24968"
27
+ ),
28
+ "smolagents": ProjectConfig(
29
+ name="SmolAgents",
30
+ repo_url="https://github.com/huggingface/smolagents",
31
+ api_url="https://api.github.com/repos/huggingface/smolagents/git/trees/main?recursive=1",
32
+ docs_path="docs/source",
33
+ github_issues={"ko": "20179"}, # To be filled when issue is created
34
+ reference_pr_url="https://github.com/huggingface/smolagents/pull/1581" # To be filled with actual PR URL
35
+ )
36
+ }
37
+
38
+
39
+ def get_project_config(project_key: str) -> ProjectConfig:
40
+ """Get project configuration by key."""
41
+ if project_key not in PROJECTS:
42
+ raise ValueError(f"Unknown project: {project_key}. Available: {list(PROJECTS.keys())}")
43
+ return PROJECTS[project_key]
44
+
45
+
46
+ def get_available_projects() -> list[str]:
47
+ """Get list of available project keys."""
48
+ return list(PROJECTS.keys())
translator/retriever.py CHANGED
@@ -5,15 +5,26 @@ from pathlib import Path
5
  import requests
6
 
7
  from .model import Languages, Summary, TranslationDoc
 
8
 
9
- URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
10
 
11
-
12
- def get_github_repo_files():
13
  """
14
  Get github repo files
15
  """
16
- response = requests.get(URL)
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  data = response.json()
19
  all_items = data.get("tree", [])
@@ -26,30 +37,42 @@ def get_github_repo_files():
26
  return file_paths
27
 
28
 
29
- def get_github_issue_open_pr(lang: str = "ko"):
30
  """
31
- Get open PR in the github issue, filtered by title starting with '🌐 [i18n-KO]'.
32
  """
33
- if lang == "ko":
34
- issue_id = "20179"
35
- else:
36
- raise ValueError(
37
- "No Github issue has been registered to the server. (Only 'ko' is supported - please contact us to support this.)"
38
- )
39
 
 
 
 
 
40
  headers = {
41
  "Accept": "application/vnd.github+json",
42
  }
43
 
 
 
 
 
 
44
  all_open_prs = []
45
  page = 1
46
  per_page = 100 # Maximum allowed by GitHub API
47
 
48
  while True:
49
- url = f"https://api.github.com/repos/huggingface/transformers/pulls?state=open&page={page}&per_page={per_page}"
 
50
  response = requests.get(url, headers=headers)
51
 
52
- if response.status_code != 200:
 
 
53
  raise Exception(f"GitHub API error: {response.status_code} {response.text}")
54
 
55
  page_prs = response.json()
@@ -63,19 +86,61 @@ def get_github_issue_open_pr(lang: str = "ko"):
63
  if len(page_prs) < per_page:
64
  break
65
 
66
- filtered_prs = [pr for pr in all_open_prs if pr["title"].startswith("🌐 [i18n-KO]")]
67
 
68
- pattern = re.compile(r"`([^`]+\.md)`")
 
69
 
70
- filenames = [
71
- "docs/source/en/" + match.group(1)
72
- for pr in filtered_prs
73
- if (match := pattern.search(pr["title"]))
74
- ]
75
- pr_info_list = [
76
- f"https://github.com/huggingface/transformers/pull/{pr['url'].rstrip('/').split('/')[-1]}"
77
- for pr in filtered_prs
78
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  return filenames, pr_info_list
80
 
81
 
@@ -99,11 +164,12 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
99
  return report, first_missing_docs
100
 
101
 
102
- def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
103
  """
104
  Generate a report for the translated docs
105
  """
106
- docs_file = get_github_repo_files()
 
107
 
108
  base_docs_path = Path("docs/source")
109
  en_docs_path = Path("docs/source/en")
 
5
  import requests
6
 
7
  from .model import Languages, Summary, TranslationDoc
8
+ from .project_config import get_project_config
9
 
 
10
 
11
+ def get_github_repo_files(project: str = "transformers"):
 
12
  """
13
  Get github repo files
14
  """
15
+ config = get_project_config(project)
16
+
17
+ # Add GitHub token if available to avoid rate limiting (optional)
18
+ headers = {}
19
+ github_token = os.environ.get("GITHUB_TOKEN")
20
+ if github_token:
21
+ headers["Authorization"] = f"token {github_token}"
22
+
23
+ response = requests.get(config.api_url, headers=headers)
24
+
25
+ # Handle rate limit with helpful message
26
+ if response.status_code == 403 and "rate limit" in response.text.lower():
27
+ raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
28
 
29
  data = response.json()
30
  all_items = data.get("tree", [])
 
37
  return file_paths
38
 
39
 
40
+ def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko", all_files: list = None):
41
  """
42
+ Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
43
  """
44
+ config = get_project_config(project)
45
+ issue_id = config.github_issues.get(lang)
46
+
47
+ # For projects without GitHub issue tracking, still search for PRs
48
+ if not issue_id:
49
+ raise ValueError(f"⚠️ No GitHub issue registered for {project}.")
50
 
51
+ # Require all_files parameter
52
+ if all_files is None:
53
+ raise ValueError("Repository file list must be provided")
54
+
55
  headers = {
56
  "Accept": "application/vnd.github+json",
57
  }
58
 
59
+ # Add GitHub token if available to avoid rate limiting (optional)
60
+ github_token = os.environ.get("GITHUB_TOKEN")
61
+ if github_token:
62
+ headers["Authorization"] = f"token {github_token}"
63
+
64
  all_open_prs = []
65
  page = 1
66
  per_page = 100 # Maximum allowed by GitHub API
67
 
68
  while True:
69
+ repo_path = config.repo_url.replace("https://github.com/", "")
70
+ url = f"https://api.github.com/repos/{repo_path}/pulls?state=open&page={page}&per_page={per_page}"
71
  response = requests.get(url, headers=headers)
72
 
73
+ if response.status_code == 403 and "rate limit" in response.text.lower():
74
+ raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
75
+ elif response.status_code != 200:
76
  raise Exception(f"GitHub API error: {response.status_code} {response.text}")
77
 
78
  page_prs = response.json()
 
86
  if len(page_prs) < per_page:
87
  break
88
 
89
+ filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
90
 
91
+ # Pattern to match filenames after "Translated" keyword
92
+ pattern = re.compile(r"Translated\s+(?:`([^`]+)`|(\S+))\s+to")
93
 
94
+ def find_original_file_path(filename_from_title, all_files):
95
+ """Find the exact file path from repo files by matching filename"""
96
+ if not filename_from_title:
97
+ return None
98
+
99
+ # Remove .md extension for matching
100
+ base_name = filename_from_title.replace('.md', '')
101
+
102
+ # Look for exact matches in repo files
103
+ for file_path in all_files:
104
+ if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
105
+ file_base = file_path.split("/")[-1].replace('.md', '')
106
+ if file_base == base_name:
107
+ return file_path
108
+
109
+ # If no exact match, fallback to simple path
110
+ return f"docs/source/en/{filename_from_title}"
111
+
112
+ filenames = []
113
+ pr_info_list = []
114
+
115
+ for pr in filtered_prs:
116
+ match = pattern.search(pr["title"])
117
+ if match:
118
+ # Use group 1 (with backticks) or group 2 (without backticks)
119
+ filename = match.group(1) or match.group(2)
120
+ # Add .md extension if not present
121
+ if not filename.endswith('.md'):
122
+ filename += '.md'
123
+
124
+ # Find the correct file path by matching filename
125
+ correct_path = None
126
+ if filename:
127
+ # Remove .md extension for matching
128
+ base_name = filename.replace('.md', '')
129
+
130
+ # Look for exact matches in repo files
131
+ for file_path in all_files:
132
+ if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
133
+ file_base = file_path.split("/")[-1].replace('.md', '')
134
+ if file_base == base_name:
135
+ correct_path = file_path
136
+ break
137
+
138
+ # If no exact match, fallback to simple path
139
+ if not correct_path:
140
+ correct_path = f"docs/source/en/{filename}"
141
+ if correct_path:
142
+ filenames.append(correct_path)
143
+ pr_info_list.append(f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}")
144
  return filenames, pr_info_list
145
 
146
 
 
164
  return report, first_missing_docs
165
 
166
 
167
+ def report(project: str, target_lang: str, top_k: int = 1, docs_file: list = None) -> tuple[str, list[str]]:
168
  """
169
  Generate a report for the translated docs
170
  """
171
+ if docs_file is None:
172
+ raise ValueError("Repository file list must be provided")
173
 
174
  base_docs_path = Path("docs/source")
175
  en_docs_path = Path("docs/source/en")