wony617 commited on
Commit
1b1c0d8
·
1 Parent(s): a487d1c

fix the docs path of the open pr list

Browse files
Files changed (2) hide show
  1. agent/workflow.py +9 -6
  2. translator/retriever.py +54 -10
agent/workflow.py CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
11
  llm_translate,
12
  preprocess_content,
13
  )
14
- from translator.retriever import report, get_github_issue_open_pr
15
  # GitHub PR Agent import
16
  try:
17
  from pr_generator.agent import GitHubPRAgent
@@ -35,11 +35,14 @@ def report_translation_target_files(
35
  translate_lang: Target language to translate
36
  top_k: Number of top-first files to return for translation. (Default 1)
37
  """
38
- # Get files in progress
39
- docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang)
40
-
41
- # Get all available files for translation
42
- all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2) # Get more to account for filtering
 
 
 
43
 
44
  # Filter out files that are already in progress
45
  available_files = [f for f in all_filepath_list if f not in docs_in_progress]
 
11
  llm_translate,
12
  preprocess_content,
13
  )
14
+ from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files
15
  # GitHub PR Agent import
16
  try:
17
  from pr_generator.agent import GitHubPRAgent
 
35
  translate_lang: Target language to translate
36
  top_k: Number of top-first files to return for translation. (Default 1)
37
  """
38
+ # Get repo files once to avoid duplicate API calls
39
+ all_repo_files = get_github_repo_files(project)
40
+
41
+ # Get all available files for translation using the file list
42
+ all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) # Get more to account for filtering
43
+
44
+ # Get files in progress using the same file list
45
+ docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files)
46
 
47
  # Filter out files that are already in progress
48
  available_files = [f for f in all_filepath_list if f not in docs_in_progress]
translator/retriever.py CHANGED
@@ -37,7 +37,7 @@ def get_github_repo_files(project: str = "transformers"):
37
  return file_paths
38
 
39
 
40
- def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko"):
41
  """
42
  Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
43
  """
@@ -48,6 +48,10 @@ def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko"):
48
  if not issue_id:
49
  raise ValueError(f"⚠️ No GitHub issue registered for {project}.")
50
 
 
 
 
 
51
  headers = {
52
  "Accept": "application/vnd.github+json",
53
  }
@@ -84,20 +88,59 @@ def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko"):
84
 
85
  filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
86
 
87
- # Pattern to match both `filename.md` and filename.md formats
88
- pattern = re.compile(r"(?:`([^`]+\.md)`|(\w+\.md))")
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  filenames = []
 
 
91
  for pr in filtered_prs:
92
  match = pattern.search(pr["title"])
93
  if match:
94
  # Use group 1 (with backticks) or group 2 (without backticks)
95
  filename = match.group(1) or match.group(2)
96
- filenames.append("docs/source/en/" + filename)
97
- pr_info_list = [
98
- f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}"
99
- for pr in filtered_prs
100
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return filenames, pr_info_list
102
 
103
 
@@ -121,11 +164,12 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
121
  return report, first_missing_docs
122
 
123
 
124
- def report(project: str, target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
125
  """
126
  Generate a report for the translated docs
127
  """
128
- docs_file = get_github_repo_files(project)
 
129
 
130
  base_docs_path = Path("docs/source")
131
  en_docs_path = Path("docs/source/en")
 
37
  return file_paths
38
 
39
 
40
+ def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko", all_files: list = None):
41
  """
42
  Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
43
  """
 
48
  if not issue_id:
49
  raise ValueError(f"⚠️ No GitHub issue registered for {project}.")
50
 
51
+ # Require all_files parameter
52
+ if all_files is None:
53
+ raise ValueError("Repository file list must be provided")
54
+
55
  headers = {
56
  "Accept": "application/vnd.github+json",
57
  }
 
88
 
89
  filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
90
 
91
+ # Pattern to match filenames after "Translated" keyword
92
+ pattern = re.compile(r"Translated\s+(?:`([^`]+)`|(\S+))\s+to")
93
 
94
+ def find_original_file_path(filename_from_title, all_files):
95
+ """Find the exact file path from repo files by matching filename"""
96
+ if not filename_from_title:
97
+ return None
98
+
99
+ # Remove .md extension for matching
100
+ base_name = filename_from_title.replace('.md', '')
101
+
102
+ # Look for exact matches in repo files
103
+ for file_path in all_files:
104
+ if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
105
+ file_base = file_path.split("/")[-1].replace('.md', '')
106
+ if file_base == base_name:
107
+ return file_path
108
+
109
+ # If no exact match, fallback to simple path
110
+ return f"docs/source/en/{filename_from_title}"
111
+
112
  filenames = []
113
+ pr_info_list = []
114
+
115
  for pr in filtered_prs:
116
  match = pattern.search(pr["title"])
117
  if match:
118
  # Use group 1 (with backticks) or group 2 (without backticks)
119
  filename = match.group(1) or match.group(2)
120
+ # Add .md extension if not present
121
+ if not filename.endswith('.md'):
122
+ filename += '.md'
123
+
124
+ # Find the correct file path by matching filename
125
+ correct_path = None
126
+ if filename:
127
+ # Remove .md extension for matching
128
+ base_name = filename.replace('.md', '')
129
+
130
+ # Look for exact matches in repo files
131
+ for file_path in all_files:
132
+ if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
133
+ file_base = file_path.split("/")[-1].replace('.md', '')
134
+ if file_base == base_name:
135
+ correct_path = file_path
136
+ break
137
+
138
+ # If no exact match, fallback to simple path
139
+ if not correct_path:
140
+ correct_path = f"docs/source/en/{filename}"
141
+ if correct_path:
142
+ filenames.append(correct_path)
143
+ pr_info_list.append(f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}")
144
  return filenames, pr_info_list
145
 
146
 
 
164
  return report, first_missing_docs
165
 
166
 
167
+ def report(project: str, target_lang: str, top_k: int = 1, docs_file: list = None) -> tuple[str, list[str]]:
168
  """
169
  Generate a report for the translated docs
170
  """
171
+ if docs_file is None:
172
+ raise ValueError("Repository file list must be provided")
173
 
174
  base_docs_path = Path("docs/source")
175
  en_docs_path = Path("docs/source/en")