|
"""Module for gradio interfaces.""" |
|
|
|
import os |
|
from pathlib import Path |
|
import gradio as gr |
|
|
|
from translator.content import ( |
|
fill_scaffold, |
|
get_content, |
|
get_full_prompt, |
|
llm_translate, |
|
preprocess_content, |
|
) |
|
from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files |
|
|
|
try: |
|
from pr_generator.agent import GitHubPRAgent |
|
|
|
GITHUB_PR_AVAILABLE = True |
|
except ImportError as e: |
|
print(f"β οΈ GitHub PR Agent is not available: {e}") |
|
GITHUB_PR_AVAILABLE = False |
|
|
|
import json |
|
from logger.github_logger import GitHubLogger |
|
|
|
|
|
def report_translation_target_files( |
|
project: str, translate_lang: str, top_k: int = 1 |
|
) -> tuple[str, list[list[str]]]: |
|
"""Return the top-k files that need translation, excluding files already in progress. |
|
|
|
Args: |
|
project: Project to translate (e.g., "transformers", "smolagents") |
|
translate_lang: Target language to translate |
|
top_k: Number of top-first files to return for translation. (Default 1) |
|
""" |
|
|
|
all_repo_files = get_github_repo_files(project) |
|
|
|
|
|
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) |
|
|
|
|
|
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files) |
|
|
|
|
|
available_files = [f for f in all_filepath_list if f not in docs_in_progress] |
|
|
|
|
|
filepath_list = available_files[:top_k] |
|
|
|
|
|
status_report = all_status_report |
|
|
|
if docs_in_progress: |
|
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:" |
|
for i, file in enumerate(docs_in_progress): |
|
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})" |
|
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):" |
|
|
|
return status_report, [[file] for file in filepath_list] |
|
|
|
|
|
def translate_docs(lang: str, file_path: str, additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False) -> tuple[str, str]: |
|
"""Translate documentation.""" |
|
|
|
translation_file_path = ( |
|
Path(__file__).resolve().parent.parent |
|
/ f"translation_result/{file_path}" |
|
) |
|
|
|
if not force_retranslate and translation_file_path.exists(): |
|
print(f"π Found existing translation: {translation_file_path}") |
|
with open(translation_file_path, "r", encoding="utf-8") as f: |
|
existing_content = f.read() |
|
if existing_content.strip(): |
|
existing_msg = f"β»οΈ **Existing translation loaded** (no tokens used)\nπ **File:** `{file_path}`\nπ
**Loaded from:** `{translation_file_path}`\nπ‘ **To retranslate:** Check 'Force Retranslate' option." |
|
return existing_msg, existing_content |
|
|
|
|
|
content = get_content(file_path, project) |
|
to_translate = preprocess_content(content) |
|
|
|
|
|
if lang == "ko": |
|
translation_lang = "Korean" |
|
to_translate_with_prompt = get_full_prompt(translation_lang, to_translate, additional_instruction) |
|
|
|
print("to_translate_with_prompt:\n", to_translate_with_prompt) |
|
|
|
|
|
|
|
callback_result, translated_content = llm_translate(to_translate_with_prompt) |
|
print("translated_content:\n") |
|
print(translated_content) |
|
|
|
translated_doc = fill_scaffold(content, to_translate, translated_content) |
|
print("translated_doc:\n") |
|
print(translated_doc) |
|
return callback_result, translated_doc |
|
|
|
|
|
def translate_docs_interactive( |
|
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False |
|
) -> tuple[str, str]: |
|
"""Interactive translation function that processes files one by one. |
|
|
|
Args: |
|
translate_lang: Target language to translate |
|
selected_files: List of file paths to translate |
|
""" |
|
|
|
file_paths = [row[0] for row in selected_files if row and len(row) > 0] |
|
|
|
|
|
current_file = file_paths[0] |
|
|
|
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction, project, force_retranslate) |
|
|
|
|
|
if isinstance(callback_result, str) and "Existing translation loaded" in callback_result: |
|
status = callback_result |
|
else: |
|
if force_retranslate: |
|
status = f"π **Force Retranslation completed**: `{current_file}` β `{translate_lang}`\n\n" |
|
else: |
|
status = f"β
Translation completed: `{current_file}` β `{translate_lang}`\n\n" |
|
status += f"π° Used token and cost: \n```\n{callback_result}\n```" |
|
|
|
print(callback_result) |
|
print(status) |
|
|
|
return status, translated_content |
|
|
|
|
|
def generate_github_pr( |
|
target_language: str, |
|
filepath: str, |
|
translated_content: str = None, |
|
github_config: dict = None, |
|
en_title: str = None, |
|
project: str = "transformers", |
|
) -> str: |
|
"""Generate a GitHub PR for translated documentation. |
|
|
|
Args: |
|
target_language: Target language for translation (e.g., "ko") |
|
filepath: Original file path (e.g., "docs/source/en/accelerator_selection.md") |
|
translated_content: Translated content (if None, read from file) |
|
github_config: GitHub configuration dictionary |
|
en_title: English title for toctree mapping |
|
|
|
Returns: |
|
PR creation result message |
|
""" |
|
if not GITHUB_PR_AVAILABLE: |
|
return "β GitHub PR Agent is not available. Please install required libraries." |
|
|
|
if not github_config: |
|
return "β GitHub configuration not provided. Please set up GitHub token, owner, and repository in Configuration panel." |
|
|
|
|
|
required_fields = ["token", "owner", "repo_name", "reference_pr_url"] |
|
missing_fields = [ |
|
field for field in required_fields if not github_config.get(field) |
|
] |
|
|
|
if missing_fields: |
|
return f"β Missing required GitHub configuration: {', '.join(missing_fields)}\n\nπ‘ Go to Configuration panel and set:\n" + "\n".join([f" β’ {field}" for field in missing_fields]) |
|
|
|
|
|
os.environ["GITHUB_TOKEN"] = github_config["token"] |
|
|
|
try: |
|
|
|
if translated_content is None: |
|
translation_file_path = ( |
|
Path(__file__).resolve().parent.parent |
|
/ f"translation_result/{filepath}" |
|
) |
|
if not translation_file_path.exists(): |
|
return f"β Translation file not found: {translation_file_path}\n\nπ‘ Please complete translation first in Tab 2 for file: {filepath}" |
|
|
|
with open(translation_file_path, "r", encoding="utf-8") as f: |
|
translated_content = f.read() |
|
|
|
if not translated_content or not translated_content.strip(): |
|
return f"β Translated content is empty for file: {filepath}\n\nπ‘ Please complete translation first in Tab 2." |
|
|
|
|
|
|
|
from translator.project_config import get_project_config |
|
project_config = get_project_config(project) |
|
base_repo_path = project_config.repo_url.replace("https://github.com/", "") |
|
base_owner, base_repo = base_repo_path.split("/") |
|
|
|
print(f"π Starting GitHub PR creation...") |
|
print(f" π File: {filepath}") |
|
print(f" π Language: {target_language}") |
|
print(f" π Reference PR: {github_config['reference_pr_url']}") |
|
print(f" π User Fork: {github_config['owner']}/{github_config['repo_name']}") |
|
print(f" π― Base Repository: {base_owner}/{base_repo}") |
|
|
|
agent = GitHubPRAgent( |
|
user_owner=github_config["owner"], |
|
user_repo=github_config["repo_name"], |
|
base_owner=base_owner, |
|
base_repo=base_repo, |
|
) |
|
result = agent.run_translation_pr_workflow( |
|
reference_pr_url=github_config["reference_pr_url"], |
|
target_language=target_language, |
|
filepath=filepath, |
|
translated_doc=translated_content, |
|
base_branch=github_config.get("base_branch", "main"), |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
toctree_result = None |
|
if en_title: |
|
from agent.toctree_handler import TocTreeHandler |
|
toctree_handler = TocTreeHandler(project) |
|
toctree_result = toctree_handler.update_toctree_after_translation( |
|
result, filepath, agent, github_config, project |
|
) |
|
|
|
|
|
|
|
toctree_status = "" |
|
if toctree_result: |
|
if toctree_result["status"] == "success": |
|
toctree_status = f"\nπ **Toctree Updated:** β
{toctree_result['message']}" |
|
else: |
|
toctree_status = f"\nπ **Toctree Update Failed:** β {toctree_result['message']}" |
|
|
|
|
|
try: |
|
log_data = result.copy() |
|
if toctree_result: |
|
log_data["toctree_result"] = toctree_result |
|
log_entry = json.dumps(log_data, ensure_ascii=False) + "\n" |
|
log_res = GitHubLogger().append_jsonl(log_entry) |
|
print(f"π Log append result: {log_res}") |
|
except Exception as e: |
|
print(f"β Failed to append PR log via GitHub API: {e}") |
|
|
|
if result["status"] == "success": |
|
return f"""β
**GitHub PR Creation Successful!** |
|
|
|
π **PR URL:** {result.get('pr_url', 'NO_PR_URL')} |
|
πΏ **Branch:** {result["branch"]} |
|
π **File:** {result["file_path"]}{toctree_status} |
|
|
|
{result["message"]}""" |
|
|
|
elif result["status"] == "partial_success": |
|
return f"""β οΈ **Partial Success** |
|
|
|
πΏ **Branch:** {result["branch"]} |
|
π **File:** {result["file_path"]}{toctree_status} |
|
|
|
{result["message"]} |
|
|
|
**Error Details:** |
|
{result.get("error_details", "Unknown error")}""" |
|
|
|
else: |
|
error_details = result.get("error_details", "No additional details") |
|
return f"""β **GitHub PR Creation Failed** |
|
|
|
**Error Message:** |
|
{result["message"]} |
|
|
|
**Error Details:** |
|
{error_details} |
|
|
|
π‘ **Common Solutions:** |
|
1. **Project Mismatch**: Selected project '{project}' but fork is '{github_config.get('repo_name', 'REPO')}' - ensure they match |
|
2. Check if your GitHub fork exists: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} |
|
3. Verify GitHub token has write access to your fork""" |
|
|
|
except Exception as e: |
|
error_msg = f"""β **Unexpected Error During PR Creation** |
|
|
|
**Error:** {str(e)} |
|
|
|
**Configuration:** |
|
β’ Project: {project} |
|
β’ File: {filepath} |
|
β’ Target: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} β {base_owner if 'base_owner' in locals() else 'BASE'}/{base_repo if 'base_repo' in locals() else 'REPO'}""" |
|
print(error_msg) |
|
return error_msg |
|
|
|
|
|
|
|
def mock_generate_PR(): |
|
"""Backward compatibility function - returns warning message only""" |
|
return ( |
|
"β οΈ mock_generate_PR() is deprecated. Please use generate_github_pr() instead." |
|
) |
|
|