import argparse import os import json import re import threading from urllib.parse import urlencode from urllib.request import urlopen from searcher import search_in_file import subprocess # 正则表达式匹配中文字符 chinese_pattern = re.compile(r'[\u4e00-\u9fff]+') # 全局翻译缓存字典 translation_cache = { "复刻": ("Fork", False), "问题": ("issues", False), # 可以继续添加其他常见的翻译 } def insert_into_meta(file_path, content): with open(file_path, 'r', encoding='utf-8') as file: lines = file.readlines() if len(lines) < 1: lines.append("\n") lines.insert(1, content + "\n") with open(file_path, 'w', encoding='utf-8') as file: file.writelines(lines) print(f"区域化已插入 '{file_path}' 的元数据下") # 删除中文 def remove_zh_cn_lines(file_path): try: with open(file_path, 'r', encoding='utf-8') as file: lines = file.readlines() filtered_lines = [ line for line in lines if not line.strip().startswith("// @name:zh-CN") and not line.strip().startswith("// @description:zh-CN") ] with open(file_path, 'w', encoding='utf-8') as file: file.writelines(filtered_lines) except FileNotFoundError: print("文件未找到,请检查文件路径。") except Exception as e: print(f"发生错误: {e}") # 读取文件并查找中文简介 def read_file_to_memory(file_path): search_results = search_in_file(file_path, "zh-CN") lines = [] for name_match in search_results.name_matches: lines.append("\n".join(search_results.name_matches)) for description_match in search_results.description_matches: lines.append("\n".join(search_results.description_matches)) remove_zh_cn_lines(file_path) return lines # 排序文本 def sort_userscript_section(file_path): with open(file_path, "r", encoding="utf-8") as file: content = file.readlines() # 找到 // ==UserScript== 和 // ==/UserScript== 的范围 start_index = None end_index = None for i, line in enumerate(content): if "// ==UserScript==" in line: start_index = i elif "// ==/UserScript==" in line: end_index = i break # 如果没有找到对应范围,直接返回 if start_index is None or end_index is None: print("未找到有效的 UserScript 区域") return # 提取范围内的内容 userscript_section = content[start_index + 1:end_index] # 按类别分组 descriptions = [] names = [] others = [] for line in userscript_section: if re.match(r"// @description", line): descriptions.append(line.strip()) elif re.match(r"// @name", line) and not re.match(r"// @namespace", line): names.append(line.strip()) else: others.append(line.strip()) # 按语言后缀排序(按规范排列规则,语言后缀优先) def sort_by_suffix(lines): return sorted(lines, key=lambda x: re.search(r"[:\-]([a-zA-Z\-]*)", x).group(1) if re.search(r"[:\-]([a-zA-Z\-]*)", x) else "") sorted_descriptions = sort_by_suffix(descriptions) sorted_names = sort_by_suffix(names) # 合并排序结果 sorted_section = sorted_names + sorted_descriptions + others # 替换原始内容中的 UserScript 区域 content[start_index + 1:end_index] = [line + "\n" for line in sorted_section] # 写入文件 with open(file_path, "w", encoding="utf-8") as file: file.writelines(content) # 翻译函数 def translate_text(text, target_lang): text = text.replace("// @name:zh-CN", '').replace("// @description:zh-CN", '') if text in translation_cache: cached_translation, needs_api_translation = translation_cache[text] if not needs_api_translation: return cached_translation api_url = 'https://translate.googleapis.com/translate_a/single' params = {'client': 'gtx', 'dt': 't', 'sl': 'auto', 'tl': target_lang, 'q': text} full_url = api_url + '?' + urlencode(params) try: response = urlopen(full_url) data = response.read().decode('utf-8') translated_text = ''.join(item[0] for item in json.loads(data.replace("'", "\u2019"))[0]) return translated_text except Exception as e: print(f"翻译错误:{e}") return None # 翻译锁,确保多个线程不会同时修改 translations translation_lock = threading.Lock() # 用于保存翻译结果的线程函数 def translate_worker(chinese_texts, translations, lang): for idx, chinese_text in chinese_texts: translated_text = translate_text(chinese_text, lang) if translated_text: with translation_lock: translations[(idx, chinese_text)] = translated_text # 翻译并返回翻译结果 def translate_and_collect(lines, chinese_texts, lang): translations = {} threads = [] chunk_size = len(chinese_texts) // 5 or 1 for i in range(0, len(chinese_texts), chunk_size): chunk = chinese_texts[i:i + chunk_size] thread = threading.Thread(target=translate_worker, args=(chunk, translations, lang)) threads.append(thread) thread.start() for thread in threads: thread.join() # 收集翻译结果,保存在字符串中 translation_output = "" for line_number, chinese_text, translated_text in [(ln, ct, translations.get((ln, ct), None)) for ln, ct in chinese_texts if (ln, ct) in translations]: if line_number == 0: # 假设第一行是标题 translation_output += f'// @name:{lang} {translated_text}\n' if lang == 'en': translation_output += f'// @name {translated_text}\n' else: translation_output += f'// @description:{lang} {translated_text}' if lang == 'en': translation_output += f'\n// @description {translated_text}' return translation_output # 处理翻译逻辑 def translate_localized(readme_path, target_langs): lines = read_file_to_memory(readme_path) # 保存整行包含中文文本的位置信息 chinese_texts = [] for line_number, line in enumerate(lines): if chinese_pattern.search(line): # 整行匹配中文文本 chinese_texts.append((line_number, line)) # 遍历 target_langs 中的语言 for lang_code in target_langs: print(f"开始翻译 {lang_code} ...") translation_output = translate_and_collect(lines, chinese_texts, lang_code) insert_into_meta(readme_path, translation_output) def main(): # 创建命令行参数解析器 parser = argparse.ArgumentParser(description="UserScript 多语言自动化翻译与优化工具") parser.add_argument("file_path", type=str, help="需要处理的 UserScript 文件路径") parser.add_argument("--langs", nargs="+", default=['ar', 'bg', 'cs', 'da', 'de', 'el', 'en', 'eo', 'es', 'fi', 'fr', 'he', 'hr', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'nb', 'pl', 'pt-BR', 'ro', 'ru', 'sk', 'sr', 'sv', 'th', 'tr', 'ug', 'uk', 'vi', 'zh-SG', 'zh', 'zh-TW', 'zh-HK', 'zh-CN', 'fr-CA'], help="目标翻译语言列表,默认包含 所有语言") args = parser.parse_args() file_path = args.file_path target_langs = args.langs translate_localized(file_path, target_langs) sort_userscript_section(file_path) subprocess.run(['node', 'utils/single-format-monkey-meta.js', file_path], check=True) print("翻译和排序完成!") if __name__ == "__main__": main()