Spaces:

freefor
/

dev

Running

App Files Files Community

dev / translate.js

github-actions[bot]

GitHub deploy: fe3a9c4cecbd9cba7d18b8a7c1f8af7c90bed87d

913d114 6 months ago

history blame contribute delete

7.73 kB

	import { readFileSync, writeFileSync } from 'fs';
	import { OpenAI } from 'openai';
	import dotenv from 'dotenv';

	dotenv.config();

	function delay(ms) {
	return new Promise(resolve => setTimeout(resolve, ms));
	}


	const openai = new OpenAI({
	baseURL: process.env.OPENAI_BASE_URL,
	apiKey: process.env.OPENAI_API_KEY,
	});



	const maxSectionLength = 8000;

	function convertToId(header) {
	return header.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-\|-$/g, '');
	}
	function splitContent(content) {
	// 首先，统一换行符为 \n
	content = content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');

	const sections = content.split(/^(#{1,2}\s.+(?:\n+\|$))/m);
	let result = [];
	let currentSection = null;

	for (let i = 1; i < sections.length; i += 2) {
	const headerWithNewlines = sections[i];
	const [header, ...newlines] = headerWithNewlines.split('\n');
	const text = sections[i + 1] \|\| '';

	currentSection = {
	id: convertToId(header.replace(/^#+\s/, '')),
	header: header,
	text: text,
	sections: [],
	leadingSpaces: [],
	translations: []
	};

	const lines = text.split('\n');
	let currentSubSection = '';
	let currentLeadingSpace = '';

	for (let line of lines) {
	const lineLeadingSpace = line.match(/^(\s*)/)[1];

	if (currentSubSection.length + line.length > maxSectionLength) {
	currentSection.sections.push(currentSubSection);
	currentSection.leadingSpaces.push(currentLeadingSpace);
	currentSubSection = '';
	currentLeadingSpace = '';
	continue;
	}

	if (currentSubSection === '') {
	currentLeadingSpace = lineLeadingSpace;
	}

	currentSubSection += line + '\n';
	}

	if (currentSubSection.trim().length > 0) {
	currentSection.sections.push(currentSubSection);
	currentSection.leadingSpaces.push(currentLeadingSpace);
	}

	result.push(currentSection);
	}

	return result;
	}

	async function translateTableOfContents(sections, openai) {
	const tocSection = sections.find(section => section.id === 'table-of-contents');
	if (!tocSection) {
	console.log("Table of Contents not found");
	return {};
	}

	const linkRegex = /^\s\\s\[(.+)\]\(#(.+)\)\s*$/;
	const idTextMap = {};

	const translatedSections = await Promise.all(tocSection.sections.map(async (section, index) => {
	const response = await openai.chat.completions.create({
	model: process.env.OPENAI_MODEL_ID,
	messages: [
	{
	role: "system",
	content: `请翻译以下目录项，保持原有的格式:
	1. 保持星号和缩进不变
	2. 只翻译方括号[]内的文本
	3. 保持圆括号()内的链接不变`
	},
	{ role: "user", content: section }
	],
	});

	const translatedContent = response.choices[0].message.content;
	console.log(`Translated TOC section ${index + 1} of ${tocSection.sections.length}`);

	const translatedLines = translatedContent.split('\n').map(line => {
	const match = line.match(linkRegex);
	if (match) {
	let [, translatedText, id] = match;
	translatedText = translatedText.replace(/\s+/g, '');
	idTextMap[id] = translatedText;
	return `${tocSection.leadingSpaces[index]}* [${translatedText}](#${translatedText})`;
	}
	return line;
	});

	return translatedLines.join('\n');
	}));

	tocSection.translations = translatedSections;

	// 替换链接
	Object.entries(idTextMap).forEach(([id, text]) => {
	const encodedText = encodeURIComponent(text.toLowerCase().replace(/\s+/g, '-'));
	tocSection.translations = tocSection.translations.map(translation =>
	translation.replace(new RegExp(`\\(#${id}\\)`, 'g'), `(#${encodedText})`));
	});

	return idTextMap;
	}
	async function translateToChineseAndSave(inputFile, outputFile) {
	try {
	const content = readFileSync(inputFile, 'utf8');
	const sections = splitContent(content);
	console.log(`Split into ${sections.length} sections`);

	const idTextMap = await translateTableOfContents(sections, openai);

	// 提取所有需要翻译的部分
	const allSectionsToTranslate = sections.filter(section => section.id !== 'table-of-contents')
	.flatMap(section =>
	section.sections.map((text, index) => ({ sectionId: section.id, index, text }))
	);

	console.log(`Preparing to translate ${allSectionsToTranslate.length} subsections`);

	// 批量翻译
	const translatedSections = await Promise.all(allSectionsToTranslate.map(async ({ sectionId, index, text }) => {
	const response = await openai.chat.completions.create({
	model: process.env.OPENAI_MODEL_ID,
	messages: [
	{
	role: "system", content: `
	请将Markdown文本翻译成中文，同时遵守以下规则:
	1. 严格保持原文的Markdown格式不变，包括但不限于标题、列表、代码块、引用等。
	2. 专有名词、缩写等可以保留英文,但在首次出现时可在括号内提供中文解释。
	3. 代码块、命令行指令等技术内容保持原样不翻译。
	4. 注意调整语序,使翻译后的文本符合中文的表达习惯,同时保持原意。
	5. 保持原文的链接格式不变，只翻译链接文本。
	`
	},
	{ role: "user", content: text.trim() }
	],
	});
	console.log(`Translated subsection ${index + 1} of section ${sectionId}`);
	return { sectionId, index, content: response.choices[0].message.content };
	}));

	// 将翻译结果存储到相应的 section 对象中
	sections.forEach(section => {
	if (section.id !== 'table-of-contents') {
	section.translations = translatedSections
	.filter(ts => ts.sectionId === section.id)
	.sort((a, b) => a.index - b.index)
	.map(ts => section.leadingSpaces[ts.index] + ts.content);
	}
	});

	// 替换 header 并组合成完整的翻译结果
	const translatedContent = sections.map(section => {
	const headerLevel = section.header.match(/^(#+)/)[1];
	const newHeader = idTextMap[section.id] ? `${headerLevel} ${idTextMap[section.id]}` : section.header;
	return [newHeader, ...section.translations].join('\n\n');
	}).join('\n\n');

	// // 更新链接
	// const updatedContent = translatedContent.replace(/\[([^\]]+)\]\(#([^)]+)\)/g, (match, linkText, linkId) => {
	// const translatedLinkText = idTextMap[linkId] \|\| linkText;
	// const encodedLinkText = encodeURIComponent(translatedLinkText.toLowerCase().replace(/\s+/g, '-'));
	// return `[${translatedLinkText}](#${encodedLinkText})`;
	// });

	writeFileSync(outputFile, translatedContent.trim());
	console.log(`Translation completed and saved to ${outputFile}`);
	} catch (error) {
	console.error('Error:', error);
	}
	}
	// 获取命令行参数
	const inputFile = process.argv[2];
	const outputFile = process.argv[3] \|\| 'README.md';

	translateToChineseAndSave(inputFile, outputFile);