Loading...
Loading...
Compare original and translation side by side
| Task | Approach |
|---|---|
| Read/analyze content | |
| Create new document | Use |
| Edit existing document | Unpack → edit XML → repack - see Editing Existing Documents below |
| 任务 | 实现方式 |
|---|---|
| 读取/分析内容 | 使用 |
| 创建新文档 | 使用 |
| 编辑现有文档 | 解压 → 编辑XML → 重新打包 - 详见下方「编辑现有文档」部分 |
.docpython scripts/office/soffice.py --headless --convert-to docx document.doc.docpython scripts/office/soffice.py --headless --convert-to docx document.docundefinedundefinedundefinedundefinedpython scripts/office/soffice.py --headless --convert-to pdf document.docx
pdftoppm -jpeg -r 150 document.pdf pagepython scripts/office/soffice.py --headless --convert-to pdf document.docx
pdftoppm -jpeg -r 150 document.pdf pagepython scripts/accept_changes.py input.docx output.docxpython scripts/accept_changes.py input.docx output.docxnpm install -g docxnpm install -g docxconst { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell, ImageRun,
Header, Footer, AlignmentType, PageOrientation, LevelFormat, ExternalHyperlink,
TableOfContents, HeadingLevel, BorderStyle, WidthType, ShadingType,
VerticalAlign, PageNumber, PageBreak } = require('docx');
const doc = new Document({ sections: [{ children: [/* content */] }] });
Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer));const { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell, ImageRun,
Header, Footer, AlignmentType, PageOrientation, LevelFormat, ExternalHyperlink,
TableOfContents, HeadingLevel, BorderStyle, WidthType, ShadingType,
VerticalAlign, PageNumber, PageBreak } = require('docx');
const doc = new Document({ sections: [{ children: [/* 内容 */] }] });
Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer));python scripts/office/validate.py doc.docxpython scripts/office/validate.py doc.docx// CRITICAL: docx-js defaults to A4, not US Letter
// Always set page size explicitly for consistent results
sections: [{
properties: {
page: {
size: {
width: 12240, // 8.5 inches in DXA
height: 15840 // 11 inches in DXA
},
margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } // 1 inch margins
}
},
children: [/* content */]
}]| Paper | Width | Height | Content Width (1" margins) |
|---|---|---|---|
| US Letter | 12,240 | 15,840 | 9,360 |
| A4 (default) | 11,906 | 16,838 | 9,026 |
size: {
width: 12240, // Pass SHORT edge as width
height: 15840, // Pass LONG edge as height
orientation: PageOrientation.LANDSCAPE // docx-js swaps them in the XML
},
// Content width = 15840 - left margin - right margin (uses the long edge)// 重要提示:docx-js默认使用A4纸,而非美国信纸
// 为确保结果一致,请始终显式设置页面尺寸
sections: [{
properties: {
page: {
size: {
width: 12240, // 8.5英寸,单位为DXA
height: 15840 // 11英寸,单位为DXA
},
margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } // 1英寸边距
}
},
children: [/* 内容 */]
}]| 纸张类型 | 宽度 | 高度 | 内容宽度(1英寸边距) |
|---|---|---|---|
| 美国信纸 | 12,240 | 15,840 | 9,360 |
| A4(默认) | 11,906 | 16,838 | 9,026 |
size: {
width: 12240, // 将短边设为width
height: 15840, // 将长边设为height
orientation: PageOrientation.LANDSCAPE // docx-js会在XML中自动交换宽高
},
// 内容宽度 = 15840 - 左边距 - 右边距(使用长边计算)const doc = new Document({
styles: {
default: { document: { run: { font: "Arial", size: 24 } } }, // 12pt default
paragraphStyles: [
// IMPORTANT: Use exact IDs to override built-in styles
{ id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true,
run: { size: 32, bold: true, font: "Arial" },
paragraph: { spacing: { before: 240, after: 240 }, outlineLevel: 0 } }, // outlineLevel required for TOC
{ id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true,
run: { size: 28, bold: true, font: "Arial" },
paragraph: { spacing: { before: 180, after: 180 }, outlineLevel: 1 } },
]
},
sections: [{
children: [
new Paragraph({ heading: HeadingLevel.HEADING_1, children: [new TextRun("Title")] }),
]
}]
});const doc = new Document({
styles: {
default: { document: { run: { font: "Arial", size: 24 } } }, // 默认12号字
paragraphStyles: [
// 重要提示:使用精确ID覆盖内置样式
{ id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true,
run: { size: 32, bold: true, font: "Arial" },
paragraph: { spacing: { before: 240, after: 240 }, outlineLevel: 0 } }, // outlineLevel为生成目录所必需
{ id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true,
run: { size: 28, bold: true, font: "Arial" },
paragraph: { spacing: { before: 180, after: 180 }, outlineLevel: 1 } },
]
},
sections: [{
children: [
new Paragraph({ heading: HeadingLevel.HEADING_1, children: [new TextRun("标题")] }),
]
}]
});// ❌ WRONG - never manually insert bullet characters
new Paragraph({ children: [new TextRun("• Item")] }) // BAD
new Paragraph({ children: [new TextRun("\u2022 Item")] }) // BAD
// ✅ CORRECT - use numbering config with LevelFormat.BULLET
const doc = new Document({
numbering: {
config: [
{ reference: "bullets",
levels: [{ level: 0, format: LevelFormat.BULLET, text: "•", alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
{ reference: "numbers",
levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.", alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
]
},
sections: [{
children: [
new Paragraph({ numbering: { reference: "bullets", level: 0 },
children: [new TextRun("Bullet item")] }),
new Paragraph({ numbering: { reference: "numbers", level: 0 },
children: [new TextRun("Numbered item")] }),
]
}]
});
// ⚠️ Each reference creates INDEPENDENT numbering
// Same reference = continues (1,2,3 then 4,5,6)
// Different reference = restarts (1,2,3 then 1,2,3)// ❌ 错误 - 切勿手动插入项目符号字符
new Paragraph({ children: [new TextRun("• 项目")] }) // 不推荐
new Paragraph({ children: [new TextRun("\u2022 项目")] }) // 不推荐
// ✅ 正确 - 使用编号配置与LevelFormat.BULLET
const doc = new Document({
numbering: {
config: [
{ reference: "bullets",
levels: [{ level: 0, format: LevelFormat.BULLET, text: "•", alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
{ reference: "numbers",
levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.", alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
]
},
sections: [{
children: [
new Paragraph({ numbering: { reference: "bullets", level: 0 },
children: [new TextRun("项目符号项")] }),
new Paragraph({ numbering: { reference: "numbers", level: 0 },
children: [new TextRun("编号项")] }),
]
}]
});
// ⚠️ 每个reference对应独立的编号序列
// 相同reference:编号连续(1,2,3 之后 4,5,6)
// 不同reference:编号重置(1,2,3 之后 1,2,3)columnWidthswidth// CRITICAL: Always set table width for consistent rendering
// CRITICAL: Use ShadingType.CLEAR (not SOLID) to prevent black backgrounds
const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
const borders = { top: border, bottom: border, left: border, right: border };
new Table({
width: { size: 9360, type: WidthType.DXA }, // Always use DXA (percentages break in Google Docs)
columnWidths: [4680, 4680], // Must sum to table width (DXA: 1440 = 1 inch)
rows: [
new TableRow({
children: [
new TableCell({
borders,
width: { size: 4680, type: WidthType.DXA }, // Also set on each cell
shading: { fill: "D5E8F0", type: ShadingType.CLEAR }, // CLEAR not SOLID
margins: { top: 80, bottom: 80, left: 120, right: 120 }, // Cell padding (internal, not added to width)
children: [new Paragraph({ children: [new TextRun("Cell")] })]
})
]
})
]
})WidthType.DXAWidthType.PERCENTAGE// Table width = sum of columnWidths = content width
// US Letter with 1" margins: 12240 - 2880 = 9360 DXA
width: { size: 9360, type: WidthType.DXA },
columnWidths: [7000, 2360] // Must sum to table widthWidthType.DXAWidthType.PERCENTAGEcolumnWidthswidthcolumnWidthmarginscolumnWidthswidth// 重要提示:始终设置表格宽度以保证显示一致
// 重要提示:使用ShadingType.CLEAR(而非SOLID)避免黑色背景
const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
const borders = { top: border, bottom: border, left: border, right: border };
new Table({
width: { size: 9360, type: WidthType.DXA }, // 始终使用DXA(百分比在Google Docs中会失效)
columnWidths: [4680, 4680], // 必须与表格宽度总和一致(DXA:1440 = 1英寸)
rows: [
new TableRow({
children: [
new TableCell({
borders,
width: { size: 4680, type: WidthType.DXA }, // 同时为每个单元格设置宽度
shading: { fill: "D5E8F0", type: ShadingType.CLEAR }, // 使用CLEAR而非SOLID
margins: { top: 80, bottom: 80, left: 120, right: 120 }, // 单元格内边距(内部填充,不增加单元格宽度)
children: [new Paragraph({ children: [new TextRun("单元格")] })]
})
]
})
]
})WidthType.DXAWidthType.PERCENTAGE// 表格宽度 = columnWidths总和 = 内容宽度
// 带1英寸边距的美国信纸:12240 - 2880 = 9360 DXA
width: { size: 9360, type: WidthType.DXA },
columnWidths: [7000, 2360] // 必须与表格宽度总和一致WidthType.DXAWidthType.PERCENTAGEcolumnWidthswidthcolumnWidthmargins// CRITICAL: type parameter is REQUIRED
new Paragraph({
children: [new ImageRun({
type: "png", // Required: png, jpg, jpeg, gif, bmp, svg
data: fs.readFileSync("image.png"),
transformation: { width: 200, height: 150 },
altText: { title: "Title", description: "Desc", name: "Name" } // All three required
})]
})// 重要提示:type参数为必填项
new Paragraph({
children: [new ImageRun({
type: "png", // 必填:png, jpg, jpeg, gif, bmp, svg
data: fs.readFileSync("image.png"),
transformation: { width: 200, height: 150 },
altText: { title: "标题", description: "描述", name: "名称" } // 三项均为必填
})]
})// CRITICAL: PageBreak must be inside a Paragraph
new Paragraph({ children: [new PageBreak()] })
// Or use pageBreakBefore
new Paragraph({ pageBreakBefore: true, children: [new TextRun("New page")] })// 重要提示:PageBreak必须嵌套在Paragraph内
new Paragraph({ children: [new PageBreak()] })
// 或使用pageBreakBefore属性
new Paragraph({ pageBreakBefore: true, children: [new TextRun("新页面")] })// CRITICAL: Headings must use HeadingLevel ONLY - no custom styles
new TableOfContents("Table of Contents", { hyperlink: true, headingStyleRange: "1-3" })// 重要提示:标题必须仅使用HeadingLevel - 不能使用自定义样式
new TableOfContents("目录", { hyperlink: true, headingStyleRange: "1-3" })sections: [{
properties: {
page: { margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } } // 1440 = 1 inch
},
headers: {
default: new Header({ children: [new Paragraph({ children: [new TextRun("Header")] })] })
},
footers: {
default: new Footer({ children: [new Paragraph({
children: [new TextRun("Page "), new TextRun({ children: [PageNumber.CURRENT] })]
})] })
},
children: [/* content */]
}]sections: [{
properties: {
page: { margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } } // 1440 = 1英寸
},
headers: {
default: new Header({ children: [new Paragraph({ children: [new TextRun("页眉")] })] })
},
footers: {
default: new Footer({ children: [new Paragraph({
children: [new TextRun("第 "), new TextRun({ children: [PageNumber.CURRENT] }), new TextRun(" 页")]
})] })
},
children: [/* 内容 */]
}]widthheightorientation: PageOrientation.LANDSCAPE\nLevelFormat.BULLETtypewidthWidthType.PERCENTAGEcolumnWidthswidthmargins: { top: 80, bottom: 80, left: 120, right: 120 }ShadingType.CLEARoutlineLevelwidthheightorientation: PageOrientation.LANDSCAPE\nLevelFormat.BULLETtypewidthWidthType.PERCENTAGEcolumnWidthswidthmargins: { top: 80, bottom: 80, left: 120, right: 120 }ShadingType.CLEARoutlineLevelpython scripts/office/unpack.py document.docx unpacked/“--merge-runs falsepython scripts/office/unpack.py document.docx unpacked/“--merge-runs falseunpacked/word/<!-- Use these entities for professional typography -->
<w:t>Here’s a quote: “Hello”</w:t>| Entity | Character |
|---|---|
| ‘ (left single) |
| ’ (right single / apostrophe) |
| “ (left double) |
| ” (right double) |
comment.pypython scripts/comment.py unpacked/ 0 "Comment text with & and ’"
python scripts/comment.py unpacked/ 1 "Reply text" --parent 0 # reply to comment 0
python scripts/comment.py unpacked/ 0 "Text" --author "Custom Author" # custom author nameunpacked/word/<!-- 使用以下实体保证专业排版 -->
<w:t>这是一段引用:“你好”</w:t>| 实体 | 字符 |
|---|---|
| ‘ (左单引号) |
| ’ (右单引号/撇号) |
| “ (左双引号) |
| ” (右双引号) |
comment.pypython scripts/comment.py unpacked/ 0 "包含&和’的批注文本"
python scripts/comment.py unpacked/ 1 "回复文本" --parent 0 # 回复批注0
python scripts/comment.py unpacked/ 0 "文本" --author "自定义作者" # 自定义作者名称python scripts/office/pack.py unpacked/ output.docx --original document.docx--validate falsedurableIdxml:space="preserve"<w:t>python scripts/office/pack.py unpacked/ output.docx --original document.docx--validate falsedurableId<w:t>xml:space="preserve"<w:r><w:r>...</w:r><w:del>...<w:ins>...<w:rPr><w:rPr><w:r><w:r>...</w:r><w:del>...<w:ins>...<w:rPr><w:rPr><w:pPr><w:pStyle><w:numPr><w:spacing><w:ind><w:jc><w:rPr>xml:space="preserve"<w:t>00AB1234<w:pPr><w:pStyle><w:numPr><w:spacing><w:ind><w:jc><w:rPr><w:t>xml:space="preserve"00AB1234<w:ins w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:t>inserted text</w:t></w:r>
</w:ins><w:del w:id="2" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>deleted text</w:delText></w:r>
</w:del><w:del><w:delText><w:t><w:delInstrText><w:instrText><!-- Change "30 days" to "60 days" -->
<w:r><w:t>The term is </w:t></w:r>
<w:del w:id="1" w:author="Claude" w:date="...">
<w:r><w:delText>30</w:delText></w:r>
</w:del>
<w:ins w:id="2" w:author="Claude" w:date="...">
<w:r><w:t>60</w:t></w:r>
</w:ins>
<w:r><w:t> days.</w:t></w:r><w:del/><w:pPr><w:rPr><w:p>
<w:pPr>
<w:numPr>...</w:numPr> <!-- list numbering if present -->
<w:rPr>
<w:del w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z"/>
</w:rPr>
</w:pPr>
<w:del w:id="2" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>Entire paragraph content being deleted...</w:delText></w:r>
</w:del>
</w:p><w:del/><w:pPr><w:rPr><w:ins w:author="Jane" w:id="5">
<w:del w:author="Claude" w:id="10">
<w:r><w:delText>their inserted text</w:delText></w:r>
</w:del>
</w:ins><w:del w:author="Jane" w:id="5">
<w:r><w:delText>deleted text</w:delText></w:r>
</w:del>
<w:ins w:author="Claude" w:id="10">
<w:r><w:t>deleted text</w:t></w:r>
</w:ins><w:ins w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:t>插入的文本</w:t></w:r>
</w:ins><w:del w:id="2" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>删除的文本</w:delText></w:r>
</w:del><w:del><w:delText><w:t><w:delInstrText><w:instrText><!-- 将"30天"改为"60天" -->
<w:r><w:t>期限为 </w:t></w:r>
<w:del w:id="1" w:author="Claude" w:date="...">
<w:n><w:delText>30</w:delText></w:r>
</w:del>
<w:ins w:id="2" w:author="Claude" w:date="...">
<w:r><w:t>60</w:t></w:r>
</w:ins>
<w:r><w:t> 天。</w:t></w:r><w:pPr><w:rPr><w:del/><w:p>
<w:pPr>
<w:numPr>...</w:numPr> <!-- 若为列表项则保留编号配置 -->
<w:rPr>
<w:del w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z"/>
</w:rPr>
</w:pPr>
<w:del w:id="2" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>待删除的整个段落内容...</w:delText></w:r>
</w:del>
</w:p><w:pPr><w:rPr><w:del/><w:ins w:author="Jane" w:id="5">
<w:del w:author="Claude" w:id="10">
<w:r><w:delText>他们插入的文本</w:delText></w:r>
</w:del>
</w:ins><w:del w:author="Jane" w:id="5">
<w:r><w:delText>被删除的文本</w:delText></w:r>
</w:del>
<w:ins w:author="Claude" w:id="10">
<w:r><w:t>被删除的文本</w:t></w:r>
</w:ins>comment.py--parent<w:commentRangeStart><w:commentRangeEnd><w:r><w:r><!-- Comment markers are direct children of w:p, never inside w:r -->
<w:commentRangeStart w:id="0"/>
<w:del w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>deleted</w:delText></w:r>
</w:del>
<w:r><w:t> more text</w:t></w:r>
<w:commentRangeEnd w:id="0"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
<!-- Comment 0 with reply 1 nested inside -->
<w:commentRangeStart w:id="0"/>
<w:commentRangeStart w:id="1"/>
<w:r><w:t>text</w:t></w:r>
<w:commentRangeEnd w:id="1"/>
<w:commentRangeEnd w:id="0"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="1"/></w:r>comment.py--parent<w:commentRangeStart><w:commentRangeEnd><w:r><w:r><!-- 批注标记是w:p的直接子元素,切勿嵌套在w:r内部 -->
<w:commentRangeStart w:id="0"/>
<w:del w:id="1" w:author="Claude" w:date="2025-01-01T00:00:00Z">
<w:r><w:delText>已删除</w:delText></w:r>
</w:del>
<w:r><w:t> 更多文本</w:t></w:r>
<w:commentRangeEnd w:id="0"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
<!-- 批注0包含嵌套的回复批注1 -->
<w:commentRangeStart w:id="0"/>
<w:commentRangeStart w:id="1"/>
<w:r><w:t>文本</w:t></w:r>
<w:commentRangeEnd w:id="1"/>
<w:commentRangeEnd w:id="0"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="1"/></w:r>word/media/word/_rels/document.xml.rels<Relationship Id="rId5" Type=".../image" Target="media/image1.png"/>[Content_Types].xml<Default Extension="png" ContentType="image/png"/><w:drawing>
<wp:inline>
<wp:extent cx="914400" cy="914400"/> <!-- EMUs: 914400 = 1 inch -->
<a:graphic>
<a:graphicData uri=".../picture">
<pic:pic>
<pic:blipFill><a:blip r:embed="rId5"/></pic:blipFill>
</pic:pic>
</a:graphicData>
</a:graphic>
</wp:inline>
</w:drawing>word/media/word/_rels/document.xml.rels<Relationship Id="rId5" Type=".../image" Target="media/image1.png"/>[Content_Types].xml<Default Extension="png" ContentType="image/png"/><w:drawing>
<wp:inline>
<wp:extent cx="914400" cy="914400"/> <!-- EMUs:914400 = 1英寸 -->
<a:graphic>
<a:graphicData uri=".../picture">
<pic:pic>
<pic:blipFill><a:blip r:embed="rId5"/></pic:blipFill>
</pic:pic>
</a:graphicData>
</a:graphic>
</wp:inline>
</w:drawing>npm install -g docxscripts/office/soffice.pypdftoppmnpm install -g docxscripts/office/soffice.pypdftoppm