Created
May 27, 2024 22:07
-
-
Save HeimMatthias/f628b8dd2ed9062e72cea544fffb3f40 to your computer and use it in GitHub Desktop.
MuPDF OutlineIterator Extension to get outline item color and style (italic/bold) information
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Use MuPDF's JavaScript API to get "table of contents" / document outline styling information | |
| * Set up an OutlineIterator (as in the main function below) but additionally allows you to retrieve the pdf object for each entry and to get and set style and color | |
| * (Also: full example of an outline parser in MuPDF in main function below) | |
| * v.1 - 27.05.2024 | |
| * run as follows: mutool.exe run document-outline-extension.js filename.pdf | |
| * | |
| * Copyright (C) 2024 Matthias Heim | |
| This program is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU Affero General Public License as | |
| published by the Free Software Foundation, either version 3 of the | |
| License, or (at your option) any later version. | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU Affero General Public License for more details. | |
| You should have received a copy of the GNU Affero General Public License | |
| along with this program. If not, see <https://www.gnu.org/licenses/>. | |
| */ | |
| /* Takes a mupdf document and outline iterator, establishes the iterator position by parsing upwards towards the bookmarkRoot, then resets the iterator position and returns the corresponding pdf object */ | |
| function getOutlineItemObject(doc, outline) { | |
| if (typeof outline.item() == "undefined") return undefined; | |
| var chain=new Array(); | |
| do { | |
| if (outline.prev()!=-1) { | |
| chain.push("Next"); | |
| } else if (outline.up()!=-1) { | |
| chain.push("First"); | |
| } else break; | |
| } while (true); | |
| var outlineItemObject = doc.getTrailer().Root.Outlines.First; | |
| while (chain.length>0) { | |
| var step = chain.pop(); | |
| if (step == "Next") outline.next(); | |
| else outline.down(); | |
| outlineItemObject = outlineItemObject[step]; | |
| } | |
| return outlineItemObject; | |
| } | |
| // cf. https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/JS_API_AcroJS.html#color | |
| // Specifies the color for a bookmark. Values are defined by using RGB color. ... A color is represented in JavaScript as an array containing 3 elements corresponding to a RGB color space. The elements are numbers that range between zero and one inclusive. For example, the color red can be represented as [1, 0, 0]. | |
| function outlineItemGetColor(outlineItemObject) { | |
| if ("C" in outlineItemObject) return outlineItemObject.C.valueOf(); | |
| else return undefined; | |
| } | |
| function outlineItemSetColor(outlineItemObject, color) { | |
| // if ((color.length>3) || (Number.isNaN(color[0])) || (Number.isNaN(color[1])) || (Number.isNaN(color[2])) || (color[0]>1) || (color[1]>1) || (color[2]>1) || (color[0]<0) || (color[1]<0) || (color[2]<2)) throw new Error("outline color array is not three rgb-values from 0 to 1"); | |
| if ((color[0]+color[1]+color[3]==0) && ("C" in outlineItemObject)) outlineItemObject.delete("C"); | |
| else outlineItemObject.put("C", color); | |
| } | |
| // cf. https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/JS_API_AcroJS.html#id247 | |
| // "Specifies the style for the bookmark’s font: 0 is normal, 1 is italic, 2 is bold, and 3 is bold-italic." | |
| // font-style: ((outlineItemGetStyle(outlineItemObject) >> 0) & 0x1) == 1 ? "italic":"normal"; | |
| // font-weight:((outlineItemGetStyle(outlineItemObject) >> 1) & 0x1) == 1 ? "bold":"normal"; | |
| function outlineItemGetStyle(outlineItemObject) { | |
| if ("F" in outlineItemObject) return outlineItemObject.F.valueOf(); | |
| else return 0; | |
| } | |
| function outlineItemSetStyle(outlineItemObject, style) { | |
| //if ((!Number.isInteger(style)) || (style<0) || (style>3)) throw new Error("outline style is not a number between 0 and 3"); | |
| if ((style==0) && ("F" in outlineItemObject)) outlineItemObject.delete("F"); | |
| else outlineItemObject.put("F", style); | |
| } | |
| (function() { | |
| if (scriptArgs.length!=1) { | |
| console.log("PDF file name missing") | |
| return; | |
| } | |
| var doc = Document.openDocument(scriptArgs[0]); | |
| var outline = doc.outlineIterator(); | |
| if (typeof(outline.item())=="undefined") { | |
| console.log("This document does not have an outline.") | |
| return; | |
| } | |
| var level = 0; | |
| iterate: do { | |
| var item=outline.item(); | |
| console.log(JSON.stringify(item)); | |
| var outlineItemObject = getOutlineItemObject(doc, outline); | |
| var outlineItemColor = outlineItemGetColor(outlineItemObject); | |
| var outlineItemStyle = outlineItemGetStyle(outlineItemObject); | |
| console.log('pdf Object is '+outlineItemObject.toString()); | |
| switch (outlineItemStyle) { | |
| case 1 : console.log("This outline entry is italic."); break; | |
| case 2 : console.log("This outline entry is bold."); break; | |
| case 3 : console.log("This outline entry is italic and bold."); break; | |
| } | |
| if (typeof outlineItemColor != "undefined") console.log('The RGB color of this outline entry is ('+Math.round(outlineItemColor[0]*255)+', '+Math.round(outlineItemColor[1]*255)+', '+Math.round(outlineItemColor[2]*255)+')'); | |
| // use outline level to style entry and give each outline entry a random color | |
| outlineItemSetStyle(outlineItemObject, level % 4); | |
| outlineItemSetColor(outlineItemObject, [Math.random(),Math.random(),Math.random()]); | |
| var down = outline.down(); | |
| level++; | |
| if (down!=0) { | |
| outline.up(); | |
| level--; | |
| while (outline.next()!=0) { | |
| if (outline.up()!=0) break iterate; | |
| else level--; | |
| } | |
| } | |
| } while(typeof(item)!="undefined") | |
| // save the document with the colorful, depth-formated outline to a new document | |
| doc.save(scriptArgs[0].replace(/\.pdf$/i,"_colorful_toc.pdf"), "incremental"); | |
| })(); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Use MuPDF's JavaScript API to get "table of contents" / document outline styling information
Set up an OutlineIterator (as in the main function below) but additionally retrieve the pdf object for each entry and get and set style and color
(Also: full example of an outline parser in MuPDF in main function below)