Created
February 3, 2026 08:39
-
-
Save puhitaku/be41bf25d2ac578c166885e8e1188d48 to your computer and use it in GitHub Desktop.
Keynote text extractor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pathlib | |
| import os | |
| import pipes | |
| import subprocess | |
| import sys | |
| # Add env to prevent the Protobuf error | |
| os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' | |
| # I forgot what to install ... | |
| import yaml | |
| from keynote_parser.command_line import unpack_command | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(f'Usage: {sys.argv[0]} INFILE') | |
| sys.exit(1) | |
| for f in sys.argv[1:]: | |
| extract(f) | |
| def extract(f: str): | |
| index = pathlib.Path(f.replace('.key', '') + f'/Index/') | |
| unpack_command(f) | |
| with open(index / 'Document.iwa.yaml', 'r') as raw: | |
| document = yaml.load(raw, yaml.Loader) | |
| slides = [] | |
| for archive in document['chunks'][0]['archives']: | |
| objects = archive['objects'][0] | |
| if objects['_pbtype'] == 'KN.SlideNodeArchive' and not objects.get('isHidden', False): | |
| slides.append(objects['slide']['identifier']) | |
| exist_slides = [] | |
| for slide in slides: | |
| try: | |
| p = index / f'Slide-{slide}.iwa.yaml' | |
| p.stat() | |
| except FileNotFoundError: | |
| continue | |
| exist_slides.append(slide) | |
| lines = [] | |
| for slide in exist_slides: | |
| with open(index / f'Slide-{slide}.iwa.yaml') as raw: | |
| body = yaml.load(raw, yaml.Loader) | |
| for objs in body['chunks'][0]['archives']: | |
| obj = objs['objects'][0] | |
| if 'text' in obj: | |
| text = obj['text'][0].replace('\ufffc', '') | |
| if text: | |
| lines.append(text.replace('\n', '\n\n')) | |
| input('Press f if you are ready') | |
| print(f'Copying all text from {f}') | |
| subprocess.Popen('pbcopy', stdin=subprocess.PIPE).communicate('\n\n'.join(lines).encode()) | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment