Skip to content

Instantly share code, notes, and snippets.

@puhitaku
Created February 3, 2026 08:39
Show Gist options
  • Select an option

  • Save puhitaku/be41bf25d2ac578c166885e8e1188d48 to your computer and use it in GitHub Desktop.

Select an option

Save puhitaku/be41bf25d2ac578c166885e8e1188d48 to your computer and use it in GitHub Desktop.
Keynote text extractor
import pathlib
import os
import pipes
import subprocess
import sys
# Add env to prevent the Protobuf error
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
# I forgot what to install ...
import yaml
from keynote_parser.command_line import unpack_command
def main():
if len(sys.argv) < 2:
print(f'Usage: {sys.argv[0]} INFILE')
sys.exit(1)
for f in sys.argv[1:]:
extract(f)
def extract(f: str):
index = pathlib.Path(f.replace('.key', '') + f'/Index/')
unpack_command(f)
with open(index / 'Document.iwa.yaml', 'r') as raw:
document = yaml.load(raw, yaml.Loader)
slides = []
for archive in document['chunks'][0]['archives']:
objects = archive['objects'][0]
if objects['_pbtype'] == 'KN.SlideNodeArchive' and not objects.get('isHidden', False):
slides.append(objects['slide']['identifier'])
exist_slides = []
for slide in slides:
try:
p = index / f'Slide-{slide}.iwa.yaml'
p.stat()
except FileNotFoundError:
continue
exist_slides.append(slide)
lines = []
for slide in exist_slides:
with open(index / f'Slide-{slide}.iwa.yaml') as raw:
body = yaml.load(raw, yaml.Loader)
for objs in body['chunks'][0]['archives']:
obj = objs['objects'][0]
if 'text' in obj:
text = obj['text'][0].replace('\ufffc', '')
if text:
lines.append(text.replace('\n', '\n\n'))
input('Press f if you are ready')
print(f'Copying all text from {f}')
subprocess.Popen('pbcopy', stdin=subprocess.PIPE).communicate('\n\n'.join(lines).encode())
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment