Created
October 3, 2024 09:03
-
-
Save ras0q/7cb903bc8689eab22934b9f6541d6629 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import zipfile | |
| import xml.etree.ElementTree as ET | |
| from pprint import pprint | |
| # プレースホルダーの位置を取得する関数 | |
| def get_placeholder_positions(xml_content): | |
| positions = [] | |
| tree = ET.ElementTree(ET.fromstring(xml_content)) | |
| root = tree.getroot() | |
| # 名前空間を指定する必要がある場合もあるので注意 | |
| ns = { | |
| "a": "http://schemas.openxmlformats.org/drawingml/2006/main", | |
| "p": "http://schemas.openxmlformats.org/presentationml/2006/main", | |
| } | |
| # プレースホルダー情報を持つ <p:sp> を検索する | |
| for sp in root.findall(".//p:sp", ns): | |
| # テキストボックスかチェック | |
| ph = sp.find(".//p:nvSpPr/p:nvPr/p:ph", ns) | |
| if ph is not None: | |
| # プレースホルダーの名前を取得 | |
| ph_type = ph.attrib.get("type", "undefined") | |
| # 位置情報を取得 | |
| el = sp.find(".//a:xfrm/a:off", ns) | |
| if el is None: | |
| continue | |
| positions.append( | |
| { | |
| "type": ph_type, | |
| "x": el.attrib["x"], | |
| "y": el.attrib["y"], | |
| } | |
| ) | |
| return positions | |
| # スライドサイズを取得する関数 | |
| def get_slide_size(pptx_filename): | |
| with zipfile.ZipFile(pptx_filename, "r") as pptx: | |
| # presentation.xml ファイルを開く | |
| with pptx.open("ppt/presentation.xml") as presentation_xml: | |
| xml_content = presentation_xml.read().decode("utf-8") | |
| tree = ET.ElementTree(ET.fromstring(xml_content)) | |
| root = tree.getroot() | |
| # 名前空間を指定 | |
| ns = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"} | |
| # <p:sldSz> タグからスライドサイズ(cx, cy)を取得 | |
| sldSz = root.find(".//p:sldSz", ns) | |
| if sldSz is not None: | |
| width = int(sldSz.attrib["cx"]) | |
| height = int(sldSz.attrib["cy"]) | |
| print(f"Slide size: width = {width} EMU, height = {height} EMU") | |
| return width, height | |
| else: | |
| print("Slide size not found!") | |
| return None, None | |
| # PowerPointファイルをunzipし、各スライドのプレースホルダーを解析する関数 | |
| def extract_placeholders_from_pptx(pptx_filename): | |
| w, h = get_slide_size(pptx_filename) | |
| with zipfile.ZipFile(pptx_filename, "r") as pptx: | |
| # スライドファイルを見つける | |
| slide_files = [ | |
| f | |
| for f in pptx.namelist() | |
| if f.startswith("ppt/slideLayouts/slideLayout") and f.endswith(".xml") | |
| ] | |
| pprint(f"Found {len(slide_files)} slides in the presentation.") | |
| for slide_file in slide_files: | |
| pprint(f"Processing slide: {slide_file}") | |
| with pptx.open(slide_file) as slide_xml: | |
| xml_content = slide_xml.read().decode("utf-8") | |
| positions = get_placeholder_positions(xml_content) | |
| pprint(positions) | |
| for p in positions: | |
| print("x: ", 100 * int(p["x"]) / int(w), "y: ", 100 * int(p["y"]) / int(h)) | |
| print() | |
| if __name__ == "__main__": | |
| if len(sys.argv) != 2: | |
| pprint("Usage: python script.py <pptx_filename>") | |
| sys.exit(1) | |
| pptx_filename = sys.argv[1] | |
| pprint(f"Extracting placeholders from {pptx_filename}") | |
| extract_placeholders_from_pptx(pptx_filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment