ras0q · October 3, 2024 09:03
diff --git a/pptx_get_placeholder_positions.py b/pptx_get_placeholder_positions.py
 import sys
 import zipfile
 import xml.etree.ElementTree as ET
 from pprint import pprint


 # プレースホルダーの位置を取得する関数
 def get_placeholder_positions(xml_content):
    positions = []
    tree = ET.ElementTree(ET.fromstring(xml_content))
    root = tree.getroot()

    # 名前空間を指定する必要がある場合もあるので注意
    ns = {
        "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
        "p": "http://schemas.openxmlformats.org/presentationml/2006/main",
    }

    # プレースホルダー情報を持つ <p:sp> を検索する
    for sp in root.findall(".//p:sp", ns):
        # テキストボックスかチェック
        ph = sp.find(".//p:nvSpPr/p:nvPr/p:ph", ns)
        if ph is not None:
            # プレースホルダーの名前を取得
            ph_type = ph.attrib.get("type", "undefined")

            # 位置情報を取得
            el = sp.find(".//a:xfrm/a:off", ns)
            if el is None:
                continue

            positions.append(
                {
                    "type": ph_type,
                    "x": el.attrib["x"],
                    "y": el.attrib["y"],
                }
            )

    return positions


 # スライドサイズを取得する関数
 def get_slide_size(pptx_filename):
    with zipfile.ZipFile(pptx_filename, "r") as pptx:
        # presentation.xml ファイルを開く
        with pptx.open("ppt/presentation.xml") as presentation_xml:
            xml_content = presentation_xml.read().decode("utf-8")
            tree = ET.ElementTree(ET.fromstring(xml_content))
            root = tree.getroot()

            # 名前空間を指定
            ns = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"}

            # <p:sldSz> タグからスライドサイズ（cx, cy）を取得
            sldSz = root.find(".//p:sldSz", ns)
            if sldSz is not None:
                width = int(sldSz.attrib["cx"])
                height = int(sldSz.attrib["cy"])
                print(f"Slide size: width = {width} EMU, height = {height} EMU")
                return width, height
            else:
                print("Slide size not found!")
                return None, None


 # PowerPointファイルをunzipし、各スライドのプレースホルダーを解析する関数
 def extract_placeholders_from_pptx(pptx_filename):
    w, h = get_slide_size(pptx_filename)

    with zipfile.ZipFile(pptx_filename, "r") as pptx:
        # スライドファイルを見つける
        slide_files = [
            f
            for f in pptx.namelist()
            if f.startswith("ppt/slideLayouts/slideLayout") and f.endswith(".xml")
        ]

        pprint(f"Found {len(slide_files)} slides in the presentation.")

        for slide_file in slide_files:
            pprint(f"Processing slide: {slide_file}")
            with pptx.open(slide_file) as slide_xml:
                xml_content = slide_xml.read().decode("utf-8")
                positions = get_placeholder_positions(xml_content)
                pprint(positions)
                for p in positions:
                    print("x: ", 100 * int(p["x"]) / int(w), "y: ", 100 * int(p["y"]) / int(h))
                print()


 if __name__ == "__main__":
    if len(sys.argv) != 2:
        pprint("Usage: python script.py <pptx_filename>")
        sys.exit(1)

    pptx_filename = sys.argv[1]
    pprint(f"Extracting placeholders from {pptx_filename}")
    extract_placeholders_from_pptx(pptx_filename)
	import sys
	import zipfile
	import xml.etree.ElementTree as ET
	from pprint import pprint


	# プレースホルダーの位置を取得する関数
	def get_placeholder_positions(xml_content):
	positions = []
	tree = ET.ElementTree(ET.fromstring(xml_content))
	root = tree.getroot()

	# 名前空間を指定する必要がある場合もあるので注意
	ns = {
	"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
	"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
	}

	# プレースホルダー情報を持つ <p:sp> を検索する
	for sp in root.findall(".//p:sp", ns):
	# テキストボックスかチェック
	ph = sp.find(".//p:nvSpPr/p:nvPr/p:ph", ns)
	if ph is not None:
	# プレースホルダーの名前を取得
	ph_type = ph.attrib.get("type", "undefined")

	# 位置情報を取得
	el = sp.find(".//a:xfrm/a:off", ns)
	if el is None:
	continue

	positions.append(
	{
	"type": ph_type,
	"x": el.attrib["x"],
	"y": el.attrib["y"],
	}
	)

	return positions


	# スライドサイズを取得する関数
	def get_slide_size(pptx_filename):
	with zipfile.ZipFile(pptx_filename, "r") as pptx:
	# presentation.xml ファイルを開く
	with pptx.open("ppt/presentation.xml") as presentation_xml:
	xml_content = presentation_xml.read().decode("utf-8")
	tree = ET.ElementTree(ET.fromstring(xml_content))
	root = tree.getroot()

	# 名前空間を指定
	ns = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"}

	# <p:sldSz> タグからスライドサイズ（cx, cy）を取得
	sldSz = root.find(".//p:sldSz", ns)
	if sldSz is not None:
	width = int(sldSz.attrib["cx"])
	height = int(sldSz.attrib["cy"])
	print(f"Slide size: width = {width} EMU, height = {height} EMU")
	return width, height
	else:
	print("Slide size not found!")
	return None, None


	# PowerPointファイルをunzipし、各スライドのプレースホルダーを解析する関数
	def extract_placeholders_from_pptx(pptx_filename):
	w, h = get_slide_size(pptx_filename)

	with zipfile.ZipFile(pptx_filename, "r") as pptx:
	# スライドファイルを見つける
	slide_files = [
	f
	for f in pptx.namelist()
	if f.startswith("ppt/slideLayouts/slideLayout") and f.endswith(".xml")
	]

	pprint(f"Found {len(slide_files)} slides in the presentation.")

	for slide_file in slide_files:
	pprint(f"Processing slide: {slide_file}")
	with pptx.open(slide_file) as slide_xml:
	xml_content = slide_xml.read().decode("utf-8")
	positions = get_placeholder_positions(xml_content)
	pprint(positions)
	for p in positions:
	print("x: ", 100 * int(p["x"]) / int(w), "y: ", 100 * int(p["y"]) / int(h))
	print()


	if __name__ == "__main__":
	if len(sys.argv) != 2:
	pprint("Usage: python script.py <pptx_filename>")
	sys.exit(1)

	pptx_filename = sys.argv[1]
	pprint(f"Extracting placeholders from {pptx_filename}")
	extract_placeholders_from_pptx(pptx_filename)
No results found