Skip to content

Instantly share code, notes, and snippets.

@ras0q
Created October 3, 2024 09:03
Show Gist options
  • Select an option

  • Save ras0q/7cb903bc8689eab22934b9f6541d6629 to your computer and use it in GitHub Desktop.

Select an option

Save ras0q/7cb903bc8689eab22934b9f6541d6629 to your computer and use it in GitHub Desktop.
import sys
import zipfile
import xml.etree.ElementTree as ET
from pprint import pprint
# プレースホルダーの位置を取得する関数
def get_placeholder_positions(xml_content):
positions = []
tree = ET.ElementTree(ET.fromstring(xml_content))
root = tree.getroot()
# 名前空間を指定する必要がある場合もあるので注意
ns = {
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
}
# プレースホルダー情報を持つ <p:sp> を検索する
for sp in root.findall(".//p:sp", ns):
# テキストボックスかチェック
ph = sp.find(".//p:nvSpPr/p:nvPr/p:ph", ns)
if ph is not None:
# プレースホルダーの名前を取得
ph_type = ph.attrib.get("type", "undefined")
# 位置情報を取得
el = sp.find(".//a:xfrm/a:off", ns)
if el is None:
continue
positions.append(
{
"type": ph_type,
"x": el.attrib["x"],
"y": el.attrib["y"],
}
)
return positions
# スライドサイズを取得する関数
def get_slide_size(pptx_filename):
with zipfile.ZipFile(pptx_filename, "r") as pptx:
# presentation.xml ファイルを開く
with pptx.open("ppt/presentation.xml") as presentation_xml:
xml_content = presentation_xml.read().decode("utf-8")
tree = ET.ElementTree(ET.fromstring(xml_content))
root = tree.getroot()
# 名前空間を指定
ns = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"}
# <p:sldSz> タグからスライドサイズ(cx, cy)を取得
sldSz = root.find(".//p:sldSz", ns)
if sldSz is not None:
width = int(sldSz.attrib["cx"])
height = int(sldSz.attrib["cy"])
print(f"Slide size: width = {width} EMU, height = {height} EMU")
return width, height
else:
print("Slide size not found!")
return None, None
# PowerPointファイルをunzipし、各スライドのプレースホルダーを解析する関数
def extract_placeholders_from_pptx(pptx_filename):
w, h = get_slide_size(pptx_filename)
with zipfile.ZipFile(pptx_filename, "r") as pptx:
# スライドファイルを見つける
slide_files = [
f
for f in pptx.namelist()
if f.startswith("ppt/slideLayouts/slideLayout") and f.endswith(".xml")
]
pprint(f"Found {len(slide_files)} slides in the presentation.")
for slide_file in slide_files:
pprint(f"Processing slide: {slide_file}")
with pptx.open(slide_file) as slide_xml:
xml_content = slide_xml.read().decode("utf-8")
positions = get_placeholder_positions(xml_content)
pprint(positions)
for p in positions:
print("x: ", 100 * int(p["x"]) / int(w), "y: ", 100 * int(p["y"]) / int(h))
print()
if __name__ == "__main__":
if len(sys.argv) != 2:
pprint("Usage: python script.py <pptx_filename>")
sys.exit(1)
pptx_filename = sys.argv[1]
pprint(f"Extracting placeholders from {pptx_filename}")
extract_placeholders_from_pptx(pptx_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment