ksasao · December 25, 2025 15:31
diff --git a/cliptest.py b/cliptest.py
 import gradio as gr
 from PIL import Image, ImageOps
 import torch
 from transformers import AutoImageProcessor, AutoModel, AutoTokenizer
 import shlex

 # HEIF/HEICフォーマットのサポートを有効化
 try:
    from pillow_heif import register_heif_opener
    register_heif_opener()
    print("HEIF/HEIC形式のサポートが有効化されました")
 except ImportError:
    print("警告: pillow-heifがインストールされていません。HEIF/HEIC形式を使用する場合は 'pip install pillow-heif' を実行してください")

 # モデルのグローバル初期化
 HF_MODEL_PATH = 'line-corporation/clip-japanese-base-v2'
 device = "cuda" if torch.cuda.is_available() else "cpu"

 print(f"デバイス: {device}")
 print("モデル読み込み中...")

 tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, legacy=False)
 processor = AutoImageProcessor.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, use_fast=True)
 model = AutoModel.from_pretrained(HF_MODEL_PATH, trust_remote_code=True).to(device)

 print("モデル読み込み完了!")

 def load_image_preview(file):
    """アップロードされたファイルから画像プレビューを生成"""
    if file is None:
        return None
    
    try:
        image = Image.open(file)
        # EXIF情報に基づいて画像の向きを補正
        image = ImageOps.exif_transpose(image)
        if image.mode != 'RGB':
            image = image.convert('RGB')
        return image
    except Exception as e:
        print(f"プレビュー生成エラー: {str(e)}")
        return None

 def classify_image(file, labels_text):
    """画像とラベルから類似度スコアを計算"""
    if file is None:
        return "画像をアップロードしてください"
    
    if not labels_text or labels_text.strip() == "":
        return "ラベルを入力してください"
    
    # ラベルを空白で分割（""で囲まれた部分は1つの文として扱う）
    try:
        labels = shlex.split(labels_text.strip())
    except ValueError:
        # クォートが閉じていない場合などは通常の分割にフォールバック
        labels = labels_text.strip().split()
    
    # ファイルから画像を読み込み（HEIF対応）
    try:
        image = Image.open(file)
        # EXIF情報に基づいて画像の向きを補正
        image = ImageOps.exif_transpose(image)
        # RGBに変換（RGBA等の場合に備えて）
        if image.mode != 'RGB':
            image = image.convert('RGB')
    except Exception as e:
        return f"画像の読み込みエラー: {str(e)}"
    
    # 画像処理
    processed_image = processor(image, return_tensors="pt").to(device)
    text = tokenizer(labels).to(device)
    
    # 推論
    with torch.no_grad():
        image_features = model.get_image_features(**processed_image)
        text_features = model.get_text_features(**text)
        similarity_scores = (image_features @ text_features.T)[0]
    
    # 結果をフォーマット
    results = "結果:\n\n"
    for label, score in zip(labels, similarity_scores):
        raw_score = score.item()
        # 0～100の範囲でクリップ
        clipped_score = max(0, min(100, raw_score))
        results += f"{label}: {clipped_score:.1f}\n"
    
    return results

 def compare_images(file1, file2):
    """画像同士の類似度スコアを計算"""
    if file1 is None or file2 is None:
        return "2つの画像をアップロードしてください"
    
    # 画像を読み込み
    try:
        image1 = Image.open(file1)
        image1 = ImageOps.exif_transpose(image1)
        if image1.mode != 'RGB':
            image1 = image1.convert('RGB')
        
        image2 = Image.open(file2)
        image2 = ImageOps.exif_transpose(image2)
        if image2.mode != 'RGB':
            image2 = image2.convert('RGB')
    except Exception as e:
        return f"画像の読み込みエラー: {str(e)}"
    
    # 画像処理
    processed_image1 = processor(image1, return_tensors="pt").to(device)
    processed_image2 = processor(image2, return_tensors="pt").to(device)
    
    # 推論
    with torch.no_grad():
        image_features1 = model.get_image_features(**processed_image1)
        image_features2 = model.get_image_features(**processed_image2)
        
        # 正規化してコサイン類似度を計算
        image_features1 = image_features1 / image_features1.norm(dim=-1, keepdim=True)
        image_features2 = image_features2 / image_features2.norm(dim=-1, keepdim=True)
        
        similarity_score = (image_features1 @ image_features2.T)[0][0]
    
    raw_score = similarity_score.item()
    # 0～100の範囲でスケール
    scaled_score = (raw_score + 1) * 50  # -1～1を0～100に変換
    
    results = "画像同士の類似度:\n\n"
    results += f"スコア: {scaled_score:.1f}\n"
    results += f"コサイン類似度: {raw_score:.4f}"
    
    return results

 def compare_texts(text1, text2):
    """テキスト同士の類似度スコアを計算"""
    if not text1 or text1.strip() == "":
        return "テキスト1を入力してください"
    
    if not text2 or text2.strip() == "":
        return "テキスト2を入力してください"
    
    # テキスト2を空白で分割（""で囲まれた部分は1つの文として扱う）
    try:
        text2_list = shlex.split(text2.strip())
    except ValueError:
        # クォートが閉じていない場合などは通常の分割にフォールバック
        text2_list = text2.strip().split()
    
    # テキスト1と各テキスト2を結合
    texts = [text1.strip()] + text2_list
    tokenized = tokenizer(texts).to(device)
    
    # 推論
    with torch.no_grad():
        text_features = model.get_text_features(**tokenized)
        
        # 正規化してコサイン類似度を計算
        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
        
        # テキスト1と各テキスト2の類似度を計算
        similarity_scores = text_features[0] @ text_features[1:].T
    
    # 結果をフォーマット
    results = "テキスト同士の類似度:\n\n"
    for text, score in zip(text2_list, similarity_scores):
        raw_score = score.item()
        # 0～100の範囲でスケール
        scaled_score = (raw_score + 1) * 50  # -1～1を0～100に変換
        results += f"{text}: {scaled_score:.1f} (コサイン類似度: {raw_score:.4f})\n"
    
    return results

 # Gradioインターフェース
 with gr.Blocks(title="CLIP Japanese 画像分類") as demo:
    gr.Markdown("# CLIP Japanese Base V2 画像分類・類似度比較")
    gr.Markdown("画像とテキストの類似度、画像同士の類似度、テキスト同士の類似度を計算できます")
    
    with gr.Tabs():
        # タブ1: 画像とテキストの類似度
        with gr.TabItem("画像 × テキスト"):
            with gr.Row():
                with gr.Column():
                    image_input = gr.File(
                        label="画像をアップロード（HEIF/HEIC対応）",
                        file_types=["image", ".heic", ".heif"]
                    )
                    image_preview = gr.Image(
                        label="プレビュー",
                        type="pil",
                        interactive=False
                    )
                    labels_input = gr.Textbox(
                        label="ラベル（空白区切り）",
                        placeholder="例: 犬 猫 象",
                        value="犬 猫 象"
                    )
                    submit_btn = gr.Button("処理", variant="primary")
                
                with gr.Column():
                    output = gr.Textbox(label="Raw Score", lines=10)
            
            # ファイルアップロード時にプレビューを表示
            image_input.change(
                fn=load_image_preview,
                inputs=[image_input],
                outputs=[image_preview]
            )
            
            submit_btn.click(
                fn=classify_image,
                inputs=[image_input, labels_input],
                outputs=output
            )
            
            gr.Markdown("### 使い方\n1. 画像をアップロード\n2. ラベルを空白区切りで入力\n3. 処理ボタンをクリック")
        
        # タブ2: 画像同士の類似度
        with gr.TabItem("画像 × 画像"):
            with gr.Row():
                with gr.Column():
                    image_input1 = gr.File(
                        label="画像1をアップロード",
                        file_types=["image", ".heic", ".heif"]
                    )
                    image_preview1 = gr.Image(
                        label="プレビュー1",
                        type="pil",
                        interactive=False
                    )
                
                with gr.Column():
                    image_input2 = gr.File(
                        label="画像2をアップロード",
                        file_types=["image", ".heic", ".heif"]
                    )
                    image_preview2 = gr.Image(
                        label="プレビュー2",
                        type="pil",
                        interactive=False
                    )
            
            submit_btn2 = gr.Button("類似度を計算", variant="primary")
            output2 = gr.Textbox(label="類似度スコア", lines=5)
            
            image_input1.change(
                fn=load_image_preview,
                inputs=[image_input1],
                outputs=[image_preview1]
            )
            
            image_input2.change(
                fn=load_image_preview,
                inputs=[image_input2],
                outputs=[image_preview2]
            )
            
            submit_btn2.click(
                fn=compare_images,
                inputs=[image_input1, image_input2],
                outputs=output2
            )
            
            gr.Markdown("### 使い方\n1. 2つの画像をアップロード\n2. 類似度を計算ボタンをクリック")
        
        # タブ3: テキスト同士の類似度
        with gr.TabItem("テキスト × テキスト"):
            with gr.Row():
                with gr.Column():
                    text_input1 = gr.Textbox(
                        label="テキスト1",
                        placeholder="例: 犬が走っている",
                        lines=3
                    )
                    text_input2 = gr.Textbox(
                        label="テキスト2（空白区切りまたは""で囲んで複数指定可）",
                        placeholder="例: 猫が歩いている \"犬が歩いている\" 象が走っている",
                        lines=3
                    )
                    submit_btn3 = gr.Button("類似度を計算", variant="primary")
                
                with gr.Column():
                    output3 = gr.Textbox(label="類似度スコア", lines=5)
            
            submit_btn3.click(
                fn=compare_texts,
                inputs=[text_input1, text_input2],
                outputs=output3
            )
            
            gr.Markdown("### 使い方\n1. テキスト1を入力\n2. テキスト2を空白区切りで複数入力（\"\"で囲むとスペースを含む文も可）\n3. 類似度を計算ボタンをクリック")

 if __name__ == "__main__":
    demo.launch()
diff --git a/requirements.txt b/requirements.txt
 aiofiles==24.1.0
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.12.0
 brotli==1.2.0
 certifi==2025.11.12
 charset-normalizer==3.4.4
 click==8.3.1
 colorama==0.4.6
 fastapi==0.127.0
 ffmpy==1.0.0
 filelock==3.20.1
 fsspec==2025.12.0
 gradio==6.2.0
 gradio_client==2.0.2
 groovy==0.1.2
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.36.0
 idna==3.11
 Jinja2==3.1.6
 markdown-it-py==4.0.0
 MarkupSafe==3.0.3
 mdurl==0.1.2
 mpmath==1.3.0
 networkx==3.6.1
 numpy==2.4.0
 orjson==3.11.5
 packaging==25.0
 pandas==2.3.3
 pillow==12.0.0
 pillow_heif==1.1.1
 pydantic==2.12.5
 pydantic_core==2.41.5
 pydub==0.25.1
 Pygments==2.19.2
 python-dateutil==2.9.0.post0
 python-multipart==0.0.21
 pytz==2025.2
 PyYAML==6.0.3
 regex==2025.11.3
 requests==2.32.5
 rich==14.2.0
 safehttpx==0.1.7
 safetensors==0.7.0
 semantic-version==2.10.0
 sentencepiece==0.2.1
 shellingham==1.5.4
 six==1.17.0
 starlette==0.50.0
 sympy==1.14.0
 timm==1.0.22
 tokenizers==0.22.1
 tomlkit==0.13.3
 torch==2.9.1
 torchvision==0.24.1
 tqdm==4.67.1
 transformers==4.57.3
 typer==0.20.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 tzdata==2025.3
 urllib3==2.6.2
 uvicorn==0.40.0
	import gradio as gr
	from PIL import Image, ImageOps
	import torch
	from transformers import AutoImageProcessor, AutoModel, AutoTokenizer
	import shlex

	# HEIF/HEICフォーマットのサポートを有効化
	try:
	from pillow_heif import register_heif_opener
	register_heif_opener()
	print("HEIF/HEIC形式のサポートが有効化されました")
	except ImportError:
	print("警告: pillow-heifがインストールされていません。HEIF/HEIC形式を使用する場合は 'pip install pillow-heif' を実行してください")

	# モデルのグローバル初期化
	HF_MODEL_PATH = 'line-corporation/clip-japanese-base-v2'
	device = "cuda" if torch.cuda.is_available() else "cpu"

	print(f"デバイス: {device}")
	print("モデル読み込み中...")

	tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, legacy=False)
	processor = AutoImageProcessor.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, use_fast=True)
	model = AutoModel.from_pretrained(HF_MODEL_PATH, trust_remote_code=True).to(device)

	print("モデル読み込み完了!")

	def load_image_preview(file):
	"""アップロードされたファイルから画像プレビューを生成"""
	if file is None:
	return None

	try:
	image = Image.open(file)
	# EXIF情報に基づいて画像の向きを補正
	image = ImageOps.exif_transpose(image)
	if image.mode != 'RGB':
	image = image.convert('RGB')
	return image
	except Exception as e:
	print(f"プレビュー生成エラー: {str(e)}")
	return None

	def classify_image(file, labels_text):
	"""画像とラベルから類似度スコアを計算"""
	if file is None:
	return "画像をアップロードしてください"

	if not labels_text or labels_text.strip() == "":
	return "ラベルを入力してください"

	# ラベルを空白で分割（""で囲まれた部分は1つの文として扱う）
	try:
	labels = shlex.split(labels_text.strip())
	except ValueError:
	# クォートが閉じていない場合などは通常の分割にフォールバック
	labels = labels_text.strip().split()

	# ファイルから画像を読み込み（HEIF対応）
	try:
	image = Image.open(file)
	# EXIF情報に基づいて画像の向きを補正
	image = ImageOps.exif_transpose(image)
	# RGBに変換（RGBA等の場合に備えて）
	if image.mode != 'RGB':
	image = image.convert('RGB')
	except Exception as e:
	return f"画像の読み込みエラー: {str(e)}"

	# 画像処理
	processed_image = processor(image, return_tensors="pt").to(device)
	text = tokenizer(labels).to(device)

	# 推論
	with torch.no_grad():
	image_features = model.get_image_features(**processed_image)
	text_features = model.get_text_features(**text)
	similarity_scores = (image_features @ text_features.T)[0]

	# 結果をフォーマット
	results = "結果:\n\n"
	for label, score in zip(labels, similarity_scores):
	raw_score = score.item()
	# 0～100の範囲でクリップ
	clipped_score = max(0, min(100, raw_score))
	results += f"{label}: {clipped_score:.1f}\n"

	return results

	def compare_images(file1, file2):
	"""画像同士の類似度スコアを計算"""
	if file1 is None or file2 is None:
	return "2つの画像をアップロードしてください"

	# 画像を読み込み
	try:
	image1 = Image.open(file1)
	image1 = ImageOps.exif_transpose(image1)
	if image1.mode != 'RGB':
	image1 = image1.convert('RGB')

	image2 = Image.open(file2)
	image2 = ImageOps.exif_transpose(image2)
	if image2.mode != 'RGB':
	image2 = image2.convert('RGB')
	except Exception as e:
	return f"画像の読み込みエラー: {str(e)}"

	# 画像処理
	processed_image1 = processor(image1, return_tensors="pt").to(device)
	processed_image2 = processor(image2, return_tensors="pt").to(device)

	# 推論
	with torch.no_grad():
	image_features1 = model.get_image_features(**processed_image1)
	image_features2 = model.get_image_features(**processed_image2)

	# 正規化してコサイン類似度を計算
	image_features1 = image_features1 / image_features1.norm(dim=-1, keepdim=True)
	image_features2 = image_features2 / image_features2.norm(dim=-1, keepdim=True)

	similarity_score = (image_features1 @ image_features2.T)[0][0]

	raw_score = similarity_score.item()
	# 0～100の範囲でスケール
	scaled_score = (raw_score + 1) * 50 # -1～1を0～100に変換

	results = "画像同士の類似度:\n\n"
	results += f"スコア: {scaled_score:.1f}\n"
	results += f"コサイン類似度: {raw_score:.4f}"

	return results

	def compare_texts(text1, text2):
	"""テキスト同士の類似度スコアを計算"""
	if not text1 or text1.strip() == "":
	return "テキスト1を入力してください"

	if not text2 or text2.strip() == "":
	return "テキスト2を入力してください"

	# テキスト2を空白で分割（""で囲まれた部分は1つの文として扱う）
	try:
	text2_list = shlex.split(text2.strip())
	except ValueError:
	# クォートが閉じていない場合などは通常の分割にフォールバック
	text2_list = text2.strip().split()

	# テキスト1と各テキスト2を結合
	texts = [text1.strip()] + text2_list
	tokenized = tokenizer(texts).to(device)

	# 推論
	with torch.no_grad():
	text_features = model.get_text_features(**tokenized)

	# 正規化してコサイン類似度を計算
	text_features = text_features / text_features.norm(dim=-1, keepdim=True)

	# テキスト1と各テキスト2の類似度を計算
	similarity_scores = text_features[0] @ text_features[1:].T

	# 結果をフォーマット
	results = "テキスト同士の類似度:\n\n"
	for text, score in zip(text2_list, similarity_scores):
	raw_score = score.item()
	# 0～100の範囲でスケール
	scaled_score = (raw_score + 1) * 50 # -1～1を0～100に変換
	results += f"{text}: {scaled_score:.1f} (コサイン類似度: {raw_score:.4f})\n"

	return results

	# Gradioインターフェース
	with gr.Blocks(title="CLIP Japanese 画像分類") as demo:
	gr.Markdown("# CLIP Japanese Base V2 画像分類・類似度比較")
	gr.Markdown("画像とテキストの類似度、画像同士の類似度、テキスト同士の類似度を計算できます")

	with gr.Tabs():
	# タブ1: 画像とテキストの類似度
	with gr.TabItem("画像 × テキスト"):
	with gr.Row():
	with gr.Column():
	image_input = gr.File(
	label="画像をアップロード（HEIF/HEIC対応）",
	file_types=["image", ".heic", ".heif"]
	)
	image_preview = gr.Image(
	label="プレビュー",
	type="pil",
	interactive=False
	)
	labels_input = gr.Textbox(
	label="ラベル（空白区切り）",
	placeholder="例: 犬猫象",
	value="犬猫象"
	)
	submit_btn = gr.Button("処理", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Raw Score", lines=10)

	# ファイルアップロード時にプレビューを表示
	image_input.change(
	fn=load_image_preview,
	inputs=[image_input],
	outputs=[image_preview]
	)

	submit_btn.click(
	fn=classify_image,
	inputs=[image_input, labels_input],
	outputs=output
	)

	gr.Markdown("### 使い方\n1. 画像をアップロード\n2. ラベルを空白区切りで入力\n3. 処理ボタンをクリック")

	# タブ2: 画像同士の類似度
	with gr.TabItem("画像 × 画像"):
	with gr.Row():
	with gr.Column():
	image_input1 = gr.File(
	label="画像1をアップロード",
	file_types=["image", ".heic", ".heif"]
	)
	image_preview1 = gr.Image(
	label="プレビュー1",
	type="pil",
	interactive=False
	)

	with gr.Column():
	image_input2 = gr.File(
	label="画像2をアップロード",
	file_types=["image", ".heic", ".heif"]
	)
	image_preview2 = gr.Image(
	label="プレビュー2",
	type="pil",
	interactive=False
	)

	submit_btn2 = gr.Button("類似度を計算", variant="primary")
	output2 = gr.Textbox(label="類似度スコア", lines=5)

	image_input1.change(
	fn=load_image_preview,
	inputs=[image_input1],
	outputs=[image_preview1]
	)

	image_input2.change(
	fn=load_image_preview,
	inputs=[image_input2],
	outputs=[image_preview2]
	)

	submit_btn2.click(
	fn=compare_images,
	inputs=[image_input1, image_input2],
	outputs=output2
	)

	gr.Markdown("### 使い方\n1. 2つの画像をアップロード\n2. 類似度を計算ボタンをクリック")

	# タブ3: テキスト同士の類似度
	with gr.TabItem("テキスト × テキスト"):
	with gr.Row():
	with gr.Column():
	text_input1 = gr.Textbox(
	label="テキスト1",
	placeholder="例: 犬が走っている",
	lines=3
	)
	text_input2 = gr.Textbox(
	label="テキスト2（空白区切りまたは""で囲んで複数指定可）",
	placeholder="例: 猫が歩いている \"犬が歩いている\" 象が走っている",
	lines=3
	)
	submit_btn3 = gr.Button("類似度を計算", variant="primary")

	with gr.Column():
	output3 = gr.Textbox(label="類似度スコア", lines=5)

	submit_btn3.click(
	fn=compare_texts,
	inputs=[text_input1, text_input2],
	outputs=output3
	)

	gr.Markdown("### 使い方\n1. テキスト1を入力\n2. テキスト2を空白区切りで複数入力（\"\"で囲むとスペースを含む文も可）\n3. 類似度を計算ボタンをクリック")

	if __name__ == "__main__":
	demo.launch()
	aiofiles==24.1.0
	annotated-doc==0.0.4
	annotated-types==0.7.0
	anyio==4.12.0
	brotli==1.2.0
	certifi==2025.11.12
	charset-normalizer==3.4.4
	click==8.3.1
	colorama==0.4.6
	fastapi==0.127.0
	ffmpy==1.0.0
	filelock==3.20.1
	fsspec==2025.12.0
	gradio==6.2.0
	gradio_client==2.0.2
	groovy==0.1.2
	h11==0.16.0
	httpcore==1.0.9
	httpx==0.28.1
	huggingface-hub==0.36.0
	idna==3.11
	Jinja2==3.1.6
	markdown-it-py==4.0.0
	MarkupSafe==3.0.3
	mdurl==0.1.2
	mpmath==1.3.0
	networkx==3.6.1
	numpy==2.4.0
	orjson==3.11.5
	packaging==25.0
	pandas==2.3.3
	pillow==12.0.0
	pillow_heif==1.1.1
	pydantic==2.12.5
	pydantic_core==2.41.5
	pydub==0.25.1
	Pygments==2.19.2
	python-dateutil==2.9.0.post0
	python-multipart==0.0.21
	pytz==2025.2
	PyYAML==6.0.3
	regex==2025.11.3
	requests==2.32.5
	rich==14.2.0
	safehttpx==0.1.7
	safetensors==0.7.0
	semantic-version==2.10.0
	sentencepiece==0.2.1
	shellingham==1.5.4
	six==1.17.0
	starlette==0.50.0
	sympy==1.14.0
	timm==1.0.22
	tokenizers==0.22.1
	tomlkit==0.13.3
	torch==2.9.1
	torchvision==0.24.1
	tqdm==4.67.1
	transformers==4.57.3
	typer==0.20.1
	typing-inspection==0.4.2
	typing_extensions==4.15.0
	tzdata==2025.3
	urllib3==2.6.2
	uvicorn==0.40.0