Last active
December 25, 2025 15:31
-
-
Save ksasao/b42ac182e1c3dbd5bc3cb11e03265eab to your computer and use it in GitHub Desktop.
clip-japanese-base-v2 お試し https://x.com/ksasao/status/2003085398113952029
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gradio as gr | |
| from PIL import Image, ImageOps | |
| import torch | |
| from transformers import AutoImageProcessor, AutoModel, AutoTokenizer | |
| import shlex | |
| # HEIF/HEICフォーマットのサポートを有効化 | |
| try: | |
| from pillow_heif import register_heif_opener | |
| register_heif_opener() | |
| print("HEIF/HEIC形式のサポートが有効化されました") | |
| except ImportError: | |
| print("警告: pillow-heifがインストールされていません。HEIF/HEIC形式を使用する場合は 'pip install pillow-heif' を実行してください") | |
| # モデルのグローバル初期化 | |
| HF_MODEL_PATH = 'line-corporation/clip-japanese-base-v2' | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"デバイス: {device}") | |
| print("モデル読み込み中...") | |
| tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, legacy=False) | |
| processor = AutoImageProcessor.from_pretrained(HF_MODEL_PATH, trust_remote_code=True, use_fast=True) | |
| model = AutoModel.from_pretrained(HF_MODEL_PATH, trust_remote_code=True).to(device) | |
| print("モデル読み込み完了!") | |
| def load_image_preview(file): | |
| """アップロードされたファイルから画像プレビューを生成""" | |
| if file is None: | |
| return None | |
| try: | |
| image = Image.open(file) | |
| # EXIF情報に基づいて画像の向きを補正 | |
| image = ImageOps.exif_transpose(image) | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| return image | |
| except Exception as e: | |
| print(f"プレビュー生成エラー: {str(e)}") | |
| return None | |
| def classify_image(file, labels_text): | |
| """画像とラベルから類似度スコアを計算""" | |
| if file is None: | |
| return "画像をアップロードしてください" | |
| if not labels_text or labels_text.strip() == "": | |
| return "ラベルを入力してください" | |
| # ラベルを空白で分割(""で囲まれた部分は1つの文として扱う) | |
| try: | |
| labels = shlex.split(labels_text.strip()) | |
| except ValueError: | |
| # クォートが閉じていない場合などは通常の分割にフォールバック | |
| labels = labels_text.strip().split() | |
| # ファイルから画像を読み込み(HEIF対応) | |
| try: | |
| image = Image.open(file) | |
| # EXIF情報に基づいて画像の向きを補正 | |
| image = ImageOps.exif_transpose(image) | |
| # RGBに変換(RGBA等の場合に備えて) | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| except Exception as e: | |
| return f"画像の読み込みエラー: {str(e)}" | |
| # 画像処理 | |
| processed_image = processor(image, return_tensors="pt").to(device) | |
| text = tokenizer(labels).to(device) | |
| # 推論 | |
| with torch.no_grad(): | |
| image_features = model.get_image_features(**processed_image) | |
| text_features = model.get_text_features(**text) | |
| similarity_scores = (image_features @ text_features.T)[0] | |
| # 結果をフォーマット | |
| results = "結果:\n\n" | |
| for label, score in zip(labels, similarity_scores): | |
| raw_score = score.item() | |
| # 0~100の範囲でクリップ | |
| clipped_score = max(0, min(100, raw_score)) | |
| results += f"{label}: {clipped_score:.1f}\n" | |
| return results | |
| def compare_images(file1, file2): | |
| """画像同士の類似度スコアを計算""" | |
| if file1 is None or file2 is None: | |
| return "2つの画像をアップロードしてください" | |
| # 画像を読み込み | |
| try: | |
| image1 = Image.open(file1) | |
| image1 = ImageOps.exif_transpose(image1) | |
| if image1.mode != 'RGB': | |
| image1 = image1.convert('RGB') | |
| image2 = Image.open(file2) | |
| image2 = ImageOps.exif_transpose(image2) | |
| if image2.mode != 'RGB': | |
| image2 = image2.convert('RGB') | |
| except Exception as e: | |
| return f"画像の読み込みエラー: {str(e)}" | |
| # 画像処理 | |
| processed_image1 = processor(image1, return_tensors="pt").to(device) | |
| processed_image2 = processor(image2, return_tensors="pt").to(device) | |
| # 推論 | |
| with torch.no_grad(): | |
| image_features1 = model.get_image_features(**processed_image1) | |
| image_features2 = model.get_image_features(**processed_image2) | |
| # 正規化してコサイン類似度を計算 | |
| image_features1 = image_features1 / image_features1.norm(dim=-1, keepdim=True) | |
| image_features2 = image_features2 / image_features2.norm(dim=-1, keepdim=True) | |
| similarity_score = (image_features1 @ image_features2.T)[0][0] | |
| raw_score = similarity_score.item() | |
| # 0~100の範囲でスケール | |
| scaled_score = (raw_score + 1) * 50 # -1~1を0~100に変換 | |
| results = "画像同士の類似度:\n\n" | |
| results += f"スコア: {scaled_score:.1f}\n" | |
| results += f"コサイン類似度: {raw_score:.4f}" | |
| return results | |
| def compare_texts(text1, text2): | |
| """テキスト同士の類似度スコアを計算""" | |
| if not text1 or text1.strip() == "": | |
| return "テキスト1を入力してください" | |
| if not text2 or text2.strip() == "": | |
| return "テキスト2を入力してください" | |
| # テキスト2を空白で分割(""で囲まれた部分は1つの文として扱う) | |
| try: | |
| text2_list = shlex.split(text2.strip()) | |
| except ValueError: | |
| # クォートが閉じていない場合などは通常の分割にフォールバック | |
| text2_list = text2.strip().split() | |
| # テキスト1と各テキスト2を結合 | |
| texts = [text1.strip()] + text2_list | |
| tokenized = tokenizer(texts).to(device) | |
| # 推論 | |
| with torch.no_grad(): | |
| text_features = model.get_text_features(**tokenized) | |
| # 正規化してコサイン類似度を計算 | |
| text_features = text_features / text_features.norm(dim=-1, keepdim=True) | |
| # テキスト1と各テキスト2の類似度を計算 | |
| similarity_scores = text_features[0] @ text_features[1:].T | |
| # 結果をフォーマット | |
| results = "テキスト同士の類似度:\n\n" | |
| for text, score in zip(text2_list, similarity_scores): | |
| raw_score = score.item() | |
| # 0~100の範囲でスケール | |
| scaled_score = (raw_score + 1) * 50 # -1~1を0~100に変換 | |
| results += f"{text}: {scaled_score:.1f} (コサイン類似度: {raw_score:.4f})\n" | |
| return results | |
| # Gradioインターフェース | |
| with gr.Blocks(title="CLIP Japanese 画像分類") as demo: | |
| gr.Markdown("# CLIP Japanese Base V2 画像分類・類似度比較") | |
| gr.Markdown("画像とテキストの類似度、画像同士の類似度、テキスト同士の類似度を計算できます") | |
| with gr.Tabs(): | |
| # タブ1: 画像とテキストの類似度 | |
| with gr.TabItem("画像 × テキスト"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.File( | |
| label="画像をアップロード(HEIF/HEIC対応)", | |
| file_types=["image", ".heic", ".heif"] | |
| ) | |
| image_preview = gr.Image( | |
| label="プレビュー", | |
| type="pil", | |
| interactive=False | |
| ) | |
| labels_input = gr.Textbox( | |
| label="ラベル(空白区切り)", | |
| placeholder="例: 犬 猫 象", | |
| value="犬 猫 象" | |
| ) | |
| submit_btn = gr.Button("処理", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox(label="Raw Score", lines=10) | |
| # ファイルアップロード時にプレビューを表示 | |
| image_input.change( | |
| fn=load_image_preview, | |
| inputs=[image_input], | |
| outputs=[image_preview] | |
| ) | |
| submit_btn.click( | |
| fn=classify_image, | |
| inputs=[image_input, labels_input], | |
| outputs=output | |
| ) | |
| gr.Markdown("### 使い方\n1. 画像をアップロード\n2. ラベルを空白区切りで入力\n3. 処理ボタンをクリック") | |
| # タブ2: 画像同士の類似度 | |
| with gr.TabItem("画像 × 画像"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input1 = gr.File( | |
| label="画像1をアップロード", | |
| file_types=["image", ".heic", ".heif"] | |
| ) | |
| image_preview1 = gr.Image( | |
| label="プレビュー1", | |
| type="pil", | |
| interactive=False | |
| ) | |
| with gr.Column(): | |
| image_input2 = gr.File( | |
| label="画像2をアップロード", | |
| file_types=["image", ".heic", ".heif"] | |
| ) | |
| image_preview2 = gr.Image( | |
| label="プレビュー2", | |
| type="pil", | |
| interactive=False | |
| ) | |
| submit_btn2 = gr.Button("類似度を計算", variant="primary") | |
| output2 = gr.Textbox(label="類似度スコア", lines=5) | |
| image_input1.change( | |
| fn=load_image_preview, | |
| inputs=[image_input1], | |
| outputs=[image_preview1] | |
| ) | |
| image_input2.change( | |
| fn=load_image_preview, | |
| inputs=[image_input2], | |
| outputs=[image_preview2] | |
| ) | |
| submit_btn2.click( | |
| fn=compare_images, | |
| inputs=[image_input1, image_input2], | |
| outputs=output2 | |
| ) | |
| gr.Markdown("### 使い方\n1. 2つの画像をアップロード\n2. 類似度を計算ボタンをクリック") | |
| # タブ3: テキスト同士の類似度 | |
| with gr.TabItem("テキスト × テキスト"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input1 = gr.Textbox( | |
| label="テキスト1", | |
| placeholder="例: 犬が走っている", | |
| lines=3 | |
| ) | |
| text_input2 = gr.Textbox( | |
| label="テキスト2(空白区切りまたは""で囲んで複数指定可)", | |
| placeholder="例: 猫が歩いている \"犬が歩いている\" 象が走っている", | |
| lines=3 | |
| ) | |
| submit_btn3 = gr.Button("類似度を計算", variant="primary") | |
| with gr.Column(): | |
| output3 = gr.Textbox(label="類似度スコア", lines=5) | |
| submit_btn3.click( | |
| fn=compare_texts, | |
| inputs=[text_input1, text_input2], | |
| outputs=output3 | |
| ) | |
| gr.Markdown("### 使い方\n1. テキスト1を入力\n2. テキスト2を空白区切りで複数入力(\"\"で囲むとスペースを含む文も可)\n3. 類似度を計算ボタンをクリック") | |
| if __name__ == "__main__": | |
| demo.launch() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| aiofiles==24.1.0 | |
| annotated-doc==0.0.4 | |
| annotated-types==0.7.0 | |
| anyio==4.12.0 | |
| brotli==1.2.0 | |
| certifi==2025.11.12 | |
| charset-normalizer==3.4.4 | |
| click==8.3.1 | |
| colorama==0.4.6 | |
| fastapi==0.127.0 | |
| ffmpy==1.0.0 | |
| filelock==3.20.1 | |
| fsspec==2025.12.0 | |
| gradio==6.2.0 | |
| gradio_client==2.0.2 | |
| groovy==0.1.2 | |
| h11==0.16.0 | |
| httpcore==1.0.9 | |
| httpx==0.28.1 | |
| huggingface-hub==0.36.0 | |
| idna==3.11 | |
| Jinja2==3.1.6 | |
| markdown-it-py==4.0.0 | |
| MarkupSafe==3.0.3 | |
| mdurl==0.1.2 | |
| mpmath==1.3.0 | |
| networkx==3.6.1 | |
| numpy==2.4.0 | |
| orjson==3.11.5 | |
| packaging==25.0 | |
| pandas==2.3.3 | |
| pillow==12.0.0 | |
| pillow_heif==1.1.1 | |
| pydantic==2.12.5 | |
| pydantic_core==2.41.5 | |
| pydub==0.25.1 | |
| Pygments==2.19.2 | |
| python-dateutil==2.9.0.post0 | |
| python-multipart==0.0.21 | |
| pytz==2025.2 | |
| PyYAML==6.0.3 | |
| regex==2025.11.3 | |
| requests==2.32.5 | |
| rich==14.2.0 | |
| safehttpx==0.1.7 | |
| safetensors==0.7.0 | |
| semantic-version==2.10.0 | |
| sentencepiece==0.2.1 | |
| shellingham==1.5.4 | |
| six==1.17.0 | |
| starlette==0.50.0 | |
| sympy==1.14.0 | |
| timm==1.0.22 | |
| tokenizers==0.22.1 | |
| tomlkit==0.13.3 | |
| torch==2.9.1 | |
| torchvision==0.24.1 | |
| tqdm==4.67.1 | |
| transformers==4.57.3 | |
| typer==0.20.1 | |
| typing-inspection==0.4.2 | |
| typing_extensions==4.15.0 | |
| tzdata==2025.3 | |
| urllib3==2.6.2 | |
| uvicorn==0.40.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment