Created
February 12, 2026 14:50
-
-
Save alexgodin/e1abfeb8bdfa4d25a2678bd1cb07928a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Configuration for Cog ⚙️ | |
| # Reference: https://cog.run/yaml | |
| build: | |
| # set to true if your model requires a GPU | |
| gpu: true | |
| # a list of ubuntu apt packages to install | |
| # system_packages: | |
| # - "libgl1-mesa-glx" | |
| # - "libglib2.0-0" | |
| # python version in the form '3.11' or '3.11.4' | |
| python_version: "3.11" | |
| # a list of packages in the format <package-name>==<version> | |
| python_packages: | |
| - "torch" | |
| - "Pillow" # for PIL/Image | |
| - "open_clip_torch" # for open_clip | |
| - "numpy" | |
| - "transformers" | |
| # commands run after the environment is setup | |
| # run: | |
| # - "echo env is ready!" | |
| # - "echo another command if needed" | |
| # predict.py defines how predictions are run on your model | |
| predict: "predict.py:Predictor" | |
| image: 'r8.im/alexgodin/fashion-siglip' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Prediction interface for Cog ⚙️ | |
| # https://cog.run/python | |
| import open_clip | |
| import torch | |
| from PIL import Image | |
| from cog import BasePredictor, Input, Path | |
| import numpy as np | |
| from typing import List | |
| class Predictor(BasePredictor): | |
| def setup(self) -> None: | |
| """Load the model into memory to make running multiple predictions efficient""" | |
| self.model, _, self.preprocess = open_clip.create_model_and_transforms('hf-hub:Marqo/marqo-fashionSigLIP') | |
| self.tokenizer = open_clip.get_tokenizer('hf-hub:Marqo/marqo-fashionSigLIP') | |
| def predict( | |
| self, | |
| image: Path = Input(description="Input image to generate embeddings for", default=None), | |
| text: str = Input(description="Input text to generate embeddings for", default=None), | |
| ) -> List[float]: | |
| """Generate CLIP embeddings for either the input image or text""" | |
| # Validate inputs | |
| if (image is None and text is None) or (image is not None and text is not None): | |
| raise ValueError("Please provide either an image or text input, but not both or neither") | |
| if text is not None: | |
| # Process text input | |
| text = self.tokenizer(text) | |
| with torch.no_grad(), torch.cuda.amp.autocast(): | |
| text_features = self.model.encode_text(text, normalize=True) | |
| return text_features.cpu().numpy().flatten().tolist() | |
| else: | |
| # Process image input | |
| img = Image.open(image) | |
| processed_image = self.preprocess(img).unsqueeze(0) | |
| with torch.no_grad(), torch.cuda.amp.autocast(): | |
| image_features = self.model.encode_image(processed_image, normalize=True) | |
| return image_features.cpu().numpy().flatten().tolist() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment