Skip to content

Instantly share code, notes, and snippets.

View keivalya's full-sized avatar
:octocat:
git happens

Keivalya Pandya keivalya

:octocat:
git happens
View GitHub Profile
@keivalya
keivalya / fusion.py
Created December 16, 2025 23:59
The Fusion module (that fuses image, text and state tokens)
"""Fusion module to combine image, text, and state tokens."""
import torch
import torch.nn as nn
class FusionMLP(nn.Module):
def __init__(self, d_model=128):
super().__init__()
self.net = nn.Sequential(
nn.Linear(3 * d_model, d_model),
@keivalya
keivalya / state_encoder.py
Created December 16, 2025 16:03
Robot State Encoder.
# Encoding robot state using a simple Multi-Layer Perceptron
class StateEncoderMLP(nn.Module):
def __init__(self, state_dim, d_model=128):
super().__init__()
self.net = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, d_model),
)
self.ln = nn.LayerNorm(d_model)
@keivalya
keivalya / text_encoder.py
Created December 16, 2025 15:56
Text Encoder.
# Encoding text using gated-recurrent unit
class TextEncoderTinyGRU(nn.Module):
def __init__(self, vocab_size, d_word=64, d_model=128):
super().__init__()
self.embed = nn.Embedding(vocab_size, d_word)
self.gru = nn.GRU(d_word, d_model, batch_first=True)
self.ln = nn.LayerNorm(d_model)
def forward(self, token_ids):
x = self.embed(token_ids)
@keivalya
keivalya / vision_encoder.py
Last active December 16, 2025 15:49
Image Encoder.
# Encoding image using Convolutional Neural Net
class ImageEncoderTinyCNN(nn.Module):
def __init__(self, d_model=128):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
self.proj = nn.Linear(128, d_model)
self.ln = nn.LayerNorm(d_model)