This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Fusion module to combine image, text, and state tokens.""" | |
| import torch | |
| import torch.nn as nn | |
| class FusionMLP(nn.Module): | |
| def __init__(self, d_model=128): | |
| super().__init__() | |
| self.net = nn.Sequential( | |
| nn.Linear(3 * d_model, d_model), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Encoding robot state using a simple Multi-Layer Perceptron | |
| class StateEncoderMLP(nn.Module): | |
| def __init__(self, state_dim, d_model=128): | |
| super().__init__() | |
| self.net = nn.Sequential( | |
| nn.Linear(state_dim, 64), | |
| nn.ReLU(), | |
| nn.Linear(64, d_model), | |
| ) | |
| self.ln = nn.LayerNorm(d_model) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Encoding text using gated-recurrent unit | |
| class TextEncoderTinyGRU(nn.Module): | |
| def __init__(self, vocab_size, d_word=64, d_model=128): | |
| super().__init__() | |
| self.embed = nn.Embedding(vocab_size, d_word) | |
| self.gru = nn.GRU(d_word, d_model, batch_first=True) | |
| self.ln = nn.LayerNorm(d_model) | |
| def forward(self, token_ids): | |
| x = self.embed(token_ids) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Encoding image using Convolutional Neural Net | |
| class ImageEncoderTinyCNN(nn.Module): | |
| def __init__(self, d_model=128): | |
| super().__init__() | |
| self.conv1 = nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2) | |
| self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1) | |
| self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) | |
| self.proj = nn.Linear(128, d_model) | |
| self.ln = nn.LayerNorm(d_model) |