Skip to content

Instantly share code, notes, and snippets.

@abikoushi
Created December 21, 2025 13:02
Show Gist options
  • Select an option

  • Save abikoushi/97b92eae7353f1821589b4b3077e55dc to your computer and use it in GitHub Desktop.

Select an option

Save abikoushi/97b92eae7353f1821589b4b3077e55dc to your computer and use it in GitHub Desktop.
gather & spread of ALTO (adaptive linearized tensor operation)
#ref:
#Jan Laukemann et al. (2025) Accelerating Sparse Tensor Decomposition Using Adaptive Linearized Representation
#https://arxiv.org/abs/2403.06348
ALTO_indexing <- function(object, data = environment(object), ...) {
mf <- model.frame(object, data, ...)
t <- if (missing(data)) terms(object) else terms(object, data = data)
labs <- attr(t, "term.labels")
mf <- lapply(labs, function(x) {
if (is.factor(mf[[x]])) {
return(mf[[x]])
} else {
warning(paste0("auto-converted `", x, "` as factor"))
return(factor(mf[[x]]))
}
})
## 0-based index
li <- lapply(mf, function(x) {
as.integer(x) - 1L
})
## 各因子の水準
factorlevels <- lapply(mf, levels)
names(factorlevels) <- labs
## 各因子の水準数
n_cate <- sapply(mf, nlevels)
## 必要 bit 幅
bitwidth <- ceiling(log2(n_cate))
names(bitwidth) <- labs
## 下位因子からの累積 bit shift
shift <- c(0L, cumsum(bitwidth[-length(bitwidth)]))
names(shift) <- labs
## ALTO index
index <- 0L
for (i in seq_along(li)) {
index <- index + bitwShiftL(li[[i]], shift[i])
}
startpos = unname(c(
0L,
cumsum(2L^bitwidth)
))
result <- list(
index = index,
bitwidth = bitwidth,
shift = shift,
mask = bitwShiftL(1L, bitwidth) - 1L,
startpos = startpos,
factorlevels = factorlevels
)
class(result) <- "alto_index"
return(result)
}
ALTO_unpack <- function(x, alto_index) {
stopifnot(class(alto_index) == "alto_index")
k <- length(alto_index$bitwidth)
res <- integer(k)
names(res) <- names(alto_index$bitwidth)
for (i in seq_len(k)) {
res[i] <- with(alto_index, bitwAnd(bitwShiftR(x, shift[i]), mask[i]))
}
return(res)
}
ALTO_unpack_name <- function(index, alto_index) {
stopifnot(class(alto_index) == "alto_index")
spread = ALTO_unpack(index, alto_index)
dummy = unlist(alto_index$factorlevels)
res = character(length = length(spread))
for (i in seq_along(res)) {
res[i] <- with(alto_index, dummy[startpos[i] + spread[i] + 1L])
}
return(res)
}
df_haireye <- as.data.frame(HairEyeColor)
alto_haireye = ALTO_indexing(Freq ~ ., data = df_haireye)
print(head(cbind(df_haireye, alto_haireye$index)))
# Hair Eye Sex Freq alto_haireye$index
# 1 Black Brown Male 32 0
# 2 Brown Brown Male 53 1
# 3 Red Brown Male 10 2
# 4 Blond Brown Male 3 3
# 5 Black Blue Male 11 4
# 6 Brown Blue Male 50 5
print(ALTO_unpack(4, alto_haireye))
# Hair Eye Sex
# 0 1 0
print(ALTO_unpack_name(4, alto_haireye))
#[1] "Black" "Blue" "Male"
####
int_to_bin <- function(x, width = NULL) {
bits <- as.integer(as.logical(intToBits(x)))
if (!is.null(width)) {
bits <- bits[seq_len(width)]
}
rev(bits)
}
int_to_bin(alto_haireye$mask, width = 5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment