abikoushi · December 20, 2025 11:31
diff --git a/ALTO_indexing.R b/ALTO_indexing.R
 #ref:
 #Jan Laukemann et al. (2025) Accelerating Sparse Tensor Decomposition Using Adaptive Linearized Representation
 #https://arxiv.org/abs/2403.06348

 library(dplyr)
 ALTO_indexing <- function(object, data = environment(object), ...) {
  mf <- model.frame(object, data, ...)
  t <- if (missing(data)) terms(object) else terms(object, data = data)
  labs <- attr(t, "term.labels")

  mf <- lapply(labs, function(x) {
    if (is.factor(mf[[x]])) {
      return(mf[[x]])
    } else {
      warning(paste0("auto-converted `", x, "` as factor"))
      return(factor(mf[[x]]))
    }
  })

  ## 0-based index
  li <- lapply(mf, function(x) {
    as.integer(x) - 1L
  })

  ## 各因子の水準
  factorlevels <- lapply(mf, levels)
  names(factorlevels) <- labs

  ## 各因子の水準数
  n_cate <- sapply(mf, nlevels)

  ## 必要 bit 幅
  bitwidth <- ceiling(log2(n_cate))
  names(bitwidth) <- labs

  ## 下位因子からの累積 bit shift
  shift <- c(0L, cumsum(bitwidth[-length(bitwidth)]))
  names(shift) <- labs

  ## ALTO index
  index <- 0L
  for (i in seq_along(li)) {
    index <- index + bitwShiftL(li[[i]], shift[i])
  }

  result <- list(
    index = index,
    bitwidth = bitwidth,
    shift = shift,
    factorlevels = factorlevels
  )
  class(result) <- "alto_index"
  return(result)
 }


 ALTO_unpack <- function(x, alto_index) {
  stopifnot(class(alto_index) == "alto_index")
  k <- length(alto_index$bitwidth)
  res <- integer(k)
  names(res) <- names(alto_index$bitwidth)
  for (i in seq_len(k)) {
    mask <- bitwShiftL(1L, alto_index$bitwidth[i]) - 1L
    res[i] <- bitwAnd(bitwShiftR(x, alto_index$shift[i]), mask)
  }
  return(res)
 }


 ALTO_unpack_factor <- function(index, alto_index) {
  stopifnot(class(alto_index) == "alto_index")
  res_un = ALTO_unpack(index, alto_index)
  res = character(length = length(res_un))
  for (i in seq_along(res)) {
    res[i] <- alto_index$factorlevels[[i]][res_un[i] + 1L]
  }
  return(res)
 }


 library(dplyr)
 df_Titanic <- as.data.frame(Titanic)
 df_Hair <- as.data.frame(HairEyeColor) %>%
  mutate(Hair = as.character(Hair))
 f = ~ Hair + Eye + Sex
 f <- Freq ~ .
 ind_hair = ALTO_indexing(f, data = df_Hair)

 ALTO_unpack(6, ind_hair)
 ALTO_unpack_factor(6, ind_hair)
 df_Hair[ind_hair$index == 6, ]


 f = ~ Class + Sex + Age + Survived
 f = Freq ~ .
 ind_titanic = ALTO_indexing(f, data = df_Titanic)

 ALTO_unpack(9, ind_titanic)
 ALTO_unpack_factor(9, ind_titanic)
 df_Titanic[ind_titanic$index == 9, ]

 ###
 df_Hair <- as.data.frame(HairEyeColor)
 mutate(df_Hair, alto = ind_hair$index) %>%
  arrange(alto) %>%
  dplyr::filter(Hair == "Black")

 mutate(df_Hair, alto = ind_hair$index) %>%
  arrange(alto) %>%
  dplyr::filter(Eye == "Brown")

 mutate(df_Hair, alto = ind_hair$index) %>%
  arrange(alto) %>%
  dplyr::filter(Sex == "Male")

 ####

 i1_fixed <- 1L
 vals <- 0:7
 inds <- (i1_fixed - 1L) + bitwShiftL(vals, ind_hair$shift[2])


 df_Hair[ind_hair$index %in% inds, ]
 as.data.frame(HairEyeColor[i1_fixed, , , drop = FALSE])

 print(
  all(
    df_Hair[ind_hair$index %in% inds, 2:4] ==
      as.data.frame(HairEyeColor[i1_fixed, , ])
  )
 )
	#ref:
	#Jan Laukemann et al. (2025) Accelerating Sparse Tensor Decomposition Using Adaptive Linearized Representation
	#https://arxiv.org/abs/2403.06348

	library(dplyr)
	ALTO_indexing <- function(object, data = environment(object), ...) {
	mf <- model.frame(object, data, ...)
	t <- if (missing(data)) terms(object) else terms(object, data = data)
	labs <- attr(t, "term.labels")

	mf <- lapply(labs, function(x) {
	if (is.factor(mf[[x]])) {
	return(mf[[x]])
	} else {
	warning(paste0("auto-converted `", x, "` as factor"))
	return(factor(mf[[x]]))
	}
	})

	## 0-based index
	li <- lapply(mf, function(x) {
	as.integer(x) - 1L
	})

	## 各因子の水準
	factorlevels <- lapply(mf, levels)
	names(factorlevels) <- labs

	## 各因子の水準数
	n_cate <- sapply(mf, nlevels)

	## 必要 bit 幅
	bitwidth <- ceiling(log2(n_cate))
	names(bitwidth) <- labs

	## 下位因子からの累積 bit shift
	shift <- c(0L, cumsum(bitwidth[-length(bitwidth)]))
	names(shift) <- labs

	## ALTO index
	index <- 0L
	for (i in seq_along(li)) {
	index <- index + bitwShiftL(li[[i]], shift[i])
	}

	result <- list(
	index = index,
	bitwidth = bitwidth,
	shift = shift,
	factorlevels = factorlevels
	)
	class(result) <- "alto_index"
	return(result)
	}


	ALTO_unpack <- function(x, alto_index) {
	stopifnot(class(alto_index) == "alto_index")
	k <- length(alto_index$bitwidth)
	res <- integer(k)
	names(res) <- names(alto_index$bitwidth)
	for (i in seq_len(k)) {
	mask <- bitwShiftL(1L, alto_index$bitwidth[i]) - 1L
	res[i] <- bitwAnd(bitwShiftR(x, alto_index$shift[i]), mask)
	}
	return(res)
	}


	ALTO_unpack_factor <- function(index, alto_index) {
	stopifnot(class(alto_index) == "alto_index")
	res_un = ALTO_unpack(index, alto_index)
	res = character(length = length(res_un))
	for (i in seq_along(res)) {
	res[i] <- alto_index$factorlevels[[i]][res_un[i] + 1L]
	}
	return(res)
	}


	library(dplyr)
	df_Titanic <- as.data.frame(Titanic)
	df_Hair <- as.data.frame(HairEyeColor) %>%
	mutate(Hair = as.character(Hair))
	f = ~ Hair + Eye + Sex
	f <- Freq ~ .
	ind_hair = ALTO_indexing(f, data = df_Hair)

	ALTO_unpack(6, ind_hair)
	ALTO_unpack_factor(6, ind_hair)
	df_Hair[ind_hair$index == 6, ]


	f = ~ Class + Sex + Age + Survived
	f = Freq ~ .
	ind_titanic = ALTO_indexing(f, data = df_Titanic)

	ALTO_unpack(9, ind_titanic)
	ALTO_unpack_factor(9, ind_titanic)
	df_Titanic[ind_titanic$index == 9, ]

	###
	df_Hair <- as.data.frame(HairEyeColor)
	mutate(df_Hair, alto = ind_hair$index) %>%
	arrange(alto) %>%
	dplyr::filter(Hair == "Black")

	mutate(df_Hair, alto = ind_hair$index) %>%
	arrange(alto) %>%
	dplyr::filter(Eye == "Brown")

	mutate(df_Hair, alto = ind_hair$index) %>%
	arrange(alto) %>%
	dplyr::filter(Sex == "Male")

	####

	i1_fixed <- 1L
	vals <- 0:7
	inds <- (i1_fixed - 1L) + bitwShiftL(vals, ind_hair$shift[2])


	df_Hair[ind_hair$index %in% inds, ]
	as.data.frame(HairEyeColor[i1_fixed, , , drop = FALSE])

	print(
	all(
	df_Hair[ind_hair$index %in% inds, 2:4] ==
	as.data.frame(HairEyeColor[i1_fixed, , ])
	)
	)
No results found