Skip to content

Instantly share code, notes, and snippets.

@abikoushi
Last active December 26, 2025 00:51
Show Gist options
  • Select an option

  • Save abikoushi/1e4ca098e178917200be7643c189aa64 to your computer and use it in GitHub Desktop.

Select an option

Save abikoushi/1e4ca098e178917200be7643c189aa64 to your computer and use it in GitHub Desktop.
read MM file as ALTO format
size_mtx <- function(file_path){
con <- file(file_path, open = "r") #Open for reading in text mode
size <- scan(con, what=integer(), comment.char = "%", nlines = 1, skip = 1, quiet = TRUE)
close(con)
names(size) <- c("row","column","nonzero")
return(size)
}
take_last <- function(x){
x[length(x)]
}
read_mtx_asALTO <- function(file_path,
zero_index=TRUE,
n_header = 2L,
order=2L) {
size <- size_mtx(file_path)
n_cate <- size[1:order]
len <- take_last(size)
bitwidth <- ceiling(log2(n_cate))
mask <- bitwShiftL(1L, bitwidth) - 1L
## shift : cumulative bit width
shift <- c(0L, cumsum(bitwidth[-length(bitwidth)]))
names(shift) <- names(n_cate)
headp = unname(c(
0L,
cumsum(n_cate) # == cumsum(2^bitwidth)
))
con <- file(file_path, open = "r") #Open for reading in text mode
## first line
newL = base::scan(con,
nmax = 1,
quiet=TRUE,
comment.char = "%",
what = list(i=integer(), j=integer(), v=numeric()),
skip = n_header)
index <- integer(len) # ALTO index
value <- integer(len)
index[1] <- sum(
##== newL$i + (newL$j-1)*size[1]-1L
bitwShiftL(c(newL$i-1L, newL$j-1L), shift)
)
value[1] <- newL$v
pb = txtProgressBar(min = 0, max = len, style=3)
for(n in 2:len){
newL = base::scan(con,
nmax = 1,
quiet=TRUE,
comment.char = "%",
what = list(i=integer(), j=integer(), v=numeric()))
index[n] <- sum(
##== newL$i + (newL$j-1)*size[1]-1L
bitwShiftL(c(newL$i-1L, newL$j-1L), shift)
)
value[n] <- newL$v
setTxtProgressBar(pb, n)
}
close(con)
zi = NULL
if(zero_index){
zi <- setdiff(seq_len(prod(n_cate)), index)
}
factorlevels <- lapply(n_cate, seq_len)
names(factorlevels) <- names(n_cate)
result <- list(
value = value,
index = index,
zero_index = zi,
shift = shift,
mask = mask,
headp = headp,
factorlevels = factorlevels
)
class(result) <- "alto_index"
return(result)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment