Tu Hu tuhulab

0 · About the User and Your Role

You are assisting Tu.
Tu is an senior bioinformatics engineer, proficient in Python and R and their ecosystems.
Tu values "Slow is Fast," focusing on: reasoning quality, scientific rigour, and long-term maintainability—not short-term speed.
Your core objectives:
- Act as a strong reasoning, strong planning coding assistant, delivering high-quality solutions and implementations with minimal back-and-forth;
- Prioritize getting it right the first time; avoid superficial answers and unnecessary clarifications.

	library(ComplexHeatmap)

	count_matrix # if your data is called count_matrix

	# Read data
	se <- readRDS("data/se.rds")
	count_matrix <- assay(se)

	# Data transformation
	count_matrix_log2 <- log2(count_matrix + 1)

	counttable_merge_library_fun <- function(counttable_data = ...,
	lib_to_merge_vector = ...){
	lib_id <- counttable_data %>% colnames() %>% str_extract("lib\\d{1,}")
	merged_counttable <- sapply(lib_to_merge_vector, function(one_lib_id_to_merge){
	merged_counts <- counttable_data %>% select((lib_id == one_lib_id_to_merge) %>% which()) %>% rowSums()
	merged_counts_df <- tibble(one_lib_id_to_merge = merged_counts)
	return(merged_counts_df)
	}) # The function to merge libs for counttable -----------------
	# list tidy
	MergedLib <- do.call(rbind.data.frame, merged_counttable) %>% t() %>% as.data.frame() %>% tibble()

	# Cluster analysis in R
	# inspired by Dima Gorenshteyn, DataCamp

	## standardize data
	df_st <- scale(df)

	## Hierachical clustering
	d <- dist(df)
	hc <- hclust(d, "method") # method %in% c("complete", "average", "single")
	c <- cutree(hc, h = the_height) # h: the height to cut the tree # assign cluster

	# First install R (https://www.r-project.org/) and RStudio(https://rstudio.com/)

	# Install enrichR
	install.packages("enrichR")

	# Load enrichR
	library(enrichR)

	# Get your gene list, e.g. type by hand
	Inflammatory_markers <- c("IL13","MMP12","IL22","NTRK1", "CCL17", "IL36A", "ICOS", "CCL18", "ALOX15", "CCL1", "CCR5", "IL13RA2", "IL19", "CCR7","CCL20", "CCR4","CCR2","CCL11","CCL22","CCR8","CCL19","CCL26","CCL3")

	library(dplyr)
	a_vector_of_genes <- c("AP005212.4", "Z98257.1", "U62317.4", "CLIC4P3", "PGLYRP2", "NEK4P1")
	a_vector_of_cleaned_genes <- data.frame(a_vector_of_genes) %>% filter(!a_vector_of_genes %>% stringr::str_detect("\\d{1,}P$\|\\d{1,}P\\d{1,}$\|\\.\|-AS\\d{1}\|-DT")) %>% pull(a_vector_of_genes)

	pos_n_max <- pos_data %>% pull(feature) %>% stringr::str_match("\\d{1,}") %>% max()
	neg_feature_n <- neg_data %>% pull(feature) %>% stringr::str_match("\\d{1,}")
	neg_data %>% mutate(feature = paste0("F", neg_feature_n + pos_n_max))