pacman::p_load(tidyfst,TopDom,R.utils)

args <- commandArgs(T)

sample <- args[1]
chrom <- args[2]
resolution <- as.numeric(args[3])
norm <- args[4]
out_dir <- args[5]

file <- paste0(sample,"_",chrom,"_",norm,"_",resolution,"k.txt.gz")
new_file <- paste0(sample,"_",chrom,"_",norm,"_",resolution,"k_for_topdom.txt")
new_file_gz <- paste0(new_file,".gz")
rdata_file <- paste0(out_dir,"/",sample,"_",chrom,"_",norm,"_",resolution,"k.topdom.rdata")

if (! file.exists(new_file_gz))
{
	raw_matrix <- fread(file)
	bin_min <- resolution * 1000
	bin_max <- max(raw_matrix$V1,raw_matrix$V2)
	model <- expand.grid(
				seq(bin_min,bin_max,bin_min),
				seq(bin_min,bin_max,bin_min)
			) %>% 
			data.table
	new_format <- left_join_dt(
							model,
							raw_matrix,
							by = c("Var1" =  "V1", "Var2" = "V2")
						) %>%
						wider_dt(Var1, name = "Var2", value = "V3") %>%
						replace_na_dt(to = 0) %>%
						select_dt(-1)

	write.table(
		new_format,
		new_file,
		col.names = F,
		row.names = F,
		quote = F,
		sep = "\t"
	)

	gzip(new_file)
}


data <- readHiC(new_file_gz,chr = chrom, binSize = resolution*1000)
fit <- TopDom(data, window.size = 20L)
		
domain_info <- fit$domain
domain_info$from.coord <- format(domain_info$from.coord,scientific = F, trim = T)
domain_info$to.coord <- format(domain_info$to.coord,scientific = F, trim = T)
domain_info$size <- format(domain_info$size,scientific = F, trim = T)
domain_info$sample <- sample
domain_info$resolution <- paste0(resolution,"k")
domain_info <- domain_info[,c(1,3,5,2,4,6:9)]
domain_info <- domain_info[which(domain_info$tag == "domain"),]

write.table(
	domain_info,
	paste0(out_dir, "/", sample, "_", chrom, "_", norm, "_", resolution, "k.topdom.domain"),
	quote  = F, 
	col.names = F, 
	row.names =F,
	sep = "\t"
)
save(data,fit,file = rdata_file)
