## The following code tries to evaluate the performance of different likelihood ratio-based tests
	## Yuanlong LIU
	## 01-03-2018
	
	############## model 1: keep the same structure ##############
	## In this case, a tree structure should be provided
	## Constraint: the top three levels have the same contact intensity parameter

	## 	01-03-2018
	p_likelihood_ratio <- function( A, head, mid, tail, num )
	{
		A_whole = A[head:tail, head:tail]
		A_a = A[head:mid, head:mid]
		A_b = A[(mid+1):tail, (mid+1):tail]
		A_ab = A[head:mid, (mid+1):tail]
		
		tri_Awhole = A_whole[upper.tri(A_whole, diag=FALSE)] 
		tri_Aa = A_a[upper.tri(A_a, diag=FALSE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=FALSE)] 
		# gamma_fit(tri_Awhole, num)
		
		inner_a = get_prob( tri_Aa )
		inner_b = get_prob( tri_Ab )
		inter_ab = get_prob( A_ab )
		
		## negative binomial
		# inner_a = get_prob_ng( tri_Aa )
		# inner_b = get_prob_ng( tri_Ab )
		# inter_ab = get_prob_ng( A_ab )

		
		## log likelihood of H1
		LH1 = inner_a + inner_b + inter_ab
		LH0 = get_prob( tri_Awhole )
		
		Lambda = -2*( LH0 - LH1 )
		# cat(Lambda, '\n')
		df_h0 = 1*1
		df_h1 = 3*1
		df = df_h1 - df_h0
		
		p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
		info = list( Lambda=Lambda, p=p )
		return(info)
	}
	
	p_likelihood_ratio_nb <- function( A, head, mid, tail )
	{
		A_whole = A[head:tail, head:tail]
		A_a = A[head:mid, head:mid]
		A_b = A[(mid+1):tail, (mid+1):tail]
		A_ab = A[head:mid, (mid+1):tail]
		
		tri_Awhole = A_whole[upper.tri(A_whole, diag=TRUE)] 
		tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] 
		
		## negative binomial
		inner_a = get_prob_nb( tri_Aa )
		inner_b = get_prob_nb( tri_Ab )
		inter_ab = get_prob_nb( A_ab )

		
		## log likelihood of H1
		LH1 = inner_a + inner_b + inter_ab
		LH0 = get_prob_nb( tri_Awhole )
		
		Lambda = -2*( LH0 - LH1 )
		# cat(Lambda, '\n')
		n_parameters = 2
		df_h0 = 1*n_parameters
		df_h1 = 3*n_parameters
		df = df_h1 - df_h0
		
		p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
		info = list( Lambda=Lambda, p=p )
		return(info)
	}

	p_likelihood_ratio_norm <- function( A, head, mid, tail )
	{
		A_whole = A[head:tail, head:tail]
		A_a = A[head:mid, head:mid]
		A_b = A[(mid+1):tail, (mid+1):tail]
		A_ab = A[head:mid, (mid+1):tail]
		
		tri_Awhole = A_whole[upper.tri(A_whole, diag=TRUE)] 
		tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] 
		
		## norm
		inner_a = likelihood_norm( tri_Aa )
		inner_b = likelihood_norm( tri_Ab )
		inter_ab = likelihood_norm( A_ab )

		
		## log likelihood of H1
		LH1 = inner_a + inner_b + inter_ab
		LH0 = likelihood_norm( tri_Awhole )
		
		Lambda = -2*( LH0 - LH1 )
		# cat(Lambda, '\n')
		n_parameters = 2
		df_h0 = 1*n_parameters
		df_h1 = 3*n_parameters
		df = df_h1 - df_h0
		
		p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
		info = list( Lambda=Lambda, p=p )
		return(info)
	}

	
	p_likelihood_ratio_gamma <- function( A, head, mid, tail, n_parameters, imputation )
	{
		A_whole = A[head:tail, head:tail]
		A_a = A[head:mid, head:mid]
		A_b = A[(mid+1):tail, (mid+1):tail]
		A_ab = A[head:mid, (mid+1):tail]
		
		tri_Awhole = A_whole[upper.tri(A_whole, diag=TRUE)] 
		## added 25-03-2018. If no zero values, no imputation
		if( length(tri_Awhole==0) == 0 ) imputation = FALSE
		
		tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] 
		
		## norm
		inner_a = likelihood_gamma_mme( tri_Aa )
		inner_b = likelihood_gamma_mme( tri_Ab )
		inter_ab = likelihood_gamma_mme( A_ab )
		whole = likelihood_gamma_mme( tri_Awhole )
		
		if( imputation ) ## zero values are imputed by the estimated distribution based on non random values
		{
			inner_a = likelihood_gamma_mme( tri_Aa[tri_Aa!=0] )/length( tri_Aa[tri_Aa!=0] )*length( tri_Aa )
			inner_b = likelihood_gamma_mme( tri_Ab[tri_Ab!=0] )/length( tri_Ab[tri_Ab!=0] )*length( tri_Ab )
			inter_ab = likelihood_gamma_mme( A_ab )/length( A_ab[A_ab!=0] )*length( A_ab )
			whole = likelihood_gamma_mme( tri_Awhole[tri_Awhole!=0] )/length( tri_Awhole[tri_Awhole!=0] )*length( tri_Awhole )
			n_parameters = n_parameters - 1 ## the mixture parameter of 0 is not taken into account
		}

		
		## log likelihood of H1
		LH1 = inner_a + inner_b + inter_ab
		LH0 = whole
		
		Lambda = -2*( LH0 - LH1 )
		# cat(Lambda, '\n')
		df_h0 = 1*n_parameters
		df_h1 = 3*n_parameters
		df = df_h1 - df_h0
		
		p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
		info = list( Lambda=Lambda, p=p )
		return(info)
	}

	lognormal_mean_test <- function( cA, head, mid, tail )
	{
		A_whole = cA[head:tail, head:tail]
		A_a = cA[head:mid, head:mid]
		A_b = cA[(mid+1):tail, (mid+1):tail]
		A_ab = cA[head:mid, (mid+1):tail]
		
		tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] 
		
		tri_Aa_vec = as.vector( tri_Aa )
		tri_Ab_vec = as.vector( tri_Ab )
		A_ab_vec = as.vector( A_ab )
		
		tri_Aa_vec_p = tri_Aa_vec[tri_Aa_vec!=0]
		tri_Ab_vec_p = tri_Ab_vec[tri_Ab_vec!=0]
		A_ab_vec_p = A_ab_vec[A_ab_vec!=0]
		
		p_Aa = p_lognormal_mean( tri_Aa_vec_p, A_ab_vec_p )
		p_Ab = p_lognormal_mean( tri_Ab_vec_p, A_ab_vec_p )
		return( list(p_Aa=p_Aa, p_Ab=p_Ab) )
	}
	
	## https://www.jstor.org/stable/2533570
	## google: Methods for Comparing the Means of Two Independent Log-Normal Samples
	## 03-07-2018
	p_lognormal_mean <- function( vec_aa, vec_ab )
	{
		if( all(vec_aa==0) | all(vec_ab==0) ) return(0)
		
		n_aa = length(vec_aa)
		n_ab = length(vec_ab)
		fited_info_aa = MASS::fitdistr(vec_aa, 'lognormal') ## intra
		mu_aa = fited_info_aa$estimate[1]
		sd_aa = fited_info_aa$estimate[2]
		s2_aa = sd_aa^2

		fited_info_ab = MASS::fitdistr(vec_ab, 'lognormal') ## inter
		mu_ab = fited_info_ab$estimate[1]
		sd_ab = fited_info_ab$estimate[2]
		s2_ab = sd_ab^2
		
		z_score = ( (mu_aa - mu_ab) + (1/2)*(s2_aa - s2_ab) ) / sqrt( s2_aa/n_aa + s2_ab/n_ab + (1/2)*( s2_aa^2/(n_aa-1) + s2_ab^2/(n_ab-1) ) )
		p = pnorm( z_score, lower.tail=FALSE )
		return(p)
	}

	get_corner_xy <- function(A_whole)
	{
		n = nrow(A_whole)
		corner_size = floor(n/2)
		A_corner = A_whole[1:corner_size, (n - corner_size + 1 ):n]
		expected_high = A_corner[upper.tri(A_corner, diag=FALSE)]
		expected_low = A_corner[lower.tri(A_corner, diag=TRUE)]
		return(list(x=expected_low, y=expected_high))
	}

	get_half_mat_values <- function(mat)
	{
		n1 = nrow(mat)
		n2 = ncol(mat)
		delta = n1/n2
		rows = lapply( 1:n2, function(x) (1+ceiling(x*delta)):n1 )
		rows = rows[ sapply(rows, function(v) max(v) <= n1) ]
		flag = which(diff(sapply(rows, length)) > 0)
		if(length(flag)>0) rows = rows[ 1:min(flag) ]
		
		row_col_indices = cbind( unlist(rows), rep(1:length(rows), sapply(rows, length)))
		x = mat[row_col_indices]
		mat[row_col_indices] = NA
		y = na.omit(as.vector(mat))
		return(list(x=x, y=y))
	}


	get_half_mat_values_v2 <- function(mat)
	{
		## https://stackoverflow.com/questions/52990525/get-upper-triangular-matrix-from-nonsymmetric-matrix/52991508#52991508
		y = mat[nrow(mat) * (2 * col(mat) - 1) / (2 * ncol(mat)) - row(mat) > -1/2]
		x = mat[nrow(mat) * (2 * col(mat) - 1) / (2 * ncol(mat)) - row(mat) < -1/2]
		return(list(x=x, y=y))
	}

	p_wilcox_test_nested <- function( A, head, mid, tail, alternative, is_CD )
	{
		test_1 = p_wilcox_test( A, head, mid, tail, alternative, is_CD, only_corner=FALSE ) #: coner + inter
		if( (tail - head <= 4) | (test_1$p > 0.05) ) ## when > 0.05 or it is already small, do not cosider it as nested
		{
			info = list( Lambda=NULL, p=0.555555, mean_diff=0 )
			return(info)
		}

		## try_error happens when tad to test is too small. THEREFORE, ASSIGN P=0 TO THE TAD
		test_left_tad = try(p_wilcox_test( A, head, mid=ceiling((head+mid)/2), mid, alternative, is_CD=FALSE, only_corner=TRUE )) #: coner + inter
		if( class(test_left_tad)=="try-error" ) test_left_tad = list(p=0)
		test_right_tad = try(p_wilcox_test( A, mid+1, mid=ceiling((mid+1+tail)/2), tail, alternative, is_CD=FALSE, only_corner=TRUE )) #: coner + inter
		if( class(test_right_tad)=="try-error" ) test_right_tad = list(p=0)

		info = list( Lambda=NULL, p=max( test_1$p, min(test_left_tad$p, test_right_tad$p) ), mean_diff=0 )
		return(info)
	}


	p_wilcox_test = function( A, head, mid, tail, alternative, is_CD=FALSE, only_corner=FALSE ) ## only_corner tests if a domain is a TAD (no nesting)
        {
                A_whole = A[head:tail, head:tail]
                A_a = A[head:mid, head:mid]
                A_b = A[(mid+1):tail, (mid+1):tail]
                A_ab = A[head:mid, (mid+1):tail]

                tri_Awhole = A_whole[upper.tri(A_whole, diag=TRUE)] ## need to check whether diag should be false or true

                tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] ## need to check whether diag should be false or true
                tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] ## need to check whether diag should be false or true

                tri_Aa_vec = as.vector( tri_Aa )
                tri_Ab_vec = as.vector( tri_Ab )
                A_ab_vec = as.vector( A_ab )

                corner_mat_info = get_half_mat_values_v2(A_ab)
                A_ab_corner = as.vector(corner_mat_info$y)
                A_ab_ncorner = corner_mat_info$x
                p_corner = wilcox.test(x=A_ab_corner, y=A_ab_ncorner, alternative='greater', exact=F)
                p_inter = wilcox.test(x=c(tri_Ab_vec, tri_Aa_vec), y=A_ab_ncorner, alternative='greater', exact=F)
                # p_inter = wilcox.test(x=c(tri_Ab_vec, tri_Aa_vec), y=c(A_ab_ncorner, A_ab_corner), alternative='greater', exact=F)

                if(is_CD==FALSE) p = max(p_inter$p.value, p_corner$p.value) ## if the tested part is the CD
                if(is_CD==TRUE) p = p_inter$p.value ## if the tested part is the CD
                # if(only_corner==TRUE) p = p_corner$p.value
                mean_diff_inter = mean(A_ab_ncorner) - mean(c(tri_Ab_vec, tri_Aa_vec)) ## negative is good
                mean_diff_corner = mean(A_ab_ncorner) - mean(A_ab_corner) ## negative is good


                if(is_CD==FALSE) mean_diff = min(mean_diff_corner, mean_diff_inter) ## if the tested part is the CD
                if(is_CD==TRUE) mean_diff = mean_diff_inter

                if(min(length(tri_Ab_vec), length(tri_Ab_vec)) < 10) mean_diff = 100 ## when one of the two twins is too small. 10: dim(4,4)

                # mean_diff = mean(A_ab_corner) - mean(A_ab_ncorner)

                # p_test_Aa =  wilcox.test(x=A_ab_vec, y=tri_Aa_vec, alternative="less", exact=F)
                # p_test_Ab =  wilcox.test(x=A_ab_vec, y=tri_Ab_vec, alternative="less", exact=F)
                # p = wilcox.test(x=c(tri_Ab_vec, tri_Aa_vec), y=A_ab_vec, alternative=alternative, exact=F)

                # xy = get_corner_xy(A_whole)
                # p = wilcox.test(x=xy$x, y=xy$y, alternative=alternative, exact=F)

                # p = max(p_test_Aab$p.value, p_test_Ab$p.value)
                info = list( Lambda=NULL, p=p, mean_diff=mean_diff)
                return(info)
        }


	p_likelihood_ratio_lnorm <- function( A, head, mid, tail, n_parameters, imputation, imputation_num=1E2 )
	{
		likelihood_fun = likelihood_lnorm_mle
		A_whole = A[head:tail, head:tail]
		A_a = A[head:mid, head:mid]
		A_b = A[(mid+1):tail, (mid+1):tail]
		A_ab = A[head:mid, (mid+1):tail]
		
		tri_Awhole = A_whole[upper.tri(A_whole, diag=TRUE)] 
		## added 25-03-2018. If no zero values, no imputation
		no_zero_flag = 0
		if( sum(tri_Awhole==0) == 0 ) { no_zero_flag = 1; imputation = FALSE }
		
		tri_Aa = A_a[upper.tri(A_a, diag=TRUE)] 
		tri_Ab = A_b[upper.tri(A_b, diag=TRUE)] 
		
		tri_Aa_vec = as.vector( tri_Aa )
		tri_Ab_vec = as.vector( tri_Ab )
		A_ab_vec = as.vector( A_ab )

		mean_a = mean( tri_Aa_vec[tri_Aa_vec!=0] )
		mean_b = mean( tri_Ab_vec[tri_Ab_vec!=0] )
		mean_ab = mean( A_ab_vec[A_ab_vec!=0] )
		mean_diff = mean_ab - min(mean_a, mean_b)
		
		if( (all(tri_Aa_vec==0)) | (all(tri_Ab_vec==0)) | (all(A_ab_vec==0))  )
		{
			info = list( Lambda=NA, p=0, mean_diff=mean_diff )
			return(info)
		}

		## lnorm
		if(!imputation)
		{
			inner_a = likelihood_fun( tri_Aa )
			inner_b = likelihood_fun( tri_Ab )
			inter_ab = likelihood_fun( A_ab )
			whole = likelihood_fun( tri_Awhole )

			## log likelihood of H1
			LH1 = inner_a + inner_b + inter_ab
			LH0 = whole
			Lambda = -2*( LH0 - LH1 )
			
			if( no_zero_flag ) n_parameters = n_parameters - 1 ## no alpha parameter
			df_h0 = 1*n_parameters ##(theta_1 = theta_2 = theta_3)
			df_h1 = 3*n_parameters ##(theta_1, theta_2, theta_3)
			df = df_h1 - df_h0
		
			p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
			info = list( Lambda=Lambda, p=p, mean_diff=mean_diff )
			return(info)
		}
		
		if( imputation ) ## zero values are imputed by the estimated distribution based on non random values
		{
			vec_list = list( tri_Aa=tri_Aa, tri_Ab=tri_Ab, A_ab=A_ab )
			imputations = imputation_list( vec_list, imputation_num )
			
			inner_as = apply(imputations$tri_Aa, 1, likelihood_fun)
			inner_bs = apply(imputations$tri_Ab, 1, likelihood_fun)
			inter_abs = apply(imputations$A_ab, 1, likelihood_fun)
			
			wholes = apply(do.call( cbind, imputations ), 1, likelihood_fun)
			LH1s = inner_as + inner_bs + inter_abs
			LH0s = wholes
			Lambdas = -2*( LH0s - LH1s )
			Lambda = mean( Lambdas )
			n_parameters = n_parameters - 1 ## the mixture parameter is not taken into account
			
			# cat(Lambda, '\n')
			df_h0 = 1*n_parameters
			df_h1 = 3*n_parameters
			df = df_h1 - df_h0
			
			p = pchisq(Lambda, df=df, lower.tail = FALSE, log.p = FALSE)
			info = list( Lambda=Lambda, p=p, mean_diff=mean_diff )
			return(info)
			
		# if( imputation ) ## zero values are imputed by the estimated distribution based on non random values
		# {
			# inner_a = likelihood_lnorm( tri_Aa[tri_Aa!=0] )/length( tri_Aa[tri_Aa!=0] )*length( tri_Aa )
			# inner_b = likelihood_lnorm( tri_Ab[tri_Ab!=0] )/length( tri_Ab[tri_Ab!=0] )*length( tri_Ab )
			# inter_ab = likelihood_lnorm( A_ab )/length( A_ab[A_ab!=0] )*length( A_ab )
			# whole = likelihood_lnorm( tri_Awhole[tri_Awhole!=0] )/length( tri_Awhole[tri_Awhole!=0] )*length( tri_Awhole )
			# n_parameters = n_parameters - 1 ## the mixture parameter of 0 is not taken into account
		# }
		
		}
	}

	imputation_list <- function( vec_list, imputation_num )
	{
		imputations = lapply( vec_list, function(v) 
		{
			if(sum(v!=0)==1) {final_vec = matrix( v[v!=0], imputation_num, length(v) ); return(final_vec)}
			
			fit = fit_lnorm(v)
			set.seed(1)
			
			## THERE WILL BE ERROR IF IS.NA SDLOG
			if(!is.na(fit['sdlog'])) imputation_vec = matrix(rlnorm( sum(v==0)*imputation_num, fit['meanlog'], fit['sdlog'] ), nrow=imputation_num)
			if(is.na(fit['sdlog'])) stop("In function imputation_list, sdlog=NA encountered")

			ori_vec =  t(replicate(imputation_num, v[v!=0]))
			final_vec = cbind( ori_vec, imputation_vec )
		} )
		return( imputations )
	}


	fit_lnorm <- function(vec)
	{
		vec = vec[vec!=0]
		fit = MASS::fitdistr(vec, 'lognormal')$estimate
		return(fit)
	}