Commit 67b532db authored by Srikanth Ravichandran's avatar Srikanth Ravichandran
Browse files

First commit

parents
This diff is collapsed.
###Load required libraries###
source('Script_markov.R')
library(plyr)
library(igraph)
library(Matrix)
library(reshape2)
#read the background signaling interactome from reactome and omnipath
network = "Reactome_Omnipath_network.txt"
network=read.table(network,sep="\t",header=TRUE)
#Specify cutoff for expression, i.e. the fraction of cells in which both the interacting nodes must be expressed
cutoff = 20
#YOUNG DATA
#Read and run markov model for a1_young data
input_data = "a1_young.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
a1_young=output_markov(idata,cutoff,network)
#Read and run markov model for a2_young data
input_data = "a2_young.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
a2_young=output_markov(idata,cutoff,network)
#Read and run markov model for q1_young data
input_data = "q1_young.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
q1_young=output_markov(idata,cutoff,network)
#Read and run markov model for q2_young data
input_data = "q2_young.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
q2_young=output_markov(idata,cutoff,network)
#OLD DATA
#Read and run markov model for a1_old data
input_data = "a1_old.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
a1_old=output_markov(idata,cutoff,network)
#Read and run markov model for a2_old data
input_data = "a2_old.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
a2_old=output_markov(idata,cutoff,network)
#Read and run markov model for q1_old data
input_data = "q1_old.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
q1_old=output_markov(idata,cutoff,network)
#Read and run markov model for q2_old data
input_data = "q2_old.txt"
idata = read.table(input_data,header=TRUE,stringsAsFactors=FALSE,quote="")
q2_old=output_markov(idata,cutoff,network)
###TO MERGE OUTPUT FILES OF ALL SUBPOPULATION TO IDENTIFY NICHE SPECIFIC SIGNALING INTERMEDIATES
#read young steady state files generated in the earlier step
files = list.files(pattern="SS.+..+.young")
listOfFiles <- lapply(files, function(x) read.table(x, header = T,sep='\t'))
d=join_all(listOfFiles, "Gene",type="full")
#comparing and identifying subpopulation specific intermediates in young
d$NA_count=rowSums(is.na(d))
ss_young=d[d$NA_count==3,]
ss_young=ss_young[1:5]
#read old SS files
files = list.files(pattern="SS.+..+.old")
listOfFiles <- lapply(files, function(x) read.table(x, header = T,sep='\t'))
d1=join_all(listOfFiles, "Gene",type="full")
#comparing and identifying subpopulation specific intermediates in young
d1$NA_count=rowSums(is.na(d1))
ss_old=d1[d1$NA_count==3,]
ss_old=ss_old[1:5]
#merging both young and old steady state probabilities
ss_all=join(ss_young,ss_old,by=c("Gene"),type="full")
ss_all$NA_count=rowSums(is.na(ss_all))
#comparing young vs old to identiy age specific signaling molecules
newdata <- rbind(newdata,(ss_all[ which((SS_20_q1_young.txt>0 & SS_20_q1_old.txt>0)),]))
newdata <- rbind(newdata,(ss_all[ which((SS_20_q2_young.txt>0 & SS_20_q2_old.txt>0)),]))
newdata <- rbind(newdata,(ss_all[ which((SS_20_a1_young.txt>0 & SS_20_a1_old.txt>0)),]))
newdata <- rbind(newdata,(ss_all[ which((SS_20_a2_young.txt>0 & SS_20_a2_old.txt>0)),]))
#subsetting common molecules
ss_all_final=ss_all[!ss_all$Gene %in% newdata$Gene,]
ss_all_final=ss_all_final[1:9]
#writing final result
write.table(ss_all_final,"SS_all_network_final.txt",sep="\t",quote=F,row.names = F)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
###################
###PREPROCESSING###
###################
Data_preprocessing <- function(input_data,cutoff,network) ###Function for data preprocessing, returns a igraph graph object as output
{
b=input_data
a=network
###Make cells with FPKM<1 as 0. Not considering those cells for further analysis
b[b < 1] <- 0
###Add a new gene Dummy with expression value 1, Only works if Dummy is present in the initial network
b[nrow(b)+1, ] <- c("Dummy", rep(1,(ncol(b)-1)))
###This is to convert chr into numericversion
b[2:ncol(b)]<-as.data.frame(lapply(b[2:ncol(b)],as.numeric,b[2:ncol(b)]))
###Renaming the first column for finding the union
colnames(b)[1] <- "Source"
ab=join(a,b,by=c("Source"),type="left",match="first")
colnames(ab)[3:ncol(ab)]="source"
colnames(b)[1] <- "Target"
ab1=join(a,b,by=c("Target"),type="left",match="first")
names(ab1) <- NULL
names(ab) <- NULL
ab=ab[,3:ncol(ab)]
ab1=ab1[,3:ncol(ab1)]
ab=as.matrix(ab)
ab1=as.matrix(ab1)
###Elementwise product###
g=ab * ab1
###Sum of elementwise product###
sum_product_expression=rowSums(g, na.rm = FALSE, dims = 1)
g3=cbind(a,sum_product_expression)
###Calculation of precentage of cells expressed###
h=rowSums(g != 0)
percent_expressed=(h*100)/ncol(ab)
g3=cbind(g3,percent_expressed)
g3[is.na(g3)]<-0
###NETWORK preprocessing###
g <- graph.data.frame(as.data.frame(g3))
g=simplify(g,edge.attr.comb=list("first"))
###DELETE those edges whoese sum is less than the cutoff###
###convert cutoff to numeric
del=E(g)[sum_product_expression==0|percent_expressed<as.numeric(cutoff)]
g <- delete.edges(g,del)
###SINCE THE TFs AND RECEPTORS ARE ALREADY CONNECTED TO DUMMY, REMOVE ANY NODE THAT HAS ZERO in degree or zero out degree
###To ensure reachability for the Markov chain
V(g)$degree=igraph::degree(g, v=V(g), mode = c("in"))
###Select Nodes to be deleted
del=V(g)[degree==0]
###delete vertices from graph
while(length(del)!=0)
{
g <- delete.vertices(g,del)
V(g)$degree=igraph::degree(g, v=V(g), mode = c("in"))
del=V(g)[degree==0]
}
###Same as above but remove nodes with with zero out degree
V(g)$degree=igraph::degree(g, v=V(g), mode = c("out"))
###Select Nodes to be deleted
del=V(g)[degree==0]
while(length(del)!=0)
{
g <- delete.vertices(g,del)
V(g)$degree=igraph::degree(g, v=V(g), mode = c("out"))
del=V(g)[degree==0]
}
###Extract largest connected component
SCC = clusters(g, mode="strong")
subg <- induced.subgraph(g, which(membership(SCC) == which.max(sizes(SCC))))
subg
}
#############################
###STATIONARY DISTRIBUTION###
#############################
Markov_chain_stationary_distribution <- function(g) #The function takes igraph graph as input and computes the SS probability of the Markov chain
{
###Extracting transition probabilities from the weighted graph
transition_probability=as.data.frame(as_edgelist(g,names=F))
transition_probability$probability=paste(E(g)$sum_product_expression)
transition_probability[3]=as.numeric(transition_probability[[3]])
myMatrix = sparseMatrix(i = transition_probability[1:nrow(transition_probability),1], j = transition_probability[1:nrow(transition_probability),2],x = transition_probability[1:nrow(transition_probability),3])
###Making a stochastic matrix
myMatrix = (myMatrix)/rowSums((myMatrix))
###Eigen value of the sparse matrix
ev=eigen(t(myMatrix))
##eigen value that matches 1
col=which.min(abs(ev$values - 1.00000000))
###STATIONARY DISTRIBUTION
SD=(abs(ev$vectors[,col]))/sum(abs(ev$vectors[,col]))
SD=as.data.frame(SD)
SD=cbind((as.data.frame(V(g)$name)),SD)
SD=as.data.frame(SD)
colnames(SD)[1] <- "Gene"
out_SS=paste("Steady_state",sep="")
colnames(SD)[2] <- out_SS
SD
}
output_markov <- function(i_data,cut_off,net)
{
g1=Data_preprocessing(i_data,cut_off,net)
#Identification of steady state probability distribution
Steady_state_true=Markov_chain_stationary_distribution(g1)
out2_3=paste("SS_",cut_off,"_",input_data,sep="")
colnames(Steady_state_true)[2] <- out2_3
#steady state probability distribution ofr the given input subpopulation
write.table(Steady_state_true,out2_3,sep="\t",row.names=FALSE,quote=F)
Steady_state_true
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment