library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library("RColorBrewer")
library("matrixStats")
library("plyr")
##
## Attaching package: 'plyr'
## The following object is masked from 'package:matrixStats':
##
## count
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("data.table")
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library("stringr")
library("ggplot2")
library("Rtsne")
#Import siGlo Control ST Data
All_ST_Clean <- read.csv("All_ST_Clean.csv") # Note this takes a long time to run
#Remove Final Low Quality Measures
numdata <- All_ST_Clean [,-c(1,2,17,18,19,21,56,57,60)]
numdatadf <- data.frame(numdata)
sapply(numdatadf,is.numeric)
## Nuclei.Area.wv1 Nuclei.Form.Factor.wv1
## TRUE TRUE
## Nuclei.Elongation.wv1 Nuclei.Compactness..wv1
## TRUE TRUE
## Nuclei.Chord.Ratio.wv1 Nuclei.Gyration.Radius.wv1
## TRUE TRUE
## Nuclei.Displacement.wv1 Nuclei.Diameter.wv1
## TRUE TRUE
## Nuclei.Perimeter.wv1 Nuclei.Intensity.wv1
## TRUE TRUE
## Nuclei.Total.Intensity.wv1 Nuclei.Intensity.CV.wv1
## TRUE TRUE
## Nuclei.Light.Flux..wv1 Nuclei.Intensity.SD.wv1
## TRUE TRUE
## Nuclei.Major.Axis.wv1 Nuclei.Major.Axis.Angle.wv1
## TRUE TRUE
## Nuclei.Spacing..SOI..wv1 Nuclei.Neighbor.Count..SOI...wv1
## TRUE TRUE
## Nuclei.Spacing..MIN...wv1 Nuclei.Neighbor.Count..MIN..wv1
## TRUE TRUE
## Nuclei.Spacing..Gabriel...wv1 Nuclei.Neighbor.Count..Gabriel...wv1
## TRUE TRUE
## Nuclei.Spacing..Lune...wv1 Nuclei.Neighbor.Count..Lune...wv1
## TRUE TRUE
## Nuclei.Skewness.wv1 Nuclei.Kurtosis.wv1
## TRUE TRUE
## Nuclei.Energy.wv1 Nuclei.Entropy.wv1
## TRUE TRUE
## Cells.Area.wv3 Cells.Form.Factor..wv3
## TRUE TRUE
## Cells.Elongation.wv3 Cells.Compactness.wv3
## TRUE TRUE
## Cells.Chord.Ratio..wv3 Cells.Gyration.Radius..wv3
## TRUE TRUE
## Cells.Nuc.Cell.Area.wv3 Cells.Diameter.wv3
## TRUE TRUE
## Cells.Perimeter.wv3 Cells.Intensity..Cell..wv3
## TRUE TRUE
## Cells.Intensity..Cyto..wv3 Cells.Total.Intensity..Cell...wv3
## TRUE TRUE
## Cells.Total.Intensity..Cyto...wv3 Cells.Intensity.CV..Cell...wv3
## TRUE TRUE
## Cells.Intensity.CV..Cyto...wv3 Cells.Intensity.Spreading..wv3
## TRUE TRUE
## Cells.Light.Flux..wv3 Cells.Nuc.Cyto.Intensity..wv3
## TRUE TRUE
## Cells.Intensity.SD..Cell..wv3 Cells.Intensity.SD..Cyto..wv3
## TRUE TRUE
## Cells.Max.Intensity.wv3 Cells.Major.Axis.wv3
## TRUE TRUE
## Cells.Minor.Axis.wv3 Cells.Skewness.wv3
## TRUE TRUE
## Cells.Kurtosis.wv3 Cells.Energy.wv3
## TRUE TRUE
## Cells.Entropy.wv3
## TRUE
#Generate Correlation Matrix
library(corrplot)
## corrplot 0.92 loaded
col <- colorRampPalette(c( "#77AADD", "#4477AA", "#FFFFFF", "#EE9988","#BB4444" ))
df_cor <- cor(numdatadf, use=c("pairwise.complete.obs")) # This will also take some time to run
#makes labels fit in margins
par(xpd=TRUE)
corrplot(df_cor, method="color", col=col(20),
type="full", order="hclust", tl.cex = 0.01, # Add coefficient of correlation
tl.col="black", tl.srt=90, #Text label color and rotation
# hide correlation coefficient on the principal diagonal
diag=T,mar = c(2, 0, 1, 0))

#Generate Correlation Tables
library("caret")
## Loading required package: lattice
High_corr <- findCorrelation(df_cor, cutoff = .9, verbose = TRUE, names = TRUE)
## Compare row 34 and column 36 with corr 0.99
## Means: 0.376 vs 0.205 so flagging column 34
## Compare row 36 and column 37 with corr 0.975
## Means: 0.364 vs 0.199 so flagging column 36
## Compare row 37 and column 50 with corr 0.948
## Means: 0.347 vs 0.193 so flagging column 37
## Compare row 45 and column 13 with corr 0.901
## Means: 0.327 vs 0.188 so flagging column 45
## Compare row 13 and column 6 with corr 0.904
## Means: 0.287 vs 0.182 so flagging column 13
## Compare row 8 and column 15 with corr 0.975
## Means: 0.272 vs 0.178 so flagging column 8
## Compare row 15 and column 6 with corr 0.969
## Means: 0.258 vs 0.175 so flagging column 15
## Compare row 6 and column 9 with corr 0.995
## Means: 0.238 vs 0.171 so flagging column 6
## Compare row 9 and column 1 with corr 0.978
## Means: 0.219 vs 0.169 so flagging column 9
## Compare row 1 and column 11 with corr 0.916
## Means: 0.193 vs 0.167 so flagging column 1
## Compare row 28 and column 27 with corr 0.951
## Means: 0.184 vs 0.167 so flagging column 28
## Compare row 55 and column 54 with corr 0.954
## Means: 0.254 vs 0.165 so flagging column 55
## Compare row 42 and column 46 with corr 0.959
## Means: 0.255 vs 0.16 so flagging column 42
## Compare row 46 and column 43 with corr 0.91
## Means: 0.23 vs 0.155 so flagging column 46
## Compare row 40 and column 41 with corr 0.955
## Means: 0.208 vs 0.153 so flagging column 40
## Compare row 47 and column 48 with corr 0.962
## Means: 0.2 vs 0.15 so flagging column 47
## Compare row 39 and column 38 with corr 0.998
## Means: 0.182 vs 0.147 so flagging column 39
## Compare row 38 and column 49 with corr 0.978
## Means: 0.156 vs 0.147 so flagging column 38
## Compare row 12 and column 14 with corr 0.963
## Means: 0.119 vs 0.146 so flagging column 14
## All correlations <= 0.9
hc = findCorrelation(df_cor, cutoff=0.9) # putt any value as a "cutoff"
hc = sort(hc)
reduced_Data = df_cor[,-c(hc)]
reduced_Data <- as.data.frame(reduced_Data)
#Full Correlation Matrix
write.csv(df_cor, "Correlation Matrix.csv")
#Retained Features Matrix - Columns
write.csv(reduced_Data, "Reduced Data.csv")
#Removed Features
write.csv(High_corr, "Correlated Measures.csv")