# xct-process-imagej-results.R ------------------ # Copyright 2017 Confluent Medical Technologies # Released as part of nitinol-design-concepts # https://github.com/confluentmedical/nitinol-design-concepts # under terms of Apache 2.0 license # http://www.apache.org/licenses/LICENSE-2.0.txt # summarize and visualize inclusion distributions, orientation, etc # explore extreme value distributions # # IN: ./image-data/*-mask-histogram.tsv # - table results for mask histogram. # - count of pixel value 0 represents masked voxels # - count of pixel value 255 represents matrix voxels # ./image-data/*-lbl-morpho.tsv # - results table from MorphoLibJ particle analysis 3D # ./image-data/*-lbl-bounds.tsv # - results table from MorphoLibJ bounding box analysis # OUT: ./out/count-by-scan.csv # ./out/count-by-type.csv # ./out/gumbel-parameters.csv # ./out/hist-count-loglog.pdf # ./out/hist-count-loglog.pdf # Setup ----------------------------------------- library(fitdistrplus) # for fitting Gumbel distribution library(tidyverse) # http://r4ds.had.co.nz/ rm(list=ls()) cutoffVolume <- 8 # filter out particles less than this value (cubic microns) umPerVoxel <- 0.500973555972 # voxel edge size from scan (microns) cubicUmPerVoxel <- (umPerVoxel^3) factorID <- c('scan01','scan02','scan03') # valid file name prefixes factorDesc <-c('SE508','SE508ELI') # valid descriptions # set working directory to location of this script setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # create output directory if it does not exist dir.create('./out', showWarnings = FALSE) # getSegmentation ------------------------------- # function to read segmentation details from specified tab separated value (.tsv) files # baseName is the file name prefix (from the set factorID) # description is the material description (from the set factorDesc) getSegmentation <- function(baseName, description){ # read volume information from histogram file # total volume is about 0.42mm^3, of which 0.25mm^3 is matrix # exact volumes vary depending on the mask used for each scan histogram000 <- read_tsv(paste0('./image-data/',baseName,'-mask-histogram.tsv'),skip=1,col_names=FALSE) histogram255 <- read_tsv(paste0('./image-data/',baseName,'-mask-histogram.tsv'),skip=256,col_names=FALSE) voxels000 <- histogram000[[1,2]] volume000 <- voxels000 * cubicUmPerVoxel voxels255 <- histogram255[[1,2]] volume255 <- voxels255 * cubicUmPerVoxel totalVolume <- volume255 + volume000 matrixVolume <- volume255 # read morphology data (MorphoLibJ > Analysis > Particle Analysis 3D) # organize into a new data table called morpho morpho <- read_tsv(paste0('./image-data/',baseName,'-lbl-morpho.tsv'),col_names=TRUE) # read bounding box data (MorphoLibJ > Analysis > Bounding Box 3D) # convert voxels to microns and combine with morphology data # add to morphology data table bounds <- read_tsv(paste0('./image-data/',baseName,'-lbl-bounds.tsv'),col_names=TRUE) %>% mutate(xBox = (XMax-XMin)*umPerVoxel, yBox = (YMax-YMin)*umPerVoxel, zBox = (ZMax-ZMin)*umPerVoxel) %>% select(xBox,yBox,zBox) morpho <- bind_cols(morpho,bounds) # finally, add the total matrix volume in the last column morpho <- morpho %>% mutate(vMatrix = matrixVolume) # prepend first and second columns to identify this data set morpho <- morpho %>% mutate(scanID = baseName, scanDesc = description) %>% select(scanID, scanDesc, everything(), -X1) return(morpho) } # Read XCT data --------------------------------- # create a new data frame called xct # that binds together results from the selected data sets # using the getSegmentation routine from above xct <- getSegmentation('scan01','SE508') %>% bind_rows(getSegmentation('scan02','SE508ELI')) %>% bind_rows(getSegmentation('scan03','SE508ELI')) %>% filter(Volume > cutoffVolume) %>% mutate(vPerCuMm = Volume / 1e9) # convert scanID and scanDesc columns into factors xct <- xct %>% mutate(scanID = parse_factor(scanID, levels = factorID), scanDesc = parse_factor(scanDesc, levels = factorDesc)) # Create histogram plots ------------------------ p.count <- ggplot(xct) + geom_histogram(aes(Volume,fill=scanID)) + facet_grid(scanID ~ .) + xlab('inclusion size (cubic micron)') + ylab('inclusion count') + ggtitle('inclusion count by size') plot(p.count) p.vol <- ggplot(xct) + geom_histogram(aes(Volume,fill=scanID,weight=Volume)) + facet_grid(scanID ~ .) + xlab('inclusion size (cubic micron)') + ylab('sum of inclusion volume (cubic micron)') + ggtitle('inclusion volume by size') plot(p.vol) p.count.ll <- ggplot(xct) + geom_histogram(aes(Volume,fill=scanID)) + facet_grid(scanID ~ .) + scale_y_log10() + scale_x_log10(limits = c(1,NA)) + xlab('inclusion size (cubic micron)') + ylab('inclusion count') + ggtitle('inclusion count by size') plot(p.count.ll) ggsave('./out/hist-count-loglog.pdf',width = 6,height = 4) ggsave('./out/hist-count-loglog.png',width = 6,height = 4) p.vol.ll <- ggplot(xct) + geom_histogram(aes(Volume,fill=scanID,weight=Volume)) + facet_grid(scanID ~ .) + scale_y_log10() + scale_x_log10(limits = c(1,NA)) + xlab('inclusion size (cubic micron)') + ylab('sum of inclusion volume (cubic micron)') + ggtitle('inclusion volume by size') plot(p.vol.ll) ggsave('./out/hist-volume-loglog.pdf',width = 6,height = 4) ggsave('./out/hist-volume-loglog.png',width = 6,height = 4) # Summarize inclusion density ------------------- # create a table of inclusion count for each scan # each row represents a particle (inclusion), so counting # the rows provides the total number of inclusions inclusionCount <- count(xct, scanID) # summarize the XCT data # use max(vMatrix) to grab the max (only) value for matrix volume # calculate density in terms of cubic microns and cubic millimeters countByScan <- xct %>% group_by(scanID,scanDesc) %>% summarize(vMatrix = max(vMatrix)) %>% bind_cols(inclusionCount[,2]) %>% mutate(nPerUm3 = n/vMatrix, nPerMm3 = nPerUm3 * 1e9) # print summary table by scan print(countByScan) # repeat the above, now summarizing by description # this combines results for multiple scans of the same description # and summarizes the combined result countByDesc <- xct %>% group_by(scanID,scanDesc) %>% summarize(vMatrix = max(vMatrix)) %>% bind_cols(inclusionCount[,2]) %>% ungroup() %>% group_by(scanDesc) %>% summarize(vMatrix = sum(vMatrix), n = sum(n)) %>% mutate(nPerUm3 = n/vMatrix, nPerMm3 = nPerUm3 * 1e9) # print summary table by description print(countByDesc) # write results to CSV files ------------------- write_csv(countByScan, path = paste('./out/','count-by-scan', '.csv',sep = ''), na='0') write_csv(countByDesc, path = paste('./out/','count-by-type', '.csv',sep = ''), na='0') # Estimate projected area ----------------------- # estimate projected area in each plane by dividing # the volume by the orthoganal bounding box dimension # then calculate the root area for later use in K formula xct <- xct %>% mutate(xyArea = Volume / zBox, # area projected in XY plane (transverse) xzArea = Volume / yBox, # area projected in XZ plane (longitudinal) yzArea = Volume / xBox, # area projected in YZ plane (longitudinal) rootXyArea = xyArea^(1/2), rootXzArea = xzArea^(1/2), rootYzArea = yzArea^(1/2)) # create gumbel functions ----------------------- # http://stats.stackexchange.com/questions/71197/ # usable-estimators-for-parameters-in-gumbel-distribution dgumbel <- function(x,mu,s){ # PDF exp((mu - x)/s - exp((mu - x)/s))/s } pgumbel <- function(q,mu,s){ # CDF exp(-exp(-((q - mu)/s))) } qgumbel <- function(p, mu, s){ # quantile function mu-s*log(-log(p)) } # helper function for fitting Gumbel distribution gumbelFit <- function(vector){ fit <- fitdist(vector, "gumbel", start=list(mu=4, s=1), method="mle") return(fit) } # gumbel fits and plots ------------------------- # code from here down is quite inelegant and tailored # to this specific example # split into separate data frames for each material xct.se508 <- filter(xct,scanDesc=='SE508') xct.eli <- filter(xct,scanDesc=='SE508ELI') # gumbel fit for root area (in microns) # for each material and each plane gumbel.se508.xz <- gumbelFit(xct.se508$rootXzArea) gumbel.se508.yz <- gumbelFit(xct.se508$rootYzArea) gumbel.se508.xy <- gumbelFit(xct.se508$rootXyArea) gumbel.eli.xz <- gumbelFit(xct.eli$rootXzArea) gumbel.eli.yz <- gumbelFit(xct.eli$rootYzArea) gumbel.eli.xy <- gumbelFit(xct.eli$rootXyArea) # assemble a summary table for 2 materials * 3 planes = 6 items # columns will be assembled from vectors with 6 items each # vector for material identification # repeat se508 3x, and eli 3x v.matl <- c(rep('se508',3),rep('eli',3)) # vector number of particles per mm^3 for each material n.SE508 <- countByDesc[countByDesc$scanDesc=='SE508','nPerMm3'][[1]] n.SE508ELI <- countByDesc[countByDesc$scanDesc=='SE508ELI','nPerMm3'][[1]] v.nPerMm3 <- c(rep(n.SE508,3), rep(n.SE508ELI,3)) # three planes for each of the two materials v.plane <- c(rep(c('xy','yz','xz'),2)) # vector for the cutoff volume v.cutoff <- c(rep(cutoffVolume,6)) # helper function to extract "mu" parameter estimate from Gumbel model gumbelMu <- function(gumbelModel){ return(gumbelModel$estimate[[1]]) } # helper function to extract "s" parameter estimate from Gumbel model gumbelS <- function(gumbelModel){ return(gumbelModel$estimate[[2]]) } # vector of mu parameter for each case v.mu <- c(gumbelMu(gumbel.se508.xy), gumbelMu(gumbel.se508.yz), gumbelMu(gumbel.se508.xz), gumbelMu(gumbel.eli.xy), gumbelMu(gumbel.eli.yz), gumbelMu(gumbel.eli.xz)) # vector of s parameter for each case v.s <- c(gumbelS(gumbel.se508.xy), gumbelS(gumbel.se508.yz), gumbelS(gumbel.se508.xz), gumbelS(gumbel.eli.xy), gumbelS(gumbel.eli.yz), gumbelS(gumbel.eli.xz)) # new data frame combining all of these vectors dfGumbel <- bind_cols(as_tibble(v.matl),as_tibble(v.plane), as_tibble(v.cutoff), as_tibble(v.nPerMm3), as_tibble(v.mu),as_tibble(v.s)) colnames(dfGumbel) <- c('matl', 'plane', 'cutoff', 'nPerMm3', 'mu', 's') # write volumetric probaiblity and gumbel parameter results to CSV # this table includes all the necessary information to create # probabilistic estimates for the presence and size of particles # at each element in a monte-carlo simulation write_csv(dfGumbel, path = paste('./out/','gumbel-parameters', '.csv',sep = ''), na='0')