From 30c03f01d0475416ea8f639a1f703ff30761e67f Mon Sep 17 00:00:00 2001 From: Jens Sauer Date: Thu, 19 Nov 2020 13:16:34 +0100 Subject: [PATCH 1/4] Remove unused factor levels from data after limiting After limiting the data there may be factors with empty levels. This can be confusing and should be avoided. --- utils.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils.R b/utils.R index 33568de..06bb87c 100644 --- a/utils.R +++ b/utils.R @@ -77,6 +77,9 @@ sma_load_data <- function(file) { # At this time only solid tumors are important sma <- filter(sma, diagnosis_type == "solid") + # After filtering unused levels must be cleared from the data + sma <- droplevels(sma) + return(sma) } From e0faa24f40efcc7df69a4e2508cfe6b83d9f6f49 Mon Sep 17 00:00:00 2001 From: Jens Sauer Date: Thu, 19 Nov 2020 13:20:52 +0100 Subject: [PATCH 2/4] Add basic histograms This adds some basic histograms to show the distribution of age at transplatation and year of transplatation. --- plots.R | 2 ++ visualize.R | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 visualize.R diff --git a/plots.R b/plots.R index adf8398..7940691 100644 --- a/plots.R +++ b/plots.R @@ -5,8 +5,10 @@ source("survival.R") source("secmal.R") +source("visualize.R") # # Make all plots and save to file sma_plot_file_surv() sma_plot_file_secmal() +sma_plot_file_hist() diff --git a/visualize.R b/visualize.R new file mode 100644 index 0000000..422bdce --- /dev/null +++ b/visualize.R @@ -0,0 +1,22 @@ +# Visualize basics from data +# +# License: GPL version 3 +# Jens Mathis Sauer (c) 2020 + +source("utils.R") +sma_init() + +sma_hist_asct_age <- function() { + data <- data.frame(asct_age = secmal$asct_age) + ggplot(data, aes(x = asct_age)) + geom_histogram(binwidth = 5) +} + +sma_hist_asct_year <- function() { + data <- data.frame(asct_year = secmal$asct_year) + ggplot(data, aes(x = asct_year)) + geom_histogram(binwidth = 1) +} + +sma_plot_file_hist <- function() { + sma_plot_file("hist_asct_age.png", png, sma_hist_asct_age) + sma_plot_file("hist_asct_year.png", png, sma_hist_asct_year) +} From 066d6cedf4d93d83b2655ed3839d811cdcc6c187 Mon Sep 17 00:00:00 2001 From: Jens Sauer Date: Thu, 19 Nov 2020 16:12:04 +0100 Subject: [PATCH 3/4] Visualize asct per diagnosis per year This adds ggplots to visualize the number of ascts per diagnosis per year in various methods: * Histogram * Density plot * Frequency plot * Scatter plot --- visualize.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/visualize.R b/visualize.R index 422bdce..06c3328 100644 --- a/visualize.R +++ b/visualize.R @@ -16,7 +16,39 @@ sma_hist_asct_year <- function() { ggplot(data, aes(x = asct_year)) + geom_histogram(binwidth = 1) } +i_dx_year_data <- function() { + return(data.frame(asct_year = secmal$asct_year, dx = secmal$diagnosis)) +} + +sma_hist_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx, fill = dx)) + + geom_histogram(binwidth = 1) +} + +sma_dens_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx, fill = dx)) + + geom_density(alpha = 0.5) +} + +sma_freq_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx)) + + geom_freqpoly(binwidth = 1) +} + +sma_jitt_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, y = dx, colour = dx, fill = dx)) + + geom_jitter(width = 0.4, height = 0.2) +} + sma_plot_file_hist <- function() { sma_plot_file("hist_asct_age.png", png, sma_hist_asct_age) sma_plot_file("hist_asct_year.png", png, sma_hist_asct_year) + sma_plot_file("dx_year_hist.png", png, sma_hist_dx_year) + sma_plot_file("dx_year_dens.png", png, sma_dens_dx_year) + sma_plot_file("dx_year_freq.png", png, sma_freq_dx_year) + sma_plot_file("dx_year_jitt.png", png, sma_jitt_dx_year) } From 0be00652e19dbd07c8bbcb64789aa91dc573f67e Mon Sep 17 00:00:00 2001 From: Jens Sauer Date: Thu, 19 Nov 2020 19:38:11 +0100 Subject: [PATCH 4/4] Visualize asct per diagnosis per age This adds ggplots to visualize the number of ascts per diagnosis per age in various methods: * Histogram * Density plot * Frequency plot * Scatter plot --- visualize.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/visualize.R b/visualize.R index 06c3328..2b78a73 100644 --- a/visualize.R +++ b/visualize.R @@ -11,6 +11,33 @@ sma_hist_asct_age <- function() { ggplot(data, aes(x = asct_age)) + geom_histogram(binwidth = 5) } +i_asct_age_data <- function() { + return(data.frame(asct_age = secmal$asct_age, dx = secmal$diagnosis)) +} + +sma_hist_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx, fill = dx)) + + geom_histogram(binwidth = 1) +} + +sma_dens_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx, fill = dx)) + + geom_density(alpha = 0.3) +} +sma_freq_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx)) + + geom_freqpoly(binwidth = 1) +} + +sma_jitt_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, y = dx, colour = dx, fill = dx)) + + geom_jitter(width = 0.4, height = 0.2) +} + sma_hist_asct_year <- function() { data <- data.frame(asct_year = secmal$asct_year) ggplot(data, aes(x = asct_year)) + geom_histogram(binwidth = 1) @@ -51,4 +78,8 @@ sma_plot_file_hist <- function() { sma_plot_file("dx_year_dens.png", png, sma_dens_dx_year) sma_plot_file("dx_year_freq.png", png, sma_freq_dx_year) sma_plot_file("dx_year_jitt.png", png, sma_jitt_dx_year) + sma_plot_file("dx_age_hist.png", png, sma_hist_dx_age) + sma_plot_file("dx_age_dens.png", png, sma_dens_dx_age) + sma_plot_file("dx_age_freq.png", png, sma_freq_dx_age) + sma_plot_file("dx_age_jitt.png", png, sma_jitt_dx_age) }