diff --git a/plots.R b/plots.R index adf8398..7940691 100644 --- a/plots.R +++ b/plots.R @@ -5,8 +5,10 @@ source("survival.R") source("secmal.R") +source("visualize.R") # # Make all plots and save to file sma_plot_file_surv() sma_plot_file_secmal() +sma_plot_file_hist() diff --git a/utils.R b/utils.R index 5313b7a..635dc8e 100644 --- a/utils.R +++ b/utils.R @@ -81,6 +81,9 @@ sma_load_data <- function(file) { # At this time only solid tumors are important sma <- filter(sma, diagnosis_type == "solid") + # After filtering unused levels must be cleared from the data + sma <- droplevels(sma) + return(sma) } diff --git a/visualize.R b/visualize.R new file mode 100644 index 0000000..2b78a73 --- /dev/null +++ b/visualize.R @@ -0,0 +1,85 @@ +# Visualize basics from data +# +# License: GPL version 3 +# Jens Mathis Sauer (c) 2020 + +source("utils.R") +sma_init() + +sma_hist_asct_age <- function() { + data <- data.frame(asct_age = secmal$asct_age) + ggplot(data, aes(x = asct_age)) + geom_histogram(binwidth = 5) +} + +i_asct_age_data <- function() { + return(data.frame(asct_age = secmal$asct_age, dx = secmal$diagnosis)) +} + +sma_hist_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx, fill = dx)) + + geom_histogram(binwidth = 1) +} + +sma_dens_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx, fill = dx)) + + geom_density(alpha = 0.3) +} +sma_freq_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, colour = dx)) + + geom_freqpoly(binwidth = 1) +} + +sma_jitt_dx_age <- function() { + data <- i_asct_age_data() + ggplot(data, aes(x = asct_age, y = dx, colour = dx, fill = dx)) + + geom_jitter(width = 0.4, height = 0.2) +} + +sma_hist_asct_year <- function() { + data <- data.frame(asct_year = secmal$asct_year) + ggplot(data, aes(x = asct_year)) + geom_histogram(binwidth = 1) +} + +i_dx_year_data <- function() { + return(data.frame(asct_year = secmal$asct_year, dx = secmal$diagnosis)) +} + +sma_hist_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx, fill = dx)) + + geom_histogram(binwidth = 1) +} + +sma_dens_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx, fill = dx)) + + geom_density(alpha = 0.5) +} + +sma_freq_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, colour = dx)) + + geom_freqpoly(binwidth = 1) +} + +sma_jitt_dx_year <- function() { + data <- i_dx_year_data() + ggplot(data, aes(x = asct_year, y = dx, colour = dx, fill = dx)) + + geom_jitter(width = 0.4, height = 0.2) +} + +sma_plot_file_hist <- function() { + sma_plot_file("hist_asct_age.png", png, sma_hist_asct_age) + sma_plot_file("hist_asct_year.png", png, sma_hist_asct_year) + sma_plot_file("dx_year_hist.png", png, sma_hist_dx_year) + sma_plot_file("dx_year_dens.png", png, sma_dens_dx_year) + sma_plot_file("dx_year_freq.png", png, sma_freq_dx_year) + sma_plot_file("dx_year_jitt.png", png, sma_jitt_dx_year) + sma_plot_file("dx_age_hist.png", png, sma_hist_dx_age) + sma_plot_file("dx_age_dens.png", png, sma_dens_dx_age) + sma_plot_file("dx_age_freq.png", png, sma_freq_dx_age) + sma_plot_file("dx_age_jitt.png", png, sma_jitt_dx_age) +}