Grammer of graphics with ggplot

Raju Rimal Postdoctoral Researcher

University of Oslo

December 1, 2022

Introduction

Grammer of graphics

Image adapted from the The Grammar of Graphics

Plotting system in R

Base

Plotting system in R

Base

Lattice

Plotting system in R

Base

Lattice

ggplot2

Components of ggplot2

ggplot(<DATA>, aes(<MAPPING>)) +
  geom_*(<MAPPING>) +
  scale_x_*(<SCALE_SPEC>) +
  scale_y_*(<SCALE_SPEC>) +
  facet_grid(ROW_FACET ~ COL_FACET) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)

Components of ggplot2

ggplot(<DATA>, aes(<MAPPING>)) +
  geom_*(<MAPPING>) +
  stat_*(geom = <GEOM>, fun = <FUN>) +
  facet_grid(ROW_FACET ~ COL_FACET) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_*(<MAPPING>) +
  stat_*(geom = <GEOM>, fun = <FUN>) +
  facet_grid(ROW_FACET ~ COL_FACET) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_*(geom = <GEOM>, fun = <FUN>) +
  facet_grid(ROW_FACET ~ COL_FACET) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_summary(fun = mean, color = "red") +
  facet_grid(ROW_FACET ~ COL_FACET) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_summary(fun = mean, color = "red") +
  facet_grid(status ~ .) +
  coord_<COORD_SYSTEM>(<SPEC>) +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_summary(fun = mean, color = "red") +
  facet_grid(status ~ .) +
  coord_cartesian() +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_summary(fun = mean, color = "red") +
  facet_grid(status ~ .) +
  coord_polar() +
  theme(<THEME_ARGUMENTS>)
ggplot(cancer, aes(x = ph.karno, y = time)) +
  geom_point() +
  stat_summary(fun = mean, color = "red") +
  facet_grid(status ~ .) +
  coord_cartesian() +
  theme_bw()

Customizing ggplot2

Aesthetic mappings

  • x, xmin, xmax
  • y, ymin, ymax
  • label
  • group
  • fill, color
  • size, alpha
  • shape, linetype
Code
ggplot(mtcars, aes(factor(carb), mpg)) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1, 
    color = "firebrick"
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon"
  ) + 
  geom_point(
    position = position_jitter(width = 0.05), 
    shape = 21, 
    fill = "whitesmoke"
  ) +
  coord_flip() +
  annotate(
    geom = "text",
    y = c(23, 25.5, 27.5),
    x = c(0.8, 0.8, 0.8),
    label = c("xmin", "x", "xmax"),
    family = "consolas",
    color = "blue"
  )

Code
ggplot(mtcars, aes(factor(carb), mpg)) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1, 
    color = "firebrick"
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon"
  ) + 
  geom_point(
    position = position_jitter(width = 0.05), 
    shape = 21, 
    fill = "whitesmoke"
  ) +
  annotate(
  geom = "text",
  y = c(23, 25.5, 27.5),
  x = c(0.8, 0.8, 0.8),
  label = c("ymin", "y", "ymax"),
  family = "consolas",
  color = "blue"
)

Code
ggplot(mtcars, aes(factor(carb), mpg)) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1, 
    color = "firebrick"
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon"
  ) + 
  geom_point(
    position = position_jitter(width = 0.05), 
    shape = 21, 
    fill = "whitesmoke"
  ) +
  geom_text(
    data = ~subset(.x, carb >= 6),
    aes(label = model),
    check_overlap = TRUE,
    vjust = -1
  )

Code
ggplot(mtcars, aes(factor(carb), mpg, group = am, color = factor(am))) +
  geom_point(
    position = position_jitter(width = 0.05), 
    fill = "whitesmoke"
  ) +
  stat_summary(
    fun = mean,
    geom = "line"
  ) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1,
    fill = "whitesmoke"
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon",
    color = "Engine",
    fill = "Engine",
  ) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1, 1) + 0.25
  )

Code
ggplot(mtcars, aes(factor(carb), mpg, group = am)) +
  geom_point(
    position = position_jitter(width = 0.05), 
    fill = "whitesmoke",
    aes(shape = factor(am))
  ) +
  stat_summary(
    fun = mean,
    geom = "line",
    aes(linetype = factor(am))
  ) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1,
    fill = "whitesmoke",
    aes(shape = factor(am), linetype = factor(am))
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon",
    color = "Engine",
    linetype = "Engine",
    fill = "Engine",
    shape = "Engine"
  ) +
  scale_shape_discrete(solid = FALSE) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1, 1) + 0.25
  )

Summary statistics

Code
ggplot(mtcars, aes(factor(carb), mpg, group = am)) +
  geom_point(
    position = position_jitter(width = 0.05), 
    fill = "whitesmoke",
    aes(shape = factor(am))
  ) +
  stat_summary(
    fun = mean,
    geom = "line",
    aes(linetype = factor(am))
  ) +
  stat_summary(
    fun.data = mean_se, 
    geom = "pointrange", 
    width = 0.1,
    fill = "whitesmoke",
    aes(shape = factor(am), linetype = factor(am))
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon",
    color = "Engine",
    linetype = "Engine",
    fill = "Engine",
    shape = "Engine"
  ) +
  scale_shape_discrete(solid = FALSE) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1, 1) + 0.25
  )

Code
ggplot(mtcars, aes(factor(carb), mpg)) +
  geom_point(
    position = position_jitter(width = 0.05), 
    aes(shape = am),
    fill = "whitesmoke",
    size = rel(3)
  ) +
  geom_smooth(
    method = "lm",
    formula = "y ~ x",
    se = FALSE,
    aes(linetype = am, group = am)
  ) +
  labs(
    x = "Number of carborators",
    y = "Mile per gallon",
    color = "Engine",
    linetype = "Engine",
    fill = "Engine",
    shape = "Engine"
  ) +
  scale_shape_discrete(solid = FALSE) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1, 1) + 0.25
  )

Faceting

Code
ggplot(survival::cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_grid(
    cols = vars(sex), 
    rows = vars(status)
  ) +
  labs(title = "Example of facet grid")

Code
cancer <- as.data.table(survival::cancer)
cancer[, age_group := cut(age, seq(35, 85, 10))]
ggplot(cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_wrap(
    facets = vars(age_group),
  ) +
  labs(title = "Example of facet wrap")

Code
cancer <- as.data.table(survival::cancer)
cancer[, age_group := cut(age, seq(35, 85, 10))]
ggplot(cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_wrap(
    facets = vars(age_group),
    labeller = labeller(
      age_group = function(x) {
        gsub("\\((.*?),(.*?)\\]", "Age Group: \\1-\\2", x)
      }
    )
  ) +
  labs(title = "Example of facet wrap")

Scale

Function to specify (customize) scales. Usually takes the form scale_<GEOM>_<TYPE>.

Code
cancer <- as.data.table(survival::cancer)
cancer[, age_group := cut(age, seq(35, 85, 10))]
ggplot(cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_wrap(
    facets = vars(age_group),
    labeller = labeller(
      age_group = function(x) {
        gsub("\\((.*?),(.*?)\\]", "Age Group: \\1-\\2", x)
      }
    )
  ) +
  labs(title = "Example of facet wrap")

Code
ggplot(cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_wrap(
    facets = vars(age_group),
    labeller = labeller(
      age_group = function(x) {
        gsub("\\((.*?),(.*?)\\]", "Age Group: \\1-\\2", x)
      }
    )
  ) +
  labs(title = "Example of facet wrap") +
  scale_x_continuous(
    breaks = seq(0, 3, 0.5) * 365.241,
    labels = function(x) x / 365.241
  )

Code
ggplot(cancer, aes(time)) +
  geom_histogram(
    aes(y = after_stat(density)), 
    fill = "#afafaf",
    binwidth = 365.241/4, 
    color = "#a0a0a0", 
    na.rm = TRUE
  ) +
  geom_density(na.rm = TRUE) +
  facet_wrap(
    facets = vars(age_group),
    labeller = labeller(
      age_group = function(x) {
        gsub("\\((.*?),(.*?)\\]", "Age Group: \\1-\\2", x)
      }
    )
  ) +
  labs(title = "Example of facet wrap") +
  scale_x_continuous(
    breaks = seq(0, 3, 0.5) * 365.241,
    labels = function(x) x / 365.241
  ) +
  scale_y_continuous(
    labels = scales::label_scientific()
  )

Scale

Code
mtcars$gear <- as.factor(mtcars$gear)
ggplot(mtcars, aes(hp, mpg, color = gear)) +
  geom_point(aes(size = wt), alpha = 0.8) +
  scale_size_continuous(breaks = seq(1, 6, 2), limits = c(1, 6)) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1.5, 1.5),
    legend.direction = "horizontal"
  ) +
  labs(
    x = "Horse power", 
    y = "Mile per gallon",
    color = "Gear",
    size = "Weight"
  )

Theming ggplot2

axis

title, text, ticks, line

legend

background, margin, spacing, key, title,

position, direction justification, box

panel

spacing, grid, background

plot

title, subtitle, caption, tag, margin

strip

background, text

Code
mtcars$gear <- as.factor(mtcars$gear)
plt <- ggplot(mtcars, aes(hp, mpg, color = gear)) +
  geom_point(aes(size = wt), alpha = 0.8) +
  scale_size_continuous(breaks = seq(1, 6, 2), limits = c(1, 6)) +
  facet_grid(rows = vars(am)) +
  theme(
    legend.position = c(1, 1),
    legend.justification = c(1.5, 1.5),
    legend.direction = "horizontal"
  ) +
  labs(
    title = "Power vs Consumption",
    x = "Horse power", 
    y = "Mile per gallon",
    color = "Gear",
    size = "Weight"
  )
plt

Helpers:

  • element_text
  • element_line
  • element_rect
  • element_blank
  • margin
  • unit

Sourcehttps://github.com/isabellabenabaye/ggplot2-reference/blob/master/ggplot2-theme-elements-reference.pdf

Annotations

  • Use geom to add custom elements
  • geom can be point, line, segment, rect and more
  • Add custom grob using annotation_custom
Code
plt +
  annotate(
    geom = "rect",
    xmin = 150, xmax = 250,
    ymin = 10, ymax = 20,
    fill = "ghostwhite",
    alpha = 0.25,
    color = "grey"
  ) +
  annotate(
    geom = "text",
    x = 200,
    y = 20,
    label = "heavy vehicle and\nlow fuel efficint",
    vjust = -0.5,
    family = "mono"
  )

Extensions

patchwork, cowplot
Composing multiple ggplot plots
gganimate
Grammar of animated graphics with ggpolot2
ggstatsplot and ggpubr
Enhance ggplot2 with statistics and annotations
ggthemes, ggtech, see, ggsci
Extra themes and schemes for ggplot2
ggraph
Visualize network, graphs and trees
ggrepel, geomtextpath
Manage and direct text in your plot

More: survminer, ggridge, ggh4x

Demo

Resources

Books and Resources

Dataset collection

https://vincentarelbundock.github.io/Rdatasets/datasets.html