Tutorial from here:

ggplot implements a new grammar for plotting. Uses building blocks to create the final plot:

  • data
  • aesthetic mapping
  • geometric object
  • statistical transformations
  • scales
  • coordinate system
  • position adjustments
  • faceting
housing <- read_csv("../inst/extdata/landdata-states.csv")
ggplot(housing, aes(x = Home.Value)) +
housing %>% 
filter(State %in% c("MA", "TX")) %>% 
           y=Home.Value, color=State))+
  geom_point(aes(x=Land.Value, y=Structure.Cost))

Aesthetics and geometric objects

Aesthetic is something one can see and related to the dataset. Geometric objects give the layout of the plot.

hp2001Q1 <- filter(housing, Date == 2001.25) 
       aes(y = Structure.Cost, x = Land.Value)) +

One plot can have multpile geoms:

p1 <- ggplot(hp2001Q1, aes(x = log(Land.Value), y = Structure.Cost))

p1 + geom_point(aes(color = Home.Value)) +
p1 +
  geom_point(aes(color = Home.Value)) +
  geom_smooth(method = "lm")
Different geoms can have different aesthetics.

p1 + 
  geom_text(aes(label=State), size = 3)

## install.packages("ggrepel") 
p1 + 
  geom_point() + 
  geom_text_repel(aes(label=State), size = 3)

Aesthetics vs. assignments

p1 +
  geom_point(aes(size = 2),# incorrect! 2 is not a variable
             color="red") # this is fine -- all points red

p1 +
  geom_point(aes(color=Home.Value, shape = region))
p2 <- ggplot(housing, aes(x = Home.Value))
p2 + geom_histogram()
p2 + geom_histogram(stat = "bin", binwidth=4000)

housing.sum <- aggregate(housing["Home.Value"], housing["State"], FUN=mean)
rbind(head(housing.sum), tail(housing.sum))
##    State Home.Value
## 1     AK  147385.14
## 2     AL   92545.22
## 3     AR   82076.84
## 4     AZ  140755.59
## 5     CA  282808.08
## 6     CO  158175.99
## 46    VA  155391.44
## 47    VT  132394.60
## 48    WA  178522.58
## 49    WI  108359.45
## 50    WV   77161.71
## 51    WY  122897.25
ggplot(housing.sum, aes(x=State, y=Home.Value)) + 

Scale modifications

p3 <- ggplot(housing,
             aes(x = State,
                 y = Home.Price.Index)) + 
              axis.text=element_text(size = 6))
p4 <- p3 + geom_point(aes(color = Date),
                       alpha = 0.5,
                       size = 1.5,
                       position = position_jitter(width = 0.25, height = 0))
p4 + scale_x_discrete(name="State Abbreviation") +
                         breaks = c(1976, 1994, 2013),
                         labels = c("'76", "'94", "'13"))

p4 +
  scale_x_discrete(name="State Abbreviation") +
                         breaks = c(1976, 1994, 2013),
                         labels = c("'76", "'94", "'13"),

p4 +
                        breaks = c(1976, 1994, 2013),
                        labels = c("'76", "'94", "'13"),
                        low = "blue",
                        high = "red",
                        mid = "gray60",
                        midpoint = 1994)


p5 <- housing %>% 
filter(State %in% c("MA", "TX")) %>% 
ggplot(aes(x = Date, y = Home.Value))
p5 + geom_line(aes(color = State))  

(p5 <- p5 + geom_line() +
   facet_wrap(~State, ncol = 10))

There is also a facet_grid() function for faceting in two dimensions.


Overriding theme defaults

Specific theme elements can be overridden using theme(). For example:

p5 + theme_minimal() +
  theme(text = element_text(color = "turquoise"))

All theme options are documented in ?theme.

Creating and saving new themes

You can create new themes, as in the following example:

theme_new <- theme_bw() +
  theme(plot.background = element_rect(size = 1, color = "blue", fill = "black"),
        text=element_text(size = 12, family = "Serif", color = "ivory"),
        axis.text.y = element_text(colour = "purple"),
        axis.text.x = element_text(colour = "red"),
        panel.background = element_rect(fill = "pink"),
        strip.background = element_rect(fill = "orange"))

p5 + theme_new

housing.byyear <- aggregate(cbind(Home.Value, Land.Value) ~ Date, data = housing, mean) <- gather(housing.byyear,
                           value = "value",
                           key = "type",
                           Home.Value, Land.Value)
           color=type)) +


  1. Use the iris dataset.
  2. plot a dotplot using sepal.length and sepal.width
  3. Color the dots by Species
  4. Add linear regression curve for the whole dataset
  5. Add linear regression curve colored by species
  6. plot boxplots for sepal length, one for each species, with different color
  7. plot dots on the boxplot
  8. Change the colors to red, blue and green.
  9. change axis labels 10 Flip the coordinates
  10. Change the theme to theme_bw