Datavisualisatie voorbeelden in R met ggplot2

Mooie lettertypes downloaden van Google Fonts

require(showtext)
font_add_google(name = "Lato", family = "Lato", regular.wt = 400, bold.wt = 700)
showtext_auto()

Opmaak via thema’s met ggthemes

require(ggplot2)
require(ggthemes)
theme_set(theme_gray(base_family = "Lato",base_size = 10)) 

Thema personaliseren

theme_update(plot.title = element_text(hjust = 0.5, face = "bold", color = "#a02521" )
             ,axis.title = element_text(hjust = 0.5, face = "bold")
             ,axis.text.y = element_text(family = "Lato", size=9, color = "#7a7a7a"))

Aanpassen labels van factor levels

data.df <- read.csv('train.csv')
require(plyr) # mapvalues() factor
data.df$MSZoning <- as.factor(data.df$MSZoning)
data.df$MSZoning <- mapvalues(data.df$MSZoning, 
          from = c("C (all)","FV","RH","RL","RM"),
          to = c("Commercial","Floating Village","High Density", "Low Density","Medium Density"))

Voorbeeld: visualiseren uitschieters in boxplot

p <- ggplot(data.df, aes(MSZoning,SalePrice)) +
        geom_boxplot(varwidth = TRUE, fill = "white", colour = "#1779ba", outlier.colour = "#a02521", outlier.shape = 1) +
        ggtitle("Verkoopprijs per type bestemmingsplan") + 
        xlab("") +
        ylab("") +
        theme(axis.text.x=element_blank()
              ,axis.ticks.y=element_blank()
              ,axis.ticks.x=element_blank()
              )
# plot wijzigen
p <- p + coord_flip()
print(p)

Voorbeeld: spreidingsdiagram met lineaire regressielijn

Spreidingsdiagram

p <- ggplot(data.df, aes(OverallQual,SalePrice)) +
        geom_point(shape=1) +      ## Cirkels
        ggtitle("Verkoopprijs versus kwaliteit") + 
        xlab("Kwaliteit") +
        ylab("Verkoopprijs") +
        theme(axis.text.x=element_blank()
              ,axis.text.y=element_blank()
              ,axis.ticks.y=element_blank()
              ,axis.ticks.x=element_blank())
print(p)

Regressielijn toevoegen

p <- p + geom_smooth(method=lm,     # Lineaire regresselijn
                    se=FALSE)       # Geen confidence schaduwband
print(p)

Dat kan ook in één

p <- ggplot(data.df, aes(GrLivArea,SalePrice)) +
        geom_point(shape=1) +      ## Cirkels
        ggtitle("Verkoopprijs versus vierkante meters leefoppervlakte") + 
        xlab("Leefoppervlakte") +
        ylab("Verkoopprijs") +
        theme(axis.text.x=element_blank()
              ,axis.text.y=element_blank()
              ,axis.ticks.y=element_blank()
              ,axis.ticks.x=element_blank()) +
        geom_smooth(method=lm,    
                    se=FALSE)     
print(p)