# Data-visualization-exploration-DEMO-04oct17.R # /home/baileraj/sta404 # load packages that we will use for initial exploration library("tidyverse") # includes ggplot2, dplyr and other tools library("gapminder") # dataset with country, may need to install this if it is not available # exploring a data set =================================================================== gapminder # typing name displays contents of object names(gapminder) # list names of variables str(gapminder) # characteristics and structure of object. For data frames, variable names, type, ... glimpse(gapminder) # alternative to "str" for tibble View(gapminder) # open a tab displaying the gapminder data set # summary of data set ===================================================================== summary(gapminder) # what is produced? # has life expectancy improved between 1952 and 2007? ggplot() + geom_point(data=gapminder, aes(x=year,y=lifeExp)) # scatterplot ggplot() + geom_jitter(data=gapminder, aes(x=year,y=lifeExp)) # jittering to deal with overplotting ggplot() + geom_boxplot(data=gapminder, aes(x=as.factor(year),y=lifeExp)) # distribution changes # combining layers ggplot(data=gapminder, aes(x=as.factor(year),y=lifeExp)) + geom_boxplot() + geom_jitter(alpha=.2) # What is being missed in this display? ggplot() + geom_line(data=gapminder, aes(x=year,y=lifeExp)) # line graph - what happened here? ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country)) ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country, color=country)) # what happens here? ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country, color=country)) + guides(color="none") # highlighting countries ================================================================================ ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country), color="lightgrey") + guides(color="none") # create dataframe with Rwanda and Japan # What is a data frame? # What does it mean to pipe commands? # Reading " %>% " as " and then " rwanda_japan <- gapminder %>% filter(country %in% c("Rwanda", "Japan")) rwanda_japan ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country), color="lightgrey") + geom_line(data=rwanda_japan, aes(year,lifeExp, color=country), lwd=1.1) + theme_minimal() # cleaning up annotations ggplot() + geom_line(data=gapminder, aes(x=year, y=lifeExp, group=country), color="lightgrey") + geom_line(data=rwanda_japan, aes(year,lifeExp, color=country), lwd=1.1) + guides(color="none") + labs(x="Year", y="Life Expectancy", caption="Source: Jennifer Bryan (2015). gapminder: Data from Gapminder. R package version 0.2.0.") + annotate("text", x=1985, y=80, label="Japan") + annotate("text", x=1985, y=30, label="Rwanda") + theme_minimal()