1. setting things up
1.1 set global chunk options and load packages
library("dplyr"); library("reshape2"); library("ggplot2"); library("knitr")
# also set the ggplot2 theme to something nicer than the default
theme_set(theme_minimal(base_size=20))
1.2 read in dataset and inspect
# read in a dataset
# print the first few rows
1.3 clean up dataset as desired
# do some cleaning operations here -- e.g.
# - change column names;
# - recode a column;
# - reshape the data to long-format with melt();
# - convert factor columns to character or vice versa;
# - ...
2. summarize and visualize columns of interest
2.1 display some tables and summaries
# make a table by calling `table()` on a column of interest
# add some stuff to the `table` command above to make a formatted version:
# - wrap it in `as.data.frame()`, and then wrap that in `kable()`
# - then give `kable()` a vector of `col.names=` that you want displayed
# - you can also specify `digits=` in `kable()`, for rounding in the output
# example of combining `aggregate()` with `kable()`:
cars_mpg_tab <- aggregate(mpg ~ gear + cyl, data=mtcars, FUN="mean")
# desired column names for output table:
my_colnames <- c("number of gears", "number of cylinders", "gas mileage (mpg)")
# display the table (note kable() comes from the knitr:: package)
kable(cars_mpg_tab, col.names=my_colnames, align=c("c","l","l"), digits=1)
3 |
4 |
21.5 |
4 |
4 |
26.9 |
5 |
4 |
28.2 |
3 |
6 |
19.8 |
4 |
6 |
19.8 |
5 |
6 |
19.7 |
3 |
8 |
15.1 |
5 |
8 |
15.4 |
2.2 display some plots
# distribution of frequency by part of speech
ggplot(mtcars, aes(x=factor(cyl), y=mpg)) +
geom_boxplot() +
theme(plot.title=element_text(face="italic")) +
labs(title="plot title",
subtitle="subtitle",
caption="a caption with some info or a statement")
# distribution of rank by part of speech
ggplot(mtcars, aes(x=factor(gear), y=mpg)) +
geom_boxplot() +
theme(axis.text.x=element_text(color="blue")) +
labs(x="x axis label",
y="y axis label")


# same as above plots, but with base graphics
# (not displayed bc of chunk option eval=FALSE)
boxplot(mpg ~ factor(cyl), data=mtcars)
boxplot(mpg ~ factor(gear), data=mtcars)
4. summarize the findings