Notes:
Notes:
library(ggplot2)
library(gridExtra)
setwd('C:/Users/Jeff/udacity/Data_Analysis_with_R')
pf <- read.csv('pseudo_facebook.tsv', sep='\t')Notes:
library(ggplot2)
ggplot(data=pf, aes(x=dob_day)) +
  geom_histogram(binwidth=1) +
  scale_x_continuous(breaks=seq(1,31))Response:
Notes:
Notes:
Response:
Response:
Notes:
Notes:
ggplot(data=pf, aes(x=dob_day)) +
  geom_histogram(binwidth=1) +
  scale_x_continuous(breaks=seq(1,31)) +
  facet_wrap(~dob_month, ncol=3)Response:
Notes:
Notes: #### Which case do you think applies to Moira’s outlier? Response:
Notes:
ggplot(data=pf, aes(x=friend_count)) +
  geom_histogram(bins=30) +
  scale_x_continuous(limits=c(0,1000))Response:
Notes:
Notes:
Notes:
# What code would you add to create a facet the histogram by gender?
# Add it to the code below.
qplot(x = friend_count, data = pf, binwidth = 10) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 50)) +
  facet_wrap(~gender)Notes:
qplot(x = friend_count, data = subset(pf, !is.na(gender)), binwidth = 10) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 50)) +
  facet_wrap(~gender)## Warning: Removed 2949 rows containing non-finite values (stat_bin).Notes:
table(pf$gender)## 
## female   male 
##  40254  58574by(pf$friend_count, pf$gender, summary)## pf$gender: female
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      37      96     242     244    4923 
## -------------------------------------------------------- 
## pf$gender: male
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      27      74     165     182    4917Response:
Response:
Response:
Notes:
qplot(x=tenure, data=pf, binwidth=30, color=I('black'), fill=I('#099DD9'))## Warning: Removed 2 rows containing non-finite values (stat_bin).qplot(x=tenure/365, data=pf, binwidth=0.25, color=I('black'), fill=I('#F79240')) + 
  scale_x_continuous(breaks=seq(1,7), limits=c(0,7))## Warning: Removed 26 rows containing non-finite values (stat_bin).Notes:
qplot(x=tenure/365, data=pf, xlab = 'Number of years using Facebook', ylab='Number of users in sample', binwidth=0.25, color=I('black'), fill=I('#F79240')) + 
  scale_x_continuous(breaks=seq(1,7), limits=c(0,7))## Warning: Removed 26 rows containing non-finite values (stat_bin).Notes:
qplot(data=pf, x=age, xlab="Age", ylab="Frequency", binwidth=1, color=I('black'), fill=I('#5760AB')) + 
  scale_x_continuous(breaks=c(min(pf$age), max(pf$age)))Response:
Notes:
Notes:
Notes:
Notes:
library(gridExtra)
p1 = qplot(data=pf, x=friend_count, xlab="Friend Count", ylab="Frequency", main="Friend Count Histogram", color=I('black'), fill=I('#5760AB'))
p2 = qplot(data=pf, x=friend_count, xlab="Friend Count", ylab="Frequency", main="Friend Count Histogram (log10)", color=I('black'), fill=I('#5760AB')) + scale_x_log10()
p3 = qplot(data=pf, x=friend_count, xlab="Friend Count", ylab="Frequency", main="Friend Count Histogram (sqrt)", color=I('black'), fill=I('#5760AB')) + scale_x_sqrt()
grid.arrange(p1,p2,p3, ncol=1)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.## Warning: Removed 1962 rows containing non-finite values (stat_bin).## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.qplot(x = friend_count, y = ..count../sum(..count..), ylab="Proportion of Users with Friend Count", data = subset(pf, !is.na(gender)), binwidth = 10, geom="freqpoly", color=gender) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 50))## Warning: Removed 2949 rows containing non-finite values (stat_bin).## Warning: Removed 4 rows containing missing values (geom_path).Notes:
qplot(x = www_likes, , xlab="Likes on the WWW", ylab="Frequency", data = subset(pf, !is.na(gender)), geom="freqpoly", color=gender) +
  #scale_x_continuous() + 
  scale_x_log10()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.## Warning: Removed 60935 rows containing non-finite values (stat_bin).by(pf$www_likes, pf$gender, sum)## pf$gender: female
## [1] 3507665
## -------------------------------------------------------- 
## pf$gender: male
## [1] 1430175Notes:
qplot(data=subset(pf, !is.na(gender)), x=gender, y=friend_count, geom="boxplot")qplot(data=subset(pf, !is.na(gender)), x=gender, y=friend_count, geom="boxplot") + 
  coord_cartesian(ylim=c(0,1000))Notes:
Response: #### Write about some ways that you can verify your answer. Response:
qplot(data=subset(pf, !is.na(gender)), x=gender, y=friendships_initiated, geom="boxplot") +
  coord_cartesian(ylim=c(0,250))by(pf$friendships_initiated, pf$gender, summary)## pf$gender: female
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    19.0    49.0   113.9   124.8  3654.0 
## -------------------------------------------------------- 
## pf$gender: male
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    15.0    44.0   103.1   111.0  4144.0Response:
Notes:
pf$mobile_check_in <- NA
pf$mobile_check_in <- ifelse(pf$mobile_likes > 0, 1, 0)
pf$mobile_check_in <- factor(pf$mobile_check_in)
summary(pf$mobile_check_in)##     0     1 
## 35056 63947# percent that check in with mobile
levels(pf$mobile_check_in)## [1] "0" "1"print(sum(pf$mobile_check_in=="1")/length(pf$mobile_check_in)*100)## [1] 64.59097Response:
Reflection:
Click KnitHTML to see all of your hard work and to have an html page of this lesson, your answers, and your notes!