Great Deal! Get Instant $10 FREE in Account on First Order + 10% Cashback on Every Order Order Now

I attach the zip file, including all the necessary materials. please do this based on the guideline file. Thank you.

1 answer below »
I attach the zip file, including all the necessary materials. please do this based on the guideline file.
Thank you.
Answered Same Day Aug 20, 2021

Solution

Manish Kumar answered on Aug 21 2021
144 Votes
## Importing packages
li
ary(tidyverse) # utility functions
#import dataset
HW2 <- read.table("../input/greynode1/assignment 1b.txt", sep = "\t", header = T)
attach(HW2)
HW2
#load package
li
ary(lu
idate)
li
ary(dplyr)
li
ary(assertive)
#PartE
#Make master table for Part E
PartE_master <- HW2
PartE_lookup <- HW2 %>% select(cust_id, months_since_survey)
#Find out the difference between rows (first row for each id default 1)
equire(dplyr)
PartE_month_diff <- PartE_lookup %>%
group_by(cust_id) %>%
mutate(month_diff = months_since_survey - lag(months_since_survey, default = months_since_survey[1] - 1))
PartE_month_diff
#Filter data which month difference more than 1
PartE_invalid <- PartE_month_diff %>%
filter(month_diff != 1)
#Find out invalid cust_id
PartE_cust_id_invalid <- unique(PartE_invalid$cust_id)
#filter out all invalid cust_id
PartE_final <- subset(PartE_master, !(cust_id %in% PartE_cust_id_invalid))
#How many cust have this data issue?
length(PartE_cust_id_invalid)
#how many rows removed?
nrow(PartE_master)-nrow(PartE_final)
#How many unique cust left in dataset
length(unique(PartE_final$cust_id))
#How many rows of data left
nrow(PartE_final)
#PartF
#Data frame for cust_id and survey_date
column_F<- PartE_final %>% select(cust_id, survey_date)
column_F
#change survey_date from factor to date format
equire(lu
idate)
(column_F$survey_date <- mdy(column_F$survey_date))
class(column_F$survey_date)
#counting the number of non missing survey_date values for each cust_id
Part_F_non <- column_F %>%
group_by(cust_id) %>%
summarise(non_na_count = sum(!is.na(survey_date)))
Part_F_non
#invalid cust_id
PartF_invalid <- Part_F_non %>%
filter(non_na_count != 1)
#number of customer having invalid values
PartF_cust_id_invalid <- unique(PartF_invalid$cust_id)
length(PartF_cust_id_invalid)
#final dataset for...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here