DROP TABLE IF EXISTS bellabeat.temp; -- drop temp table from previous queries
CREATE TABLE IF NOT EXISTS bellabeat.april_steps_minute(
Id BIGINT, Time DATETIME, Steps INT, PRIMARY KEY(Id, Time)
);
-- create "temporary" table to hold CSV data. Can be used later if INSERT query fails
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.April_minuteStepsNarrow_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.april_steps_minute (Id, Time, Steps)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_steps_minute.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp; -- drop temp table from previous queries
CREATE TABLE IF NOT EXISTS bellabeat.march_steps_minute(
Id BIGINT, Time DATETIME, Steps INT, PRIMARY KEY(Id, Time)
);
-- create "temporary" table to hold CSV data. Can be used later if INSERT query fails
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.March_minuteStepsNarrow_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.march_steps_minute (Id, Time, Steps)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_steps_minute.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.march_hourly_calories(Id BIGINT, Time DATETIME, Calories INT, PRIMARY KEY(Id, Time));
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.March_hourlyCalories_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.march_hourly_calories (Id, Time, Calories)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_hourly_calories.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_hourly_calories(Id BIGINT, Time DATETIME, Calories INT, PRIMARY KEY(Id, Time));
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.April_hourlyCalories_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.april_hourly_calories (Id, Time, Calories)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_hourly_calories.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.march_hourly_heartrate(Id BIGINT, Time DATETIME, Value INT, PRIMARY KEY(Id, Time));
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.March_hourlyHeartrate_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.march_hourly_heartrate (Id, Time, Value)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_hourly_heartrate.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_hourly_heartrate(Id BIGINT, Time DATETIME, Value INT, PRIMARY KEY(Id, Time));
CREATE TABLE bellabeat.temp (
Id BIGINT,
Time TEXT,
Value INT
);
LOAD DATA INFILE '.April_hourlyHeartrate_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.april_hourly_heartrate (Id, Time, Value)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_hourly_heartrate.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
DROP TABLE IF EXISTS bellabeat.march_daily_activity;
CREATE TABLE IF NOT EXISTS bellabeat.march_daily_activity
(Id BIGINT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
Calories INT, Date DATE,
PRIMARY KEY(Id, Date));
CREATE TABLE bellabeat.temp (
Id BIGINT, Date TEXT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
Calories INT
);
LOAD DATA INFILE '.March_dailyActivity_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newDate DATE;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newDate = STR_TO_DATE(Date, '%c/%e/%Y');
ALTER TABLE bellabeat.temp DROP COLUMN Date;
ALTER TABLE bellabeat.temp CHANGE newDate Date DATE;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.march_daily_activity (Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date)
SELECT Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_daily_activity.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_daily_activity
(Id BIGINT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
Calories INT, Date DATE,
PRIMARY KEY(Id, Date));
CREATE TABLE bellabeat.temp (
Id BIGINT, Date TEXT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
Calories INT
);
LOAD DATA INFILE '.April_dailyActivity_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
ALTER TABLE bellabeat.temp ADD newDate DATE;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newDate = STR_TO_DATE(Date, '%c/%e/%Y');
ALTER TABLE bellabeat.temp DROP COLUMN Date;
ALTER TABLE bellabeat.temp CHANGE newDate Date DATE;
SET SQL_SAFE_UPDATES = 1;
INSERT INTO bellabeat.april_daily_activity (Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date)
SELECT Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_daily_activity.Id = th.Id;
DROP TABLE IF EXISTS bellabeat.users;
CREATE TABLE IF NOT EXISTS bellabeat.users(id BIGINT PRIMARY KEY, activity VARCHAR(5),
weekday_activity DOUBLE, weekend_activity DOUBLE, step INT, step_rank VARCHAR(4),
intensity DOUBLE, int_rank VARCHAR(4), daily_use DOUBLE, daily_rank VARCHAR(4),
percent_days DOUBLE, day_rank VARCHAR(4), mets DOUBLE, mets_rank VARCHAR(4));
#LOAD IDS
INSERT INTO bellabeat.users(Id)
(SELECT DISTINCT Id FROM bellabeat.steps);
#LOAD STEPS
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id, AVG(step) AS avg_step,
NTILE(3) OVER (ORDER BY AVG(step)) AS quartile
FROM (
SELECT Id, DATE(Time) AS day, SUM(Steps) AS step
FROM bellabeat.steps
GROUP BY Id, DATE(Time)
) AS daily_avg
GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET
u.step = r.avg_step,
u.step_rank = CASE
WHEN r.quartile = 1 THEN 'low'
WHEN r.quartile = 2 THEN 'avg'
WHEN r.quartile = 3 THEN 'high'
END;
SET SQL_SAFE_UPDATES = 1;
#LOAD METS
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id,
AVG(met) AS avg_mets,
NTILE(3) OVER (ORDER BY AVG(met)) AS quartile
FROM (
SELECT Id, DATE(Time) AS day, AVG(Mets) AS met
FROM bellabeat.mets
GROUP BY Id, DATE(Time)
) AS daily_avg
GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET
u.mets = r.avg_mets,
u.mets_rank = CASE
WHEN r.quartile = 1 THEN 'low'
WHEN r.quartile = 2 THEN 'avg'
WHEN r.quartile = 3 THEN 'high'
END;
SET SQL_SAFE_UPDATES = 1;
#UPDATE INTENSITY
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id,
AVG(intent) AS avg_int,
NTILE(3) OVER (ORDER BY AVG(intent)) AS quartile
FROM (
SELECT Id, DATE(Time) AS day, AVG(Intensity) AS intent
FROM bellabeat.intensities
GROUP BY Id, DATE(Time)
) AS daily_avg
GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET
u.intensity = r.avg_int,
u.int_rank = CASE
WHEN r.quartile = 1 THEN 'low'
WHEN r.quartile = 2 THEN 'avg'
WHEN r.quartile = 3 THEN 'high'
END;
SET SQL_SAFE_UPDATES = 1;
#UPDATE INTENSITY
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id,
AVG(intent) AS avg_int,
NTILE(3) OVER (ORDER BY AVG(intent)) AS quartile
FROM (
SELECT Id, DATE(Time) AS day, AVG(Intensity) AS intent
FROM bellabeat.intensities
GROUP BY Id, DATE(Time)
) AS daily_avg
GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET
u.intensity = r.avg_int,
u.int_rank = CASE
WHEN r.quartile = 1 THEN 'low'
WHEN r.quartile = 2 THEN 'avg'
WHEN r.quartile = 3 THEN 'high'
END;
SET SQL_SAFE_UPDATES = 1;
#UPDATE ACTIVITY
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS(
WITH av AS (
SELECT
AVG(VeryActiveMinutes) AS avg_very,
AVG(FairlyActiveMinutes) AS avg_fair,
AVG(LightlyActiveMinutes) AS avg_light,
AVG(SedentaryActiveMinutes) AS avg_sedentary
FROM bellabeat.activity
),
daily_avg AS (
SELECT
Id,
Date AS day,
(VeryActiveMinutes + FairlyActiveMinutes + LightlyActiveMinutes +
SedentaryActiveMinutes) AS distance,
AVG(VeryActiveMinutes) AS very,
AVG(FairlyActiveMinutes) AS fair,
AVG(LightlyActiveMinutes) AS light,
AVG(SedentaryActiveMinutes) AS sedentary
FROM bellabeat.activity
GROUP BY Id, Date
),
diffs AS (
SELECT
da.Id,
AVG(da.distance) AS avg_minute,
AVG(da.very) AS avg_very,
AVG(da.fair) AS avg_fair,
AVG(da.light) AS avg_light,
AVG(da.sedentary) AS avg_sedentary,
-- Calculate the difference from the overall averages
AVG(da.very) - (SELECT avg_very FROM av) AS diff_very,
AVG(da.fair) - (SELECT avg_fair FROM av) AS diff_fair,
AVG(da.light) - (SELECT avg_light FROM av) AS diff_light,
AVG(da.sedentary) - (SELECT avg_sedentary FROM av) AS diff_sedentary
FROM daily_avg da
CROSS JOIN av
GROUP BY da.Id
)
SELECT
d.Id,
d.diff_very, d.diff_fair, d.diff_light, d.diff_sedentary,
CASE
WHEN d.diff_very > 0 THEN -- very or fairly active
CASE
WHEN d.diff_fair > d.diff_light OR
d.diff_very > d.diff_light THEN 'very'
ELSE "fair"
END
WHEN d.diff_fair > 0 THEN -- fairly or lightly active
CASE
WHEN d.diff_light > 0 THEN 'fair'
ELSE "light"
END
ELSE "light"
END AS ranking,
d.avg_very, d.avg_fair, d.avg_light, d.avg_sedentary
FROM diffs d
ORDER BY FIELD(ranking, "very", "fair", "light"), avg_very DESC
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET
u.activity = r.ranking;
SET SQL_SAFE_UPDATES = 1;
SELECT * FROM bellabeat.users ORDER BY FIELD(activity, "very", "fair", "light");
#UPDATE USAGE BY DAY OF WEEK
SET SQL_SAFE_UPDATES = 0;
WITH weeks AS(
WITH dow AS(
SELECT DISTINCT(Date),
CASE
WHEN WEEKDAY(Date) < 5 THEN "weekday"
ELSE "weekend"
END AS dow
FROM bellabeat.activity
)
SELECT Id,
AVG(
CASE
WHEN dow.dow = 'weekday'
THEN LightlyActiveMinutes + FairlyActiveMinutes + VeryActiveMinutes END
) AS weekday_activity,
AVG(
CASE
WHEN dow.dow = 'weekend'
THEN LightlyActiveMinutes + FairlyActiveMinutes + VeryActiveMinutes END
) AS weekend_activity
FROM bellabeat.activity
JOIN dow USING(Date)
GROUP BY Id
ORDER BY Id
)
UPDATE bellabeat.users AS u
JOIN weeks AS w USING(Id)
SET u.weekday_activity = w.weekday_activity, u.weekend_activity = w.weekend_activity;
SET SQL_SAFE_UPDATES = 1;
#UPDATE DAILY USAGE
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id, per_day,
NTILE(3) OVER (ORDER BY per_day) AS quartile
FROM (
SELECT Id, count(Date) / (SELECT count(distinct(Date))
FROM bellabeat.activity) AS per_day
FROM bellabeat.activity
GROUP BY Id
) AS daily_use
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r USING(Id)
SET
u.percent_days = r.per_day,
u.day_rank =
CASE
WHEN r.quartile = 1 THEN "low"
WHEN r.quartile = 2 THEN "avg"
WHEN r.quartile = 3 THEN "high"
END;
SET SQL_SAFE_UPDATES = 1;
#UPDATE USAGE BY DAY OF WEEK
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
SELECT Id, AVG(min) AS avg_min,
NTILE(3) OVER (ORDER BY AVG(min)) AS quartile
FROM (
SELECT Id, Date, VeryActiveMinutes + FairlyActiveMinutes +
LightlyActiveMinutes + SedentaryActiveMinutes AS min
FROM bellabeat.activity
GROUP BY Id, Date
) AS daily_avg
GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r USING(Id)
SET
u.daily_use = r.avg_min,
u.daily_rank = CASE
WHEN r.quartile = 1 THEN 'low'
WHEN r.quartile = 2 THEN 'avg'
WHEN r.quartile = 3 THEN 'high'
END;
SET SQL_SAFE_UPDATES = 1;
SELECT * FROM bellabeat.users ORDER BY FIELD(activity, "very", "fair", "light");
# Convert ID columns to lower case to match user_db
names(steps)[names(steps)=="Id"] <- "id"
names(heartrate)[names(heartrate)=="Id"] <- "id"
names(calories)[names(calories)=="Id"] <- "id"
# Convert the 'Time's to 'date's (removing the time component)
steps$date <- as.Date(steps$Time)
heartrate$date <- as.Date(heartrate$Time)
calories$date <- as.Date(calories$Time)
# Group by 'date' and 'id', then summarize for each day
daily_steps <- steps %>%
group_by(id, date) %>%
summarize(total_steps = sum(Steps, na.rm = TRUE))
daily_heartrate <- heartrate %>%
group_by(id, date) %>%
summarize(avg_rate = mean(Value, na.rm = TRUE)
, min_rate = min(Value, na.rm = TRUE),
.groups = 'drop')
daily_cals <- calories %>%
group_by(id, date) %>%
summarize(total_cals = sum(Calories, na.rm = TRUE))
# Join the dataframes
user_steps <- merge(user_db, daily_steps, by="id", all=TRUE)
user_heart <- merge(user_db, daily_heartrate, by="id", all=TRUE)
heart_steps <- merge(daily_heartrate, daily_steps, by=c("id", "date"))
heart_step_cal <- merge(heart_steps, daily_cals, by=c("id", "date"))
user_all <- merge(heart_step_cal, user_db, by="id")
# Convert activity levels to ordered factors
user_all <- user_all %>%
mutate(activity = factor(activity, levels = c("very", "fair", "light")))
# Convert columns named list for loops later
categories <- list("step_rank"="Steps",
"int_rank"="Intensity",
"daily_rank"="Daily Usage",
"day_rank"="Days Used",
"mets_rank"="METs",
"activity"="Activity Level")
# Convert all the other categories to ordered factors of high, avg, low
for (key in names(categories)) {
if (key != "activity"){
user_all <- user_all %>%
mutate(!!sym(key) := factor(!!sym(key), levels = c("high", "avg", "low")))
}
}
# Summarize average usages (daily and total) by activity levels
average_daily_usage <- user_all %>%
group_by(activity) %>%
summarize(average_daily_use = mean(daily_use, na.rm = TRUE)) %>%
ungroup()
average_usage <- user_all %>%
group_by(activity) %>%
summarize(average_usage = mean(percent_days, na.rm = TRUE)) %>%
ungroup()
# Capitalized labels:
cap_labels = c("Very", "Moderate", "Light")
cap_categories = c("High", "Average", "Low")
ggplot(user_all, aes(x=date, y=avg_rate, color=activity)) +
geom_smooth() + geom_point() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_labels) +
scale_x_date(date_breaks = "5 day", date_labels = "%b %d") +
labs(x="Days", y="Avg Heart Rate", title="Average Heart Rate by Day",
color="Activity Level") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
ggplot(user_all) +
geom_smooth() +
geom_point(position = position_jitter()) +
aes(x = date, y = total_cals, color = activity) +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_labels) +
scale_x_date(date_breaks = "5 day", date_labels = "%b %d") +
scale_y_continuous(labels = comma) +
labs(x = "Days", y = "Calories Burned", title = "Calories Burned Over Time",
color = "Activity Level") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
#Average Rates
ggplot(user_all, aes(x=activity, y=avg_rate, color=activity)) +
geom_boxplot() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels = cap_labels) +
scale_x_discrete(labels = cap_labels) +
labs(x="Activity Level", y="Heart Rate",
title="Heart Rates by Activity Levels",
color="Activity Levels") +
theme_minimal()
# Minimum Rates
ggplot(user_all, aes(x=activity, y=min_rate, color=activity)) +
geom_boxplot() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels = cap_labels) +
scale_x_discrete(labels = cap_labels) +
labs(x="Activity Level", y="Heart Rate",
title="Min Heart Rates by Activity Levels",
color="Activity Levels") +
theme_minimal()
ggplot(user_db, aes(x=activity, y=mets, color=activity)) +
geom_boxplot() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_labels) +
scale_x_discrete(labels = cap_labels) +
scale_y_continuous(labels=number_format(accuracy = 1)) +
labs(x="Activity Level", y="METs (x10)",
title="Average METs for Activity Levels",
color="Activity Levels") +
theme_minimal()
ggplot(user_db, aes(x=activity, y=intensity, color=activity)) +
geom_boxplot() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_labels) +
scale_x_discrete(labels = cap_labels) +
labs(x="Activity Level", y="Intensity",
title="Average Intensity for Activity Levels",
color="Activity Levels") +
theme_minimal()
ggplot(user_db, aes(x=activity, y=step, color=activity)) +
geom_boxplot() +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_labels) +
scale_x_discrete(labels = cap_labels) +
scale_y_continuous(labels=comma) +
labs(x="Activity Level", y="Steps",
title="Average Daily Steps for Activity Levels",
color="Activity Levels") +
theme_minimal()
# Daily Usage
ggplot(average_daily_usage, aes(x = activity, y = average_daily_use, fill=activity)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = brewer.pal(3, "Set1"), labels = cap_labels) +
scale_x_discrete(labels = cap_labels) +
scale_y_continuous(labels = comma) +
labs(x = "Activity Level", y = "Daily Minutes",
title = "Average Minutes/Day Usage by Activity Level",
fill="Activity Levels") +
theme_minimal()
# Days Used
ggplot(average_usage, aes(x = activity, y = average_usage, fill=activity)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = brewer.pal(3, "Set1"), labels = cap_labels) +
scale_x_discrete(labels = cap_labels) +
scale_y_continuous(labels = percent) +
labs(x = "Activity Level", y = "Percentage of Time Used",
title = "Percentage of Days Used by Activity Level",
fill="Activity Levels") +
theme_minimal()
user_long <- user_db %>%
pivot_longer(cols = c("weekday_activity", "weekend_activity"),
names_to = "Days", values_to = "Minutes")
user_long <- user_long %>%
group_by(activity) %>%
mutate(Proportion = Minutes / sum(Minutes))
ggplot(user_long, aes(x = activity, y = Minutes, fill = Days)) +
geom_bar(stat = "identity", position = "dodge") + # Side-by-side bars
scale_fill_manual(values = c("#F0E442", "#0072B2"),
labels = c("Weekday", "Weekend")) +
scale_x_discrete(labels=cap_labels) +
labs(x = "Activity Level", y = "Minutes", fill = "Days",
title = "Weekday vs Weekend Activity by Level") +
theme_minimal()
# Heart Rate and Calorie Burns Over Time by Various Categories
for (key in names(categories)){
hr <- ggplot(user_all) + geom_smooth() + geom_point() +
aes(x=date, y=avg_rate, color=!!sym(key)) +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_categories) +
scale_x_date(date_breaks = "5 day", date_labels = "%b %d") +
labs(x="Days", y="Avg Heart Rate",
title=glue("Average Heart Rate Daily by {categories[[key]]}"),
color=categories[[key]]) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
print(hr)
}
for (key in names(categories)){
cal <- ggplot(user_all) +
geom_smooth() +
geom_point(position = position_jitter()) +
aes(x = date, y = total_cals, color = !!sym(key)) +
scale_color_manual(values = brewer.pal(3, "Set1"), labels=cap_categories) +
scale_x_date(date_breaks = "5 day", date_labels = "%b %d") +
scale_y_continuous(labels = comma) +
labs(x = "Days", y = "Calories Burned",
title = glue("Calories Burned Over Time by {categories[[key]]}"),
color = categories[[key]]) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
print(cal)
}
This concludes all of the data processing and visualization creation code used to draw our conclusions from this set of FitBit users.