MySQL - Full Code (Phase III)

Importing CSVs

DROP TABLE IF EXISTS bellabeat.temp; -- drop temp table from previous queries
CREATE TABLE IF NOT EXISTS bellabeat.april_steps_minute(
    Id BIGINT, Time DATETIME, Steps INT, PRIMARY KEY(Id, Time)
    );

-- create "temporary" table to hold CSV data. Can be used later if INSERT query fails
CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.April_minuteStepsNarrow_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.april_steps_minute (Id, Time, Steps)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_steps_minute.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp; -- drop temp table from previous queries
CREATE TABLE IF NOT EXISTS bellabeat.march_steps_minute(
    Id BIGINT, Time DATETIME, Steps INT, PRIMARY KEY(Id, Time)
    );

-- create "temporary" table to hold CSV data. Can be used later if INSERT query fails
CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.March_minuteStepsNarrow_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.march_steps_minute (Id, Time, Steps)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_steps_minute.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.march_hourly_calories(Id BIGINT, Time DATETIME, Calories INT, PRIMARY KEY(Id, Time));

CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.March_hourlyCalories_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.march_hourly_calories (Id, Time, Calories)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_hourly_calories.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_hourly_calories(Id BIGINT, Time DATETIME, Calories INT, PRIMARY KEY(Id, Time));

CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.April_hourlyCalories_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.april_hourly_calories (Id, Time, Calories)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_hourly_calories.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.march_hourly_heartrate(Id BIGINT, Time DATETIME, Value INT, PRIMARY KEY(Id, Time));

CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.March_hourlyHeartrate_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.march_hourly_heartrate (Id, Time, Value)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_hourly_heartrate.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_hourly_heartrate(Id BIGINT, Time DATETIME, Value INT, PRIMARY KEY(Id, Time));

CREATE TABLE bellabeat.temp (
    Id BIGINT,
    Time TEXT,
    Value INT
);

LOAD DATA INFILE '.April_hourlyHeartrate_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newTime DATETIME;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newTime = STR_TO_DATE(Time, '%c/%e/%Y %l:%i:%s %p');
ALTER TABLE bellabeat.temp DROP COLUMN Time;
ALTER TABLE bellabeat.temp CHANGE newTime Time DATETIME;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.april_hourly_heartrate (Id, Time, Value)
SELECT th.Id, th.Time, th.Value
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_hourly_heartrate.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
DROP TABLE IF EXISTS bellabeat.march_daily_activity;
CREATE TABLE IF NOT EXISTS bellabeat.march_daily_activity
    (Id BIGINT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
    LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
    LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
    FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
    Calories INT, Date DATE,
    PRIMARY KEY(Id, Date));

CREATE TABLE bellabeat.temp (
    Id BIGINT, Date TEXT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
    LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
    LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
    FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
    Calories INT
);

LOAD DATA INFILE '.March_dailyActivity_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newDate DATE;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newDate = STR_TO_DATE(Date, '%c/%e/%Y');
ALTER TABLE bellabeat.temp DROP COLUMN Date;
ALTER TABLE bellabeat.temp CHANGE newDate Date DATE;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.march_daily_activity (Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
    VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
    FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date)
SELECT Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
    VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
    FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE march_daily_activity.Id = th.Id;

DROP TABLE IF EXISTS bellabeat.temp;
CREATE TABLE IF NOT EXISTS bellabeat.april_daily_activity
    (Id BIGINT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
    LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
    LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
    FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
    Calories INT, Date DATE,
    PRIMARY KEY(Id, Date));

CREATE TABLE bellabeat.temp (
    Id BIGINT, Date TEXT, TotalSteps INT, TotalDistance DOUBLE, TrackerDistance DOUBLE,
    LoggedActivityDistance DOUBLE, VeryActiveDistance DOUBLE, ModeratelyActiveDistance DOUBLE,
    LightActiveDistance DOUBLE, SedentaryActiveDistance DOUBLE, VeryActiveMinutes INT,
    FairlyActiveMinutes INT, LightlyActiveMinutes INT, SedentaryActiveMinutes INT,
    Calories INT
);

LOAD DATA INFILE '.April_dailyActivity_merged.csv'
INTO TABLE bellabeat.temp
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

ALTER TABLE bellabeat.temp ADD newDate DATE;
SET SQL_SAFE_UPDATES = 0;
UPDATE bellabeat.temp SET newDate = STR_TO_DATE(Date, '%c/%e/%Y');
ALTER TABLE bellabeat.temp DROP COLUMN Date;
ALTER TABLE bellabeat.temp CHANGE newDate Date DATE;
SET SQL_SAFE_UPDATES = 1;

INSERT INTO bellabeat.april_daily_activity (Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
    VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
    FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date)
SELECT Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivityDistance,
    VeryActiveDistance, ModeratelyActiveDistance, LightActiveDistance, SedentaryActiveDistance, VeryActiveMinutes,
    FairlyActiveMinutes, LightlyActiveMinutes, SedentaryActiveMinutes, Calories, Date
FROM bellabeat.temp AS th
ON DUPLICATE KEY UPDATE april_daily_activity.Id = th.Id;

Building users table:

DROP TABLE IF EXISTS bellabeat.users;
CREATE TABLE IF NOT EXISTS bellabeat.users(id BIGINT PRIMARY KEY, activity VARCHAR(5),
    weekday_activity DOUBLE, weekend_activity DOUBLE, step INT, step_rank VARCHAR(4),
    intensity DOUBLE, int_rank VARCHAR(4), daily_use DOUBLE, daily_rank VARCHAR(4),
    percent_days DOUBLE, day_rank VARCHAR(4), mets DOUBLE, mets_rank VARCHAR(4));

#LOAD IDS
INSERT INTO bellabeat.users(Id)
(SELECT DISTINCT Id FROM bellabeat.steps);

#LOAD STEPS
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
    SELECT Id, AVG(step) AS avg_step,
           NTILE(3) OVER (ORDER BY AVG(step)) AS quartile
    FROM (
        SELECT Id, DATE(Time) AS day, SUM(Steps) AS step
        FROM bellabeat.steps
        GROUP BY Id, DATE(Time)
    ) AS daily_avg
    GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET 
    u.step = r.avg_step,
    u.step_rank = CASE
                    WHEN r.quartile = 1 THEN 'low'
                    WHEN r.quartile = 2 THEN 'avg'
                    WHEN r.quartile = 3 THEN 'high'
                END;
SET SQL_SAFE_UPDATES = 1;

#LOAD METS
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
    SELECT Id, 
           AVG(met) AS avg_mets, 
           NTILE(3) OVER (ORDER BY AVG(met)) AS quartile
    FROM (
        SELECT Id, DATE(Time) AS day, AVG(Mets) AS met
        FROM bellabeat.mets
        GROUP BY Id, DATE(Time)
    ) AS daily_avg
    GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET 
    u.mets = r.avg_mets,
    u.mets_rank = CASE
                    WHEN r.quartile = 1 THEN 'low'
                    WHEN r.quartile = 2 THEN 'avg'
                    WHEN r.quartile = 3 THEN 'high'
                END;
SET SQL_SAFE_UPDATES = 1;


#UPDATE INTENSITY
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
    SELECT Id, 
               AVG(intent) AS avg_int, 
               NTILE(3) OVER (ORDER BY AVG(intent)) AS quartile
        FROM (
            SELECT Id, DATE(Time) AS day, AVG(Intensity) AS intent
            FROM bellabeat.intensities
            GROUP BY Id, DATE(Time)
        ) AS daily_avg
        GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET 
    u.intensity = r.avg_int,
    u.int_rank = CASE
                    WHEN r.quartile = 1 THEN 'low'
                    WHEN r.quartile = 2 THEN 'avg'
                    WHEN r.quartile = 3 THEN 'high'
                END;
SET SQL_SAFE_UPDATES = 1;


#UPDATE INTENSITY
SET SQL_SAFE_UPDATES = 0;
WITH ranked_avg AS (
    SELECT Id, 
               AVG(intent) AS avg_int, 
               NTILE(3) OVER (ORDER BY AVG(intent)) AS quartile
        FROM (
            SELECT Id, DATE(Time) AS day, AVG(Intensity) AS intent
            FROM bellabeat.intensities
            GROUP BY Id, DATE(Time)
        ) AS daily_avg
        GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET 
    u.intensity = r.avg_int,
    u.int_rank = CASE
                    WHEN r.quartile = 1 THEN 'low'
                    WHEN r.quartile = 2 THEN 'avg'
                    WHEN r.quartile = 3 THEN 'high'
                END;
SET SQL_SAFE_UPDATES = 1;

#UPDATE ACTIVITY
SET SQL_SAFE_UPDATES = 0;

WITH ranked_avg AS(
    WITH av AS (
        SELECT 
            AVG(VeryActiveMinutes) AS avg_very,
            AVG(FairlyActiveMinutes) AS avg_fair,
            AVG(LightlyActiveMinutes) AS avg_light,
            AVG(SedentaryActiveMinutes) AS avg_sedentary
        FROM bellabeat.activity
    ),
    daily_avg AS (
        SELECT 
            Id, 
            Date AS day, 
            (VeryActiveMinutes + FairlyActiveMinutes + LightlyActiveMinutes + 
              SedentaryActiveMinutes) AS distance,
            AVG(VeryActiveMinutes) AS very,
            AVG(FairlyActiveMinutes) AS fair,
            AVG(LightlyActiveMinutes) AS light,
            AVG(SedentaryActiveMinutes) AS sedentary
        FROM bellabeat.activity
        GROUP BY Id, Date
    ),
    diffs AS (
        SELECT 
            da.Id,
            AVG(da.distance) AS avg_minute,
            AVG(da.very) AS avg_very,
            AVG(da.fair) AS avg_fair, 
            AVG(da.light) AS avg_light,
            AVG(da.sedentary) AS avg_sedentary,
            -- Calculate the difference from the overall averages
            AVG(da.very) - (SELECT avg_very FROM av) AS diff_very,
            AVG(da.fair) - (SELECT avg_fair FROM av) AS diff_fair,
            AVG(da.light) - (SELECT avg_light FROM av) AS diff_light,
            AVG(da.sedentary) - (SELECT avg_sedentary FROM av) AS diff_sedentary
        FROM daily_avg da
        CROSS JOIN av
        GROUP BY da.Id
    )
    SELECT 
        d.Id,
        d.diff_very, d.diff_fair, d.diff_light, d.diff_sedentary,
        CASE
            WHEN d.diff_very > 0 THEN -- very or fairly active
                CASE
                    WHEN d.diff_fair > d.diff_light OR 
                      d.diff_very > d.diff_light THEN 'very'
                    ELSE "fair"
                END
            WHEN d.diff_fair > 0 THEN -- fairly or lightly active
                CASE
                    WHEN d.diff_light > 0 THEN 'fair'
                    ELSE "light"
                END
            ELSE "light"
        END AS ranking,
        d.avg_very, d.avg_fair, d.avg_light, d.avg_sedentary
    FROM diffs d
    ORDER BY FIELD(ranking, "very", "fair", "light"), avg_very DESC
    )
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r ON u.Id = r.Id
SET 
    u.activity = r.ranking;

SET SQL_SAFE_UPDATES = 1;
SELECT * FROM bellabeat.users ORDER BY FIELD(activity, "very", "fair", "light");

App / Device Usage, including by day of week:

#UPDATE USAGE BY DAY OF WEEK
SET SQL_SAFE_UPDATES = 0;

WITH weeks AS(
    WITH dow AS(
        SELECT DISTINCT(Date),
            CASE
                WHEN WEEKDAY(Date) < 5 THEN "weekday"
                ELSE "weekend"
            END AS dow
        FROM bellabeat.activity
    )
    SELECT Id,
        AVG(
            CASE
                WHEN dow.dow = 'weekday'
                THEN LightlyActiveMinutes + FairlyActiveMinutes + VeryActiveMinutes END
            ) AS weekday_activity,
        AVG(
            CASE
                WHEN dow.dow = 'weekend'
                THEN LightlyActiveMinutes + FairlyActiveMinutes + VeryActiveMinutes END
            ) AS weekend_activity
    FROM bellabeat.activity
    JOIN dow USING(Date)
    GROUP BY Id
    ORDER BY Id
)
UPDATE bellabeat.users AS u
JOIN weeks AS w USING(Id)
SET u.weekday_activity = w.weekday_activity, u.weekend_activity = w.weekend_activity;

SET SQL_SAFE_UPDATES = 1;

#UPDATE DAILY USAGE
SET SQL_SAFE_UPDATES = 0;

WITH ranked_avg AS (
    SELECT Id, per_day,
           NTILE(3) OVER (ORDER BY per_day) AS quartile
    FROM (
        SELECT Id, count(Date) / (SELECT count(distinct(Date)) 
          FROM bellabeat.activity) AS per_day
        FROM bellabeat.activity
        GROUP BY Id
    ) AS daily_use
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r USING(Id)
SET
    u.percent_days = r.per_day,
    u.day_rank = 
            CASE
                WHEN r.quartile = 1 THEN "low"
                WHEN r.quartile = 2 THEN "avg"
                WHEN r.quartile = 3 THEN "high"
            END;

SET SQL_SAFE_UPDATES = 1;

#UPDATE USAGE BY DAY OF WEEK
SET SQL_SAFE_UPDATES = 0;

WITH ranked_avg AS (
    SELECT Id, AVG(min) AS avg_min,
           NTILE(3) OVER (ORDER BY AVG(min)) AS quartile
    FROM (
        SELECT Id, Date, VeryActiveMinutes + FairlyActiveMinutes +
                          LightlyActiveMinutes + SedentaryActiveMinutes AS min
        FROM bellabeat.activity
        GROUP BY Id, Date
    ) AS daily_avg
    GROUP BY Id
)
UPDATE bellabeat.users AS u
JOIN ranked_avg AS r USING(Id)
SET 
    u.daily_use = r.avg_min,
    u.daily_rank = CASE
                    WHEN r.quartile = 1 THEN 'low'
                    WHEN r.quartile = 2 THEN 'avg'
                    WHEN r.quartile = 3 THEN 'high'
                END;
SET SQL_SAFE_UPDATES = 1;

SELECT * FROM bellabeat.users ORDER BY FIELD(activity, "very", "fair", "light");

Visualizations Using R and GGPlot

Loading the 4 Main Dataframes

user_db <- read.csv(paste0(workdir, "users.csv"))
steps <- read.csv(paste0(workdir, "steps.csv"))
heartrate <- read.csv(paste0(workdir, "heartrate.csv"))
calories <- read.csv(paste0(workdir, "calories.csv"))

Cleaning the Tables for R Processing

# Convert ID columns to lower case to match user_db
names(steps)[names(steps)=="Id"] <- "id"
names(heartrate)[names(heartrate)=="Id"] <- "id"
names(calories)[names(calories)=="Id"] <- "id"

# Convert the 'Time's to 'date's (removing the time component)
steps$date <- as.Date(steps$Time)
heartrate$date <- as.Date(heartrate$Time)
calories$date <- as.Date(calories$Time)

# Group by 'date' and 'id', then summarize for each day
daily_steps <- steps %>%
  group_by(id, date) %>%
  summarize(total_steps = sum(Steps, na.rm = TRUE))
daily_heartrate <- heartrate %>% 
  group_by(id, date) %>% 
  summarize(avg_rate = mean(Value, na.rm = TRUE)
            , min_rate = min(Value, na.rm = TRUE),
            .groups = 'drop')
daily_cals <- calories %>%
  group_by(id, date) %>%
  summarize(total_cals = sum(Calories, na.rm = TRUE))

# Join the dataframes
user_steps <- merge(user_db, daily_steps, by="id", all=TRUE)
user_heart <- merge(user_db, daily_heartrate, by="id", all=TRUE)
heart_steps <- merge(daily_heartrate, daily_steps, by=c("id", "date"))
heart_step_cal <- merge(heart_steps, daily_cals, by=c("id", "date"))
user_all <- merge(heart_step_cal, user_db, by="id")

# Convert activity levels to ordered factors
user_all <- user_all %>%
  mutate(activity = factor(activity, levels = c("very", "fair", "light")))
# Convert columns named list for loops later
categories <- list("step_rank"="Steps", 
                   "int_rank"="Intensity", 
                   "daily_rank"="Daily Usage", 
                   "day_rank"="Days Used", 
                   "mets_rank"="METs",
                   "activity"="Activity Level")

# Convert all the other categories to ordered factors of high, avg, low
for (key in names(categories)) {
  if (key != "activity"){
    user_all <- user_all %>%
      mutate(!!sym(key) := factor(!!sym(key), levels = c("high", "avg", "low")))
  }
}


# Summarize average usages (daily and total) by activity levels
average_daily_usage <- user_all %>%
  group_by(activity) %>%
  summarize(average_daily_use = mean(daily_use, na.rm = TRUE)) %>% 
  ungroup()
average_usage <- user_all %>%
  group_by(activity) %>%
  summarize(average_usage = mean(percent_days, na.rm = TRUE)) %>% 
  ungroup()

# Capitalized labels:
cap_labels = c("Very", "Moderate", "Light")
cap_categories = c("High", "Average", "Low")