Preliminary Setup

This code configures knitr code chunk options

Code
knitr::opts_chunk$set(
    echo = TRUE, message = FALSE, warning = FALSE, error = FALSE,
    comment = NA, cache = TRUE, code_folding = TRUE,
    R.options = list(width = 220, digits = 3),
    fig.align = "center",
    out.width = "75%", fig.asp = .75
)

This code loads the r packages necessary for this example

Code
library(tidyverse)
library(readxl)
library(gtsummary)
library(DT)
library(patchwork)
library(dlookr)
library(kableExtra)
library(knitr)
library(openxlsx)
library(psych)
library(janitor)
library(stringr)
library(tidyr)
library(htmltools)
library(plotly)

This code loads the dataframe

Code
# Define the clean_variable_names function
clean_variable_names <- function(df) {
  cleaned_names <- df %>%
    names() %>%
    str_replace_all("[^[:alnum:]_]", "_") %>%
    str_replace_all("__+", "_") %>%
    str_replace_all("[\r\n\t]", "") %>%
    str_trim() %>%
    str_remove("_$") %>%
    tolower()
  
  cleaned_names <- make.unique(cleaned_names, sep = "_")
  
  names(df) <- cleaned_names
  return(df)
}

# Example usage:
# Read the CSV file
merged <- read_csv("/Users/shawes/PA_JCMS/data/Merged_Files_NoRecid.csv", col_types = cols(.default = "c"))

# Clean the column names
merged_clean <- clean_variable_names(merged)

# Verify the cleaned names
#print(names(merged_clean))

# Select the specified variables to create the new dataframe
variables_to_keep <- c(
  "unique_id", "youth_num_in_cohort", "gender", "race", "ethnicity", "racnicity", 
  "age_at_service_start", "age_at_first_referral_date", "assessment_type", "total_risk_desc", 
  "totalscore", "priorscore", "familyscore", "educationscore", "peerscore", "substancescore", 
  "leisurescore", "personalityscore", "attitudesscore", "spep_id", "time", "total_weeks_of_service", 
  "total_hours_of_service", "was_there_service_interruption_of_30_days_or_more", 
  "was_youth_discharged_earlier_than_anticipated", "anticipated_of_youth", "total_of_youth", 
  "service_type", "target_dosage", "target_duration", "setting", "service_type_1", 
  "primary_therapeutic_category", "total_of_youth_in_cohort_amount_of_service", 
  "number_of_youth_scoring_low_on_the_yls", "number_of_youth_scoring_moderate_on_the_yls", 
  "number_of_youth_scoring_high_on_the_yls", "number_of_youth_scoring_very_high_on_the_yls", 
  "points_received_primary_service_type", "points_received_supplemental_service_provided", 
  "points_received_quality_of_service_delivery", "points_received_amount_of_service_duration", 
  "points_received_amount_of_service_contact_hours", 
  "points_received_risk_level_of_youth_youth_scoring_above_low_on_the_yls", 
  "points_received_risk_level_of_youth_youth_scoring_above_moderate_on_the_yls", 
  "basic_score_total_raw_points_earned", "maximum_total_points_possible_for_service_type_denominator_for_pop_score", 
  "pop_score_basic_score_divided_by_max_pts_possible_for_service_type", 
  "quality_measures_received_organizational_response_to_drift", "total_quality_measures_received", 
  "quality_of_service_delivery", "total_pts_amount_of_service", "total_pts_risk_level", 
  "basic_score_description_pa_validation_study_2020", "pop_score_description_pa_validation_study_2020", 
  "target_weeks", "target_hours"
)

# Create the new dataframe with the specified variables
merged_clean <- merged_clean %>%
  select(all_of(variables_to_keep))

# Convert appropriate columns to numeric and factor
numeric_vars <- c("age_at_service_start", "age_at_first_referral_date", "totalscore", "priorscore",
                  "familyscore", "educationscore", "peerscore", "substancescore", "leisurescore",
                  "personalityscore", "attitudesscore", "total_weeks_of_service", "total_hours_of_service",
                  "anticipated_of_youth", "total_of_youth", "target_dosage", "target_duration",
                  "total_of_youth_in_cohort_amount_of_service", "number_of_youth_scoring_low_on_the_yls",
                  "number_of_youth_scoring_moderate_on_the_yls", "number_of_youth_scoring_high_on_the_yls",
                  "number_of_youth_scoring_very_high_on_the_yls", "points_received_primary_service_type",
                  "points_received_supplemental_service_provided", "points_received_quality_of_service_delivery",
                  "points_received_amount_of_service_duration", "points_received_amount_of_service_contact_hours",
                  "points_received_risk_level_of_youth_youth_scoring_above_low_on_the_yls",
                  "points_received_risk_level_of_youth_youth_scoring_above_moderate_on_the_yls",
                  "basic_score_total_raw_points_earned", "maximum_total_points_possible_for_service_type_denominator_for_pop_score",
                  "pop_score_basic_score_divided_by_max_pts_possible_for_service_type",
                  "quality_measures_received_organizational_response_to_drift", "total_quality_measures_received",
                  "total_pts_amount_of_service", "total_pts_risk_level", "target_weeks", "target_hours")

factor_vars <- c("unique_id", "youth_num_in_cohort", "gender", "race", "ethnicity", "racnicity",
                 "assessment_type", "total_risk_desc", "spep_id", "time", "was_there_service_interruption_of_30_days_or_more",
                 "was_youth_discharged_earlier_than_anticipated", "service_type", "setting", "service_type_1",
                 "primary_therapeutic_category", "quality_of_service_delivery", "basic_score_description_pa_validation_study_2020",
                 "pop_score_description_pa_validation_study_2020")

ordered_factor_vars <- c("total_risk_desc", "time", "quality_of_service_delivery", 
                         "basic_score_description_pa_validation_study_2020", "pop_score_description_pa_validation_study_2020")

merged_clean <- merged_clean %>%
  mutate(across(all_of(numeric_vars), as.numeric)) %>%
  mutate(across(all_of(factor_vars), as.factor)) %>%
  mutate(across(all_of(ordered_factor_vars), ~ factor(.x, ordered = TRUE)))

# Verify the new dataframe
#str(merged_clean)
Code
#| echo: true
#| warning: false
#| output: true

# Split the data into numeric and factor columns
numeric_cols <- merged_clean %>% select(where(is.numeric))
unordered_factor_cols <- merged_clean %>% select(where(is.factor) & !where(is.ordered))
ordered_factor_cols <- merged_clean %>% select(where(is.ordered))

# Pivot and calculate summary statistics for numeric columns
numeric_summary <- numeric_cols %>%
  pivot_longer(cols = everything(), names_to = "variable") %>%
  group_by(variable) %>%
  summarise(
    missing = sum(is.na(value)),
    categories = n_distinct(value),
    count = NA,
    mean = mean(value, na.rm = TRUE),
    sd = sd(value, na.rm = TRUE),
    min = min(value, na.rm = TRUE),
    max = max(value, na.rm = TRUE)
  ) %>%
  mutate(mean_sd = sprintf("%.2f (%.2f)", mean, sd)) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Pivot and calculate summary statistics for unordered factor columns
unordered_factor_summary <- unordered_factor_cols %>%
  pivot_longer(cols = everything(), names_to = "variable") %>%
  group_by(variable) %>%
  summarise(
    missing = sum(is.na(value)),
    categories = n_distinct(value),
    count = paste(names(sort(table(value), decreasing = TRUE)[1]), 
                  "(", round(max(table(value)) / sum(table(value)) * 100, 2), "%)", sep = ""),
    mean_sd = NA,
    min = NA,
    max = NA
  ) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Pivot and calculate summary statistics for each ordered factor column separately
ordered_factor_summary <- bind_rows(lapply(names(ordered_factor_cols), function(col_name) {
  ordered_factor_cols %>%
    select(!!sym(col_name)) %>%
    pivot_longer(cols = everything(), names_to = "variable") %>%
    group_by(variable) %>%
    summarise(
      missing = sum(is.na(value)),
      categories = n_distinct(value),
      count = paste(names(sort(table(value), decreasing = TRUE)[1]), 
                    "(", round(max(table(value)) / sum(table(value)) * 100, 2), "%)", sep = ""),
      mean_sd = NA,
      min = NA,
      max = NA
    ) %>%
    select(variable, mean_sd, min, max, count, missing, categories)
}))

# Combine numeric and factor summaries
descriptives_tidy <- bind_rows(numeric_summary, unordered_factor_summary, ordered_factor_summary) %>%
  rename(
    count = count,
    missing = missing,
    categories = categories
  ) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Exclude specified variables
descriptives_filtered <- descriptives_tidy %>%
  filter(!variable %in% c("unique_id", "youth_num_in_cohort", "spep_id"))

Sample Descriptives

This table shows some general info for full sample (including duplicates), will probably be removed later

Code
# Render the table using DT::datatable with tooltips and better formatting
datatable(descriptives_filtered, 
          rownames = FALSE,
          caption = 'Summary Statistics Table',
          extensions = 'Buttons',
          options = list(
            scrollX = TRUE,
            scrollY = "500px",
            paging = FALSE, # Disable pagination
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
          )) %>%
  formatRound(columns = c('min', 'max'), digits = 2) %>%
  formatStyle(
    columns = 'mean_sd',
    target = 'row',
    backgroundColor = styleInterval(1, c('white', 'lightgreen'))
  ) %>%
  formatStyle(
    columns = names(descriptives_filtered),
    whiteSpace = 'nowrap'
  )

This table shows descriptives by cohort for YLS scores

Code
# Display the formatted table
#YLS_table

# Display the HTML table
HTML(html_table)
Cohort YLS Table
Cohort Information
YLS Total
YLS Risk Levels
Index Cohort Count Mean(SD) Low Moderate High High/Very High
1 0303- 150 NA (6.43) 49 (33%) 11 (7%) 87 (58%) 1
2 0121- 113 NA (7.24) 40 (35%) 10 (9%) 56 (50%) 1
3 0135- 107 NA (6.84) 40 (37%) 11 (10%) 55 (51%) 1
4 0133- 105 NA (6.90) 40 (38%) 11 (10%) 53 (50%) 1
5 0111- 103 NA (6.43) 36 (35%) 7 (7%) 57 (55%) 1
6 0042- 95 NA (5.33) 39 (41%) 5 (5%) 50 (53%) 1
7 0140- 81 NA (7.25) 28 (35%) 8 (10%) 41 (51%) 1
8 0210- 79 NA (5.26) 11 (14%) 6 (8%) 62 (78%) 1
9 0222- 79 NA (7.67) 24 (30%) 12 (15%) 42 (53%) 1
10 0036- 78 NA (5.98) 27 (35%) 6 (8%) 44 (56%) 1
11 0304- 76 NA (7.43) 23 (30%) 18 (24%) 35 (46%) 1
12 0157- 74 NA (5.46) 28 (38%) 3 (4%) 43 (58%) 1
13 0034- 68 NA (6.07) 26 (38%) 5 (7%) 36 (53%) 1
14 0189- 62 NA (7.47) 19 (31%) 13 (21%) 30 (48%) 1
15 0091- 58 NA (5.49) 20 (34%) 4 (7%) 34 (59%) 1
16 0085- 57 NA (6.93) 19 (33%) 8 (14%) 29 (51%) 1
17 0087- 57 NA (6.93) 19 (33%) 8 (14%) 29 (51%) 1
18 0035- 55 NA (6.68) 17 (31%) 9 (16%) 28 (51%) 1
19 0272- 52 NA (4.93) 24 (46%) 2 (4%) 26 (50%) 1
20 0045- 50 NA (5.79) 13 (26%) 4 (8%) 33 (66%) 1
21 0114- 50 NA (7.46) 17 (34%) 5 (10%) 25 (50%) 1
22 0115- 50 NA (7.46) 17 (34%) 5 (10%) 25 (50%) 1
23 0141- 50 NA (7.46) 17 (34%) 5 (10%) 25 (50%) 1
24 0014- 48 NA (7.38) 18 (38%) 6 (12%) 22 (46%) 1
25 0007- 47 NA (5.73) 22 (47%) 2 (4%) 21 (45%) 1
26 0013- 47 NA (7.43) 17 (36%) 6 (13%) 22 (47%) 1
27 0112- 47 NA (6.31) 13 (28%) 4 (9%) 30 (64%) 1
28 0010- 46 NA (5.78) 22 (48%) 2 (4%) 20 (43%) 1
29 0011- 46 NA (7.57) 17 (37%) 6 (13%) 21 (46%) 1
30 0012- 46 NA (7.94) 19 (41%) 8 (17%) 17 (37%) 1
31 0108- 46 NA (6.37) 13 (28%) 4 (9%) 29 (63%) 1
32 0009- 45 NA (5.75) 22 (49%) 2 (4%) 19 (42%) 1
33 0005- 42 NA (7.35) 17 (40%) 4 (10%) 20 (48%) 1
34 0319- 42 NA (5.74) 19 (45%) 0 (0%) 22 (52%) 1
35 0004- 41 NA (7.40) 16 (39%) 4 (10%) 20 (49%) 1
36 0006- 41 NA (6.86) 17 (41%) 3 (7%) 20 (49%) 1
37 0188- 41 NA (7.98) 15 (37%) 9 (22%) 17 (41%) 1
38 0190- 41 NA (7.98) 15 (37%) 9 (22%) 17 (41%) 1
39 0191- 41 NA (7.98) 15 (37%) 9 (22%) 17 (41%) 1
40 0223- 41 NA (6.57) 10 (24%) 5 (12%) 26 (63%) 1
41 0343- 40 NA (6.30) 5 (12%) 7 (18%) 28 (70%) 1
42 0345- 40 NA (6.30) 5 (12%) 7 (18%) 28 (70%) 1
43 0003- 39 NA (6.93) 16 (41%) 3 (8%) 19 (49%) 1
44 0187- 39 NA (6.38) 14 (36%) 5 (13%) 20 (51%) 1
45 0037- 37 NA (5.42) 14 (38%) 1 (3%) 22 (59%) 1
46 0159- 36 NA (5.22) 16 (44%) 2 (6%) 17 (47%) 1
47 0320- 36 NA (4.39) 11 (31%) 1 (3%) 24 (67%) 1
48 0350- 34 NA (7.65) 4 (12%) 15 (44%) 15 (44%) 1
49 0310- 33 NA (5.32) 1 (3%) 14 (42%) 18 (55%) 1
50 0344- 33 NA (6.49) 4 (12%) 7 (21%) 22 (67%) 1
51 0302- 32 NA (6.59) 11 (34%) 2 (6%) 18 (56%) 1
52 0321- 32 NA (5.59) 1 (3%) 22 (69%) 9 (28%) 1
53 0120- 31 NA (7.02) 11 (35%) 3 (10%) 16 (52%) 1
54 0139- 31 NA (7.02) 11 (35%) 3 (10%) 16 (52%) 1
55 0067- 30 NA (6.79) 5 (17%) 8 (27%) 17 (57%) 1
56 0160- 30 NA (8.22) 4 (13%) 8 (27%) 18 (60%) 1
57 0236- 30 NA (6.58) 4 (13%) 3 (10%) 22 (73%) 1
58 0171- 29 NA (6.54) 10 (34%) 3 (10%) 16 (55%) 1
59 0172- 29 NA (6.54) 10 (34%) 3 (10%) 16 (55%) 1
60 0311- 29 NA (5.38) 1 (3%) 12 (41%) 16 (55%) 1
61 0044- 28 NA (5.79) 4 (14%) 2 (7%) 22 (79%) 1
62 0041- 27 NA (4.32) 13 (48%) 0 (0%) 14 (52%) 1
63 0024- 24 NA (6.55) 8 (33%) 3 (12%) 13 (54%) 1
64 0057- 24 NA (5.29) 8 (33%) 1 (4%) 15 (62%) 1
65 0351- 24 NA (4.43) 1 (4%) 4 (17%) 19 (79%) 1
66 0158- 22 NA (5.22) 3 (14%) 4 (18%) 15 (68%) 1
67 0339- 22 NA (6.54) 1 (5%) 13 (59%) 8 (36%) 1
68 0168- 21 NA (7.25) 1 (5%) 3 (14%) 16 (76%) 1
69 0233- 21 NA (5.04) 3 (14%) 4 (19%) 14 (67%) 1
70 0322- 21 NA (4.79) 5 (24%) 2 (10%) 14 (67%) 1
71 0113- 19 NA (5.45) 1 (5%) 2 (11%) 16 (84%) 1
72 0312- 19 NA (4.61) 6 (32%) 0 (0%) 13 (68%) 1
73 0313- 19 NA (4.61) 6 (32%) 0 (0%) 13 (68%) 1
74 0314- 19 NA (4.61) 6 (32%) 0 (0%) 13 (68%) 1
75 0316- 19 NA (4.61) 6 (32%) 0 (0%) 13 (68%) 1
76 0317- 19 NA (4.61) 6 (32%) 0 (0%) 13 (68%) 1
77 0325- 19 NA (4.42) 8 (42%) 0 (0%) 11 (58%) 1
78 0225- 18 NA (4.74) 11 (61%) 0 (0%) 7 (39%) 1
79 0244- 18 NA (7.38) 3 (17%) 3 (17%) 12 (67%) 1
80 0025- 17 NA (5.11) 0 (0%) 7 (41%) 10 (59%) 1
81 0227- 17 NA (5.94) 9 (53%) 1 (6%) 7 (41%) 1
82 0300- 17 NA (7.12) 8 (47%) 0 (0%) 9 (53%) 1
83 0349- 17 NA (5.94) 10 (59%) 1 (6%) 6 (35%) 1
84 0334- 16 NA (4.58) 2 (12%) 1 (6%) 13 (81%) 1
85 0318- 15 NA (6.15) 2 (13%) 3 (20%) 10 (67%) 1
86 0305- 14 NA (6.69) 5 (36%) 2 (14%) 7 (50%) 1
87 0307- 14 NA (6.69) 5 (36%) 2 (14%) 7 (50%) 1
88 0315- 14 NA (4.79) 5 (36%) 0 (0%) 9 (64%) 1
89 0324- 14 NA (5.22) 1 (7%) 2 (14%) 11 (79%) 1
90 0329- 14 NA (6.98) 2 (14%) 2 (14%) 10 (71%) 1
91 0347- 14 NA (6.57) 2 (14%) 2 (14%) 10 (71%) 1
92 0277- 13 NA (5.86) 3 (23%) 0 (0%) 9 (69%) 1
93 0323- 13 NA (5.04) 5 (38%) 0 (0%) 8 (62%) 1
94 0008- 12 NA (6.69) 6 (50%) 0 (0%) 5 (42%) 1
95 0099- 12 NA (7.18) 7 (58%) 2 (17%) 3 (25%) 1
96 0101- 12 NA (7.18) 7 (58%) 2 (17%) 3 (25%) 1
97 0102- 12 NA (5.59) 4 (33%) 0 (0%) 8 (67%) 1
98 0109- 12 NA (6.33) 5 (42%) 0 (0%) 7 (58%) 1
99 0332- 12 NA (8.24) 5 (42%) 1 (8%) 5 (42%) 1
100 0098- 11 NA (7.51) 7 (64%) 2 (18%) 2 (18%) 1
101 0100- 11 NA (6.99) 7 (64%) 2 (18%) 2 (18%) 1
102 0137- 11 NA (7.68) 4 (36%) 2 (18%) 5 (45%) 1
103 0151- 11 NA (6.71) 1 (9%) 4 (36%) 6 (55%) 1
104 0309- 11 NA (5.65) 4 (36%) 0 (0%) 7 (64%) 1
105 0346- 11 NA (4.31) 0 (0%) 5 (45%) 6 (55%) 1
106 0357- 11 NA (9.16) 2 (18%) 2 (18%) 6 (55%) 1
107 0235- 10 NA (3.52) 0 (0%) 0 (0%) 10 (100%) 1
108 0333- 10 NA (6.31) 1 (10%) 4 (40%) 5 (50%) 1
109 0348- 10 NA (4.24) 5 (50%) 1 (10%) 4 (40%) 1
110 0335- 9 NA (7.73) 1 (11%) 4 (44%) 4 (44%) 1
111 0337- 9 NA (7.73) 1 (11%) 4 (44%) 4 (44%) 1
112 0338- 9 NA (7.73) 1 (11%) 4 (44%) 4 (44%) 1
Note:
This table provides the YLS risk levels combined with 'High/Very High' indicating if either 'High' or 'Very High' was endorsed.

This table shows descriptives by cohort for different service types

Code
# Create and format the table using kable and kableExtra with enhancements
formatted_table_serviceType <- contingency_df_serviceType %>%
  kable("html", col.names = c("Index", "Cohort", "Count", "Serv_Type_Sum", "ART_Comm_Based", "ART_Res_Prog", "Behav_Manag", "Chall_Progs", "CBT", "Fam_Couns", "Grp_Couns", "Indiv_Couns", "Job_Train", "Voc_Couns", "Mentoring", "Mix_Couns", "MST", "Rem_Acad_Prog", "Rest_Comm_Serv", "Soc_Skills_Train"), caption = "Cohort Service Type Table") %>%
  kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>%
  row_spec(1:nrow(contingency_df_serviceType), background = c("#f2f2f2", "white")) %>%
  column_spec(1, bold = TRUE, color = "blue") %>%
  column_spec(3, background = "#ffebcc") %>%
  add_header_above(c("Cohort Information" = 4, "YLS Service Types" = 16)) %>%
  footnote(general = "This table provides xxxxx.") %>%
  kable_styling(latex_options = "scale_down") %>%
  column_spec(1:4, bold = TRUE, border_right = TRUE, width = "1.5cm") %>%
  column_spec(5:20, width = "2cm")

# Wrap the table in a div for scrolling
html_table_serviceType <- paste0(
  '<div style="height:500px; overflow-y:auto;">',
  as.character(formatted_table_serviceType),
  '</div>'
)

# Display the HTML table
HTML(html_table_serviceType)
Cohort Service Type Table
Cohort Information
YLS Service Types
Index Cohort Count Serv_Type_Sum ART_Comm_Based ART_Res_Prog Behav_Manag Chall_Progs CBT Fam_Couns Grp_Couns Indiv_Couns Job_Train Voc_Couns Mentoring Mix_Couns MST Rem_Acad_Prog Rest_Comm_Serv Soc_Skills_Train
1 0303- 150 300 0 0 0 0 0 0 150 0 0 0 0 0 0 0 0 0
2 0121- 113 226 0 0 0 0 0 0 0 0 0 0 0 0 0 113 0 0
3 0135- 107 214 0 0 0 0 0 0 0 0 0 107 0 0 0 0 0 0
4 0133- 105 210 0 0 0 0 105 0 0 0 0 0 0 0 0 0 0 0
5 0111- 103 206 0 0 0 0 0 0 103 0 0 0 0 0 0 0 0 0
6 0042- 95 190 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 95
7 0140- 81 162 0 0 0 0 0 0 0 81 0 0 0 0 0 0 0 0
8 0210- 79 158 0 0 0 0 0 0 0 0 0 0 79 0 0 0 0 0
9 0222- 79 158 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 79
10 0036- 78 156 0 0 0 0 0 0 0 0 0 0 0 0 0 78 0 0
11 0304- 76 152 0 0 0 0 0 0 0 0 76 0 0 0 0 0 0 0
12 0157- 74 148 0 0 0 0 0 0 74 0 0 0 0 0 0 0 0 0
13 0034- 68 136 0 0 0 0 0 0 0 0 68 0 0 0 0 0 0 0
14 0189- 62 124 0 0 0 0 0 0 0 0 0 0 0 0 0 62 0 0
15 0091- 58 116 0 0 0 0 0 0 0 58 0 0 0 0 0 0 0 0
16 0035- 55 110 0 0 0 0 55 0 0 0 0 0 0 0 0 0 0 0
17 0272- 52 104 0 0 0 52 0 0 0 0 0 0 0 0 0 0 0 0
18 0045- 50 100 0 0 0 0 0 25 0 0 0 0 0 0 25 0 0 0
19 0114- 50 100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 50
20 0115- 50 100 0 0 0 0 50 0 0 0 0 0 0 0 0 0 0 0
21 0141- 50 100 0 0 0 0 0 0 0 0 0 50 0 0 0 0 0 0
22 0014- 48 96 0 0 48 0 0 0 0 0 0 0 0 0 0 0 0 0
23 0007- 47 94 0 0 0 0 0 0 0 47 0 0 0 0 0 0 0 0
24 0013- 47 94 0 0 0 0 47 0 0 0 0 0 0 0 0 0 0 0
25 0112- 47 94 0 0 0 0 0 0 47 0 0 0 0 0 0 0 0 0
26 0010- 46 92 0 0 46 0 0 0 0 0 0 0 0 0 0 0 0 0
27 0011- 46 92 0 0 0 0 0 0 0 46 0 0 0 0 0 0 0 0
28 0012- 46 92 0 46 0 0 0 0 0 0 0 0 0 0 0 0 0 0
29 0108- 46 92 0 0 0 0 0 0 46 0 0 0 0 0 0 0 0 0
30 0009- 45 90 0 0 0 0 45 0 0 0 0 0 0 0 0 0 0 0
31 0005- 42 84 0 0 0 0 42 0 0 0 0 0 0 0 0 0 0 0
32 0319- 42 84 0 0 42 0 0 0 0 0 0 0 0 0 0 0 0 0
33 0004- 41 82 0 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0
34 0006- 41 82 0 0 41 0 0 0 0 0 0 0 0 0 0 0 0 0
35 0188- 41 82 0 0 0 0 41 0 0 0 0 0 0 0 0 0 0 0
36 0190- 41 82 0 0 0 0 0 0 41 0 0 0 0 0 0 0 0 0
37 0191- 41 82 0 0 0 0 0 0 41 0 0 0 0 0 0 0 0 0
38 0223- 41 82 0 0 0 0 0 0 0 41 0 0 0 0 0 0 0 0
39 0343- 40 80 0 0 0 0 0 0 0 40 0 0 0 0 0 0 0 0
40 0345- 40 80 0 0 0 0 0 0 40 0 0 0 0 0 0 0 0 0
41 0003- 39 78 0 0 0 0 0 0 0 39 0 0 0 0 0 0 0 0
42 0187- 39 78 0 39 0 0 0 0 0 0 0 0 0 0 0 0 0 0
43 0037- 37 74 0 0 0 0 37 0 0 0 0 0 0 0 0 0 0 0
44 0159- 36 72 0 36 0 0 0 0 0 0 0 0 0 0 0 0 0 0
45 0320- 36 36 36 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
46 0350- 34 68 0 0 0 0 0 0 0 34 0 0 0 0 0 0 0 0
47 0310- 33 66 0 0 0 0 0 0 0 33 0 0 0 0 0 0 0 0
48 0344- 33 66 0 0 0 0 0 0 33 0 0 0 0 0 0 0 0 0
49 0302- 32 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0
50 0321- 32 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0
51 0120- 31 62 0 0 0 0 31 0 0 0 0 0 0 0 0 0 0 0
52 0139- 31 62 0 0 0 0 31 0 0 0 0 0 0 0 0 0 0 0
53 0067- 30 60 0 0 0 0 0 0 0 0 0 0 30 0 0 0 0 0
54 0160- 30 60 0 0 0 0 0 0 0 30 0 0 0 0 0 0 0 0
55 0236- 30 60 0 0 0 0 30 0 0 0 0 0 0 0 0 0 0 0
56 0171- 29 58 0 0 0 0 0 0 0 0 0 29 0 0 0 0 0 0
57 0172- 29 58 0 0 0 0 0 0 0 0 0 0 0 0 0 29 0 0
58 0311- 29 58 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 0
59 0044- 28 56 0 0 0 0 0 0 0 0 0 0 28 0 0 0 0 0
60 0041- 27 54 0 0 0 0 0 0 27 0 0 0 0 0 0 0 0 0
61 0024- 24 48 0 24 0 0 0 0 0 0 0 0 0 0 0 0 0 0
62 0057- 24 48 0 0 0 0 0 0 0 0 0 0 0 0 24 0 0 0
63 0351- 24 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24
64 0158- 22 44 0 0 0 0 0 0 0 0 0 0 22 0 0 0 0 0
65 0339- 22 44 0 0 0 0 0 0 0 0 0 0 22 0 0 0 0 0
66 0168- 21 42 0 0 0 0 0 0 0 0 0 0 0 21 0 0 0 0
67 0233- 21 21 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
68 0322- 21 42 0 0 0 0 21 0 0 0 0 0 0 0 0 0 0 0
69 0113- 19 38 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0
70 0312- 19 38 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0
71 0313- 19 38 0 0 19 0 0 0 0 0 0 0 0 0 0 0 0 0
72 0314- 19 38 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0
73 0316- 19 38 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0
74 0317- 19 38 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0
75 0325- 19 19 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
76 0225- 18 36 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0
77 0244- 18 36 0 0 0 0 0 0 0 0 0 0 0 18 0 0 0 0
78 0025- 17 34 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17
79 0227- 17 34 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0
80 0300- 17 34 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0
81 0349- 17 34 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0
82 0334- 16 32 0 0 0 0 0 0 0 0 0 0 0 16 0 0 0 0
83 0318- 15 30 0 0 0 0 0 0 0 0 0 0 0 0 15 0 0 0
84 0305- 14 28 0 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0
85 0307- 14 28 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0
86 0315- 14 28 0 0 0 0 0 0 0 0 0 0 0 0 0 14 0 0
87 0324- 14 28 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0
88 0329- 14 28 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0
89 0347- 14 28 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0
90 0277- 13 26 0 0 0 0 0 0 13 0 0 0 0 0 0 0 0 0
91 0323- 13 26 0 0 0 0 0 0 13 0 0 0 0 0 0 0 0 0
92 0008- 12 24 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0
93 0099- 12 24 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
94 0101- 12 24 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0
95 0102- 12 24 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0
96 0109- 12 24 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0
97 0332- 12 24 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0
98 0098- 11 22 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0
99 0100- 11 22 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0
100 0137- 11 22 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0
101 0151- 11 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0
102 0309- 11 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0
103 0346- 11 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
104 0357- 11 22 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0
105 0235- 10 20 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0
106 0333- 10 20 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0
107 0348- 10 20 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0
108 0335- 9 18 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0
109 0337- 9 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9
110 0338- 9 18 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0
111 0085- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
112 0087- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Note:
This table provides xxxxx.
Code
str(merged$service )
 NULL

Additional Output and Tables

This table shows descriptives by cohort for different settings

Code
# Display the HTML table in RStudio Viewer or a web browser
formatted_table_with_scroll <- htmltools::HTML(html_table_with_scroll)
formatted_table_with_scroll
Cohort Setting Table
Cohort Information
Settings
Index Cohort Count C R
1 0003- 39 0 39
2 0004- 41 0 41
3 0005- 42 0 42
4 0006- 41 0 41
5 0007- 47 0 47
6 0008- 12 0 12
7 0009- 45 0 45
8 0010- 46 0 46
9 0011- 46 0 46
10 0012- 46 0 46
11 0013- 47 0 47
12 0014- 48 0 48
13 0024- 24 0 24
14 0025- 17 17 0
15 0034- 68 0 68
16 0035- 55 0 55
17 0036- 78 0 78
18 0037- 37 0 37
19 0041- 27 27 0
20 0042- 95 95 0
21 0044- 28 28 0
22 0045- 50 50 0
23 0057- 24 24 0
24 0067- 30 30 0
25 0085- 57 0 57
26 0087- 57 0 57
27 0091- 58 0 58
28 0098- 11 0 11
29 0099- 12 0 12
30 0100- 11 0 11
31 0101- 12 0 12
32 0102- 12 12 0
33 0108- 46 0 46
34 0109- 12 0 12
35 0111- 103 0 103
36 0112- 47 0 47
37 0113- 19 0 19
38 0114- 50 0 50
39 0115- 50 0 50
40 0120- 31 0 31
41 0121- 113 0 113
42 0133- 105 0 105
43 0135- 107 0 107
44 0137- 11 0 11
45 0139- 31 0 31
46 0140- 81 0 81
47 0141- 50 0 50
48 0151- 11 11 0
49 0157- 74 0 74
50 0158- 22 22 0
51 0159- 36 0 36
52 0160- 30 0 30
53 0168- 21 0 21
54 0171- 29 0 29
55 0172- 29 0 29
56 0187- 39 0 39
57 0188- 41 0 41
58 0189- 62 0 62
59 0190- 41 0 41
60 0191- 41 0 41
61 0210- 79 79 0
62 0222- 79 0 79
63 0223- 41 0 41
64 0225- 18 0 18
65 0227- 17 0 17
66 0233- 21 21 0
67 0235- 10 0 10
68 0236- 30 0 30
69 0244- 18 18 0
70 0272- 52 0 52
71 0277- 13 13 0
72 0300- 17 0 17
73 0302- 32 0 32
74 0303- 150 0 150
75 0304- 76 0 76
76 0305- 14 0 14
77 0307- 14 0 14
78 0309- 11 0 11
79 0310- 33 33 0
80 0311- 29 29 0
81 0312- 19 0 19
82 0313- 19 0 19
83 0314- 19 0 19
84 0315- 14 0 14
85 0316- 19 0 19
86 0317- 19 0 19
87 0318- 15 15 0
88 0319- 42 0 42
89 0320- 36 36 0
90 0321- 32 32 0
91 0322- 21 21 0
92 0323- 13 13 0
93 0324- 14 14 0
94 0325- 19 19 0
95 0329- 14 0 14
96 0332- 12 12 0
97 0333- 10 10 0
98 0334- 16 16 0
99 0335- 9 0 9
100 0337- 9 0 9
101 0338- 9 0 9
102 0339- 22 22 0
103 0343- 40 0 40
104 0344- 33 0 33
105 0345- 40 0 40
106 0346- 11 11 0
107 0347- 14 14 0
108 0348- 10 0 10
109 0349- 17 0 17
110 0350- 34 34 0
111 0351- 24 24 0
112 0357- 11 0 11
Note:
This table provides a breakdown of cohorts by Setting.

This histogram shows cohort by service type

Code
# Install and load necessary packages
# install.packages("tidyr")
# install.packages("ggplot2")
# library(tidyr)
# library(ggplot2)
library(plotly)

# Reshape the data: gather service types into a single column
contingency_long <- contingency_df_serviceType %>%
  gather(key = "service_type", value = "count", -Index, -Cohort, -Count, -Service_Type_Sum)

# Filter out rows with zero counts (if needed)
contingency_long_filtered <- contingency_long %>%
  filter(count > 0)

plot_data <- contingency_long_filtered %>%
  group_by(Cohort, service_type) %>%
  summarise(total_count = sum(count)) %>%
  ungroup()

p <- plot_ly(plot_data, x = ~service_type, y = ~total_count, type = 'bar', name = ~Cohort) %>%
  layout(
    title = "Histogram of Service Types by Cohort",
    xaxis = list(title = "Service Type"),
    yaxis = list(title = "Count"),
    updatemenus = list(
      list(
        buttons = lapply(unique(plot_data$Cohort), function(cohort) {
          list(
            method = "restyle",
            args = list("transforms[0].value", cohort),
            label = cohort
          )
        }),
        direction = "down",
        x = 0.1,
        y = 1.1,
        showactive = TRUE
      )
    ),
    transforms = list(
      list(
        type = 'filter',
        target = ~Cohort,
        operation = '=',
        value = unique(plot_data$Cohort)[1]
      )
    )
  )

# Render the plot
p
Code
## Cohort Duration Table

# ::: blue
# > **This table shows descriptives by cohort for different durations**
# #| echo: true
# #| warning: false
# #| output: false
# 
# #column_names <- names(merged)
# #print(column_names)
# 
# # Assuming you find the exact column name as "Points Received: \nAmount of Service - Duration"
# exact_column_name <- "Points Received: \r\nAmount of Service - Duration"
# 
# # Create a new column 'duration' that is a copy of the original column
# merged$duration <- merged[[exact_column_name]]
# 
# # Ensure 'duration' column is factors
# merged$duration <- as.factor(merged$duration)
# 
# # Verify the new column
# #str(merged$duration)
# 
# # Step 1: Create contingency table for 'duration' by 'Cohort'
# contingency_table_duration <- table(merged$Cohort, merged$duration)
# 
# # Step 2: Convert the contingency table to a data frame
# contingency_df_duration <- as.data.frame.matrix(contingency_table_duration)
# 
# # Step 3: Add a new column with the total count of individuals in each cohort
# contingency_df_duration$Total <- rowSums(contingency_df_duration)
# 
# # Step 4: Rename the 'Total' column to 'Count'
# contingency_df_duration <- contingency_df_duration %>%
#   rownames_to_column(var = "Cohort") %>%
#   rename(Count = Total)
# 
# # Step 5: Add an index column
# contingency_df_duration <- contingency_df_duration %>%
#   mutate(Index = row_number())
# 
# # Step 6: Reorder the columns to have the index as the first column
# contingency_df_duration <- contingency_df_duration %>%
#   select(Index, Cohort, Count, everything())
# 
# # Step 7: Create and format the table using kable and kableExtra with enhancements
# formatted_table_duration <- contingency_df_duration %>%
#   kable("html", col.names = names(contingency_df_duration), caption = "Cohort Duration Table") %>%
#   kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
#   row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>%
#   row_spec(1:nrow(contingency_df_duration), background = c("#f2f2f2", "white")) %>%
#   column_spec(1, bold = TRUE, color = "blue") %>%
#   column_spec(3, background = "#ffebcc") %>%
#   add_header_above(c("Cohort Information" = 3, "Durations" = ncol(contingency_df_duration) - 3)) %>%
#   footnote(general = "This table provides a breakdown of cohorts by Duration.")
Code
# Display the formatted table for Setting
##formatted_table_duration