SPEP Revalidation

Preliminary Setup

This code configures knitr code chunk options

Code

knitr::opts_chunk$set(
    echo = TRUE, message = FALSE, warning = FALSE, error = FALSE,
    comment = NA, cache = TRUE, code_folding = TRUE,
    R.options = list(width = 220, digits = 3),
    fig.align = "center",
    out.width = "75%", fig.asp = .75
)

This code loads the r packages necessary for this example

Code

library(tidyverse)
library(readxl)
library(gtsummary)
library(DT)
library(patchwork)
library(dlookr)
library(kableExtra)
library(knitr)
library(openxlsx)
library(psych)
library(janitor)
library(stringr)
library(tidyr)
library(htmltools)
library(plotly)

This code loads the dataframe

Code

# Define the clean_variable_names function
clean_variable_names <- function(df) {
  cleaned_names <- df %>%
    names() %>%
    str_replace_all("[^[:alnum:]_]", "_") %>%
    str_replace_all("__+", "_") %>%
    str_replace_all("[\r\n\t]", "") %>%
    str_trim() %>%
    str_remove("_$") %>%
    tolower()
  
  cleaned_names <- make.unique(cleaned_names, sep = "_")
  
  names(df) <- cleaned_names
  return(df)
}

# Example usage:
# Read the CSV file
merged <- read_csv("/Users/shawes/PA_JCMS/data/Merged_Files_NoRecid.csv", col_types = cols(.default = "c"))

# Clean the column names
merged_clean <- clean_variable_names(merged)

# Verify the cleaned names
#print(names(merged_clean))

# Select the specified variables to create the new dataframe
variables_to_keep <- c(
  "unique_id", "youth_num_in_cohort", "gender", "race", "ethnicity", "racnicity", 
  "age_at_service_start", "age_at_first_referral_date", "assessment_type", "total_risk_desc", 
  "totalscore", "priorscore", "familyscore", "educationscore", "peerscore", "substancescore", 
  "leisurescore", "personalityscore", "attitudesscore", "spep_id", "time", "total_weeks_of_service", 
  "total_hours_of_service", "was_there_service_interruption_of_30_days_or_more", 
  "was_youth_discharged_earlier_than_anticipated", "anticipated_of_youth", "total_of_youth", 
  "service_type", "target_dosage", "target_duration", "setting", "service_type_1", 
  "primary_therapeutic_category", "total_of_youth_in_cohort_amount_of_service", 
  "number_of_youth_scoring_low_on_the_yls", "number_of_youth_scoring_moderate_on_the_yls", 
  "number_of_youth_scoring_high_on_the_yls", "number_of_youth_scoring_very_high_on_the_yls", 
  "points_received_primary_service_type", "points_received_supplemental_service_provided", 
  "points_received_quality_of_service_delivery", "points_received_amount_of_service_duration", 
  "points_received_amount_of_service_contact_hours", 
  "points_received_risk_level_of_youth_youth_scoring_above_low_on_the_yls", 
  "points_received_risk_level_of_youth_youth_scoring_above_moderate_on_the_yls", 
  "basic_score_total_raw_points_earned", "maximum_total_points_possible_for_service_type_denominator_for_pop_score", 
  "pop_score_basic_score_divided_by_max_pts_possible_for_service_type", 
  "quality_measures_received_organizational_response_to_drift", "total_quality_measures_received", 
  "quality_of_service_delivery", "total_pts_amount_of_service", "total_pts_risk_level", 
  "basic_score_description_pa_validation_study_2020", "pop_score_description_pa_validation_study_2020", 
  "target_weeks", "target_hours"
)

# Create the new dataframe with the specified variables
merged_clean <- merged_clean %>%
  select(all_of(variables_to_keep))

# Convert appropriate columns to numeric and factor
numeric_vars <- c("age_at_service_start", "age_at_first_referral_date", "totalscore", "priorscore",
                  "familyscore", "educationscore", "peerscore", "substancescore", "leisurescore",
                  "personalityscore", "attitudesscore", "total_weeks_of_service", "total_hours_of_service",
                  "anticipated_of_youth", "total_of_youth", "target_dosage", "target_duration",
                  "total_of_youth_in_cohort_amount_of_service", "number_of_youth_scoring_low_on_the_yls",
                  "number_of_youth_scoring_moderate_on_the_yls", "number_of_youth_scoring_high_on_the_yls",
                  "number_of_youth_scoring_very_high_on_the_yls", "points_received_primary_service_type",
                  "points_received_supplemental_service_provided", "points_received_quality_of_service_delivery",
                  "points_received_amount_of_service_duration", "points_received_amount_of_service_contact_hours",
                  "points_received_risk_level_of_youth_youth_scoring_above_low_on_the_yls",
                  "points_received_risk_level_of_youth_youth_scoring_above_moderate_on_the_yls",
                  "basic_score_total_raw_points_earned", "maximum_total_points_possible_for_service_type_denominator_for_pop_score",
                  "pop_score_basic_score_divided_by_max_pts_possible_for_service_type",
                  "quality_measures_received_organizational_response_to_drift", "total_quality_measures_received",
                  "total_pts_amount_of_service", "total_pts_risk_level", "target_weeks", "target_hours")

factor_vars <- c("unique_id", "youth_num_in_cohort", "gender", "race", "ethnicity", "racnicity",
                 "assessment_type", "total_risk_desc", "spep_id", "time", "was_there_service_interruption_of_30_days_or_more",
                 "was_youth_discharged_earlier_than_anticipated", "service_type", "setting", "service_type_1",
                 "primary_therapeutic_category", "quality_of_service_delivery", "basic_score_description_pa_validation_study_2020",
                 "pop_score_description_pa_validation_study_2020")

ordered_factor_vars <- c("total_risk_desc", "time", "quality_of_service_delivery", 
                         "basic_score_description_pa_validation_study_2020", "pop_score_description_pa_validation_study_2020")

merged_clean <- merged_clean %>%
  mutate(across(all_of(numeric_vars), as.numeric)) %>%
  mutate(across(all_of(factor_vars), as.factor)) %>%
  mutate(across(all_of(ordered_factor_vars), ~ factor(.x, ordered = TRUE)))

# Verify the new dataframe
#str(merged_clean)

Code

#| echo: true
#| warning: false
#| output: true

# Split the data into numeric and factor columns
numeric_cols <- merged_clean %>% select(where(is.numeric))
unordered_factor_cols <- merged_clean %>% select(where(is.factor) & !where(is.ordered))
ordered_factor_cols <- merged_clean %>% select(where(is.ordered))

# Pivot and calculate summary statistics for numeric columns
numeric_summary <- numeric_cols %>%
  pivot_longer(cols = everything(), names_to = "variable") %>%
  group_by(variable) %>%
  summarise(
    missing = sum(is.na(value)),
    categories = n_distinct(value),
    count = NA,
    mean = mean(value, na.rm = TRUE),
    sd = sd(value, na.rm = TRUE),
    min = min(value, na.rm = TRUE),
    max = max(value, na.rm = TRUE)
  ) %>%
  mutate(mean_sd = sprintf("%.2f (%.2f)", mean, sd)) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Pivot and calculate summary statistics for unordered factor columns
unordered_factor_summary <- unordered_factor_cols %>%
  pivot_longer(cols = everything(), names_to = "variable") %>%
  group_by(variable) %>%
  summarise(
    missing = sum(is.na(value)),
    categories = n_distinct(value),
    count = paste(names(sort(table(value), decreasing = TRUE)[1]), 
                  "(", round(max(table(value)) / sum(table(value)) * 100, 2), "%)", sep = ""),
    mean_sd = NA,
    min = NA,
    max = NA
  ) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Pivot and calculate summary statistics for each ordered factor column separately
ordered_factor_summary <- bind_rows(lapply(names(ordered_factor_cols), function(col_name) {
  ordered_factor_cols %>%
    select(!!sym(col_name)) %>%
    pivot_longer(cols = everything(), names_to = "variable") %>%
    group_by(variable) %>%
    summarise(
      missing = sum(is.na(value)),
      categories = n_distinct(value),
      count = paste(names(sort(table(value), decreasing = TRUE)[1]), 
                    "(", round(max(table(value)) / sum(table(value)) * 100, 2), "%)", sep = ""),
      mean_sd = NA,
      min = NA,
      max = NA
    ) %>%
    select(variable, mean_sd, min, max, count, missing, categories)
}))

# Combine numeric and factor summaries
descriptives_tidy <- bind_rows(numeric_summary, unordered_factor_summary, ordered_factor_summary) %>%
  rename(
    count = count,
    missing = missing,
    categories = categories
  ) %>%
  select(variable, mean_sd, min, max, count, missing, categories)

# Exclude specified variables
descriptives_filtered <- descriptives_tidy %>%
  filter(!variable %in% c("unique_id", "youth_num_in_cohort", "spep_id"))

Sample Descriptives

This table shows some general info for full sample (including duplicates), will probably be removed later

Code

# Render the table using DT::datatable with tooltips and better formatting
datatable(descriptives_filtered, 
          rownames = FALSE,
          caption = 'Summary Statistics Table',
          extensions = 'Buttons',
          options = list(
            scrollX = TRUE,
            scrollY = "500px",
            paging = FALSE, # Disable pagination
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
          )) %>%
  formatRound(columns = c('min', 'max'), digits = 2) %>%
  formatStyle(
    columns = 'mean_sd',
    target = 'row',
    backgroundColor = styleInterval(1, c('white', 'lightgreen'))
  ) %>%
  formatStyle(
    columns = names(descriptives_filtered),
    whiteSpace = 'nowrap'
  )

This table shows descriptives by cohort for YLS scores

Code

# Display the formatted table
#YLS_table

# Display the HTML table
HTML(html_table)

Cohort YLS Table
Cohort Information			YLS Total	YLS Risk Levels
Index	Cohort	Count	Mean(SD)	Low	Moderate	High	High/Very High
1	0303-	150	NA (6.43)	49 (33%)	11 (7%)	87 (58%)	1
2	0121-	113	NA (7.24)	40 (35%)	10 (9%)	56 (50%)	1
3	0135-	107	NA (6.84)	40 (37%)	11 (10%)	55 (51%)	1
4	0133-	105	NA (6.90)	40 (38%)	11 (10%)	53 (50%)	1
5	0111-	103	NA (6.43)	36 (35%)	7 (7%)	57 (55%)	1
6	0042-	95	NA (5.33)	39 (41%)	5 (5%)	50 (53%)	1
7	0140-	81	NA (7.25)	28 (35%)	8 (10%)	41 (51%)	1
8	0210-	79	NA (5.26)	11 (14%)	6 (8%)	62 (78%)	1
9	0222-	79	NA (7.67)	24 (30%)	12 (15%)	42 (53%)	1
10	0036-	78	NA (5.98)	27 (35%)	6 (8%)	44 (56%)	1
11	0304-	76	NA (7.43)	23 (30%)	18 (24%)	35 (46%)	1
12	0157-	74	NA (5.46)	28 (38%)	3 (4%)	43 (58%)	1
13	0034-	68	NA (6.07)	26 (38%)	5 (7%)	36 (53%)	1
14	0189-	62	NA (7.47)	19 (31%)	13 (21%)	30 (48%)	1
15	0091-	58	NA (5.49)	20 (34%)	4 (7%)	34 (59%)	1
16	0085-	57	NA (6.93)	19 (33%)	8 (14%)	29 (51%)	1
17	0087-	57	NA (6.93)	19 (33%)	8 (14%)	29 (51%)	1
18	0035-	55	NA (6.68)	17 (31%)	9 (16%)	28 (51%)	1
19	0272-	52	NA (4.93)	24 (46%)	2 (4%)	26 (50%)	1
20	0045-	50	NA (5.79)	13 (26%)	4 (8%)	33 (66%)	1
21	0114-	50	NA (7.46)	17 (34%)	5 (10%)	25 (50%)	1
22	0115-	50	NA (7.46)	17 (34%)	5 (10%)	25 (50%)	1
23	0141-	50	NA (7.46)	17 (34%)	5 (10%)	25 (50%)	1
24	0014-	48	NA (7.38)	18 (38%)	6 (12%)	22 (46%)	1
25	0007-	47	NA (5.73)	22 (47%)	2 (4%)	21 (45%)	1
26	0013-	47	NA (7.43)	17 (36%)	6 (13%)	22 (47%)	1
27	0112-	47	NA (6.31)	13 (28%)	4 (9%)	30 (64%)	1
28	0010-	46	NA (5.78)	22 (48%)	2 (4%)	20 (43%)	1
29	0011-	46	NA (7.57)	17 (37%)	6 (13%)	21 (46%)	1
30	0012-	46	NA (7.94)	19 (41%)	8 (17%)	17 (37%)	1
31	0108-	46	NA (6.37)	13 (28%)	4 (9%)	29 (63%)	1
32	0009-	45	NA (5.75)	22 (49%)	2 (4%)	19 (42%)	1
33	0005-	42	NA (7.35)	17 (40%)	4 (10%)	20 (48%)	1
34	0319-	42	NA (5.74)	19 (45%)	0 (0%)	22 (52%)	1
35	0004-	41	NA (7.40)	16 (39%)	4 (10%)	20 (49%)	1
36	0006-	41	NA (6.86)	17 (41%)	3 (7%)	20 (49%)	1
37	0188-	41	NA (7.98)	15 (37%)	9 (22%)	17 (41%)	1
38	0190-	41	NA (7.98)	15 (37%)	9 (22%)	17 (41%)	1
39	0191-	41	NA (7.98)	15 (37%)	9 (22%)	17 (41%)	1
40	0223-	41	NA (6.57)	10 (24%)	5 (12%)	26 (63%)	1
41	0343-	40	NA (6.30)	5 (12%)	7 (18%)	28 (70%)	1
42	0345-	40	NA (6.30)	5 (12%)	7 (18%)	28 (70%)	1
43	0003-	39	NA (6.93)	16 (41%)	3 (8%)	19 (49%)	1
44	0187-	39	NA (6.38)	14 (36%)	5 (13%)	20 (51%)	1
45	0037-	37	NA (5.42)	14 (38%)	1 (3%)	22 (59%)	1
46	0159-	36	NA (5.22)	16 (44%)	2 (6%)	17 (47%)	1
47	0320-	36	NA (4.39)	11 (31%)	1 (3%)	24 (67%)	1
48	0350-	34	NA (7.65)	4 (12%)	15 (44%)	15 (44%)	1
49	0310-	33	NA (5.32)	1 (3%)	14 (42%)	18 (55%)	1
50	0344-	33	NA (6.49)	4 (12%)	7 (21%)	22 (67%)	1
51	0302-	32	NA (6.59)	11 (34%)	2 (6%)	18 (56%)	1
52	0321-	32	NA (5.59)	1 (3%)	22 (69%)	9 (28%)	1
53	0120-	31	NA (7.02)	11 (35%)	3 (10%)	16 (52%)	1
54	0139-	31	NA (7.02)	11 (35%)	3 (10%)	16 (52%)	1
55	0067-	30	NA (6.79)	5 (17%)	8 (27%)	17 (57%)	1
56	0160-	30	NA (8.22)	4 (13%)	8 (27%)	18 (60%)	1
57	0236-	30	NA (6.58)	4 (13%)	3 (10%)	22 (73%)	1
58	0171-	29	NA (6.54)	10 (34%)	3 (10%)	16 (55%)	1
59	0172-	29	NA (6.54)	10 (34%)	3 (10%)	16 (55%)	1
60	0311-	29	NA (5.38)	1 (3%)	12 (41%)	16 (55%)	1
61	0044-	28	NA (5.79)	4 (14%)	2 (7%)	22 (79%)	1
62	0041-	27	NA (4.32)	13 (48%)	0 (0%)	14 (52%)	1
63	0024-	24	NA (6.55)	8 (33%)	3 (12%)	13 (54%)	1
64	0057-	24	NA (5.29)	8 (33%)	1 (4%)	15 (62%)	1
65	0351-	24	NA (4.43)	1 (4%)	4 (17%)	19 (79%)	1
66	0158-	22	NA (5.22)	3 (14%)	4 (18%)	15 (68%)	1
67	0339-	22	NA (6.54)	1 (5%)	13 (59%)	8 (36%)	1
68	0168-	21	NA (7.25)	1 (5%)	3 (14%)	16 (76%)	1
69	0233-	21	NA (5.04)	3 (14%)	4 (19%)	14 (67%)	1
70	0322-	21	NA (4.79)	5 (24%)	2 (10%)	14 (67%)	1
71	0113-	19	NA (5.45)	1 (5%)	2 (11%)	16 (84%)	1
72	0312-	19	NA (4.61)	6 (32%)	0 (0%)	13 (68%)	1
73	0313-	19	NA (4.61)	6 (32%)	0 (0%)	13 (68%)	1
74	0314-	19	NA (4.61)	6 (32%)	0 (0%)	13 (68%)	1
75	0316-	19	NA (4.61)	6 (32%)	0 (0%)	13 (68%)	1
76	0317-	19	NA (4.61)	6 (32%)	0 (0%)	13 (68%)	1
77	0325-	19	NA (4.42)	8 (42%)	0 (0%)	11 (58%)	1
78	0225-	18	NA (4.74)	11 (61%)	0 (0%)	7 (39%)	1
79	0244-	18	NA (7.38)	3 (17%)	3 (17%)	12 (67%)	1
80	0025-	17	NA (5.11)	0 (0%)	7 (41%)	10 (59%)	1
81	0227-	17	NA (5.94)	9 (53%)	1 (6%)	7 (41%)	1
82	0300-	17	NA (7.12)	8 (47%)	0 (0%)	9 (53%)	1
83	0349-	17	NA (5.94)	10 (59%)	1 (6%)	6 (35%)	1
84	0334-	16	NA (4.58)	2 (12%)	1 (6%)	13 (81%)	1
85	0318-	15	NA (6.15)	2 (13%)	3 (20%)	10 (67%)	1
86	0305-	14	NA (6.69)	5 (36%)	2 (14%)	7 (50%)	1
87	0307-	14	NA (6.69)	5 (36%)	2 (14%)	7 (50%)	1
88	0315-	14	NA (4.79)	5 (36%)	0 (0%)	9 (64%)	1
89	0324-	14	NA (5.22)	1 (7%)	2 (14%)	11 (79%)	1
90	0329-	14	NA (6.98)	2 (14%)	2 (14%)	10 (71%)	1
91	0347-	14	NA (6.57)	2 (14%)	2 (14%)	10 (71%)	1
92	0277-	13	NA (5.86)	3 (23%)	0 (0%)	9 (69%)	1
93	0323-	13	NA (5.04)	5 (38%)	0 (0%)	8 (62%)	1
94	0008-	12	NA (6.69)	6 (50%)	0 (0%)	5 (42%)	1
95	0099-	12	NA (7.18)	7 (58%)	2 (17%)	3 (25%)	1
96	0101-	12	NA (7.18)	7 (58%)	2 (17%)	3 (25%)	1
97	0102-	12	NA (5.59)	4 (33%)	0 (0%)	8 (67%)	1
98	0109-	12	NA (6.33)	5 (42%)	0 (0%)	7 (58%)	1
99	0332-	12	NA (8.24)	5 (42%)	1 (8%)	5 (42%)	1
100	0098-	11	NA (7.51)	7 (64%)	2 (18%)	2 (18%)	1
101	0100-	11	NA (6.99)	7 (64%)	2 (18%)	2 (18%)	1
102	0137-	11	NA (7.68)	4 (36%)	2 (18%)	5 (45%)	1
103	0151-	11	NA (6.71)	1 (9%)	4 (36%)	6 (55%)	1
104	0309-	11	NA (5.65)	4 (36%)	0 (0%)	7 (64%)	1
105	0346-	11	NA (4.31)	0 (0%)	5 (45%)	6 (55%)	1
106	0357-	11	NA (9.16)	2 (18%)	2 (18%)	6 (55%)	1
107	0235-	10	NA (3.52)	0 (0%)	0 (0%)	10 (100%)	1
108	0333-	10	NA (6.31)	1 (10%)	4 (40%)	5 (50%)	1
109	0348-	10	NA (4.24)	5 (50%)	1 (10%)	4 (40%)	1
110	0335-	9	NA (7.73)	1 (11%)	4 (44%)	4 (44%)	1
111	0337-	9	NA (7.73)	1 (11%)	4 (44%)	4 (44%)	1
112	0338-	9	NA (7.73)	1 (11%)	4 (44%)	4 (44%)	1
Note:
This table provides the YLS risk levels combined with 'High/Very High' indicating if either 'High' or 'Very High' was endorsed.

This table shows descriptives by cohort for different service types

Code

# Create and format the table using kable and kableExtra with enhancements
formatted_table_serviceType <- contingency_df_serviceType %>%
  kable("html", col.names = c("Index", "Cohort", "Count", "Serv_Type_Sum", "ART_Comm_Based", "ART_Res_Prog", "Behav_Manag", "Chall_Progs", "CBT", "Fam_Couns", "Grp_Couns", "Indiv_Couns", "Job_Train", "Voc_Couns", "Mentoring", "Mix_Couns", "MST", "Rem_Acad_Prog", "Rest_Comm_Serv", "Soc_Skills_Train"), caption = "Cohort Service Type Table") %>%
  kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>%
  row_spec(1:nrow(contingency_df_serviceType), background = c("#f2f2f2", "white")) %>%
  column_spec(1, bold = TRUE, color = "blue") %>%
  column_spec(3, background = "#ffebcc") %>%
  add_header_above(c("Cohort Information" = 4, "YLS Service Types" = 16)) %>%
  footnote(general = "This table provides xxxxx.") %>%
  kable_styling(latex_options = "scale_down") %>%
  column_spec(1:4, bold = TRUE, border_right = TRUE, width = "1.5cm") %>%
  column_spec(5:20, width = "2cm")

# Wrap the table in a div for scrolling
html_table_serviceType <- paste0(
  '<div style="height:500px; overflow-y:auto;">',
  as.character(formatted_table_serviceType),
  '</div>'
)

# Display the HTML table
HTML(html_table_serviceType)

Cohort Service Type Table
Cohort Information				YLS Service Types
Index	Cohort	Count	Serv_Type_Sum	ART_Comm_Based	ART_Res_Prog	Behav_Manag	Chall_Progs	CBT	Fam_Couns	Grp_Couns	Indiv_Couns	Job_Train	Voc_Couns	Mentoring	Mix_Couns	MST	Rem_Acad_Prog	Rest_Comm_Serv	Soc_Skills_Train
1	0303-	150	300	0	0	0	0	0	0	150	0	0	0	0	0	0	0	0	0
2	0121-	113	226	0	0	0	0	0	0	0	0	0	0	0	0	0	113	0	0
3	0135-	107	214	0	0	0	0	0	0	0	0	0	107	0	0	0	0	0	0
4	0133-	105	210	0	0	0	0	105	0	0	0	0	0	0	0	0	0	0	0
5	0111-	103	206	0	0	0	0	0	0	103	0	0	0	0	0	0	0	0	0
6	0042-	95	190	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	95
7	0140-	81	162	0	0	0	0	0	0	0	81	0	0	0	0	0	0	0	0
8	0210-	79	158	0	0	0	0	0	0	0	0	0	0	79	0	0	0	0	0
9	0222-	79	158	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	79
10	0036-	78	156	0	0	0	0	0	0	0	0	0	0	0	0	0	78	0	0
11	0304-	76	152	0	0	0	0	0	0	0	0	76	0	0	0	0	0	0	0
12	0157-	74	148	0	0	0	0	0	0	74	0	0	0	0	0	0	0	0	0
13	0034-	68	136	0	0	0	0	0	0	0	0	68	0	0	0	0	0	0	0
14	0189-	62	124	0	0	0	0	0	0	0	0	0	0	0	0	0	62	0	0
15	0091-	58	116	0	0	0	0	0	0	0	58	0	0	0	0	0	0	0	0
16	0035-	55	110	0	0	0	0	55	0	0	0	0	0	0	0	0	0	0	0
17	0272-	52	104	0	0	0	52	0	0	0	0	0	0	0	0	0	0	0	0
18	0045-	50	100	0	0	0	0	0	25	0	0	0	0	0	0	25	0	0	0
19	0114-	50	100	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	50
20	0115-	50	100	0	0	0	0	50	0	0	0	0	0	0	0	0	0	0	0
21	0141-	50	100	0	0	0	0	0	0	0	0	0	50	0	0	0	0	0	0
22	0014-	48	96	0	0	48	0	0	0	0	0	0	0	0	0	0	0	0	0
23	0007-	47	94	0	0	0	0	0	0	0	47	0	0	0	0	0	0	0	0
24	0013-	47	94	0	0	0	0	47	0	0	0	0	0	0	0	0	0	0	0
25	0112-	47	94	0	0	0	0	0	0	47	0	0	0	0	0	0	0	0	0
26	0010-	46	92	0	0	46	0	0	0	0	0	0	0	0	0	0	0	0	0
27	0011-	46	92	0	0	0	0	0	0	0	46	0	0	0	0	0	0	0	0
28	0012-	46	92	0	46	0	0	0	0	0	0	0	0	0	0	0	0	0	0
29	0108-	46	92	0	0	0	0	0	0	46	0	0	0	0	0	0	0	0	0
30	0009-	45	90	0	0	0	0	45	0	0	0	0	0	0	0	0	0	0	0
31	0005-	42	84	0	0	0	0	42	0	0	0	0	0	0	0	0	0	0	0
32	0319-	42	84	0	0	42	0	0	0	0	0	0	0	0	0	0	0	0	0
33	0004-	41	82	0	41	0	0	0	0	0	0	0	0	0	0	0	0	0	0
34	0006-	41	82	0	0	41	0	0	0	0	0	0	0	0	0	0	0	0	0
35	0188-	41	82	0	0	0	0	41	0	0	0	0	0	0	0	0	0	0	0
36	0190-	41	82	0	0	0	0	0	0	41	0	0	0	0	0	0	0	0	0
37	0191-	41	82	0	0	0	0	0	0	41	0	0	0	0	0	0	0	0	0
38	0223-	41	82	0	0	0	0	0	0	0	41	0	0	0	0	0	0	0	0
39	0343-	40	80	0	0	0	0	0	0	0	40	0	0	0	0	0	0	0	0
40	0345-	40	80	0	0	0	0	0	0	40	0	0	0	0	0	0	0	0	0
41	0003-	39	78	0	0	0	0	0	0	0	39	0	0	0	0	0	0	0	0
42	0187-	39	78	0	39	0	0	0	0	0	0	0	0	0	0	0	0	0	0
43	0037-	37	74	0	0	0	0	37	0	0	0	0	0	0	0	0	0	0	0
44	0159-	36	72	0	36	0	0	0	0	0	0	0	0	0	0	0	0	0	0
45	0320-	36	36	36	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
46	0350-	34	68	0	0	0	0	0	0	0	34	0	0	0	0	0	0	0	0
47	0310-	33	66	0	0	0	0	0	0	0	33	0	0	0	0	0	0	0	0
48	0344-	33	66	0	0	0	0	0	0	33	0	0	0	0	0	0	0	0	0
49	0302-	32	64	0	0	0	0	0	0	0	0	0	0	0	0	0	0	32	0
50	0321-	32	64	0	0	0	0	0	0	0	0	0	0	0	0	0	0	32	0
51	0120-	31	62	0	0	0	0	31	0	0	0	0	0	0	0	0	0	0	0
52	0139-	31	62	0	0	0	0	31	0	0	0	0	0	0	0	0	0	0	0
53	0067-	30	60	0	0	0	0	0	0	0	0	0	0	30	0	0	0	0	0
54	0160-	30	60	0	0	0	0	0	0	0	30	0	0	0	0	0	0	0	0
55	0236-	30	60	0	0	0	0	30	0	0	0	0	0	0	0	0	0	0	0
56	0171-	29	58	0	0	0	0	0	0	0	0	0	29	0	0	0	0	0	0
57	0172-	29	58	0	0	0	0	0	0	0	0	0	0	0	0	0	29	0	0
58	0311-	29	58	0	0	0	0	0	0	0	0	0	0	0	0	0	0	29	0
59	0044-	28	56	0	0	0	0	0	0	0	0	0	0	28	0	0	0	0	0
60	0041-	27	54	0	0	0	0	0	0	27	0	0	0	0	0	0	0	0	0
61	0024-	24	48	0	24	0	0	0	0	0	0	0	0	0	0	0	0	0	0
62	0057-	24	48	0	0	0	0	0	0	0	0	0	0	0	0	24	0	0	0
63	0351-	24	48	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	24
64	0158-	22	44	0	0	0	0	0	0	0	0	0	0	22	0	0	0	0	0
65	0339-	22	44	0	0	0	0	0	0	0	0	0	0	22	0	0	0	0	0
66	0168-	21	42	0	0	0	0	0	0	0	0	0	0	0	21	0	0	0	0
67	0233-	21	21	21	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
68	0322-	21	42	0	0	0	0	21	0	0	0	0	0	0	0	0	0	0	0
69	0113-	19	38	0	0	0	0	0	0	0	0	19	0	0	0	0	0	0	0
70	0312-	19	38	0	0	0	0	0	0	0	19	0	0	0	0	0	0	0	0
71	0313-	19	38	0	0	19	0	0	0	0	0	0	0	0	0	0	0	0	0
72	0314-	19	38	0	0	0	0	0	0	19	0	0	0	0	0	0	0	0	0
73	0316-	19	38	0	0	0	0	0	0	0	0	19	0	0	0	0	0	0	0
74	0317-	19	38	0	0	0	0	0	0	19	0	0	0	0	0	0	0	0	0
75	0325-	19	19	19	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
76	0225-	18	36	0	0	0	0	18	0	0	0	0	0	0	0	0	0	0	0
77	0244-	18	36	0	0	0	0	0	0	0	0	0	0	0	18	0	0	0	0
78	0025-	17	34	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	17
79	0227-	17	34	0	0	0	0	0	0	17	0	0	0	0	0	0	0	0	0
80	0300-	17	34	0	0	0	0	17	0	0	0	0	0	0	0	0	0	0	0
81	0349-	17	34	0	0	0	0	0	0	0	17	0	0	0	0	0	0	0	0
82	0334-	16	32	0	0	0	0	0	0	0	0	0	0	0	16	0	0	0	0
83	0318-	15	30	0	0	0	0	0	0	0	0	0	0	0	0	15	0	0	0
84	0305-	14	28	0	14	0	0	0	0	0	0	0	0	0	0	0	0	0	0
85	0307-	14	28	0	0	0	0	0	0	14	0	0	0	0	0	0	0	0	0
86	0315-	14	28	0	0	0	0	0	0	0	0	0	0	0	0	0	14	0	0
87	0324-	14	28	0	0	0	0	0	0	14	0	0	0	0	0	0	0	0	0
88	0329-	14	28	0	0	0	0	0	0	0	14	0	0	0	0	0	0	0	0
89	0347-	14	28	0	0	0	0	0	0	0	14	0	0	0	0	0	0	0	0
90	0277-	13	26	0	0	0	0	0	0	13	0	0	0	0	0	0	0	0	0
91	0323-	13	26	0	0	0	0	0	0	13	0	0	0	0	0	0	0	0	0
92	0008-	12	24	0	12	0	0	0	0	0	0	0	0	0	0	0	0	0	0
93	0099-	12	24	0	0	0	0	12	0	0	0	0	0	0	0	0	0	0	0
94	0101-	12	24	0	0	12	0	0	0	0	0	0	0	0	0	0	0	0	0
95	0102-	12	24	0	0	0	0	0	12	0	0	0	0	0	0	0	0	0	0
96	0109-	12	24	0	12	0	0	0	0	0	0	0	0	0	0	0	0	0	0
97	0332-	12	24	0	0	0	0	0	0	0	0	0	0	12	0	0	0	0	0
98	0098-	11	22	0	11	0	0	0	0	0	0	0	0	0	0	0	0	0	0
99	0100-	11	22	0	0	0	0	0	0	0	11	0	0	0	0	0	0	0	0
100	0137-	11	22	0	0	0	0	11	0	0	0	0	0	0	0	0	0	0	0
101	0151-	11	22	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11	0
102	0309-	11	22	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11	0
103	0346-	11	22	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11
104	0357-	11	22	0	0	0	0	0	0	11	0	0	0	0	0	0	0	0	0
105	0235-	10	20	0	0	0	0	0	0	10	0	0	0	0	0	0	0	0	0
106	0333-	10	20	0	0	0	0	10	0	0	0	0	0	0	0	0	0	0	0
107	0348-	10	20	0	0	0	0	0	0	10	0	0	0	0	0	0	0	0	0
108	0335-	9	18	0	0	0	0	9	0	0	0	0	0	0	0	0	0	0	0
109	0337-	9	18	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	9
110	0338-	9	18	0	0	0	0	0	0	0	9	0	0	0	0	0	0	0	0
111	0085-	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
112	0087-	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
Note:
This table provides xxxxx.

Code

str(merged$service )

 NULL

Additional Output and Tables

Settings Table
Service Type Histograms by Cohort (Temp Table)

This table shows descriptives by cohort for different settings

Code

# Display the HTML table in RStudio Viewer or a web browser
formatted_table_with_scroll <- htmltools::HTML(html_table_with_scroll)
formatted_table_with_scroll

Cohort Setting Table
Cohort Information			Settings
Index	Cohort	Count	C	R
1	0003-	39	0	39
2	0004-	41	0	41
3	0005-	42	0	42
4	0006-	41	0	41
5	0007-	47	0	47
6	0008-	12	0	12
7	0009-	45	0	45
8	0010-	46	0	46
9	0011-	46	0	46
10	0012-	46	0	46
11	0013-	47	0	47
12	0014-	48	0	48
13	0024-	24	0	24
14	0025-	17	17	0
15	0034-	68	0	68
16	0035-	55	0	55
17	0036-	78	0	78
18	0037-	37	0	37
19	0041-	27	27	0
20	0042-	95	95	0
21	0044-	28	28	0
22	0045-	50	50	0
23	0057-	24	24	0
24	0067-	30	30	0
25	0085-	57	0	57
26	0087-	57	0	57
27	0091-	58	0	58
28	0098-	11	0	11
29	0099-	12	0	12
30	0100-	11	0	11
31	0101-	12	0	12
32	0102-	12	12	0
33	0108-	46	0	46
34	0109-	12	0	12
35	0111-	103	0	103
36	0112-	47	0	47
37	0113-	19	0	19
38	0114-	50	0	50
39	0115-	50	0	50
40	0120-	31	0	31
41	0121-	113	0	113
42	0133-	105	0	105
43	0135-	107	0	107
44	0137-	11	0	11
45	0139-	31	0	31
46	0140-	81	0	81
47	0141-	50	0	50
48	0151-	11	11	0
49	0157-	74	0	74
50	0158-	22	22	0
51	0159-	36	0	36
52	0160-	30	0	30
53	0168-	21	0	21
54	0171-	29	0	29
55	0172-	29	0	29
56	0187-	39	0	39
57	0188-	41	0	41
58	0189-	62	0	62
59	0190-	41	0	41
60	0191-	41	0	41
61	0210-	79	79	0
62	0222-	79	0	79
63	0223-	41	0	41
64	0225-	18	0	18
65	0227-	17	0	17
66	0233-	21	21	0
67	0235-	10	0	10
68	0236-	30	0	30
69	0244-	18	18	0
70	0272-	52	0	52
71	0277-	13	13	0
72	0300-	17	0	17
73	0302-	32	0	32
74	0303-	150	0	150
75	0304-	76	0	76
76	0305-	14	0	14
77	0307-	14	0	14
78	0309-	11	0	11
79	0310-	33	33	0
80	0311-	29	29	0
81	0312-	19	0	19
82	0313-	19	0	19
83	0314-	19	0	19
84	0315-	14	0	14
85	0316-	19	0	19
86	0317-	19	0	19
87	0318-	15	15	0
88	0319-	42	0	42
89	0320-	36	36	0
90	0321-	32	32	0
91	0322-	21	21	0
92	0323-	13	13	0
93	0324-	14	14	0
94	0325-	19	19	0
95	0329-	14	0	14
96	0332-	12	12	0
97	0333-	10	10	0
98	0334-	16	16	0
99	0335-	9	0	9
100	0337-	9	0	9
101	0338-	9	0	9
102	0339-	22	22	0
103	0343-	40	0	40
104	0344-	33	0	33
105	0345-	40	0	40
106	0346-	11	11	0
107	0347-	14	14	0
108	0348-	10	0	10
109	0349-	17	0	17
110	0350-	34	34	0
111	0351-	24	24	0
112	0357-	11	0	11
Note:
This table provides a breakdown of cohorts by Setting.

This histogram shows cohort by service type

Code

# Install and load necessary packages
# install.packages("tidyr")
# install.packages("ggplot2")
# library(tidyr)
# library(ggplot2)
library(plotly)

# Reshape the data: gather service types into a single column
contingency_long <- contingency_df_serviceType %>%
  gather(key = "service_type", value = "count", -Index, -Cohort, -Count, -Service_Type_Sum)

# Filter out rows with zero counts (if needed)
contingency_long_filtered <- contingency_long %>%
  filter(count > 0)

plot_data <- contingency_long_filtered %>%
  group_by(Cohort, service_type) %>%
  summarise(total_count = sum(count)) %>%
  ungroup()

p <- plot_ly(plot_data, x = ~service_type, y = ~total_count, type = 'bar', name = ~Cohort) %>%
  layout(
    title = "Histogram of Service Types by Cohort",
    xaxis = list(title = "Service Type"),
    yaxis = list(title = "Count"),
    updatemenus = list(
      list(
        buttons = lapply(unique(plot_data$Cohort), function(cohort) {
          list(
            method = "restyle",
            args = list("transforms[0].value", cohort),
            label = cohort
          )
        }),
        direction = "down",
        x = 0.1,
        y = 1.1,
        showactive = TRUE
      )
    ),
    transforms = list(
      list(
        type = 'filter',
        target = ~Cohort,
        operation = '=',
        value = unique(plot_data$Cohort)[1]
      )
    )
  )

# Render the plot
p

Code

## Cohort Duration Table

# ::: blue
# > **This table shows descriptives by cohort for different durations**
# #| echo: true
# #| warning: false
# #| output: false
# 
# #column_names <- names(merged)
# #print(column_names)
# 
# # Assuming you find the exact column name as "Points Received: \nAmount of Service - Duration"
# exact_column_name <- "Points Received: \r\nAmount of Service - Duration"
# 
# # Create a new column 'duration' that is a copy of the original column
# merged$duration <- merged[[exact_column_name]]
# 
# # Ensure 'duration' column is factors
# merged$duration <- as.factor(merged$duration)
# 
# # Verify the new column
# #str(merged$duration)
# 
# # Step 1: Create contingency table for 'duration' by 'Cohort'
# contingency_table_duration <- table(merged$Cohort, merged$duration)
# 
# # Step 2: Convert the contingency table to a data frame
# contingency_df_duration <- as.data.frame.matrix(contingency_table_duration)
# 
# # Step 3: Add a new column with the total count of individuals in each cohort
# contingency_df_duration$Total <- rowSums(contingency_df_duration)
# 
# # Step 4: Rename the 'Total' column to 'Count'
# contingency_df_duration <- contingency_df_duration %>%
#   rownames_to_column(var = "Cohort") %>%
#   rename(Count = Total)
# 
# # Step 5: Add an index column
# contingency_df_duration <- contingency_df_duration %>%
#   mutate(Index = row_number())
# 
# # Step 6: Reorder the columns to have the index as the first column
# contingency_df_duration <- contingency_df_duration %>%
#   select(Index, Cohort, Count, everything())
# 
# # Step 7: Create and format the table using kable and kableExtra with enhancements
# formatted_table_duration <- contingency_df_duration %>%
#   kable("html", col.names = names(contingency_df_duration), caption = "Cohort Duration Table") %>%
#   kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
#   row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>%
#   row_spec(1:nrow(contingency_df_duration), background = c("#f2f2f2", "white")) %>%
#   column_spec(1, bold = TRUE, color = "blue") %>%
#   column_spec(3, background = "#ffebcc") %>%
#   add_header_above(c("Cohort Information" = 3, "Durations" = ncol(contingency_df_duration) - 3)) %>%
#   footnote(general = "This table provides a breakdown of cohorts by Duration.")

Code

# Display the formatted table for Setting
##formatted_table_duration