This notebook provides a running example for predicting the following for a set of currently hospitalized patients:
SEVERE
in code).We also show how you could test different future scenarios by loading a set of future hospitalizations you defined.
model = readRDS('./cache/israeli_model.Rds')
# the number of simulated runs per patient, 10K is our recommendation
N_MONTE_CARLO_RUNS = 10*1000
# the maximum number of transitions in the Multi-State model
# this is used to exclude outlier paths with more transitions than
# the maximal number of transitions observed in the data
MAX_PATH_LENGTH = 9
T
: the day from which we predictT = Sys.Date() # use the date of today
PREDICT_N_DAYS_AHEAD = 30 # predict one month ahead
In this example we generate random stub data, but you can use your own data by reading a dataframe which complies with the following conventions:
sex
: MALE (1), FEMALE (0)age
: in yearsstate_at_hospitalization
: either MILD
(2), MODERATE
(3) or SEVERE
(4). Note, these correspond with the states Moderate, Severe, Critical which are used in the paperdate_of_hospitalization
: date in the format ‘yyyy-mm-dd’current_state
: same values as 3 abovedays_spent_in_current_state
: non-negative integerwas_severe
: Binary indicator. Visited severe state at least once before entering the current state (1), or not (0)Note: The data generated here is not intended to reflect any true distribution of patients but only serves as an example of the needed data format. Do not base predictions on this stub data.
hospitalized_patients_df = generate_random_hospitalized_patients_df(n_hospitalized_patients = 30,
T = T,
PREDICT_N_DAYS_AHEAD = PREDICT_N_DAYS_AHEAD)
# alternatively:
# hospitalized_patients_df = read.csv('sourasky_arrival_df.csv')
formattable(head(hospitalized_patients_df, n=5))
sex | age | state_at_hospitalization | date_of_hospitalization | current_state | days_spent_in_current_state | was_severe |
---|---|---|---|---|---|---|
1 | 69 | 3 | 2020-10-15 | 3 | 5 | FALSE |
1 | 75 | 3 | 2020-10-16 | 4 | 2 | FALSE |
1 | 41 | 2 | 2020-10-14 | 2 | 3 | FALSE |
1 | 65 | 3 | 2020-10-18 | 3 | 2 | FALSE |
0 | 10 | 2 | 2020-10-17 | 2 | 3 | FALSE |
hospitalized_patients = lapply(1:nrow(hospitalized_patients_df), function(row_idx) {
row = hospitalized_patients_df[row_idx,]
construct_patient_object(
sex = row$sex,
age = row$age,
state_at_hospitalization = row$state_at_hospitalization,
date_of_hospitalization = row$date_of_hospitalization,
current_state = row$current_state,
days_spent_in_current_state = row$days_spent_in_current_state,
was_severe = row$was_severe,
T=T
)
})
Note: The stub data generated here is not intended to reflect any true distribution of patients but only serves as an example of the needed data format. Do not base predictions on this stub data.
future_arrival_patients_df = generate_future_arrival_patients_df(n_future_arrivals = 100,
T = T,
PREDICT_N_DAYS_AHEAD = PREDICT_N_DAYS_AHEAD)
# alternatively:
# future_arrival_patients_df = read.csv('PATH TO YOUR FUTURE ARRIVAL SCENARIO CSV')
formattable(head(future_arrival_patients_df, n=5))
sex | age | state_at_hospitalization | date_of_hospitalization |
---|---|---|---|
1 | 62 | 2 | 2020-11-12 |
1 | 84 | 4 | 2020-11-06 |
1 | 39 | 3 | 2020-11-02 |
1 | 78 | 3 | 2020-11-14 |
0 | 34 | 4 | 2020-10-29 |
future_arrival_patients = lapply(1:nrow(future_arrival_patients_df), function(row_idx) {
row = future_arrival_patients_df[row_idx, ]
construct_future_arrival_patient_object(
sex = row$sex,
age = row$age,
state_at_hospitalization = row$state_at_hospitalization,
date_of_hospitalization = row$date_of_hospitalization
)
})
INCLUDE_FUTURE_ARRIVAL_PATIENTS = TRUE
all_patients = hospitalized_patients
if (INCLUDE_FUTURE_ARRIVAL_PATIENTS) all_patients = append(all_patients,
future_arrival_patients)
all_patients = run_monte_carlo_per_patient(all_patients,
N_MONTE_CARLO_RUNS,
MAX_PATH_LENGTH)
The plots below displays the mean number of hospitalized patients, over all monte carlo simulations.
For each day, we plot:
The plot below is simialr to that above, except it displays the mean number of hospitalized patients in a Critical state (SEVERE
)
plot_expected_number_of_patients_each_day(all_patients,
first_date = T,
last_date = T + PREDICT_N_DAYS_AHEAD,
states_to_include = c(SEVERE))
Below you can see the expected proportion of deaths by days since hospitalization. That is, we are counting deaths among all patients 5 days since hospitalization, 10 days hospitalization etc.; averaging over all sampled monte carlo paths.
This analysis includes future arrival patients if INCLUDE_FUTURE_ARRIVAL_PATIENTS = TRUE
Note: deaths occuring after T + PREDICT_N_DAYS_AHEAD
are excluded from the displayed statistics.
formattable(expected_deaths_by_days_since_hospitalization(all_patients,
T,
PREDICT_N_DAYS_AHEAD))
days_since_hospitalization | n_expected_deaths | expected_proportion_of_deaths |
---|---|---|
5 | 2.9932 | 0.02302462 |
10 | 6.6030 | 0.05079231 |
15 | 8.0962 | 0.06227846 |
20 | 8.9931 | 0.06917769 |
25 | 9.6024 | 0.07386462 |
30 | 9.9620 | 0.07663077 |