library(ggplot2)
library(dplyr)
library(glue)
options(repr.plot.width=8, repr.plot.height=3, repr.plot.res=300)

Warning message:
"package 'ggplot2' was built under R version 3.6.3"Warning message:
"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Warning message:
"package 'glue' was built under R version 3.6.3"
Attaching package: 'glue'

The following object is masked from 'package:dplyr':

    collapse


rds_file <- "C:/RMV2.0 Workshop/something/Project_02.19.rds"
Project <- readRDS(rds_file)
cat("Your project has the following models.",fill=T)
print(names(Project$model_obj_list$models_list))

Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo

Your project has the following models.
[1] "Data_pre_2_2.csv"


# Uncomment to open a graphical View to browse the data structure.
#View(Project)
# This only works in RStudio, so I'm just including screenshots below.


cat("Project file had the following structure:",fill=T)
print(names(Project))
# Typical for screening analysis
# [1] "files_path_sc"      "fahrenheit"         "Data_pre_summary_0" "Data_pre_summary"   "files_names"        "model_obj_list"
# [7] "p_name_sc"          "Data_pre"           "pre_dir_sc"         "files_names_mod"    "Model"
# Typical for savings analysis
# [1] "Data_pre_summary_0"  "load"                "files_names"         "post_names_sav"      "pre_names_sav"
# [6] "Data_pre"            "files_names_mod"     "p_name_sav"          "post_path_sav"       "Data_pre_summary"
# [11] "fahrenheit"          "post_dir_sav"        "model_obj_list"      "pre_path_sav"        "Data_post"
# [16] "Data_post_summary"   "nre_done"            "Data_post_summary_0" "pre_dir_sav"         "Model"

Project file had the following structure:
 [1] "files_path_sc"      "fahrenheit"         "Data_pre_summary_0"
 [4] "Data_pre_summary"   "files_names"        "model_obj_list"    
 [7] "p_name_sc"          "Data_pre"           "pre_dir_sc"        
[10] "files_names_mod"    "Model"


cat("Project file has Data_post? (is savings analysis?)",fill=T)
print("Data_post" %in% names(Project))
isSavings = "Data_post" %in% names(Project)
projectType <- if (isSavings) {"savings!"} else {"screenings!"}

cat("Type of models used?",fill=T)
print(Project$Model)
if (Project$Model != "TOWT") {
  cat("Warning: this script only understands TOWT models.",fill=T)
}

Project file has Data_post? (is savings analysis?)
[1] FALSE
Type of models used?
[1] "TOWT"


# For the next part, need to pick a model from the models_list by number.
# There is only one data file in the example project, so only one option.
i <- 1
# The model is stored as a list with these named elements.
print(names(Project$model_obj_list$models_list[[i]]$towt_model))
# For convenience, attach the model into the workspace.
# Ignore the warnings about objects in the environment being masked.
attach(Project$model_obj_list$models_list[[i]]$towt_model,name="towt_model")

[1] "timeVec"       "Baseline"      "PredMatrix"    "WeightMatrix" 
[5] "trainTime"     "trainBaseline" "regOutList"    "timescaleDays"
[9] "pointlist"


dataTime <- if (isSavings) {timeVec} else {trainTime}
npoints = length(dataTime)
t0 = min(dataTime,na.rm=T)
t1 = max(dataTime,na.rm=T)
deltaT = as.numeric(difftime(t1,t0,units="days"))
nsegments = max(1,ceiling(deltaT/timescaleDays))
segmentwidth = (npoints-1)/nsegments
segmentDeltaT = deltaT / nsegments
# This is store in the project file, but just to show how it is computed:
pointlist = floor(sort(npoints-segmentwidth*(0:nsegments))+0.001)
# This is not diretly stored in the project file
tCenters = trainTime[pointlist]


npoints


t0
t1
deltaT

[1] "2006-01-01 01:00:00 MST"

[1] "2006-12-31 23:00:00 MST"


# The hyperparameter timescaleDays
timescaleDays


# We'll make at least this many segments
deltaT/timescaleDays


# Rounded up
nsegments


# The indices of timestamps to be the "center" of each segment.
# Because of (0:nsegments), we get nsegments+1 values.
# It's like rounding up, plus a little extra.
pointlist


# The timestamps used for weights
tCenters

[1] "2006-01-01 01:00:00 MST" "2006-03-15 01:00:00 MST"
[3] "2006-05-27 00:00:00 MST" "2006-08-08 00:00:00 MST"
[5] "2006-10-19 23:00:00 MST" "2006-12-31 23:00:00 MST"


length(regOutList)
print(names(regOutList[[1]]))

[1] "training"    "predictions" "amod"        "bmod"        "tempKnots"  
[6] "occVec"      "okocc"


nModelRuns <- length(regOutList)
cat(paste("There are ",nModelRuns,"model blocks"),fill=T)
for (irun in 1:nModelRuns) {
  cat(paste("Model block ", irun),fill=T)
  regOut <- regOutList[[irun]]

# Explain how coefficients are linked to input variables in model
#print(regOut$amod$terms)
cat("The formula for the regression:",fill=T)
print(deparse(regOut$amod$terms))
cat("The type of each variable:",fill=T)
print(attr(regOut$amod$terms,"dataClasses"))
cat("The temperature knots used to define temperature variables X1, X2, ...:",fill=T)
print(regOut$tempKnots)

cat(paste("Number of parameters in occupied period model (excluding tempKnots)  :",regOut$amod$rank),fill=T)
cat(paste("Number of parameters in unoccupied period model (excluding tempKnots):",regOut$bmod$rank),fill=T)
cat(fill=T)

}

There are  6 model blocks
Model block  1
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98

Model block  2
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98

Model block  3
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98

Model block  4
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98

Model block  5
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98

Model block  6
The formula for the regression:
[1] "loadVec ~ ftow + X1 + X2 + X3 + X4 + X5 + 0"
The type of each variable:
  loadVec      ftow        X1        X2        X3        X4        X5 (weights) 
"numeric"  "factor" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
The temperature knots used to define temperature variables X1, X2, ...:
[1]  4.444444 12.777778 18.333333 26.666667
Number of parameters in occupied period model (excluding tempKnots)  : 80
Number of parameters in unoccupied period model (excluding tempKnots): 98


# Show some or all of the coefficients in a table
cat("Regression fit coefficients for occupied periods:")
#print(head(regOut$amod$coefficients))
#print(tail(regOut$amod$coefficients))
print(regOut$amod)
# Result: e.g. ftow33 ... ftow158, X1 ... X5
# Not all ftow factors appear

cat("Regression fit coefficients for unoccupied periods:")
#print(head(regOut$bmod$coefficients))
#print(tail(regOut$bmod$coefficients))
print(regOut$bmod)
# Result: e.g. ftow1 ... ftow168, X1 ... X5

Regression fit coefficients for occupied periods:
Call:
lm(formula = loadVec ~ . + 0, data = dframe, subset = okocc, 
    weights = weightvec, na.action = na.exclude)

Coefficients:
  ftow33    ftow34    ftow35    ftow36    ftow37    ftow38    ftow39    ftow40  
 889.513  1129.579  1083.434  1055.898  1030.532  1032.293  1053.327  1055.209  
  ftow41    ftow42    ftow43    ftow44    ftow45    ftow57    ftow58    ftow59  
1086.492  1052.536   890.060   781.562   687.989   956.480  1239.358  1193.083  
  ftow60    ftow61    ftow62    ftow63    ftow64    ftow65    ftow66    ftow67  
1166.313  1141.000  1148.110  1175.762  1181.462  1211.249  1164.571   973.030  
  ftow68    ftow69    ftow70    ftow81    ftow82    ftow83    ftow84    ftow85  
 850.676   746.600   652.988   965.799  1251.020  1207.634  1182.726  1152.299  
  ftow86    ftow87    ftow88    ftow89    ftow90    ftow91    ftow92    ftow93  
1162.257  1190.022  1188.593  1217.781  1172.343   980.118   861.779   752.962  
  ftow94   ftow105   ftow106   ftow107   ftow108   ftow109   ftow110   ftow111  
 651.490   954.417  1203.931  1160.355  1132.017  1107.869  1115.496  1140.647  
 ftow112   ftow113   ftow114   ftow115   ftow116   ftow117   ftow118   ftow129  
1143.676  1169.399  1124.270   939.357   831.399   719.134   631.937   970.610  
 ftow130   ftow131   ftow132   ftow133   ftow134   ftow135   ftow136   ftow137  
1249.137  1209.257  1187.430  1162.131  1171.442  1198.762  1200.905  1228.215  
 ftow138   ftow139   ftow140   ftow141   ftow142   ftow153   ftow154   ftow155  
1183.935   985.538   866.343   751.631   653.014   624.374   733.561   685.567  
 ftow156   ftow157   ftow158        X1        X2        X3        X4        X5  
 663.234   661.950   662.021     1.851     4.336    27.944     7.310    45.556  

Regression fit coefficients for unoccupied periods:
Call:
lm(formula = loadVec ~ . + 0, data = dframe, subset = !okocc, 
    weights = weightvec, na.action = na.exclude)

Coefficients:
   ftow1     ftow2     ftow3     ftow4     ftow5     ftow6     ftow7     ftow8  
273.1739  270.1190  271.7262  272.8517  279.2212  293.7085  321.3417  352.5974  
   ftow9    ftow10    ftow11    ftow12    ftow13    ftow14    ftow15    ftow16  
350.7285  316.0362  268.5449  239.1591  234.6808  246.7670  244.2578  236.3076  
  ftow17    ftow18    ftow19    ftow20    ftow21    ftow22    ftow23    ftow24  
271.9386  338.8820  365.6205  358.6873  350.0431  338.1211  318.8483  295.9443  
  ftow25    ftow26    ftow27    ftow28    ftow29    ftow30    ftow31    ftow32  
279.7471  319.8969  322.2569  322.8331  330.4514  362.2822  445.7943  547.8207  
  ftow46    ftow47    ftow48    ftow49    ftow50    ftow51    ftow52    ftow53  
659.3880  481.4336  347.8056  317.5771  319.1767  319.8283  320.9749  327.9593  
  ftow54    ftow55    ftow56    ftow71    ftow72    ftow73    ftow74    ftow75  
361.2094  447.8091  568.7063  511.9503  355.3213  321.6388  318.0751  319.8035  
  ftow76    ftow77    ftow78    ftow79    ftow80    ftow95    ftow96    ftow97  
320.1643  327.0424  361.1173  449.3805  569.4319  506.8513  355.8040  322.2515  
  ftow98    ftow99   ftow100   ftow101   ftow102   ftow103   ftow104   ftow119  
314.3528  315.8488  315.6444  322.8590  358.7039  441.9594  563.9345  491.8609  
 ftow120   ftow121   ftow122   ftow123   ftow124   ftow125   ftow126   ftow127  
352.5433  320.4745  318.0097  317.8134  318.6468  324.3257  358.6189  444.9453  
 ftow128   ftow143   ftow144   ftow145   ftow146   ftow147   ftow148   ftow149  
569.6498  504.8645  353.6033  321.0042  266.5064  267.2594  268.3067  275.8720  
 ftow150   ftow151   ftow152   ftow159   ftow160   ftow161   ftow162   ftow163  
291.1315  412.7272  488.8743  566.0680  426.6568  453.0773  457.9925  372.8548  
 ftow164   ftow165   ftow166   ftow167   ftow168        X1        X2        X3  
350.2605  345.8815  333.8005  311.7801  290.0373   -1.0110   -0.8148    3.1848  
      X4        X5  
 22.2031    7.6776


# Build dataframe from timeVec (shape [n]) and WeightMatrix (shape [26,n]), transposed
df <- data.frame(
  day = timeVec,
  t_weights=t(WeightMatrix)
)
# "Melt" from wide table to long table (day, variable, value) for the plot
df2 <- reshape2::melt(data=df,id.vars="day")

#library(IRdisplay)
#display(head(df))
#display(tail(df))
head(df)
tail(df)


## Plot training weights, overlaid together
figtitle0 <- if (isSavings) {
  "Raw weights for weighted average of baseline models in post-period\n(Denominator is sum of weights)"
  } else {
    "Weights used to generate baseline models\n(Weighted least squares linear regression)"
  }
figtitle <- glue("{figtitle0}
                  deltaT = {format(deltaT,digits=4)} days, \\
                  timescaleDays = {timescaleDays}, \\
                  segmentDays = {format(segmentDeltaT, digits=4)}")
fig <- ggplot(data = df2, aes(x = day, y = value, colour = variable)) + geom_line() +
  theme(aspect.ratio=0.3) +
  ggtitle(figtitle)
print(fig)


t0 <- as.POSIXct("2007-01-01 00:00:00")
predTime = seq(t0, by="hour", length.out=8760)

myweights=numeric()
for (irun in 1:nModelRuns) {
    tcenter = dataTime[pointlist[irun]]
    tDiffPred = as.numeric(difftime(tcenter,predTime,units="days"))
    # Statistical weight for prediction period
    weightvecPred = timescaleDays^2/(timescaleDays^2 + tDiffPred^2)
    # The extra parentheses to avoid storing column name 'weightvecPred'
    myweights=cbind(myweights,(weightvecPred))
}

myweights.df = data.frame(
  day = predTime,
  p_weights=myweights
)
head(myweights.df)
tail(myweights.df)


figtitle <- glue("Raw weights for weighted average of baseline models in post-period \n\\
(Denominator is sum of weights) \n\\
deltaT = {format(deltaT,digits=4)} days, \\
timescaleDays = {timescaleDays}, \\
segmentDays = {format(segmentDeltaT, digits=4)}")
fig <- ggplot(data = reshape2::melt(data=myweights.df,id.vars="day"),
              aes(x = day, y = value, colour = variable)) + geom_line() +
  theme(aspect.ratio=0.3) +
  ggtitle(figtitle)
print(fig)


myweights.df2 = data.frame(
  day = predTime,
  weights=myweights/apply(myweights,1,sum)
)
head(myweights.df2)


fig <- ggplot(data = reshape2::melt(data=myweights.df2,id.vars="day"),
              aes(x = day, y = value, fill = variable)) +
    geom_area(alpha=0.6 , size=1, colour="black") +
    theme(aspect.ratio=0.3) +
    ggtitle(figtitle)

print(fig)


## Clean up: detach scope
detach("towt_model")


## Clean up: remove global variables from workspace
rm(rds_file, isSavings, projectType, i, df, df2, tCenters, nModelRuns, irun, regOut)
rm(Project)

day	t_weights.1	t_weights.2	t_weights.3	t_weights.4	t_weights.5	t_weights.6
2006-01-01 01:00:00	1.0000000	0.6031722	0.2754743	0.1445325	0.08680245	0.05733938
2006-01-01 02:00:00	0.9999998	0.6034455	0.2755883	0.1445796	0.08682508	0.05735172
2006-01-01 03:00:00	0.9999991	0.6037189	0.2757023	0.1446267	0.08684772	0.05736407
2006-01-01 04:00:00	0.9999981	0.6039924	0.2758164	0.1446738	0.08687037	0.05737643
2006-01-01 05:00:00	0.9999966	0.6042659	0.2759306	0.1447209	0.08689303	0.05738878
2006-01-01 06:00:00	0.9999946	0.6045396	0.2760448	0.1447681	0.08691570	0.05740114

	day	t_weights.1	t_weights.2	t_weights.3	t_weights.4	t_weights.5	t_weights.6
8753	2006-12-31 18:00:00	0.05740114	0.08691570	0.1447681	0.2760448	0.6045396	0.9999946
8754	2006-12-31 19:00:00	0.05738878	0.08689303	0.1447209	0.2759306	0.6042659	0.9999966
8755	2006-12-31 20:00:00	0.05737643	0.08687037	0.1446738	0.2758164	0.6039924	0.9999981
8756	2006-12-31 21:00:00	0.05736407	0.08684772	0.1446267	0.2757023	0.6037189	0.9999991
8757	2006-12-31 22:00:00	0.05735172	0.08682508	0.1445796	0.2755883	0.6034455	0.9999998
8758	2006-12-31 23:00:00	0.05733938	0.08680245	0.1445325	0.2754743	0.6031722	1.0000000

day	p_weights.1	p_weights.2	p_weights.3	p_weights.4	p_weights.5	p_weights.6
2007-01-01 00:00:00	0.05732704	0.08677982	0.1444855	0.2753603	0.6028990	0.9999998
2007-01-01 01:00:00	0.05731470	0.08675721	0.1444384	0.2752465	0.6026259	0.9999991
2007-01-01 02:00:00	0.05730237	0.08673460	0.1443914	0.2751327	0.6023529	0.9999981
2007-01-01 03:00:00	0.05729004	0.08671200	0.1443445	0.2750190	0.6020800	0.9999966
2007-01-01 04:00:00	0.05727771	0.08668941	0.1442975	0.2749053	0.6018072	0.9999946
2007-01-01 05:00:00	0.05726539	0.08666683	0.1442506	0.2747917	0.6015345	0.9999923

	day	p_weights.1	p_weights.2	p_weights.3	p_weights.4	p_weights.5	p_weights.6
8755	2007-12-31 18:00:00	0.01498407	0.01843566	0.02321821	0.03011538	0.04054835	0.05737643
8756	2007-12-31 19:00:00	0.01498238	0.01843337	0.02321498	0.03011061	0.04054094	0.05736407
8757	2007-12-31 20:00:00	0.01498069	0.01843107	0.02321174	0.03010585	0.04053354	0.05735172
8758	2007-12-31 21:00:00	0.01497901	0.01842878	0.02320850	0.03010109	0.04052614	0.05733938
8759	2007-12-31 22:00:00	0.01497733	0.01842648	0.02320527	0.03009633	0.04051874	0.05732704
8760	2007-12-31 23:00:00	0.01497564	0.01842419	0.02320203	0.03009157	0.04051134	0.05731470

day	weights.1	weights.2	weights.3	weights.4	weights.5	weights.6
2007-01-01 00:00:00	0.02645638	0.04004881	0.06667991	0.1270785	0.2782374	0.4614990
2007-01-01 01:00:00	0.02645642	0.04004705	0.06667266	0.1270535	0.2781716	0.4615987
2007-01-01 02:00:00	0.02645646	0.04004530	0.06666542	0.1270286	0.2781059	0.4616984
2007-01-01 03:00:00	0.02645651	0.04004356	0.06665820	0.1270036	0.2780402	0.4617979
2007-01-01 04:00:00	0.02645656	0.04004182	0.06665099	0.1269787	0.2779746	0.4618973
2007-01-01 05:00:00	0.02645662	0.04004009	0.06664379	0.1269538	0.2779090	0.4619966

Understand model¶

Executive summary¶

Getting started¶

Load libraries¶

Read the "project.rds" file stored by RMV2.0¶

Explore the project file¶

These commands will just print text info summary of the project file structure.¶

The good stuff¶

Number of segments in segmented model¶

The TOWT model object¶

Coefficients for the first segment¶

Weights for the segmented model¶

Training weights for weighted linear regression¶

Note¶

Prediction weights applied in the performance period¶

End of supplement 1¶