Get model from training on Google CloudML in R

Help! I trained a model on CloudML using cloudml_train("model.R", master_type = "complex_model_m_p100"). And now need the trained model. I didn't specify anything in my model fit to save... Assumed it would return the weights after the final epoch with job_collect().

job_collect() does return the training input jobDir: gs://project/r-cloudml/staging

Is there any way to get the model weights? Or setup the script using a callback that will work with google? Here's the script

library(keras)

load("sspr.ndvi.tensor.RData")
load("sspr.highdem.tensor.RData")
load("sspr.lowdem.tensor.RData")
load("yspr.ndvi.tensor.RData")
load("yspr.highdem.tensor.RData")
load("yspr.lowdem.tensor.RData")

#model!
highres.crop.input<-layer_input(shape = c(51,51,1),name = "highres.crop_input")
lowdem.input<-layer_input(shape = c(51,51,1),name = "lowdem.input")

lowdem_output<-lowdem.input %>% 
  layer_gaussian_dropout(rate = 0.35) %>%
  layer_conv_2d(kernel_size = c(3, 3), strides = 1, filter = 14,
                activation = "relu", padding = "same",
                data_format = "channels_last") %>% 
  layer_max_pooling_2d(pool_size = c(3,3)) %>% 
  layer_conv_2d(kernel_size = c(3, 3), strides = 1, filter = 16,
                activation = "relu", padding = "same",
                data_format = "channels_last") %>% 
  layer_batch_normalization() %>% 
  layer_average_pooling_2d(pool_size = c(17,17)) %>% 
  layer_upsampling_2d(size = c(51,51),name = "lowdem_output")

inception_input0<- highres.crop.input %>%
  layer_gaussian_dropout(rate = 0.35) %>% 
  layer_conv_2d(kernel_size = c(3, 3), strides = 1, filter = 16,
                activation = "relu", padding = "same",
                data_format = "channels_last") %>% 
  layer_conv_2d(kernel_size = c(2, 2), filter = 16,
                activation = "relu", padding = "same") %>%
  layer_batch_normalization(name = "inception_input0") 

inception_output0<-inception_input0 %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 1,
                activation = "relu",padding = "same") %>% 
  layer_max_pooling_2d(pool_size = c(3,3)) %>% 
  layer_conv_2d(kernel_size = c(1,7),filters = 16,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(7,1),filters = 16,
                activation = "relu",padding = "same") %>% 
  layer_upsampling_2d(size = c(3,3), interpolation = "nearest",name = "inception_output0")

cnn_inter_output0<-layer_add(c(inception_input0,inception_output0,lowdem_output)) %>% 
  layer_conv_2d(kernel_size = c(1,5),filters = 6,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(5,1),filters = 6,
                activation = "relu",padding = "same",name = "cnn_inter_output0")
added_inception_highres0<-layer_add(c(highres.crop.input,cnn_inter_output0)) %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 4,
                activation = "relu",padding = "same",name = "added_inception_highres0")
#### 1 ####
inception_input1<- added_inception_highres0 %>%
  layer_gaussian_dropout(rate = 0.35) %>%
  layer_conv_2d(kernel_size = c(3, 3), strides = 1, filter = 16,
                activation = "relu", padding = "same",
                data_format = "channels_last") %>% 
  layer_conv_2d(kernel_size = c(3, 3), filter = 8,
                activation = "relu", padding = "same") %>% 
  layer_batch_normalization(name = "inception_input1") 

inception_output1<-inception_input1 %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 1,
                activation = "relu",padding = "same") %>% 
  layer_max_pooling_2d(pool_size = c(3,3)) %>% 
  layer_conv_2d(kernel_size = c(1,7),filters = 8,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(7,1),filters = 8,
                activation = "relu",padding = "same") %>% 
  layer_upsampling_2d(size = c(3,3), interpolation = "nearest",name = "inception_output1")

cnn_inter_output1<-layer_add(c(inception_input1,inception_output1)) %>% 
  layer_conv_2d(kernel_size = c(1,5),filters = 6,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(5,1),filters = 6,
                activation = "relu",padding = "same",name = "cnn_inter_output1")
added_inception_highres1<-cnn_inter_output1 %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 2,
                activation = "relu",padding = "same",name = "added_inception_highres1")
#### 2 ####
inception_input2<- added_inception_highres1 %>%
  layer_conv_2d(kernel_size = c(3, 3), strides = 1, filter = 16,
                activation = "relu", padding = "same",
                data_format = "channels_last") %>% 
  layer_conv_2d(kernel_size = c(3, 3), filter = 8,
                activation = "relu", padding = "same") %>% 
  layer_batch_normalization(name = "inception_input2") 

inception_output2<-inception_input2 %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 1,
                activation = "relu",padding = "same") %>% 
  layer_max_pooling_2d(pool_size = c(3,3)) %>% 
  layer_conv_2d(kernel_size = c(1,7),filters = 8,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(7,1),filters = 8,
                activation = "relu",padding = "same") %>% 
  layer_upsampling_2d(size = c(3,3), interpolation = "nearest",name = "inception_output2")

cnn_inter_output2<-layer_add(c(inception_input2,inception_output2)) %>% 
  layer_conv_2d(kernel_size = c(1,5),filters = 6,
                activation = "relu",padding = "same") %>% 
  layer_conv_2d(kernel_size = c(5,1),filters = 6,
                activation = "relu",padding = "same",name = "cnn_inter_output2")
added_inception_highres2<-cnn_inter_output2 %>% 
  layer_conv_2d(kernel_size = c(1,1),filters = 1,
                activation = "relu",padding = "same",name = "added_inception_highres2")


incept_dual<-keras_model(
  inputs = c(highres.crop.input,lowdem.input),
  outputs = added_inception_highres2
)
summary(incept_dual)

incept_dual %>% compile(loss = 'mse',
                              optimizer = 'Nadam',
                              metric = "mse")


incept_dual %>% fit(
  x = list(highres.crop_input = sspr.highdem.tensor, lowdem.input = sspr.lowdem.tensor),
  y = list(added_inception_highres2 = sspr.ndvi.tensor),
  epochs = 1000,
  batch_size = 32,
  validation_data=list(list(yspr.highdem.tensor,yspr.lowdem.tensor),yspr.ndvi.tensor),
  shuffle = TRUE 
)

Solution

The answer was to define filenames in the script that have no parent path


checkpoint_path="five_epoch_checkpoint.ckpt"
lastditch_callback <- callback_model_checkpoint(
  filepath = checkpoint_path,
  save_weights_only = TRUE,
  save_best_only = FALSE,
  save_freq = 5,
  period = 5,
  verbose = 0
)
best_path = "best.ckpt"
bestmod_callback <- callback_model_checkpoint(
  filepath = best_path,
  save_weights_only = TRUE,
  save_best_only = TRUE,
  mode = "auto",
  verbose = 0
)



incept_dual %>% fit(
  x = list(highres.crop_input = sspr.highdem.tensor, lowdem.input = sspr.lowdem.tensor),
  y = list(prediction = sspr.ndvi.tensor),
  epochs = 50,
  batch_size = 32,
  validation_data=list(list(yspr.highdem.tensor,yspr.lowdem.tensor),yspr.ndvi.tensor),
  callbacks = list(lastditch_callback,bestmod_callback),
  shuffle = TRUE 
)

save_model_hdf5(incept_dual,"incept_dual.h5")

five_epoch_checkpoint.ckpt,best.ckpt, and incept_dual.h5 will all be present in the google bucket the model results are automatically saved to. I couldn't retrieve the model unfortunately, but I can save checkpoints and the final model in my future runs now.