tensorflowobject-detection-apitensorflow-slim

Tensorflow Object Detection got different results in two evaluations on the same model checkpoint


I'm a rookie to tensorflow and currently working on object detection API.

I've chosen ssd_resnet50_fpn to get started and downloaded the pretrained model from tensorflow model zoo to do transfer learning with my own dataset with only 1 class (person). The training configurations was defined in the pipeline.config which was revised from the one in the same package of the pretrained model, and I trained the model with legacy train.py script.

The training process was fine and the loss decreased as expected, and I've exported my evaluation images with bounded boxes when doing the evaluation (via legacy eval.py). The inference was fine and those exported images worked as expected.

However, I found something weird that I got different evaluation results on the same model checkpoint. When I perform eval.py twice with the same parameters, I found that the bounded boxes from inference were different on the same image.

Here's the evaluation result made by eval.py (coco_detection_metrics)

First time

Second time

Since I'm not really understand about the mAP, but the two results were slightly different.

And here's one of the exported image during the evaluation, the left one is the first evaluation and the right is the second.

Exported Image during evaluation

Seems like the model weight changes during inference, how can I find out the problem? Is there any configuration that I missed?

I'm using tensorflow 1.10.1 with python 3.5.2 and cloned object detection API from https://github.com/tensorflow/models without change.

Here's my pipeline.config:

model {
  ssd {
    num_classes: 1
    image_resizer {
      fixed_shape_resizer {
        height: 640
        width: 640
      }
    }
    feature_extractor {
      type: "ssd_resnet50_v1_fpn"
      depth_multiplier: 1.0
      min_depth: 16
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 0.000399999989895
          }
        }
        initializer {
          truncated_normal_initializer {
            mean: 0.0
            stddev: 0.0299999993294
          }
        }
        activation: RELU_6
        batch_norm {
          decay: 0.996999979019
          scale: true
          epsilon: 0.0010000000475
        }
      }
      override_base_feature_extractor_hyperparams: true
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        conv_hyperparams {
          regularizer {
            l2_regularizer {
              weight: 0.000399999989895
            }
          }
          initializer {
            random_normal_initializer {
              mean: 0.0
              stddev: 0.00999999977648
            }
          }
          activation: RELU_6
          batch_norm {
            decay: 0.996999979019
            scale: true
            epsilon: 0.0010000000475
          }
        }
        use_dropout: true
        dropout_keep_probability: 0.7
        depth: 256
        num_layers_before_predictor: 4
        kernel_size: 3
        class_prediction_bias_init: -4.59999990463
      }
    }
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        scales_per_octave: 2
      }
    }
    post_processing {
      batch_non_max_suppression {
        score_threshold: 0.300000011921
        iou_threshold: 0.600000023842
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
    normalize_loss_by_num_matches: true
    loss {
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_loss {
        weighted_sigmoid_focal {
          gamma: 2.0
          alpha: 0.25
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    encode_background_as_zeros: true
    normalize_loc_loss_by_codesize: true
    inplace_batchnorm_update: true
    freeze_batchnorm: false
  }
}
train_config {
  batch_size: 8
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_crop_image {
      min_object_covered: 0.0
      min_aspect_ratio: 0.75
      max_aspect_ratio: 3.0
      min_area: 0.75
      max_area: 1.0
      overlap_thresh: 0.0
    }
  }
  sync_replicas: false
  optimizer {
    adam_optimizer: {
      learning_rate: {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.0001
          decay_steps: 5000
          decay_factor: 0.9
        }
      }
    }
    use_moving_average: false
  }
  fine_tune_checkpoint: "/tf-object-detection-training/models/ssd_resnet50/saved/model.ckpt-652123"
  num_steps: 2000000
  from_detection_checkpoint: true
  load_all_detection_checkpoint_vars: true
  startup_delay_steps: 0.0
  replicas_to_aggregate: 8
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
}
train_input_reader {
  label_map_path: "/tf-object-detection-training/dataset_VOC/label.pbtxt"
  tf_record_input_reader {
    input_path: "/tf-object-detection-training/dataset_VOC/person_train.record-?????-of-00010"
  }
}
eval_config {
  num_examples: 10000
  num_visualizations: 100
  eval_interval_secs: 60
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  min_score_threshold: 0.5
  retain_original_images: false
  keep_image_id_for_visualization_export: true
  visualization_export_dir: "/tf-object-detection-training/models/ssd_resnet50/eval_detections/"
}
eval_input_reader {
  label_map_path: "/tf-object-detection-training/dataset_VOC/label.pbtxt"
  shuffle: false
  num_readers: 1
  tf_record_input_reader {
    input_path: "/tf-object-detection-training/dataset_VOC/person_val.record-?????-of-00010"
  }
}

Thanks for any advice


Solution

  • After tracing code for a long time, I've found the answer is that the 'use_dropout' flag set in the pipeline.config. Seems like the dropout function is not removed while doing inference so the eval.py and frozen_inference_graph were all applied dropout function and making random inference.

    To solve this, simply remove 'use_dropout' from pipeline.config fixes this.