Fix problem with flags fp32 and int8

PaddlePaddle · Oct 14, 2020 · 52b2922 · 52b2922 · paddle-bot-old · Oct 14, 2020
1 parent 6bbb6e7
commit 52b2922
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 15 deletions.
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -66,6 +66,7 @@ function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir
              --infer_data=${data_path}
              --warmup_batch_size=${WARMUP_BATCH_SIZE}
              --batch_size=50
+             --enable_int8=true
              --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=2)
 endfunction()
@@ -81,6 +82,7 @@ function(inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary model_
         ARGS --infer_model=${model_dir}/model
              --infer_data=${data_path}
              --batch_size=50
+             --enable_bf16=true
              --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=2)
 endfunction()
@@ -92,6 +94,7 @@ function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_bi
              --infer_data=${data_path}
              --warmup_batch_size=10
              --batch_size=300
+             --enable_int8=true
              --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=1)
 endfunction()
@@ -115,6 +118,7 @@ function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_mode
              --int8_model=${int8_model_dir}
              --infer_data=${data_path}
              --batch_size=50
+             --enable_int8=true
              --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
              --with_accuracy_layer=false
              --iterations=2)

diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -50,8 +50,8 @@ DEFINE_bool(ernie_large, false, "Test ernie large");
 DEFINE_bool(with_accuracy_layer, true,
             "Calculate the accuracy while label is in the input");
 DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction");
-DEFINE_bool(enable_bf16, true, "Enable BF16 type prediction");
-DEFINE_bool(enable_int8, true, "Enable INT8 type prediction");
+DEFINE_bool(enable_bf16, false, "Enable BF16 type prediction");
+DEFINE_bool(enable_int8, false, "Enable INT8 type prediction");
 DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
 // setting iterations to 0 means processing the whole dataset
 DEFINE_int32(iterations, 0, "number of batches to process");
@@ -639,8 +639,9 @@ void TestPrediction(const PaddlePredictor::Config *config,
   }
 }
 
-void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
-                       int compared_idx) {
+void SummarizeAccuracy(float avg_acc_ref, float avg_acc, int compared_idx) {
+  std::string data_type_name = "INT8";
+  if (FLAGS_enable_bf16) data_type_name = "BF16";
   PADDLE_ENFORCE_LE(
       compared_idx, 2,
       platform::errors::InvalidArgument(
@@ -659,12 +660,12 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
   LOG(INFO) << "--- Accuracy summary --- ";
   LOG(INFO) << "Accepted " << prefix
             << "drop threshold: " << FLAGS_quantized_accuracy
-            << ". (condition: (FP32_" << prefix << " - INT8_" << prefix
-            << ") <= threshold)";
+            << ". (condition: (FP32_" << prefix << " - " << data_type_name
+            << "_" << prefix << ") <= threshold)";
   LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc_fp32;
-  LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc_int8;
+            << std::setprecision(4) << avg_acc_ref;
+  LOG(INFO) << data_type_name << ": avg " << prefix << std::fixed
+            << std::setw(6) << std::setprecision(4) << avg_acc;
 }
 
 void SummarizePerformance(const char *title, float sample) {
@@ -677,8 +678,9 @@ void SummarizePerformance(const char *title, float sample) {
 
 void SummarizePerformance(const char *title_fp32, float sample_latency_fp32,
                           const char *title, float sample_latency) {
-  SummarizePerformance(title_fp32, sample_latency_fp32);
-  SummarizePerformance(title, sample_latency);
+  if (FLAGS_enable_fp32) SummarizePerformance(title_fp32, sample_latency_fp32);
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16)
+    SummarizePerformance(title, sample_latency);
 }
 
 float CompareAccuracyOne(
@@ -733,15 +735,15 @@ void CompareAccuracy(
     const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
     const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
     int compared_idx) {
-  if ((FLAGS_enable_fp32 && FLAGS_enable_int8) &&
+  if ((FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16)) &&
       (output_slots_quant.size() == 0 || output_slots_ref.size()) == 0)
     throw std::invalid_argument(
         "CompareAccuracy: output_slots vector is empty.");
 
   float avg_acc_quant = 0.0;
   float avg_acc_ref = 0.0;
 
-  if (FLAGS_enable_int8)
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16)
     avg_acc_quant = CompareAccuracyOne(output_slots_quant, compared_idx);
 
   if (FLAGS_enable_fp32)
@@ -751,9 +753,9 @@ void CompareAccuracy(
 
   if (FLAGS_enable_fp32) CHECK_GT(avg_acc_ref, 0.0);
 
-  if (FLAGS_enable_int8) CHECK_GT(avg_acc_quant, 0.0);
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16) CHECK_GT(avg_acc_quant, 0.0);
 
-  if (FLAGS_enable_fp32 && FLAGS_enable_int8)
+  if (FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16))
     CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
 }