BEGIN 1777953626.8779757 EXEC /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build /usr/local/therock-tools/bin/cmake -E env --unset=ROCM_PATH --unset=ROCM_DIR --unset=HIP_PATH --unset=HIP_DIR -- /usr/local/therock-tools/bin/cmake --build /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build 31.7 [1/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 32.3 [2/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 33.6 [3/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 35.0 [4/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 35.9 [5/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 36.3 [6/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 37.2 [7/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 39.7 [8/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 41.3 [9/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 41.6 [10/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 41.9 [11/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 42.0 [12/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 42.4 [13/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 43.0 [14/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 43.5 [15/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 43.8 [16/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 45.7 [17/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 45.9 [18/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 46.4 [19/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 46.7 [20/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 47.6 [21/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 48.2 [22/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 48.2 [23/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 48.4 [24/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 50.0 [25/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 51.5 [26/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 53.2 [27/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 53.9 [28/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 86.7 [29/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 87.2 [30/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 88.9 [31/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 90.4 [32/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 91.1 [33/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 91.2 [34/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 91.4 [35/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 94.1 [36/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 96.5 [37/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 96.8 [38/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 97.8 [39/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 102.8 [40/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 136.1 [41/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 141.9 [42/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 148.5 [43/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 148.8 [44/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 149.8 [45/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 150.9 [46/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 155.4 [47/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 156.3 [48/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 156.8 [49/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 157.3 [50/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 179.9 [51/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 182.4 [52/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 186.4 [53/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 191.0 [54/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 191.5 [55/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 191.7 [56/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 191.9 [57/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 193.5 [58/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 193.7 [59/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 193.7 [60/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 196.3 [61/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 197.9 [62/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 199.5 [63/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 201.0 [64/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 203.1 [65/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 205.9 [66/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 207.0 [67/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 219.1 [68/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 222.6 [69/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 224.1 [70/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 224.6 [71/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 225.8 [72/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 226.2 [73/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 227.8 [74/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 231.4 [75/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 234.5 [76/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 234.8 [77/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 235.8 [78/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 236.6 [79/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 236.8 [80/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 237.5 [81/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 237.8 [82/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 238.6 [83/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 239.2 [84/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 239.7 [85/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 242.2 [86/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 246.7 [87/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 247.5 [88/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 247.5 [89/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 254.5 [90/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 260.2 [91/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 262.7 [92/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 264.0 [93/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 264.8 [94/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 275.5 [95/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 276.9 [96/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 277.8 [97/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 278.4 [98/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 279.1 [99/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 283.1 [100/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 283.7 [101/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 288.3 [102/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 288.4 [103/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 288.8 [104/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 289.4 [105/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 289.5 [106/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 290.4 [107/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 290.5 [108/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 291.8 [109/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 292.7 [110/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 295.1 [111/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 296.0 [112/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 300.1 [113/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 304.0 [114/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 304.4 [115/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 304.6 [116/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 307.1 [117/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 307.7 [118/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 308.7 [119/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 309.6 [120/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 310.3 [121/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 312.9 [122/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.o 313.3 [123/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.o 316.0 [124/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 316.3 [125/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 317.1 [126/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.o 317.9 [127/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.o 318.6 [128/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 320.2 [129/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 328.9 [130/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 330.1 [131/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 333.5 [132/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 336.8 [133/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 337.1 [134/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 337.7 [135/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.o 337.8 [136/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 340.0 [137/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 341.5 [138/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.o 342.3 [139/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_f32_bf16_instance.cpp.o 342.9 [140/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 343.2 [141/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 344.7 [142/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 344.9 [143/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 345.0 [144/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.o 345.6 [145/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 347.1 [146/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.o 347.8 [147/1682] Building CXX object library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeFiles/device_elementwise_normalization_instance.dir/device_elementwise_normalization_f16_instance.cpp.o 353.3 [148/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 355.1 [149/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.o 357.7 [150/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.o 357.8 [151/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 361.3 [152/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_16_16_instance.cpp.o 362.3 [153/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 363.6 [154/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 365.1 [155/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.o 370.2 [156/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 371.3 [157/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 371.7 [158/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 371.9 [159/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeFiles/device_conv2d_fwd_bias_relu_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.o 372.4 [160/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeFiles/device_conv2d_fwd_bias_relu_add_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.o 372.8 [161/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 374.5 [162/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_f32_bf16_instance.cpp.o 378.2 [163/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 378.5 [164/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_optimized_loads_instance.cpp.o 381.0 [165/1682] Building CXX object library/src/tensor_operation_instance/gpu/elementwise/CMakeFiles/device_elementwise_instance.dir/device_normalize_instance.cpp.o 385.7 [166/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_pad0_pipev1_instance.cpp.o 385.9 [167/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_default_pipev1_instance.cpp.o 386.5 [168/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 386.5 [169/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 386.7 [170/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 388.0 [171/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 389.5 [172/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_optimized_loads_instance.cpp.o 389.7 [173/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 389.8 [174/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 390.9 [175/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_optimized_loads_instance.cpp.o 391.8 [176/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 391.9 [177/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_16_16_instance.cpp.o 392.1 [178/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 394.9 [179/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 395.1 [180/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 395.3 [181/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 397.9 [182/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_pad0_pipev1_instance.cpp.o 398.0 [183/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 399.8 [184/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 400.3 [185/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 402.3 [186/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 403.5 [187/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_default_pipev1_instance.cpp.o 407.2 [188/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 412.6 [189/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load.cpp.o 412.6 [190/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load.cpp.o 412.6 [191/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev2_instance.cpp.o 412.9 [192/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev5_instance.cpp.o 412.9 [193/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 413.3 [194/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev2_instance.cpp.o 414.6 [195/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev5_instance.cpp.o 416.7 [196/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 416.9 [197/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev2_instance.cpp.o 417.4 [198/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev5_instance.cpp.o 419.0 [199/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instance.cpp.o 420.6 [200/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev2_instance.cpp.o 420.6 [201/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev5_instance.cpp.o 420.8 [202/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 421.1 [203/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 421.4 [204/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev2_instance.cpp.o 421.8 [205/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev2_instance.cpp.o 422.3 [206/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_16_16_instance.cpp.o 423.0 [207/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 423.6 [208/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 425.3 [209/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev5_instance.cpp.o 426.4 [210/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev5_instance.cpp.o 430.1 [211/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_16_16_instance.cpp.o 430.2 [212/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 431.2 [213/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 432.9 [214/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 454.7 [215/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_f16_pipev1_instance.cpp.o 455.7 [216/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_bf16_pipev1_instance.cpp.o 458.1 [217/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instance.cpp.o 458.1 [218/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 458.2 [219/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 460.3 [220/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 479.8 [221/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 485.6 [222/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 486.4 [223/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 486.6 [224/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp.o 486.7 [225/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp.o 488.6 [226/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp.o 489.1 [227/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp.o 490.3 [228/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 492.0 [229/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 495.9 [230/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp.o 497.1 [231/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 497.2 [232/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 497.7 [233/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 498.2 [234/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 498.7 [235/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 502.9 [236/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 503.7 [237/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 507.7 [238/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 508.5 [239/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 508.5 [240/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 517.4 [241/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 531.8 [242/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 533.0 [243/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 561.2 [244/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 564.9 [245/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 588.3 [246/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_part2_instance.cpp.o 617.3 [247/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_part2_instance.cpp.o 622.8 [248/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instance.cpp.o 626.3 [249/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 644.0 [250/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 665.5 [251/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 671.7 [252/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_vec_transpose_instance.cpp.o 694.5 [253/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_vec_transpose_instance.cpp.o 702.0 [254/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 709.5 [255/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instance.cpp.o 725.0 [256/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 737.3 [257/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_vec_transpose_instance.cpp.o 738.8 [258/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 743.1 [259/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 748.6 [260/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 749.9 [261/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_irregular_instance.cpp.o 762.5 [262/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_part2_instance.cpp.o 772.2 [263/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_part2_instance.cpp.o 776.3 [264/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_part2_instance.cpp.o 779.4 [265/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_part2_instance.cpp.o 781.0 [266/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_irregular_instance.cpp.o 782.9 [267/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_irregular_instance.cpp.o 786.2 [268/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_irregular_instance.cpp.o 788.7 [269/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_f16_instance.cpp.o 794.2 [270/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 821.8 [271/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instance.cpp.o 827.8 [272/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 831.6 [273/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 832.7 [274/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_bf16_instance.cpp.o 834.6 [275/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 838.4 [276/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 839.3 [277/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 841.5 [278/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 844.0 [279/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev2_instance.cpp.o 845.7 [280/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev2_instance.cpp.o 859.0 [281/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev5_instance.cpp.o 869.3 [282/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp.o 870.9 [283/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_instance.cpp.o 877.0 [284/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 879.6 [285/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 891.1 [286/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_instance.cpp.o 896.2 [287/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_instance.cpp.o 921.2 [288/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp.o 922.0 [289/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev5_instance.cpp.o 926.7 [290/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_comp_instance.cpp.o 929.7 [291/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_instance.cpp.o 938.3 [292/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 952.1 [293/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_2x_instance.cpp.o 973.2 [294/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_2x_instance.cpp.o 980.5 [295/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp.o 1034.7 [296/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp.o 1072.9 [297/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_1.cpp.o 1089.6 [298/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 1126.5 [299/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 1169.2 [300/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 1186.5 [301/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 1213.2 [302/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1236.4 [303/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_10.cpp.o 1244.5 [304/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_3.cpp.o 1261.0 [305/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 1267.9 [306/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_2.cpp.o 1272.1 [307/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.o 1272.2 [308/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_8.cpp.o 1273.4 [309/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_12.cpp.o 1277.3 [310/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1277.5 [311/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances.cpp.o 1282.0 [312/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_5.cpp.o 1298.2 [313/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1301.6 [314/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_intra_instance.cpp.o 1305.7 [315/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_7.cpp.o 1313.2 [316/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_14.cpp.o 1317.5 [317/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 1318.9 [318/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_6.cpp.o 1321.3 [319/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1324.3 [320/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.o 1341.5 [321/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1353.5 [322/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 1358.0 [323/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances.cpp.o 1365.5 [324/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_9.cpp.o 1372.8 [325/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 1380.8 [326/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.o 1386.5 [327/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_inter_instance.cpp.o 1393.2 [328/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_11.cpp.o 1404.4 [329/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_1.cpp.o 1408.3 [330/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_10.cpp.o 1411.3 [331/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_14.cpp.o 1414.7 [332/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.o 1417.6 [333/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_12.cpp.o 1421.8 [334/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_intra_instance.cpp.o 1433.7 [335/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_13.cpp.o 1440.7 [336/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_11.cpp.o 1441.5 [337/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_15.cpp.o 1441.8 [338/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 1443.2 [339/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_13.cpp.o 1444.3 [340/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_12.cpp.o 1448.7 [341/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_16x16_instance.cpp.o 1450.0 [342/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp.o 1455.6 [343/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 1456.1 [344/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_15.cpp.o 1457.0 [345/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_14.cpp.o 1458.8 [346/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_5.cpp.o 1464.7 [347/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_6.cpp.o 1466.1 [348/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances.cpp.o 1470.9 [349/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_3.cpp.o 1474.3 [350/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_0.cpp.o 1476.2 [351/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_8.cpp.o 1485.1 [352/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_2.cpp.o 1485.4 [353/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_8.cpp.o 1488.2 [354/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_0.cpp.o 1488.8 [355/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_1.cpp.o 1490.2 [356/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_15.cpp.o 1490.5 [357/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_3.cpp.o 1493.3 [358/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_4.cpp.o 1497.7 [359/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_13.cpp.o 1497.8 [360/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_10.cpp.o 1497.9 [361/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_8.cpp.o 1502.3 [362/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_7.cpp.o 1504.5 [363/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_6.cpp.o 1505.2 [364/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_9.cpp.o 1508.5 [365/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_11.cpp.o 1509.4 [366/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_10.cpp.o 1510.0 [367/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_13.cpp.o 1513.0 [368/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_20.cpp.o 1515.7 [369/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_2.cpp.o 1515.7 [370/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_12.cpp.o 1515.9 [371/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances.cpp.o 1516.7 [372/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 1516.7 [373/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.o 1520.5 [374/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_2.cpp.o 1521.6 [375/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 1522.2 [376/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_14.cpp.o 1522.9 [377/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_19.cpp.o 1523.4 [378/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.o 1524.6 [379/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_15.cpp.o 1525.5 [380/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_17.cpp.o 1526.8 [381/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_inter_instance.cpp.o 1526.9 [382/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_1.cpp.o 1528.2 [383/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_16.cpp.o 1529.6 [384/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_18.cpp.o 1529.8 [385/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp.o 1530.8 [386/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_1.cpp.o 1530.9 [387/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_11.cpp.o 1533.2 [388/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_0.cpp.o 1535.9 [389/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_3.cpp.o 1536.2 [390/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_2.cpp.o 1541.4 [391/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 1542.1 [392/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_20.cpp.o 1545.1 [393/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances.cpp.o 1545.7 [394/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_4.cpp.o 1548.4 [395/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_8.cpp.o 1551.1 [396/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_11.cpp.o 1551.4 [397/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_6.cpp.o 1551.5 [398/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_7.cpp.o 1551.6 [399/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_13.cpp.o 1552.8 [400/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_10.cpp.o 1553.3 [401/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_12.cpp.o 1556.7 [402/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_15.cpp.o 1556.9 [403/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_18.cpp.o 1557.5 [404/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_14.cpp.o 1558.2 [405/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1559.7 [406/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_19.cpp.o 1559.9 [407/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 1560.0 [408/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_16.cpp.o 1561.8 [409/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_17.cpp.o 1562.8 [410/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1563.0 [411/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1563.1 [412/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_3.cpp.o 1564.4 [413/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_4.cpp.o 1564.5 [414/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 1564.6 [415/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_6.cpp.o 1571.1 [416/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_6.cpp.o 1572.1 [417/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_7.cpp.o 1573.8 [418/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_5.cpp.o 1576.2 [419/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1576.5 [420/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_9.cpp.o 1577.1 [421/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_8.cpp.o 1577.3 [422/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_9.cpp.o 1577.8 [423/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_16x16_instance.cpp.o 1580.6 [424/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_10.cpp.o 1581.1 [425/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_11.cpp.o 1581.9 [426/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_12.cpp.o 1583.5 [427/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_13.cpp.o 1584.4 [428/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1585.2 [429/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_15.cpp.o 1585.4 [430/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1585.8 [431/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_14.cpp.o 1587.0 [432/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_4.cpp.o 1587.6 [433/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1589.5 [434/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.o 1590.4 [435/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_5.cpp.o 1591.4 [436/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1591.6 [437/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_8.cpp.o 1591.9 [438/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_7.cpp.o 1592.2 [439/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_6.cpp.o 1592.2 [440/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_3.cpp.o 1592.3 [441/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_9.cpp.o 1593.7 [442/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_12.cpp.o 1594.7 [443/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_11.cpp.o 1596.4 [444/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_10.cpp.o 1598.8 [445/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_13.cpp.o 1600.3 [446/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_14.cpp.o 1601.1 [447/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.o 1601.1 [448/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_15.cpp.o 1602.7 [449/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp.o 1603.3 [450/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1603.9 [451/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1605.7 [452/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1606.0 [453/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_3.cpp.o 1606.1 [454/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_5.cpp.o 1606.5 [455/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_0.cpp.o 1607.1 [456/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 1608.7 [457/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_5.cpp.o 1609.3 [458/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1609.4 [459/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_6.cpp.o 1609.5 [460/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances.cpp.o 1609.6 [461/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_4.cpp.o 1610.6 [462/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_7.cpp.o 1612.0 [463/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_8.cpp.o 1612.2 [464/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1612.3 [465/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_11.cpp.o 1613.9 [466/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_9.cpp.o 1614.0 [467/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_10.cpp.o 1615.1 [468/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_12.cpp.o 1615.7 [469/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances.cpp.o 1617.1 [470/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1617.6 [471/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_13.cpp.o 1618.2 [472/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances.cpp.o 1618.3 [473/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1619.7 [474/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_14.cpp.o 1620.8 [475/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1621.1 [476/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_15.cpp.o 1623.0 [477/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1625.5 [478/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1626.4 [479/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1627.1 [480/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1628.1 [481/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1628.2 [482/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_0.cpp.o 1628.4 [483/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1628.4 [484/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1628.8 [485/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1629.2 [486/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_1.cpp.o 1629.6 [487/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_2.cpp.o 1630.1 [488/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1630.5 [489/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1632.5 [490/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1632.6 [491/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1632.9 [492/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1633.2 [493/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_5.cpp.o 1635.0 [494/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1640.0 [495/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_2.cpp.o 1641.0 [496/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_2.cpp.o 1641.4 [497/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_3.cpp.o 1642.1 [498/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_0.cpp.o 1642.7 [499/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_3.cpp.o 1642.9 [500/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_1.cpp.o 1646.0 [501/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_1.cpp.o 1647.8 [502/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_0.cpp.o 1649.9 [503/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_0.cpp.o 1650.3 [504/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1650.5 [505/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1650.8 [506/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 1651.1 [507/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_1.cpp.o 1651.9 [508/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_2.cpp.o 1652.2 [509/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances.cpp.o 1654.3 [510/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_3.cpp.o 1658.0 [511/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_12.cpp.o 1658.3 [512/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_11.cpp.o 1659.1 [513/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_4.cpp.o 1659.4 [514/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_8.cpp.o 1659.5 [515/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_6.cpp.o 1661.6 [516/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_10.cpp.o 1662.2 [517/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_14.cpp.o 1662.6 [518/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_16.cpp.o 1662.8 [519/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_15.cpp.o 1662.9 [520/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_13.cpp.o 1664.4 [521/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_17.cpp.o 1664.7 [522/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_19.cpp.o 1665.7 [523/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_18.cpp.o 1666.5 [524/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances.cpp.o 1667.4 [525/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_1.cpp.o 1668.9 [526/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1670.9 [527/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_2.cpp.o 1672.6 [528/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_3.cpp.o 1672.6 [529/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_4.cpp.o 1672.9 [530/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_6.cpp.o 1673.0 [531/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_7.cpp.o 1674.3 [532/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_5.cpp.o 1675.6 [533/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_10.cpp.o 1678.0 [534/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_9.cpp.o 1679.4 [535/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_0.cpp.o 1681.8 [536/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_1.cpp.o 1682.8 [537/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_8.cpp.o 1684.5 [538/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_11.cpp.o 1688.0 [539/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_0.cpp.o 1694.8 [540/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_2.cpp.o 1703.3 [541/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances.cpp.o 1703.6 [542/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_3.cpp.o 1704.7 [543/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_6.cpp.o 1704.9 [544/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_11.cpp.o 1705.7 [545/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_8.cpp.o 1706.2 [546/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_4.cpp.o 1707.3 [547/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_17.cpp.o 1707.4 [548/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_12.cpp.o 1708.9 [549/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_10.cpp.o 1709.2 [550/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1710.0 [551/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_1.cpp.o 1710.7 [552/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_13.cpp.o 1710.9 [553/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1711.3 [554/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_14.cpp.o 1711.8 [555/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_7.cpp.o 1711.9 [556/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_15.cpp.o 1712.1 [557/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_16.cpp.o 1713.4 [558/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1713.5 [559/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_18.cpp.o 1714.1 [560/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_4.cpp.o 1715.9 [561/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1716.4 [562/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_19.cpp.o 1716.6 [563/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_6.cpp.o 1717.8 [564/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_9.cpp.o 1717.9 [565/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_2.cpp.o 1718.8 [566/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_8.cpp.o 1719.0 [567/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_10.cpp.o 1720.0 [568/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_3.cpp.o 1720.5 [569/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_5.cpp.o 1720.8 [570/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances.cpp.o 1722.4 [571/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_0.cpp.o 1723.9 [572/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1724.1 [573/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_4.cpp.o 1728.8 [574/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1730.1 [575/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances.cpp.o 1731.7 [576/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_11.cpp.o 1734.0 [577/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_0.cpp.o 1734.1 [578/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_1.cpp.o 1735.2 [579/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_2.cpp.o 1736.2 [580/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_3.cpp.o 1736.6 [581/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_7.cpp.o 1737.2 [582/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_6.cpp.o 1738.9 [583/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_4.cpp.o 1740.4 [584/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_9.cpp.o 1741.6 [585/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_10.cpp.o 1742.5 [586/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_5.cpp.o 1742.8 [587/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_11.cpp.o 1743.4 [588/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_8.cpp.o 1743.4 [589/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_13.cpp.o 1743.6 [590/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_12.cpp.o 1744.2 [591/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_2.cpp.o 1744.3 [592/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_1.cpp.o 1745.3 [593/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_14.cpp.o 1745.7 [594/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_15.cpp.o 1746.5 [595/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_0.cpp.o 1748.2 [596/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_3.cpp.o 1748.5 [597/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1749.5 [598/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_11.cpp.o 1750.9 [599/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_8.cpp.o 1751.2 [600/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_12.cpp.o 1751.5 [601/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_4.cpp.o 1751.6 [602/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_10.cpp.o 1754.7 [603/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances.cpp.o 1756.3 [604/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_6.cpp.o 1757.3 [605/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_14.cpp.o 1759.2 [606/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_13.cpp.o 1766.3 [607/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_19.cpp.o 1766.9 [608/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_15.cpp.o 1768.1 [609/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_0.cpp.o 1770.2 [610/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_16.cpp.o 1773.8 [611/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_2.cpp.o 1775.7 [612/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_18.cpp.o 1777.4 [613/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_17.cpp.o 1779.1 [614/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1780.3 [615/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances.cpp.o 1784.1 [616/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_1.cpp.o 1786.1 [617/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_3.cpp.o 1789.9 [618/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_14.cpp.o 1793.4 [619/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_4.cpp.o 1793.9 [620/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_11.cpp.o 1795.1 [621/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_8.cpp.o 1797.8 [622/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_9.cpp.o 1798.6 [623/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_7.cpp.o 1798.9 [624/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_13.cpp.o 1799.6 [625/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_16.cpp.o 1800.6 [626/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_18.cpp.o 1800.7 [627/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_10.cpp.o 1802.0 [628/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_1.cpp.o 1803.1 [629/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_17.cpp.o 1803.9 [630/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_6.cpp.o 1804.7 [631/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_12.cpp.o 1805.3 [632/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_19.cpp.o 1806.2 [633/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_7.cpp.o 1806.3 [634/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_15.cpp.o 1810.6 [635/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_5.cpp.o 1811.4 [636/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_2.cpp.o 1815.6 [637/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_0.cpp.o 1818.1 [638/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_3.cpp.o 1818.9 [639/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_4.cpp.o 1819.2 [640/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances.cpp.o 1820.6 [641/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_10.cpp.o 1823.5 [642/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_7.cpp.o 1825.0 [643/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_9.cpp.o 1825.5 [644/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_7.cpp.o 1831.6 [645/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances.cpp.o 1832.9 [646/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_6.cpp.o 1833.2 [647/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_11.cpp.o 1833.3 [648/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances.cpp.o 1835.6 [649/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_0.cpp.o 1836.6 [650/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_8.cpp.o 1836.8 [651/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_14.cpp.o 1838.1 [652/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_12.cpp.o 1840.0 [653/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_13.cpp.o 1841.7 [654/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances.cpp.o 1843.1 [655/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_15.cpp.o 1846.7 [656/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_9.cpp.o 1847.0 [657/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_2.cpp.o 1848.2 [658/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances.cpp.o 1849.4 [659/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_1.cpp.o 1851.2 [660/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances.cpp.o 1857.8 [661/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_10.cpp.o 1858.4 [662/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_4.cpp.o 1865.0 [663/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_8.cpp.o 1865.3 [664/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp.o 1866.1 [665/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_5.cpp.o 1866.7 [666/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_7.cpp.o 1866.8 [667/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances.cpp.o 1866.8 [668/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_6.cpp.o 1867.1 [669/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_3.cpp.o 1867.4 [670/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_9.cpp.o 1868.5 [671/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances_shard_0.cpp.o 1868.6 [672/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_1.cpp.o 1872.0 [673/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_0.cpp.o 1873.8 [674/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_2.cpp.o 1877.5 [675/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances_shard_0.cpp.o 1883.0 [676/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances_shard_0.cpp.o 1886.8 [677/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_3.cpp.o 1891.0 [678/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_0.cpp.o 1896.8 [679/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances.cpp.o 1897.3 [680/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_1.cpp.o 1899.8 [681/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_3.cpp.o 1901.3 [682/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_1.cpp.o 1909.1 [683/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_2.cpp.o 1912.1 [684/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_4.cpp.o 1922.9 [685/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_3.cpp.o 1923.2 [686/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_5.cpp.o 1923.3 [687/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_7.cpp.o 1923.6 [688/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_5.cpp.o 1929.1 [689/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_11.cpp.o 1931.3 [690/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_10.cpp.o 1931.9 [691/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_9.cpp.o 1935.7 [692/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_8.cpp.o 1937.1 [693/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1944.3 [694/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_5.cpp.o 1950.1 [695/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1954.7 [696/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1956.1 [697/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_7.cpp.o 1958.7 [698/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_7.cpp.o 1962.4 [699/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 1966.0 [700/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1967.2 [701/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 1979.9 [702/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 1993.3 [703/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_7.cpp.o 2008.7 [704/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2014.8 [705/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_5.cpp.o 2042.4 [706/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2042.4 [707/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 2044.6 [708/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 2061.3 [709/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2063.6 [710/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_5.cpp.o 2063.7 [711/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 2072.1 [712/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 2072.7 [713/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_0.cpp.o 2077.1 [714/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2079.2 [715/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 2086.4 [716/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_9.cpp.o 2086.8 [717/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_5.cpp.o 2092.9 [718/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 2095.8 [719/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 2097.2 [720/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 2099.5 [721/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_9.cpp.o 2128.5 [722/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2135.9 [723/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_3.cpp.o 2136.7 [724/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_4.cpp.o 2136.8 [725/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 2140.5 [726/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2141.4 [727/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2146.8 [728/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 2149.0 [729/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 2158.0 [730/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 2158.3 [731/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2166.8 [732/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 2171.4 [733/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2172.8 [734/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 2174.5 [735/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2176.0 [736/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 2203.2 [737/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_9.cpp.o 2214.3 [738/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp.o 2223.7 [739/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 2225.6 [740/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 2245.8 [741/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 2249.7 [742/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2257.7 [743/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2258.4 [744/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2266.2 [745/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 2268.0 [746/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 2273.7 [747/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 2293.4 [748/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2296.1 [749/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 2301.2 [750/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2305.3 [751/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2308.9 [752/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 2313.6 [753/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 2328.3 [754/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_9.cpp.o 2341.8 [755/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 2363.0 [756/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 2371.1 [757/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2378.0 [758/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 2397.6 [759/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2405.1 [760/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 2431.5 [761/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 2441.3 [762/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2442.2 [763/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 2454.6 [764/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16_16_instance.cpp.o 2463.6 [765/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2468.3 [766/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2468.9 [767/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 2470.8 [768/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2480.7 [769/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_optimized_loads_instance.cpp.o 2498.3 [770/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 2504.0 [771/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_optimized_loads_instance.cpp.o 2504.4 [772/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 2504.9 [773/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 2506.6 [774/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_optimized_loads_instance.cpp.o 2511.1 [775/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 2520.2 [776/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_4.cpp.o 2522.5 [777/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 2525.4 [778/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_6.cpp.o 2527.3 [779/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2529.7 [780/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_4.cpp.o 2530.7 [781/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2535.4 [782/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_0.cpp.o 2550.7 [783/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f16_instance.cpp.o 2552.2 [784/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_instance.cpp.o 2556.5 [785/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2561.0 [786/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2571.3 [787/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 2580.3 [788/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 2582.5 [789/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 2585.4 [790/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 2589.8 [791/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_2.cpp.o 2591.1 [792/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 2604.3 [793/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_16_16_instance.cpp.o 2605.3 [794/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 2607.0 [795/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 2625.5 [796/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 2635.2 [797/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 2637.5 [798/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2644.4 [799/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 2645.3 [800/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_f32_bf16_instance.cpp.o 2647.5 [801/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16_16_instance.cpp.o 2660.7 [802/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2662.9 [803/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_16_16_instance.cpp.o 2676.1 [804/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 2696.1 [805/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 2704.5 [806/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2724.5 [807/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2727.2 [808/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev2_instance.cpp.o 2731.0 [809/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2732.5 [810/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2733.3 [811/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev2_instance.cpp.o 2737.7 [812/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2748.2 [813/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev5_instance.cpp.o 2750.1 [814/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 2754.0 [815/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev5_instance.cpp.o 2756.0 [816/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev2_instance.cpp.o 2763.9 [817/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev2_instance.cpp.o 2764.3 [818/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev5_instance.cpp.o 2767.2 [819/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instance.cpp.o 2770.9 [820/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev2_instance.cpp.o 2777.6 [821/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev5_instance.cpp.o 2777.8 [822/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instance.cpp.o 2781.0 [823/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.o 2781.8 [824/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instance.cpp.o 2795.4 [825/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2795.8 [826/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2797.6 [827/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 2798.1 [828/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2807.1 [829/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 2808.9 [830/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 2810.4 [831/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2813.0 [832/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 2854.6 [833/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 2858.5 [834/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_f16_pipev1_instance.cpp.o 2863.9 [835/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_pipev1_instance.cpp.o 2870.3 [836/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 2870.5 [837/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 2877.3 [838/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 2883.2 [839/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 2884.1 [840/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 2929.7 [841/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp.o 2935.2 [842/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 2951.9 [843/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp.o 2952.7 [844/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp.o 2967.8 [845/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp.o 2975.5 [846/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 2978.8 [847/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 2985.6 [848/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_part2_instance.cpp.o 3011.8 [849/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_vec_transpose_instance.cpp.o 3016.1 [850/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_vec_transpose_instance.cpp.o 3022.4 [851/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 3031.5 [852/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 3041.9 [853/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 3042.5 [854/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3045.6 [855/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 3058.5 [856/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_vec_transpose_instance.cpp.o 3071.7 [857/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3086.9 [858/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3111.8 [859/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 3123.3 [860/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_part2_instance.cpp.o 3149.1 [861/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 3157.9 [862/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 3173.9 [863/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3193.0 [864/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 3194.8 [865/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3199.2 [866/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 3199.5 [867/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 3203.9 [868/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 3209.9 [869/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.o 3222.7 [870/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_irregular_instance.cpp.o 3233.7 [871/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 3252.5 [872/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 3259.2 [873/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp.o 3260.9 [874/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 3275.2 [875/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3279.2 [876/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3290.1 [877/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 3291.6 [878/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_part2_instance.cpp.o 3296.3 [879/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3296.8 [880/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_irregular_instance.cpp.o 3300.3 [881/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp.o 3308.6 [882/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_part2_instance.cpp.o 3308.6 [883/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_irregular_instance.cpp.o 3315.8 [884/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_irregular_instance.cpp.o 3327.2 [885/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3330.4 [886/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 3332.2 [887/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3338.4 [888/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3347.0 [889/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_instance.cpp.o 3348.5 [890/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3362.4 [891/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 3367.4 [892/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 3369.2 [893/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_part2_instance.cpp.o 3378.6 [894/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_part2_instance.cpp.o 3378.7 [895/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev2_instance.cpp.o 3390.0 [896/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3391.2 [897/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp.o 3396.5 [898/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 3397.5 [899/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev5_instance.cpp.o 3409.3 [900/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3417.2 [901/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 3420.3 [902/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3420.6 [903/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev5_instance.cpp.o 3422.1 [904/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev2_instance.cpp.o 3432.7 [905/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3440.5 [906/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3452.3 [907/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3472.3 [908/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_instance.cpp.o 3477.2 [909/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 3485.4 [910/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 3492.5 [911/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 3501.1 [912/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 3507.1 [913/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 3510.0 [914/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 3512.9 [915/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3521.5 [916/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_2x_instance.cpp.o 3538.3 [917/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 3584.0 [918/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_2x_instance.cpp.o 3586.4 [919/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cpp.o 3586.9 [920/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3590.9 [921/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 3594.5 [922/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3621.1 [923/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 3626.4 [924/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 3640.4 [925/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 3644.9 [926/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 3671.7 [927/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 3674.9 [928/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances.cpp.o 3675.1 [929/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp.o 3710.8 [930/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp.o 3727.8 [931/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 3730.4 [932/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 3738.3 [933/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 3751.1 [934/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp.o 3755.2 [935/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3756.1 [936/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 3761.1 [937/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_3.cpp.o 3761.1 [938/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 3765.2 [939/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3768.9 [940/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances.cpp.o 3774.4 [941/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 3775.9 [942/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_5.cpp.o 3786.9 [943/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_7.cpp.o 3797.2 [944/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3801.2 [945/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_1.cpp.o 3802.6 [946/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.o 3805.4 [947/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_0.cpp.o 3806.1 [948/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3806.5 [949/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_2.cpp.o 3806.6 [950/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.o 3807.3 [951/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 3810.6 [952/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_3.cpp.o 3811.2 [953/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 3812.8 [954/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_2.cpp.o 3817.4 [955/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances.cpp.o 3822.8 [956/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.o 3825.8 [957/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_6.cpp.o 3841.9 [958/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 3851.8 [959/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3854.5 [960/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.o 3855.5 [961/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances.cpp.o 3856.8 [962/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_0.cpp.o 3861.7 [963/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_3.cpp.o 3868.6 [964/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_1.cpp.o 3877.5 [965/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_4.cpp.o 3880.8 [966/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_2.cpp.o 3883.9 [967/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_1.cpp.o 3887.1 [968/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_3.cpp.o 3888.8 [969/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 3895.5 [970/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_7.cpp.o 3909.7 [971/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_2.cpp.o 3909.9 [972/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 3914.4 [973/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances.cpp.o 3917.8 [974/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_0.cpp.o 3918.6 [975/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 3923.2 [976/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 3933.7 [977/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_5.cpp.o 3934.9 [978/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_4.cpp.o 3941.7 [979/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_6.cpp.o 3944.2 [980/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_6.cpp.o 3944.5 [981/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 3945.7 [982/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_1.cpp.o 3949.4 [983/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_1.cpp.o 3952.0 [984/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.o 3958.0 [985/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_0.cpp.o 3961.4 [986/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_8.cpp.o 3962.0 [987/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_9.cpp.o 3967.2 [988/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances.cpp.o 3979.0 [989/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_1.cpp.o 3981.5 [990/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_3.cpp.o 3992.9 [991/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_2.cpp.o 3999.7 [992/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_3.cpp.o 4007.4 [993/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances.cpp.o 4022.3 [994/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_2.cpp.o 4029.2 [995/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_3.cpp.o 4029.6 [996/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_0.cpp.o 4030.0 [997/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 4036.3 [998/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 4040.5 [999/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_2.cpp.o 4041.1 [1000/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_6.cpp.o 4045.8 [1001/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_1.cpp.o 4046.4 [1002/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_5.cpp.o 4048.9 [1003/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_5.cpp.o 4051.1 [1004/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4060.6 [1005/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 4064.2 [1006/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_4.cpp.o 4065.0 [1007/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_7.cpp.o 4067.5 [1008/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_1.cpp.o 4074.6 [1009/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances.cpp.o 4075.1 [1010/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_8.cpp.o 4079.6 [1011/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.o 4086.7 [1012/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_0.cpp.o 4094.1 [1013/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_4.cpp.o 4094.2 [1014/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 4097.9 [1015/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_0.cpp.o 4117.0 [1016/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_3.cpp.o 4118.4 [1017/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_8.cpp.o 4119.2 [1018/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.o 4120.1 [1019/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_6.cpp.o 4122.9 [1020/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_11.cpp.o 4125.2 [1021/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_6.cpp.o 4128.2 [1022/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_9.cpp.o 4128.3 [1023/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_8.cpp.o 4130.2 [1024/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 4137.2 [1025/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.o 4139.9 [1026/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_2.cpp.o 4157.3 [1027/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_16x16_instance.cpp.o 4168.7 [1028/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 4173.1 [1029/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_5.cpp.o 4175.7 [1030/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_0.cpp.o 4180.9 [1031/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 4191.3 [1032/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_5.cpp.o 4202.0 [1033/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_4.cpp.o 4208.4 [1034/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 4212.7 [1035/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_7.cpp.o 4219.7 [1036/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_8.cpp.o 4221.4 [1037/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_7.cpp.o 4221.6 [1038/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 4232.4 [1039/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_6.cpp.o 4237.6 [1040/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_11.cpp.o 4242.0 [1041/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_5.cpp.o 4243.6 [1042/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4248.9 [1043/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_6.cpp.o 4249.3 [1044/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances.cpp.o 4250.4 [1045/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_8.cpp.o 4252.2 [1046/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_7.cpp.o 4254.9 [1047/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 4255.4 [1048/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_7.cpp.o 4257.7 [1049/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_1.cpp.o 4258.3 [1050/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_2.cpp.o 4262.2 [1051/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_1.cpp.o 4264.2 [1052/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_7.cpp.o 4264.6 [1053/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_4.cpp.o 4265.0 [1054/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_3.cpp.o 4265.7 [1055/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 4266.0 [1056/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_5.cpp.o 4268.6 [1057/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_2.cpp.o 4269.1 [1058/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_6.cpp.o 4270.4 [1059/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_4.cpp.o 4270.6 [1060/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 4270.7 [1061/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_7.cpp.o 4272.1 [1062/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_8.cpp.o 4274.1 [1063/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_9.cpp.o 4274.7 [1064/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_10.cpp.o 4277.9 [1065/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_11.cpp.o 4279.8 [1066/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_8.cpp.o 4279.8 [1067/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_9.cpp.o 4280.3 [1068/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances.cpp.o 4281.6 [1069/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_10.cpp.o 4283.2 [1070/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_9.cpp.o 4285.1 [1071/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_10.cpp.o 4290.7 [1072/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_1.cpp.o 4292.8 [1073/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_9.cpp.o 4298.6 [1074/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_11.cpp.o 4300.4 [1075/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 4301.4 [1076/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 4302.2 [1077/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 4303.0 [1078/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_5.cpp.o 4303.3 [1079/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_7.cpp.o 4304.4 [1080/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 4305.2 [1081/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 4306.7 [1082/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_3.cpp.o 4308.0 [1083/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp.o 4308.9 [1084/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_5.cpp.o 4309.9 [1085/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_4.cpp.o 4310.0 [1086/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_6.cpp.o 4310.6 [1087/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_4.cpp.o 4310.8 [1088/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_8.cpp.o 4310.8 [1089/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_7.cpp.o 4316.1 [1090/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_9.cpp.o 4319.3 [1091/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_10.cpp.o 4319.7 [1092/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_11.cpp.o 4319.9 [1093/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 4320.9 [1094/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_2.cpp.o 4329.8 [1095/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_12.cpp.o 4330.7 [1096/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_13.cpp.o 4330.8 [1097/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_15.cpp.o 4331.7 [1098/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_6.cpp.o 4332.1 [1099/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_14.cpp.o 4333.6 [1100/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_bf8_instance.cpp.o 4334.1 [1101/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 4334.4 [1102/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 4336.0 [1103/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_5.cpp.o 4337.1 [1104/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_instance.cpp.o 4337.4 [1105/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_6.cpp.o 4337.8 [1106/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.o 4338.2 [1107/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 4338.4 [1108/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_5.cpp.o 4340.2 [1109/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_3.cpp.o 4340.6 [1110/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp.o 4341.4 [1111/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_4.cpp.o 4342.0 [1112/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 4343.0 [1113/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4343.7 [1114/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_10.cpp.o 4343.8 [1115/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_fp8_instance.cpp.o 4344.4 [1116/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_9.cpp.o 4346.5 [1117/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_8.cpp.o 4346.8 [1118/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_7.cpp.o 4347.3 [1119/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4347.7 [1120/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_9.cpp.o 4349.2 [1121/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_9.cpp.o 4350.0 [1122/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_11.cpp.o 4350.4 [1123/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 4352.5 [1124/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_7.cpp.o 4354.4 [1125/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_14.cpp.o 4355.2 [1126/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_13.cpp.o 4357.7 [1127/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_0.cpp.o 4359.5 [1128/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_15.cpp.o 4359.9 [1129/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_6.cpp.o 4360.1 [1130/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_12.cpp.o 4360.9 [1131/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_4.cpp.o 4363.3 [1132/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 4367.0 [1133/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4368.6 [1134/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_6.cpp.o 4369.4 [1135/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4369.6 [1136/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances.cpp.o 4369.6 [1137/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_3.cpp.o 4369.8 [1138/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 4370.9 [1139/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 4372.0 [1140/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_5.cpp.o 4372.7 [1141/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_4.cpp.o 4373.5 [1142/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_7.cpp.o 4373.8 [1143/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_9.cpp.o 4375.3 [1144/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances.cpp.o 4375.3 [1145/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_8.cpp.o 4375.5 [1146/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances.cpp.o 4376.2 [1147/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 4377.2 [1148/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_9.cpp.o 4377.6 [1149/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 4380.8 [1150/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_13.cpp.o 4380.9 [1151/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_10.cpp.o 4381.3 [1152/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_11.cpp.o 4381.6 [1153/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_0.cpp.o 4382.9 [1154/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_15.cpp.o 4382.9 [1155/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_12.cpp.o 4384.5 [1156/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 4384.8 [1157/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_14.cpp.o 4385.9 [1158/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 4389.7 [1159/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 4390.0 [1160/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 4391.8 [1161/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 4391.9 [1162/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 4392.7 [1163/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 4392.8 [1164/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 4394.4 [1165/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_1.cpp.o 4394.6 [1166/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_2.cpp.o 4395.8 [1167/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 4397.4 [1168/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 4397.9 [1169/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 4400.3 [1170/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 4401.4 [1171/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 4403.1 [1172/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_8.cpp.o 4407.3 [1173/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_3.cpp.o 4407.7 [1174/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_3.cpp.o 4408.8 [1175/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_2.cpp.o 4410.2 [1176/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp.o 4410.9 [1177/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_1.cpp.o 4410.9 [1178/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_2.cpp.o 4412.4 [1179/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 4414.0 [1180/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_1.cpp.o 4414.8 [1181/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_0.cpp.o 4415.9 [1182/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 4415.9 [1183/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_2.cpp.o 4418.1 [1184/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances.cpp.o 4418.3 [1185/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_3.cpp.o 4418.8 [1186/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 4418.8 [1187/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_0.cpp.o 4419.5 [1188/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_0.cpp.o 4419.5 [1189/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_1.cpp.o 4420.4 [1190/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_8.cpp.o 4422.7 [1191/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_4.cpp.o 4423.4 [1192/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_6.cpp.o 4423.8 [1193/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_10.cpp.o 4424.7 [1194/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_11.cpp.o 4427.0 [1195/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_12.cpp.o 4427.3 [1196/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_14.cpp.o 4428.9 [1197/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_16.cpp.o 4429.0 [1198/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 4430.4 [1199/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_15.cpp.o 4430.5 [1200/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_17.cpp.o 4431.0 [1201/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_19.cpp.o 4431.3 [1202/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_13.cpp.o 4432.2 [1203/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_18.cpp.o 4434.0 [1204/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_1.cpp.o 4435.0 [1205/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_0.cpp.o 4436.2 [1206/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_2.cpp.o 4438.1 [1207/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances.cpp.o 4439.3 [1208/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_3.cpp.o 4442.7 [1209/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_8.cpp.o 4443.3 [1210/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_6.cpp.o 4444.9 [1211/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_4.cpp.o 4445.2 [1212/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_12.cpp.o 4446.6 [1213/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_11.cpp.o 4447.6 [1214/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_10.cpp.o 4448.8 [1215/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_14.cpp.o 4449.9 [1216/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_13.cpp.o 4450.0 [1217/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_15.cpp.o 4450.4 [1218/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_17.cpp.o 4451.2 [1219/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_19.cpp.o 4452.5 [1220/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_1.cpp.o 4452.6 [1221/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_16.cpp.o 4453.3 [1222/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_18.cpp.o 4454.7 [1223/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 4454.9 [1224/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_2.cpp.o 4455.3 [1225/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances.cpp.o 4457.3 [1226/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_0.cpp.o 4457.6 [1227/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_3.cpp.o 4460.1 [1228/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_10.cpp.o 4460.1 [1229/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_4.cpp.o 4460.6 [1230/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_9.cpp.o 4462.1 [1231/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_6.cpp.o 4463.1 [1232/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_8.cpp.o 4464.3 [1233/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 4464.7 [1234/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_11.cpp.o 4467.1 [1235/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_13.cpp.o 4467.2 [1236/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_12.cpp.o 4467.3 [1237/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_14.cpp.o 4468.4 [1238/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 4468.9 [1239/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_15.cpp.o 4471.9 [1240/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 4472.4 [1241/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 4473.6 [1242/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_1.cpp.o 4474.0 [1243/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_2.cpp.o 4474.1 [1244/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_0.cpp.o 4476.3 [1245/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances.cpp.o 4476.6 [1246/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_3.cpp.o 4480.4 [1247/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_4.cpp.o 4481.6 [1248/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_8.cpp.o 4481.7 [1249/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_6.cpp.o 4482.1 [1250/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_12.cpp.o 4482.5 [1251/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_11.cpp.o 4482.7 [1252/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 4485.0 [1253/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_10.cpp.o 4485.7 [1254/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_16.cpp.o 4486.4 [1255/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_13.cpp.o 4486.4 [1256/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_14.cpp.o 4486.9 [1257/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_15.cpp.o 4487.4 [1258/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 4488.9 [1259/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 4489.7 [1260/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_18.cpp.o 4490.7 [1261/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_17.cpp.o 4491.3 [1262/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_19.cpp.o 4492.4 [1263/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_1.cpp.o 4492.9 [1264/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_0.cpp.o 4494.0 [1265/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_2.cpp.o 4494.9 [1266/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_5.cpp.o 4495.1 [1267/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_4.cpp.o 4496.9 [1268/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_7.cpp.o 4497.4 [1269/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 4498.1 [1270/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_3.cpp.o 4498.7 [1271/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances.cpp.o 4500.2 [1272/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_6.cpp.o 4500.3 [1273/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_8.cpp.o 4501.1 [1274/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_11.cpp.o 4503.8 [1275/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_12.cpp.o 4504.0 [1276/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_10.cpp.o 4504.7 [1277/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_14.cpp.o 4505.4 [1278/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_13.cpp.o 4506.7 [1279/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_15.cpp.o 4509.4 [1280/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_16.cpp.o 4509.7 [1281/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_7.cpp.o 4510.4 [1282/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_7.cpp.o 4511.2 [1283/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_5.cpp.o 4511.2 [1284/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances.cpp.o 4512.0 [1285/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_19.cpp.o 4512.1 [1286/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_1.cpp.o 4512.7 [1287/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_17.cpp.o 4513.9 [1288/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_0.cpp.o 4514.0 [1289/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_18.cpp.o 4515.6 [1290/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_2.cpp.o 4517.1 [1291/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_3.cpp.o 4518.8 [1292/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_6.cpp.o 4519.9 [1293/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_9.cpp.o 4521.0 [1294/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_4.cpp.o 4521.4 [1295/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_8.cpp.o 4521.5 [1296/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_10.cpp.o 4522.2 [1297/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 4522.5 [1298/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_13.cpp.o 4523.9 [1299/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_11.cpp.o 4525.4 [1300/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_12.cpp.o 4525.6 [1301/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 4525.7 [1302/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_15.cpp.o 4525.8 [1303/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 4526.4 [1304/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_14.cpp.o 4526.6 [1305/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 4526.7 [1306/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 4527.6 [1307/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances.cpp.o 4528.7 [1308/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_9.cpp.o 4530.1 [1309/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances.cpp.o 4530.3 [1310/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 4531.0 [1311/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances.cpp.o 4532.3 [1312/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 4534.0 [1313/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 4534.2 [1314/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 4534.2 [1315/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 4535.0 [1316/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 4535.1 [1317/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 4535.4 [1318/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 4535.7 [1319/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 4537.3 [1320/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 4537.4 [1321/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances.cpp.o 4538.3 [1322/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_0.cpp.o 4541.0 [1323/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances_shard_0.cpp.o 4542.3 [1324/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances_shard_0.cpp.o 4542.4 [1325/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_3.cpp.o 4542.7 [1326/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_1.cpp.o 4542.8 [1327/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_2.cpp.o 4544.7 [1328/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_5.cpp.o 4544.9 [1329/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances.cpp.o 4545.7 [1330/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_9.cpp.o 4549.5 [1331/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_1.cpp.o 4550.9 [1332/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_0.cpp.o 4551.4 [1333/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_1.cpp.o 4552.1 [1334/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_3.cpp.o 4552.3 [1335/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_4.cpp.o 4553.0 [1336/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_2.cpp.o 4554.9 [1337/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_3.cpp.o 4556.9 [1338/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4557.0 [1339/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 4557.2 [1340/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_7.cpp.o 4557.9 [1341/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_5.cpp.o 4558.3 [1342/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_8.cpp.o 4558.3 [1343/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_3.cpp.o 4560.6 [1344/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4561.1 [1345/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_10.cpp.o 4561.7 [1346/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_5.cpp.o 4561.7 [1347/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_9.cpp.o 4561.8 [1348/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_11.cpp.o 4566.0 [1349/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4566.6 [1350/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_7.cpp.o 4572.2 [1351/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 4572.7 [1352/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_7.cpp.o 4575.2 [1353/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 4576.6 [1354/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 4579.6 [1355/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4581.6 [1356/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 4582.0 [1357/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_5.cpp.o 4583.9 [1358/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_7.cpp.o 4586.2 [1359/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 4593.8 [1360/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4596.7 [1361/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4599.3 [1362/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4599.5 [1363/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 4599.7 [1364/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_4.cpp.o 4600.0 [1365/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 4602.1 [1366/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 4602.5 [1367/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 4603.5 [1368/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_9.cpp.o 4606.1 [1369/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4606.8 [1370/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4614.1 [1371/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4616.1 [1372/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_5.cpp.o 4621.2 [1373/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 4621.2 [1374/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 4622.3 [1375/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp.o 4623.0 [1376/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4624.2 [1377/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_9.cpp.o 4628.0 [1378/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4630.6 [1379/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4633.8 [1380/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4633.8 [1381/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4637.3 [1382/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4638.5 [1383/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 4643.0 [1384/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4646.1 [1385/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4646.2 [1386/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_3.cpp.o 4649.1 [1387/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 4652.2 [1388/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4660.0 [1389/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4663.5 [1390/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 4665.0 [1391/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_4.cpp.o 4665.2 [1392/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 4665.8 [1393/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 4668.4 [1394/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_0.cpp.o 4675.1 [1395/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_5.cpp.o 4676.6 [1396/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 4695.8 [1397/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 4705.6 [1398/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 4706.1 [1399/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 4708.9 [1400/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4709.2 [1401/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4710.2 [1402/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 4718.0 [1403/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 4722.5 [1404/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4728.7 [1405/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 4729.0 [1406/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 4730.0 [1407/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 4732.0 [1408/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4732.3 [1409/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 4732.9 [1410/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4732.9 [1411/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 4733.1 [1412/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4734.3 [1413/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 4739.3 [1414/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 4741.5 [1415/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 4742.0 [1416/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 4742.0 [1417/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 4747.8 [1418/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 4762.7 [1419/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4773.6 [1420/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_4.cpp.o 4775.0 [1421/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_0.cpp.o 4777.8 [1422/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4778.3 [1423/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4785.0 [1424/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4789.6 [1425/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4792.5 [1426/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4792.6 [1427/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4793.9 [1428/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4794.6 [1429/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4801.8 [1430/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4805.9 [1431/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4808.3 [1432/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_2.cpp.o 4809.6 [1433/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_6.cpp.o 4810.8 [1434/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 4817.8 [1435/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 4825.7 [1436/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 4827.4 [1437/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 4829.9 [1438/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4830.5 [1439/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4830.7 [1440/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4840.7 [1441/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4867.8 [1442/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 4868.5 [1443/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 4876.1 [1444/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 4880.1 [1445/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 4881.3 [1446/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4882.2 [1447/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 4883.2 [1448/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 4884.4 [1449/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4886.1 [1450/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4889.2 [1451/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4892.3 [1452/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 4893.5 [1453/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4900.9 [1454/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 4901.1 [1455/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 4915.2 [1456/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 4915.5 [1457/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 4924.0 [1458/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_default_instance.cpp.o 4926.2 [1459/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 4931.9 [1460/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 4942.1 [1461/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 5001.0 [1462/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5015.5 [1463/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5019.2 [1464/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5035.7 [1465/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5046.0 [1466/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.o 5047.4 [1467/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5051.5 [1468/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp.o 5051.6 [1469/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5055.8 [1470/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.o 5089.8 [1471/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp.o 5092.5 [1472/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_m_instance.cpp.o 5096.3 [1473/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp.o 5100.4 [1474/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5109.0 [1475/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_default_instance.cpp.o 5115.0 [1476/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.o 5118.1 [1477/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp.o 5122.1 [1478/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp.o 5125.5 [1479/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.o 5127.8 [1480/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_m_instance.cpp.o 5129.4 [1481/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.o 5129.7 [1482/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.o 5131.0 [1483/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 5135.3 [1484/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.o 5136.3 [1485/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.o 5136.3 [1486/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_default_instance.cpp.o 5140.1 [1487/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp.o 5140.1 [1488/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_n_instance.cpp.o 5140.4 [1489/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5141.0 [1490/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5146.2 [1491/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.o 5167.4 [1492/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.o 5171.0 [1493/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.o 5179.1 [1494/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.o 5181.9 [1495/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.o 5182.5 [1496/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5185.5 [1497/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_n_instance.cpp.o 5188.6 [1498/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.o 5190.8 [1499/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_mn_instance.cpp.o 5193.4 [1500/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5195.7 [1501/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.o 5199.5 [1502/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.o 5202.4 [1503/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.o 5206.0 [1504/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.o 5206.7 [1505/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.o 5208.8 [1506/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.o 5210.9 [1507/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.o 5211.3 [1508/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5211.6 [1509/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.o 5216.1 [1510/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.o 5217.3 [1511/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.o 5224.4 [1512/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5224.9 [1513/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.o 5228.9 [1514/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.o 5229.7 [1515/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.o 5232.9 [1516/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.o 5234.0 [1517/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.o 5234.5 [1518/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.o 5235.4 [1519/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.o 5235.7 [1520/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.o 5237.9 [1521/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.o 5238.5 [1522/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.o 5240.0 [1523/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 5242.2 [1524/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5244.4 [1525/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.o 5245.1 [1526/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.o 5246.6 [1527/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.o 5246.9 [1528/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.o 5247.5 [1529/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.o 5252.1 [1530/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.o 5252.3 [1531/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.o 5252.5 [1532/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.o 5254.4 [1533/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.o 5254.5 [1534/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.o 5259.3 [1535/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.o 5261.1 [1536/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.o 5262.1 [1537/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.o 5263.8 [1538/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5265.1 [1539/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.o 5265.3 [1540/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5266.7 [1541/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.o 5266.8 [1542/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.o 5269.9 [1543/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5270.7 [1544/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.o 5275.5 [1545/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.o 5276.3 [1546/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.o 5278.0 [1547/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.o 5280.0 [1548/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5280.0 [1549/1682] cd /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build && /usr/local/therock-tools/bin/cmake -E rm -f /__w/rockrel/rockrel/build/ml-libs/composable_kernel/stamp/stage.stamp 5282.7 [1550/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/device_memory.cpp.o 5285.0 [1551/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5285.6 [1552/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/host_tensor.cpp.o 5286.3 [1553/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/convolution_parameter.cpp.o 5292.2 [1554/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5294.7 [1555/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 5300.0 [1556/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce2.cpp.o 5300.4 [1557/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce1.cpp.o 5301.1 [1558/1682] Linking CXX static library lib/libdevice_contraction_operations.a 5301.2 [1559/1682] Linking CXX static library lib/libdevice_other_operations.a 5301.2 [1560/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce1.cpp.o 5302.5 [1561/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5304.1 [1562/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 5304.6 [1563/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 5305.1 [1564/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce3.cpp.o 5309.0 [1565/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 5309.0 [1566/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 5310.2 [1567/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce2.cpp.o 5310.7 [1568/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce3.cpp.o 5311.8 [1569/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce1.cpp.o 5314.2 [1570/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce2.cpp.o 5315.7 [1571/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce4.cpp.o 5315.8 [1572/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce3.cpp.o 5316.2 [1573/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce1.cpp.o 5318.1 [1574/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce2.cpp.o 5320.3 [1575/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce3.cpp.o 5324.3 [1576/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 5324.3 [1577/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 5325.8 [1578/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 5327.5 [1579/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 5327.7 [1580/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce4.cpp.o 5331.5 [1581/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.o 5331.8 [1582/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.o 5333.3 [1583/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.o 5334.6 [1584/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 5335.0 [1585/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 5335.5 [1586/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 5338.4 [1587/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 5340.0 [1588/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.o 5340.8 [1589/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5341.9 [1590/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 5347.0 [1591/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.o 5347.5 [1592/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.o 5353.5 [1593/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.o 5353.5 [1594/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.o 5353.6 [1595/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.o 5354.9 [1596/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5356.0 [1597/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.o 5358.5 [1598/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.o 5359.2 [1599/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_odd_mn_instance.cpp.o 5359.6 [1600/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.o 5360.8 [1601/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.o 5361.5 [1602/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.o 5362.4 [1603/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.o 5362.7 [1604/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.o 5364.7 [1605/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.o 5364.8 [1606/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.o 5365.5 [1607/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5373.3 [1608/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.o 5375.8 [1609/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5378.1 [1610/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5381.3 [1611/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5395.6 [1612/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5407.1 [1613/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5414.0 [1614/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_default_instance.cpp.o 5414.1 [1615/1682] Linking CXX static library lib/libutility.a 5415.3 [1616/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 5415.9 [1617/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5419.1 [1618/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 5420.1 [1619/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 5421.4 [1620/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 5425.8 [1621/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 5429.0 [1622/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5430.0 [1623/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5435.9 [1624/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5445.7 [1625/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.o 5446.0 [1626/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5452.0 [1627/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.o 5454.0 [1628/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5460.4 [1629/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 5462.9 [1630/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5464.1 [1631/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 5465.1 [1632/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 5465.9 [1633/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 5467.1 [1634/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 5468.8 [1635/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 5468.9 [1636/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 5472.0 [1637/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5473.7 [1638/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 5474.5 [1639/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnpadding_instance.cpp.o 5477.8 [1640/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_kpadding_instance.cpp.o 5478.0 [1641/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.o 5478.8 [1642/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5481.8 [1643/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 5485.1 [1644/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 5488.7 [1645/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.o 5492.1 [1646/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5492.4 [1647/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.o 5492.8 [1648/1682] Linking CXX static library lib/libdevice_reduction_operations.a 5495.9 [1649/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5502.2 [1650/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_mnkpadding_instance.cpp.o 5502.4 [1651/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_kpadding_instance.cpp.o 5503.2 [1652/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 5503.4 [1653/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_default_instance.cpp.o 5512.3 [1654/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 5515.2 [1655/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 5515.7 [1656/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 5516.0 [1657/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_default_instance.cpp.o 5518.4 [1658/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 5519.5 [1659/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 5532.6 [1660/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 5539.5 [1661/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 5544.0 [1662/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 5551.0 [1663/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5553.6 [1664/1682] Linking CXX static library lib/libdevice_conv_operations.a 5582.2 [1665/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 5598.7 [1666/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 5612.0 [1667/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 5615.6 [1668/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 5634.8 [1669/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 5637.5 [1670/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 5640.2 [1671/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 5642.3 [1672/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 5648.7 [1673/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 5694.9 [1674/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 5703.2 [1675/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 5712.9 [1676/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 5722.7 [1677/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 5725.0 [1678/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 5730.8 [1679/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 7116.2 [1680/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_bf16_bf16/device_gemm_wmma_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 7129.3 [1681/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_f16_f16_f16/device_gemm_wmma_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 7284.5 [1682/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_i8_bf16/device_gemm_wmma_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o END 1777960911.3626592 7284.484683513641 0