BEGIN 1777953915.9665227 EXEC /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build /usr/local/therock-tools/bin/cmake -E env --unset=ROCM_PATH --unset=ROCM_DIR --unset=HIP_PATH --unset=HIP_DIR -- /usr/local/therock-tools/bin/cmake --build /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build 38.0 [1/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 39.7 [2/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 40.1 [3/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 40.2 [4/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 40.4 [5/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 40.7 [6/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 41.2 [7/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 43.3 [8/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 48.2 [9/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 48.9 [10/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 50.3 [11/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 51.8 [12/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 52.0 [13/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 52.6 [14/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 53.0 [15/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 53.2 [16/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 53.7 [17/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 54.1 [18/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 54.2 [19/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 54.3 [20/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 55.0 [21/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 56.1 [22/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 56.1 [23/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 56.2 [24/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 58.4 [25/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 59.4 [26/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 59.8 [27/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 60.5 [28/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 92.9 [29/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 93.5 [30/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 93.5 [31/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 95.0 [32/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 95.0 [33/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 95.4 [34/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 95.7 [35/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 96.9 [36/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 98.3 [37/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 99.8 [38/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 100.0 [39/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 101.4 [40/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 146.1 [41/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 159.3 [42/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 167.8 [43/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 171.8 [44/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 178.4 [45/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 184.3 [46/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 274.9 [47/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 303.8 [48/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 303.9 [49/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 308.4 [50/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 315.0 [51/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 321.8 [52/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 322.9 [53/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 341.7 [54/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 352.0 [55/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 360.0 [56/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 389.9 [57/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 393.5 [58/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 394.4 [59/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 417.4 [60/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 503.3 [61/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 504.0 [62/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 510.2 [63/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 520.9 [64/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 522.9 [65/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 527.2 [66/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 530.0 [67/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 547.6 [68/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 555.9 [69/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 588.3 [70/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 593.4 [71/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 593.9 [72/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 599.2 [73/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 617.7 [74/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 620.0 [75/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 623.4 [76/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 623.7 [77/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 636.3 [78/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 637.7 [79/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 648.1 [80/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 653.2 [81/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 653.6 [82/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 659.1 [83/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 667.6 [84/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 674.9 [85/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 688.1 [86/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 688.7 [87/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 704.1 [88/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 726.2 [89/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 727.2 [90/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 737.7 [91/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 737.8 [92/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 745.5 [93/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 748.4 [94/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 750.4 [95/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 750.6 [96/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 752.3 [97/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 756.1 [98/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 756.4 [99/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 763.3 [100/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 768.7 [101/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 770.5 [102/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 774.6 [103/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 792.9 [104/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 795.9 [105/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 820.5 [106/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 832.5 [107/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 841.2 [108/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 848.1 [109/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 865.2 [110/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 869.7 [111/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.o 879.6 [112/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.o 881.3 [113/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 883.5 [114/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.o 891.9 [115/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 893.2 [116/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.o 893.9 [117/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 898.3 [118/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 900.9 [119/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 901.8 [120/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 911.7 [121/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 917.8 [122/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 919.7 [123/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 931.1 [124/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 932.2 [125/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 936.4 [126/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 956.0 [127/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.o 958.0 [128/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 958.7 [129/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.o 960.7 [130/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 960.9 [131/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_f32_bf16_instance.cpp.o 962.2 [132/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 965.9 [133/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 968.4 [134/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 977.2 [135/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 982.9 [136/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 987.5 [137/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 991.7 [138/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 991.9 [139/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.o 999.7 [140/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 1011.7 [141/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.o 1018.7 [142/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 1022.3 [143/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.o 1028.6 [144/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 1032.9 [145/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 1045.9 [146/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 1049.9 [147/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 1058.8 [148/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 1067.0 [149/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 1073.1 [150/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.o 1084.2 [151/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeFiles/device_conv2d_fwd_bias_relu_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.o 1086.0 [152/1691] Building CXX object library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeFiles/device_elementwise_normalization_instance.dir/device_elementwise_normalization_f16_instance.cpp.o 1086.2 [153/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.o 1087.8 [154/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_16_16_instance.cpp.o 1130.5 [155/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 1142.8 [156/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 1146.3 [157/1691] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeFiles/device_conv2d_fwd_bias_relu_add_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.o 1147.5 [158/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 1148.1 [159/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_f32_bf16_instance.cpp.o 1151.7 [160/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_optimized_loads_instance.cpp.o 1155.2 [161/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 1160.3 [162/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1177.8 [163/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1187.7 [164/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_default_pipev1_instance.cpp.o 1187.7 [165/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 1191.7 [166/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_pad0_pipev1_instance.cpp.o 1202.3 [167/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_optimized_loads_instance.cpp.o 1206.0 [168/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 1208.3 [169/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_optimized_loads_instance.cpp.o 1216.0 [170/1691] Building CXX object library/src/tensor_operation_instance/gpu/elementwise/CMakeFiles/device_elementwise_instance.dir/device_normalize_instance.cpp.o 1227.4 [171/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 1248.2 [172/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 1251.1 [173/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1253.6 [174/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_pad0_pipev1_instance.cpp.o 1264.2 [175/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_default_pipev1_instance.cpp.o 1268.8 [176/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1277.5 [177/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 1284.4 [178/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instance.cpp.o 1286.2 [179/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 1294.3 [180/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1294.5 [181/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev5_instance.cpp.o 1296.8 [182/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_16_16_instance.cpp.o 1301.2 [183/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 1303.0 [184/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 1304.5 [185/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 1305.3 [186/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 1305.6 [187/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev2_instance.cpp.o 1306.3 [188/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev2_instance.cpp.o 1306.7 [189/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1311.6 [190/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load.cpp.o 1313.0 [191/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 1319.1 [192/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 1325.8 [193/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev5_instance.cpp.o 1328.5 [194/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1332.5 [195/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev2_instance.cpp.o 1340.1 [196/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev2_instance.cpp.o 1341.3 [197/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load.cpp.o 1344.1 [198/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 1348.5 [199/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1350.6 [200/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev2_instance.cpp.o 1352.7 [201/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev5_instance.cpp.o 1358.8 [202/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev5_instance.cpp.o 1369.3 [203/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev2_instance.cpp.o 1372.2 [204/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 1374.0 [205/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 1390.8 [206/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev5_instance.cpp.o 1393.9 [207/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 1402.8 [208/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev5_instance.cpp.o 1455.4 [209/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_16_16_instance.cpp.o 1477.1 [210/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_16_16_instance.cpp.o 1510.3 [211/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_f16_pipev1_instance.cpp.o 1512.6 [212/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_bf16_pipev1_instance.cpp.o 1519.1 [213/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 1520.4 [214/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 1523.4 [215/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 1530.4 [216/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 1533.1 [217/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 1601.0 [218/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instance.cpp.o 1621.0 [219/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp.o 1631.1 [220/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 1636.0 [221/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 1644.5 [222/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 1668.8 [223/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp.o 1669.7 [224/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 1680.7 [225/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 1691.8 [226/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 1693.4 [227/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 1700.1 [228/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1710.9 [229/1691] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 1714.0 [230/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp.o 1724.1 [231/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 1724.6 [232/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1731.8 [233/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp.o 1744.8 [234/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1746.9 [235/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 1766.3 [236/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp.o 1766.7 [237/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1801.7 [238/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 1802.0 [239/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1812.4 [240/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 1824.7 [241/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1826.6 [242/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 1828.7 [243/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1925.2 [244/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 2026.3 [245/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 2034.1 [246/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2041.6 [247/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_part2_instance.cpp.o 2054.0 [248/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instance.cpp.o 2058.3 [249/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2099.9 [250/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_part2_instance.cpp.o 2162.5 [251/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 2168.7 [252/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 2223.5 [253/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instance.cpp.o 2234.9 [254/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_vec_transpose_instance.cpp.o 2240.3 [255/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_vec_transpose_instance.cpp.o 2278.7 [256/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2285.6 [257/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 2311.5 [258/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2333.9 [259/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 2341.5 [260/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_part2_instance.cpp.o 2373.5 [261/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_part2_instance.cpp.o 2376.2 [262/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_vec_transpose_instance.cpp.o 2392.8 [263/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_part2_instance.cpp.o 2409.1 [264/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 2439.1 [265/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_f16_instance.cpp.o 2447.3 [266/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_bf16_instance.cpp.o 2453.6 [267/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_irregular_instance.cpp.o 2457.7 [268/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_part2_instance.cpp.o 2473.7 [269/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 2473.8 [270/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_irregular_instance.cpp.o 2474.6 [271/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instance.cpp.o 2480.5 [272/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2492.5 [273/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2511.3 [274/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2540.9 [275/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp.o 2581.3 [276/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_irregular_instance.cpp.o 2583.9 [277/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp.o 2648.6 [278/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev2_instance.cpp.o 2663.2 [279/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_irregular_instance.cpp.o 2665.0 [280/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_instance.cpp.o 2665.5 [281/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev2_instance.cpp.o 2666.4 [282/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp.o 2696.0 [283/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_instance.cpp.o 2711.6 [284/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp.o 2719.3 [285/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 2723.4 [286/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_instance.cpp.o 2732.4 [287/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev5_instance.cpp.o 2749.2 [288/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev5_instance.cpp.o 2759.6 [289/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_instance.cpp.o 2761.3 [290/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 2772.0 [291/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_comp_instance.cpp.o 2780.6 [292/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_2x_instance.cpp.o 2792.1 [293/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 2798.8 [294/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_2x_instance.cpp.o 2844.9 [295/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2924.4 [296/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 2941.0 [297/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 3034.5 [298/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_1.cpp.o 3061.5 [299/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 3067.7 [300/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 3082.0 [301/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 3083.6 [302/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 3119.8 [303/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_10.cpp.o 3129.9 [304/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3160.5 [305/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_2.cpp.o 3166.6 [306/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances.cpp.o 3167.4 [307/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3193.6 [308/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_3.cpp.o 3194.0 [309/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_5.cpp.o 3212.7 [310/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3215.8 [311/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 3217.8 [312/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_8.cpp.o 3220.3 [313/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_6.cpp.o 3222.9 [314/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_12.cpp.o 3233.1 [315/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 3236.0 [316/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_7.cpp.o 3237.2 [317/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 3238.2 [318/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_14.cpp.o 3301.7 [319/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.o 3312.9 [320/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_intra_instance.cpp.o 3320.6 [321/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_inter_instance.cpp.o 3330.0 [322/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_9.cpp.o 3334.4 [323/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 3337.3 [324/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.o 3343.2 [325/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 3351.9 [326/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.o 3356.5 [327/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances.cpp.o 3358.6 [328/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 3368.5 [329/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.o 3375.0 [330/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 3389.8 [331/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_10.cpp.o 3395.0 [332/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_12.cpp.o 3395.6 [333/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_14.cpp.o 3398.8 [334/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_11.cpp.o 3401.1 [335/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_1.cpp.o 3415.2 [336/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp.o 3420.7 [337/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3433.5 [338/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_11.cpp.o 3434.1 [339/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_12.cpp.o 3443.6 [340/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_13.cpp.o 3445.9 [341/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_13.cpp.o 3450.7 [342/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_14.cpp.o 3462.8 [343/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_15.cpp.o 3463.7 [344/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_15.cpp.o 3464.9 [345/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances.cpp.o 3482.1 [346/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_intra_instance.cpp.o 3489.4 [347/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_3.cpp.o 3500.9 [348/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_10.cpp.o 3516.1 [349/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_1.cpp.o 3517.1 [350/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_2.cpp.o 3518.9 [351/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_6.cpp.o 3521.1 [352/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_0.cpp.o 3524.8 [353/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_8.cpp.o 3529.8 [354/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_3.cpp.o 3532.0 [355/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 3540.6 [356/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_4.cpp.o 3545.0 [357/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_7.cpp.o 3547.0 [358/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp.o 3548.0 [359/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3553.0 [360/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_8.cpp.o 3553.1 [361/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_6.cpp.o 3560.2 [362/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.o 3563.7 [363/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_8.cpp.o 3565.6 [364/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_inter_instance.cpp.o 3575.1 [365/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 3577.5 [366/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_5.cpp.o 3578.7 [367/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.o 3584.8 [368/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_20.cpp.o 3590.2 [369/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_10.cpp.o 3592.3 [370/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances.cpp.o 3593.2 [371/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_13.cpp.o 3594.8 [372/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 3594.8 [373/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_2.cpp.o 3598.4 [374/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_11.cpp.o 3598.4 [375/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_12.cpp.o 3603.3 [376/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_13.cpp.o 3604.4 [377/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_14.cpp.o 3606.2 [378/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_9.cpp.o 3609.4 [379/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_15.cpp.o 3609.8 [380/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_16.cpp.o 3619.8 [381/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_17.cpp.o 3620.0 [382/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_11.cpp.o 3621.6 [383/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 3622.9 [384/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_19.cpp.o 3624.6 [385/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_1.cpp.o 3626.4 [386/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.o 3627.1 [387/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_18.cpp.o 3628.4 [388/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_2.cpp.o 3634.7 [389/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_0.cpp.o 3636.2 [390/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_15.cpp.o 3637.5 [391/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_2.cpp.o 3640.2 [392/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_4.cpp.o 3640.5 [393/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_3.cpp.o 3643.0 [394/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_20.cpp.o 3643.2 [395/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_16x16_instance.cpp.o 3646.4 [396/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances.cpp.o 3649.6 [397/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_6.cpp.o 3651.8 [398/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_11.cpp.o 3652.9 [399/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_12.cpp.o 3653.1 [400/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_13.cpp.o 3653.2 [401/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_8.cpp.o 3654.1 [402/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_10.cpp.o 3654.8 [403/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.o 3655.8 [404/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 3656.5 [405/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_14.cpp.o 3658.2 [406/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_1.cpp.o 3658.3 [407/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_15.cpp.o 3659.3 [408/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_16.cpp.o 3659.6 [409/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_0.cpp.o 3661.2 [410/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_17.cpp.o 3661.2 [411/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_18.cpp.o 3666.5 [412/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3666.8 [413/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3667.1 [414/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3667.6 [415/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_19.cpp.o 3675.3 [416/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_3.cpp.o 3675.5 [417/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_4.cpp.o 3677.3 [418/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3678.4 [419/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_5.cpp.o 3679.1 [420/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_6.cpp.o 3679.5 [421/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_7.cpp.o 3679.9 [422/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_7.cpp.o 3681.0 [423/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_8.cpp.o 3681.4 [424/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_6.cpp.o 3683.6 [425/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_9.cpp.o 3684.6 [426/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_10.cpp.o 3686.7 [427/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_9.cpp.o 3687.1 [428/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_11.cpp.o 3688.9 [429/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_13.cpp.o 3692.3 [430/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_12.cpp.o 3693.2 [431/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_14.cpp.o 3693.5 [432/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_15.cpp.o 3693.9 [433/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 3694.3 [434/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3696.6 [435/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3697.0 [436/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_6.cpp.o 3699.3 [437/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_5.cpp.o 3699.9 [438/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_8.cpp.o 3700.0 [439/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_4.cpp.o 3700.3 [440/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_3.cpp.o 3701.0 [441/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_7.cpp.o 3701.8 [442/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_5.cpp.o 3703.0 [443/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3704.6 [444/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_10.cpp.o 3704.9 [445/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_12.cpp.o 3705.6 [446/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_11.cpp.o 3707.6 [447/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_9.cpp.o 3712.8 [448/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_13.cpp.o 3714.9 [449/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_14.cpp.o 3715.2 [450/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_15.cpp.o 3715.8 [451/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 3717.8 [452/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3718.9 [453/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_3.cpp.o 3720.0 [454/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_4.cpp.o 3720.0 [455/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3721.3 [456/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_6.cpp.o 3722.2 [457/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3722.3 [458/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances.cpp.o 3722.5 [459/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_5.cpp.o 3724.7 [460/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_8.cpp.o 3725.6 [461/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_7.cpp.o 3726.9 [462/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_9.cpp.o 3728.4 [463/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances.cpp.o 3729.2 [464/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_13.cpp.o 3730.9 [465/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_11.cpp.o 3731.0 [466/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_12.cpp.o 3731.3 [467/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_10.cpp.o 3732.2 [468/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3732.9 [469/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3733.4 [470/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_14.cpp.o 3734.5 [471/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_15.cpp.o 3737.0 [472/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3738.2 [473/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances.cpp.o 3739.2 [474/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3742.1 [475/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3744.3 [476/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3744.6 [477/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_0.cpp.o 3747.2 [478/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3748.9 [479/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 3749.9 [480/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3750.0 [481/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_1.cpp.o 3750.2 [482/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3751.5 [483/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3752.0 [484/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_2.cpp.o 3752.1 [485/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3753.4 [486/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_5.cpp.o 3754.0 [487/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3755.9 [488/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3756.4 [489/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3758.9 [490/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3761.6 [491/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3762.2 [492/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3768.6 [493/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_3.cpp.o 3768.7 [494/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp.o 3770.5 [495/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_3.cpp.o 3771.4 [496/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_2.cpp.o 3774.5 [497/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_2.cpp.o 3777.4 [498/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3777.7 [499/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_1.cpp.o 3778.1 [500/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3780.8 [501/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_1.cpp.o 3781.3 [502/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_1.cpp.o 3781.9 [503/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_0.cpp.o 3782.0 [504/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_0.cpp.o 3782.6 [505/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 3783.4 [506/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_0.cpp.o 3788.4 [507/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_2.cpp.o 3788.9 [508/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_3.cpp.o 3790.5 [509/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3790.8 [510/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_8.cpp.o 3792.6 [511/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_11.cpp.o 3793.0 [512/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_4.cpp.o 3793.1 [513/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances.cpp.o 3793.5 [514/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_10.cpp.o 3795.6 [515/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_12.cpp.o 3796.3 [516/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_6.cpp.o 3796.8 [517/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_15.cpp.o 3798.1 [518/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_13.cpp.o 3802.1 [519/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_14.cpp.o 3802.6 [520/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_17.cpp.o 3802.9 [521/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_16.cpp.o 3803.5 [522/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_18.cpp.o 3804.3 [523/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 3807.0 [524/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_0.cpp.o 3809.2 [525/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_1.cpp.o 3811.0 [526/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances.cpp.o 3811.9 [527/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_19.cpp.o 3816.1 [528/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_7.cpp.o 3816.4 [529/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_2.cpp.o 3817.4 [530/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_16x16_instance.cpp.o 3818.5 [531/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_6.cpp.o 3819.6 [532/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_4.cpp.o 3819.7 [533/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_9.cpp.o 3820.1 [534/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_10.cpp.o 3821.0 [535/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_3.cpp.o 3821.3 [536/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_5.cpp.o 3821.4 [537/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_8.cpp.o 3823.2 [538/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_0.cpp.o 3825.8 [539/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_11.cpp.o 3826.7 [540/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp.o 3830.8 [541/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_0.cpp.o 3831.2 [542/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_1.cpp.o 3832.8 [543/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_2.cpp.o 3836.4 [544/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_3.cpp.o 3837.5 [545/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_6.cpp.o 3839.4 [546/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_8.cpp.o 3840.4 [547/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances.cpp.o 3840.5 [548/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_4.cpp.o 3841.0 [549/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_10.cpp.o 3841.8 [550/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_12.cpp.o 3844.2 [551/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_11.cpp.o 3846.0 [552/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_13.cpp.o 3846.6 [553/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_15.cpp.o 3847.3 [554/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_14.cpp.o 3849.4 [555/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_16.cpp.o 3851.5 [556/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_17.cpp.o 3854.3 [557/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_1.cpp.o 3854.3 [558/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances.cpp.o 3854.9 [559/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_18.cpp.o 3856.3 [560/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_6.cpp.o 3856.6 [561/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_7.cpp.o 3856.8 [562/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_19.cpp.o 3859.6 [563/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_9.cpp.o 3860.4 [564/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_8.cpp.o 3861.8 [565/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_2.cpp.o 3862.5 [566/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_4.cpp.o 3862.9 [567/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3863.6 [568/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3864.0 [569/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_10.cpp.o 3864.2 [570/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_3.cpp.o 3865.0 [571/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_5.cpp.o 3865.1 [572/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3866.9 [573/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3867.0 [574/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_0.cpp.o 3868.8 [575/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_11.cpp.o 3873.5 [576/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_0.cpp.o 3874.1 [577/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_1.cpp.o 3876.5 [578/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_2.cpp.o 3877.8 [579/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances.cpp.o 3880.3 [580/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_3.cpp.o 3880.4 [581/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_4.cpp.o 3884.3 [582/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_6.cpp.o 3885.5 [583/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_8.cpp.o 3886.1 [584/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_9.cpp.o 3887.9 [585/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_10.cpp.o 3890.5 [586/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_11.cpp.o 3891.8 [587/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_12.cpp.o 3892.4 [588/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp.o 3892.9 [589/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_13.cpp.o 3893.8 [590/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 3894.9 [591/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_7.cpp.o 3895.1 [592/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_14.cpp.o 3896.1 [593/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 3897.0 [594/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_5.cpp.o 3897.9 [595/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_1.cpp.o 3898.0 [596/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_2.cpp.o 3898.1 [597/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3898.7 [598/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_15.cpp.o 3899.1 [599/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_3.cpp.o 3899.9 [600/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_7.cpp.o 3900.5 [601/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_0.cpp.o 3901.7 [602/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances.cpp.o 3905.6 [603/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_11.cpp.o 3906.3 [604/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_8.cpp.o 3906.4 [605/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_12.cpp.o 3906.8 [606/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_10.cpp.o 3906.9 [607/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_4.cpp.o 3907.5 [608/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_13.cpp.o 3908.1 [609/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_6.cpp.o 3910.8 [610/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_15.cpp.o 3912.4 [611/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_16.cpp.o 3912.5 [612/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_14.cpp.o 3913.9 [613/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_7.cpp.o 3914.8 [614/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 3917.4 [615/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_17.cpp.o 3919.1 [616/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_18.cpp.o 3921.1 [617/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_1.cpp.o 3922.0 [618/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 3922.2 [619/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_19.cpp.o 3923.3 [620/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_2.cpp.o 3924.1 [621/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_0.cpp.o 3924.7 [622/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances.cpp.o 3929.1 [623/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_3.cpp.o 3931.3 [624/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_4.cpp.o 3934.8 [625/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_8.cpp.o 3935.9 [626/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_6.cpp.o 3936.1 [627/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_12.cpp.o 3936.7 [628/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_11.cpp.o 3938.0 [629/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_5.cpp.o 3938.0 [630/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_10.cpp.o 3938.8 [631/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_9.cpp.o 3939.6 [632/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_14.cpp.o 3940.6 [633/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_7.cpp.o 3941.3 [634/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_13.cpp.o 3941.8 [635/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_17.cpp.o 3942.0 [636/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_16.cpp.o 3942.2 [637/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_15.cpp.o 3942.6 [638/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_0.cpp.o 3942.8 [639/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_1.cpp.o 3943.1 [640/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_18.cpp.o 3944.1 [641/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances.cpp.o 3945.3 [642/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_19.cpp.o 3947.7 [643/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_2.cpp.o 3950.0 [644/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_6.cpp.o 3950.9 [645/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_3.cpp.o 3952.9 [646/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_4.cpp.o 3953.0 [647/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_10.cpp.o 3953.6 [648/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_9.cpp.o 3953.6 [649/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_8.cpp.o 3957.9 [650/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_11.cpp.o 3958.1 [651/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_13.cpp.o 3959.3 [652/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_12.cpp.o 3961.5 [653/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_7.cpp.o 3961.6 [654/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances.cpp.o 3961.7 [655/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_14.cpp.o 3961.9 [656/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_0.cpp.o 3962.3 [657/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances.cpp.o 3963.3 [658/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_15.cpp.o 3963.7 [659/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_1.cpp.o 3963.8 [660/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_2.cpp.o 3965.2 [661/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances.cpp.o 3965.8 [662/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances.cpp.o 3966.8 [663/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances.cpp.o 3968.2 [664/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_3.cpp.o 3969.1 [665/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_4.cpp.o 3975.8 [666/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_5.cpp.o 3976.4 [667/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances.cpp.o 3977.3 [668/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_10.cpp.o 3978.1 [669/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_6.cpp.o 3978.8 [670/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances_shard_0.cpp.o 3979.0 [671/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_7.cpp.o 3980.1 [672/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_9.cpp.o 3980.9 [673/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_0.cpp.o 3981.8 [674/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_8.cpp.o 3981.8 [675/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances_shard_0.cpp.o 3982.4 [676/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_9.cpp.o 3983.5 [677/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_1.cpp.o 3984.6 [678/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances_shard_0.cpp.o 3986.3 [679/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_2.cpp.o 3987.5 [680/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_3.cpp.o 3988.4 [681/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances.cpp.o 3988.8 [682/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_1.cpp.o 3990.9 [683/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 3992.7 [684/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_0.cpp.o 3993.3 [685/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_3.cpp.o 3994.2 [686/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_4.cpp.o 3994.3 [687/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_1.cpp.o 3995.8 [688/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_2.cpp.o 3999.9 [689/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_3.cpp.o 4001.8 [690/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4004.4 [691/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_7.cpp.o 4005.2 [692/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_8.cpp.o 4005.3 [693/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_11.cpp.o 4005.6 [694/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_5.cpp.o 4008.5 [695/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_5.cpp.o 4009.3 [696/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_10.cpp.o 4009.8 [697/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_9.cpp.o 4013.3 [698/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 4014.6 [699/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4015.2 [700/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_9.cpp.o 4017.0 [701/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 4018.8 [702/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_5.cpp.o 4021.6 [703/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4022.6 [704/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 4023.5 [705/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_7.cpp.o 4025.3 [706/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 4031.4 [707/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 4033.0 [708/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_7.cpp.o 4033.9 [709/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 4037.4 [710/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4044.0 [711/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_7.cpp.o 4048.8 [712/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_5.cpp.o 4049.5 [713/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 4051.9 [714/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4054.6 [715/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_9.cpp.o 4057.5 [716/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4066.1 [717/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 4068.3 [718/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 4071.7 [719/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 4072.4 [720/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4072.5 [721/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 4073.3 [722/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 4076.5 [723/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 4076.7 [724/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_9.cpp.o 4079.5 [725/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4085.0 [726/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4085.9 [727/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 4086.2 [728/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_5.cpp.o 4095.1 [729/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 4103.5 [730/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_9.cpp.o 4104.3 [731/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4107.2 [732/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4107.4 [733/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 4107.7 [734/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 4109.6 [735/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 4110.6 [736/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 4114.0 [737/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 4114.2 [738/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 4116.9 [739/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 4120.0 [740/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 4121.3 [741/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4124.7 [742/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 4125.2 [743/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4126.1 [744/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4135.5 [745/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 4138.0 [746/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4139.8 [747/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 4145.1 [748/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 4151.9 [749/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4153.1 [750/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4155.8 [751/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 4157.1 [752/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 4160.7 [753/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 4164.5 [754/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4195.4 [755/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 4196.9 [756/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_4.cpp.o 4207.0 [757/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 4211.8 [758/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 4216.4 [759/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 4230.0 [760/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 4233.7 [761/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4233.9 [762/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4239.9 [763/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16_16_instance.cpp.o 4264.9 [764/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 4265.9 [765/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4266.2 [766/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4275.8 [767/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 4276.0 [768/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_optimized_loads_instance.cpp.o 4284.0 [769/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_optimized_loads_instance.cpp.o 4287.7 [770/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4288.6 [771/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4290.4 [772/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4290.4 [773/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_optimized_loads_instance.cpp.o 4296.3 [774/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 4308.5 [775/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 4316.0 [776/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4318.3 [777/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4321.9 [778/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4326.5 [779/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 4333.6 [780/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 4338.7 [781/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f16_instance.cpp.o 4344.4 [782/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 4344.9 [783/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4348.5 [784/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_instance.cpp.o 4350.0 [785/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 4359.7 [786/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 4361.2 [787/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_16_16_instance.cpp.o 4380.2 [788/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 4383.7 [789/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 4385.5 [790/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_4.cpp.o 4389.2 [791/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_0.cpp.o 4392.0 [792/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_6.cpp.o 4401.4 [793/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_2.cpp.o 4412.3 [794/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 4422.6 [795/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 4422.7 [796/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_16_16_instance.cpp.o 4430.3 [797/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 4431.3 [798/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_0.cpp.o 4431.7 [799/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 4435.8 [800/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 4439.8 [801/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 4440.3 [802/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16_16_instance.cpp.o 4440.3 [803/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_f32_bf16_instance.cpp.o 4442.0 [804/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_4.cpp.o 4442.0 [805/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_3.cpp.o 4450.1 [806/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 4451.8 [807/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_5.cpp.o 4460.4 [808/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 4467.1 [809/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 4475.8 [810/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp.o 4478.6 [811/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.o 4479.1 [812/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4481.9 [813/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4485.1 [814/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 4486.6 [815/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4486.8 [816/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4489.9 [817/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4492.5 [818/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instance.cpp.o 4493.8 [819/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev2_instance.cpp.o 4495.6 [820/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 4495.7 [821/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4496.9 [822/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instance.cpp.o 4499.2 [823/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4499.4 [824/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev2_instance.cpp.o 4499.8 [825/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 4500.1 [826/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev2_instance.cpp.o 4501.2 [827/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev2_instance.cpp.o 4501.8 [828/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev5_instance.cpp.o 4502.3 [829/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev5_instance.cpp.o 4503.0 [830/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instance.cpp.o 4505.1 [831/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4506.3 [832/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev5_instance.cpp.o 4508.6 [833/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 4509.7 [834/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev2_instance.cpp.o 4510.3 [835/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev5_instance.cpp.o 4513.6 [836/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4559.9 [837/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_pipev1_instance.cpp.o 4561.7 [838/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_f16_pipev1_instance.cpp.o 4575.9 [839/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 4582.2 [840/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 4585.6 [841/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_4.cpp.o 4602.0 [842/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 4627.7 [843/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp.o 4627.9 [844/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp.o 4630.1 [845/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp.o 4635.9 [846/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 4637.1 [847/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp.o 4637.7 [848/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 4657.0 [849/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 4682.8 [850/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 4698.2 [851/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 4706.5 [852/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4719.1 [853/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_vec_transpose_instance.cpp.o 4727.2 [854/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_vec_transpose_instance.cpp.o 4728.1 [855/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 4736.5 [856/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 4759.7 [857/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4774.5 [858/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_vec_transpose_instance.cpp.o 4776.1 [859/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_part2_instance.cpp.o 4786.6 [860/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_part2_instance.cpp.o 4805.7 [861/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 4806.3 [862/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4819.2 [863/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4825.0 [864/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 4825.0 [865/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 4833.1 [866/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 4834.0 [867/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 4839.5 [868/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 4841.8 [869/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4855.3 [870/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 4863.8 [871/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 4865.4 [872/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.o 4867.4 [873/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 4867.8 [874/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp.o 4869.2 [875/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 4877.7 [876/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 4895.7 [877/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4897.6 [878/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4897.9 [879/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4905.1 [880/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_part2_instance.cpp.o 4905.9 [881/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 4908.2 [882/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_irregular_instance.cpp.o 4915.0 [883/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 4916.7 [884/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4920.6 [885/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4921.9 [886/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_part2_instance.cpp.o 4922.9 [887/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 4925.9 [888/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4926.8 [889/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_irregular_instance.cpp.o 4929.9 [890/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp.o 4933.6 [891/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 4933.7 [892/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_irregular_instance.cpp.o 4936.3 [893/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 4945.2 [894/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 4945.6 [895/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 4947.2 [896/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 4948.4 [897/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 4951.3 [898/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_part2_instance.cpp.o 4953.2 [899/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_part2_instance.cpp.o 4966.3 [900/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_irregular_instance.cpp.o 4968.5 [901/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 4969.7 [902/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4969.9 [903/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 4971.8 [904/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 4977.8 [905/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp.o 4992.1 [906/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5006.8 [907/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev2_instance.cpp.o 5009.0 [908/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5016.8 [909/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5020.0 [910/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 5021.6 [911/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev5_instance.cpp.o 5022.5 [912/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5025.3 [913/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5039.4 [914/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_instance.cpp.o 5049.6 [915/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_instance.cpp.o 5050.4 [916/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev2_instance.cpp.o 5065.2 [917/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 5067.2 [918/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev5_instance.cpp.o 5078.5 [919/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 5090.1 [920/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5102.5 [921/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 5120.8 [922/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 5127.9 [923/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 5140.8 [924/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 5142.4 [925/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 5188.5 [926/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances.cpp.o 5223.3 [927/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 5248.4 [928/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_2x_instance.cpp.o 5268.6 [929/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5271.7 [930/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 5276.7 [931/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5278.0 [932/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cpp.o 5278.1 [933/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_2x_instance.cpp.o 5294.4 [934/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 5297.8 [935/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5306.8 [936/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 5313.6 [937/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 5316.8 [938/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances.cpp.o 5344.9 [939/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp.o 5356.1 [940/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_0.cpp.o 5362.0 [941/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5364.3 [942/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp.o 5365.6 [943/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_1.cpp.o 5371.3 [944/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 5378.5 [945/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_2.cpp.o 5384.2 [946/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_3.cpp.o 5392.0 [947/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 5397.0 [948/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_3.cpp.o 5400.9 [949/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5402.8 [950/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances.cpp.o 5405.7 [951/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5408.3 [952/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_5.cpp.o 5425.6 [953/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_7.cpp.o 5437.8 [954/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5439.2 [955/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.o 5440.7 [956/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_2.cpp.o 5446.0 [957/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_0.cpp.o 5446.3 [958/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5469.6 [959/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_1.cpp.o 5471.4 [960/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_6.cpp.o 5476.6 [961/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 5478.2 [962/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_1.cpp.o 5481.9 [963/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_2.cpp.o 5484.0 [964/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_3.cpp.o 5486.2 [965/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances.cpp.o 5487.2 [966/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_4.cpp.o 5491.4 [967/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 5491.6 [968/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.o 5492.0 [969/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_5.cpp.o 5495.0 [970/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_3.cpp.o 5495.6 [971/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 5508.7 [972/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_7.cpp.o 5510.4 [973/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_2.cpp.o 5514.3 [974/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances.cpp.o 5524.9 [975/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_1.cpp.o 5529.1 [976/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_4.cpp.o 5530.6 [977/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_6.cpp.o 5538.0 [978/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_0.cpp.o 5541.5 [979/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_6.cpp.o 5542.7 [980/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_8.cpp.o 5544.7 [981/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.o 5546.5 [982/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_9.cpp.o 5549.1 [983/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.o 5550.7 [984/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_0.cpp.o 5551.2 [985/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_1.cpp.o 5555.8 [986/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_2.cpp.o 5557.7 [987/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_3.cpp.o 5559.1 [988/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_1.cpp.o 5559.7 [989/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 5561.7 [990/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances.cpp.o 5574.4 [991/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_3.cpp.o 5574.5 [992/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5584.0 [993/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances.cpp.o 5585.3 [994/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_2.cpp.o 5592.1 [995/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.o 5595.3 [996/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_0.cpp.o 5596.5 [997/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_5.cpp.o 5605.7 [998/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_1.cpp.o 5609.2 [999/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_2.cpp.o 5610.4 [1000/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_3.cpp.o 5621.6 [1001/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_4.cpp.o 5623.7 [1002/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_7.cpp.o 5631.3 [1003/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_0.cpp.o 5634.7 [1004/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances.cpp.o 5636.7 [1005/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 5638.7 [1006/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_6.cpp.o 5639.5 [1007/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.o 5643.3 [1008/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_1.cpp.o 5644.4 [1009/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_8.cpp.o 5650.3 [1010/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_5.cpp.o 5650.9 [1011/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 5656.4 [1012/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_6.cpp.o 5656.9 [1013/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 5660.8 [1014/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_8.cpp.o 5663.0 [1015/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_4.cpp.o 5665.3 [1016/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 5666.4 [1017/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_6.cpp.o 5668.8 [1018/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_9.cpp.o 5673.1 [1019/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.o 5679.4 [1020/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_11.cpp.o 5679.8 [1021/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 5680.6 [1022/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_2.cpp.o 5688.1 [1023/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_3.cpp.o 5694.8 [1024/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.o 5700.1 [1025/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_8.cpp.o 5709.1 [1026/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_5.cpp.o 5719.2 [1027/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_5.cpp.o 5726.5 [1028/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_7.cpp.o 5730.3 [1029/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_7.cpp.o 5733.2 [1030/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_4.cpp.o 5734.6 [1031/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 5739.4 [1032/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 5743.8 [1033/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 5752.1 [1034/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_5.cpp.o 5753.3 [1035/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 5759.4 [1036/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_6.cpp.o 5772.7 [1037/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_8.cpp.o 5777.4 [1038/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 5777.5 [1039/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 5780.7 [1040/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_9.cpp.o 5781.7 [1041/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_7.cpp.o 5781.8 [1042/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_1.cpp.o 5783.0 [1043/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_0.cpp.o 5783.5 [1044/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 5797.2 [1045/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_8.cpp.o 5804.5 [1046/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_7.cpp.o 5811.6 [1047/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_6.cpp.o 5815.5 [1048/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_11.cpp.o 5815.8 [1049/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances.cpp.o 5826.6 [1050/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_3.cpp.o 5829.1 [1051/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_4.cpp.o 5830.6 [1052/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_2.cpp.o 5830.6 [1053/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_1.cpp.o 5833.7 [1054/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_2.cpp.o 5840.1 [1055/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_5.cpp.o 5845.1 [1056/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 5856.3 [1057/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_6.cpp.o 5857.7 [1058/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_7.cpp.o 5859.3 [1059/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_8.cpp.o 5859.8 [1060/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_9.cpp.o 5867.5 [1061/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5882.6 [1062/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_11.cpp.o 5887.0 [1063/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_10.cpp.o 5889.8 [1064/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 5892.8 [1065/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_4.cpp.o 5896.3 [1066/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_9.cpp.o 5899.2 [1067/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances.cpp.o 5908.5 [1068/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_11.cpp.o 5917.3 [1069/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_10.cpp.o 5920.6 [1070/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_4.cpp.o 5923.9 [1071/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_8.cpp.o 5932.0 [1072/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_5.cpp.o 5941.9 [1073/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_7.cpp.o 5946.8 [1074/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_9.cpp.o 5959.2 [1075/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_10.cpp.o 5963.5 [1076/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_0.cpp.o 5991.6 [1077/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 6003.6 [1078/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 6005.7 [1079/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_7.cpp.o 6016.9 [1080/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_6.cpp.o 6019.8 [1081/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_3.cpp.o 6020.3 [1082/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_4.cpp.o 6020.8 [1083/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 6022.6 [1084/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_5.cpp.o 6024.0 [1085/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_7.cpp.o 6041.8 [1086/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_9.cpp.o 6054.8 [1087/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_8.cpp.o 6056.3 [1088/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_11.cpp.o 6058.6 [1089/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_10.cpp.o 6065.1 [1090/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 6070.2 [1091/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_13.cpp.o 6071.3 [1092/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_12.cpp.o 6086.7 [1093/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_14.cpp.o 6096.1 [1094/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_9.cpp.o 6096.1 [1095/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_16x16_instance.cpp.o 6102.6 [1096/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 6102.7 [1097/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 6105.9 [1098/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.o 6113.6 [1099/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_instance.cpp.o 6116.9 [1100/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 6117.3 [1101/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_fp8_instance.cpp.o 6119.0 [1102/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_15.cpp.o 6126.3 [1103/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 6132.7 [1104/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 6140.1 [1105/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_bf8_instance.cpp.o 6144.2 [1106/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_1.cpp.o 6153.8 [1107/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_5.cpp.o 6154.1 [1108/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_4.cpp.o 6157.2 [1109/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_7.cpp.o 6157.9 [1110/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_6.cpp.o 6158.3 [1111/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_3.cpp.o 6164.1 [1112/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 6167.9 [1113/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_13.cpp.o 6180.9 [1114/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_8.cpp.o 6181.4 [1115/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_2.cpp.o 6181.4 [1116/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_10.cpp.o 6184.0 [1117/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_9.cpp.o 6196.8 [1118/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_8.cpp.o 6198.4 [1119/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_15.cpp.o 6199.6 [1120/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 6202.5 [1121/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_14.cpp.o 6205.6 [1122/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_12.cpp.o 6206.5 [1123/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_5.cpp.o 6211.2 [1124/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_11.cpp.o 6212.5 [1125/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_6.cpp.o 6215.9 [1126/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 6217.5 [1127/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 6220.6 [1128/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 6229.2 [1129/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_3.cpp.o 6230.9 [1130/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_9.cpp.o 6231.6 [1131/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 6234.8 [1132/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6237.5 [1133/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_5.cpp.o 6242.2 [1134/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_4.cpp.o 6245.6 [1135/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_9.cpp.o 6246.6 [1136/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_6.cpp.o 6248.4 [1137/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances.cpp.o 6252.9 [1138/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_7.cpp.o 6253.2 [1139/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_8.cpp.o 6253.8 [1140/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_15.cpp.o 6254.2 [1141/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6254.9 [1142/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_6.cpp.o 6266.8 [1143/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_7.cpp.o 6270.1 [1144/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 6273.6 [1145/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_14.cpp.o 6276.2 [1146/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_13.cpp.o 6279.5 [1147/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_12.cpp.o 6280.2 [1148/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_10.cpp.o 6280.5 [1149/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_11.cpp.o 6280.7 [1150/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances.cpp.o 6281.7 [1151/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances.cpp.o 6285.3 [1152/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 6290.4 [1153/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 6296.8 [1154/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 6300.3 [1155/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 6306.4 [1156/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_1.cpp.o 6307.0 [1157/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 6312.5 [1158/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 6313.4 [1159/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_9.cpp.o 6315.9 [1160/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 6319.3 [1161/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 6319.8 [1162/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 6321.2 [1163/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 6325.6 [1164/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 6327.6 [1165/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 6335.3 [1166/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_0.cpp.o 6338.9 [1167/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 6342.5 [1168/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 6346.3 [1169/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 6346.7 [1170/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_2.cpp.o 6355.7 [1171/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 6398.2 [1172/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_2.cpp.o 6403.4 [1173/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_0.cpp.o 6405.9 [1174/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances.cpp.o 6413.3 [1175/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_1.cpp.o 6420.4 [1176/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_1.cpp.o 6421.4 [1177/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_3.cpp.o 6425.9 [1178/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_1.cpp.o 6428.5 [1179/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_2.cpp.o 6428.8 [1180/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_0.cpp.o 6432.2 [1181/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_12.cpp.o 6433.0 [1182/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_3.cpp.o 6433.8 [1183/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 6434.4 [1184/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_2.cpp.o 6435.7 [1185/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 6441.4 [1186/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_3.cpp.o 6441.9 [1187/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 6441.9 [1188/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_11.cpp.o 6442.5 [1189/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_0.cpp.o 6446.0 [1190/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp.o 6446.1 [1191/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_10.cpp.o 6449.3 [1192/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_13.cpp.o 6456.2 [1193/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_8.cpp.o 6457.7 [1194/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_14.cpp.o 6459.5 [1195/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_0.cpp.o 6460.0 [1196/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_15.cpp.o 6460.6 [1197/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_4.cpp.o 6464.8 [1198/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_6.cpp.o 6472.2 [1199/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 6472.7 [1200/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_16.cpp.o 6475.9 [1201/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_18.cpp.o 6487.9 [1202/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_0.cpp.o 6488.8 [1203/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_17.cpp.o 6495.5 [1204/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_19.cpp.o 6497.4 [1205/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 6508.4 [1206/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 6515.6 [1207/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp.o 6522.8 [1208/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances.cpp.o 6526.4 [1209/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_1.cpp.o 6534.1 [1210/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_10.cpp.o 6534.7 [1211/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_2.cpp.o 6540.6 [1212/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_18.cpp.o 6542.4 [1213/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_6.cpp.o 6542.7 [1214/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_12.cpp.o 6551.5 [1215/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_3.cpp.o 6555.2 [1216/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_4.cpp.o 6561.9 [1217/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_19.cpp.o 6566.9 [1218/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_15.cpp.o 6568.8 [1219/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_8.cpp.o 6572.6 [1220/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_2.cpp.o 6573.6 [1221/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_11.cpp.o 6575.3 [1222/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_1.cpp.o 6581.7 [1223/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_13.cpp.o 6583.1 [1224/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_17.cpp.o 6583.2 [1225/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_14.cpp.o 6586.8 [1226/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances.cpp.o 6588.2 [1227/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_0.cpp.o 6588.5 [1228/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_16.cpp.o 6589.4 [1229/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_10.cpp.o 6600.3 [1230/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_9.cpp.o 6600.4 [1231/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_3.cpp.o 6607.1 [1232/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_4.cpp.o 6609.1 [1233/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_15.cpp.o 6609.3 [1234/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_8.cpp.o 6612.1 [1235/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_6.cpp.o 6613.7 [1236/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_11.cpp.o 6623.1 [1237/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_1.cpp.o 6623.9 [1238/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_13.cpp.o 6628.7 [1239/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 6631.3 [1240/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_14.cpp.o 6633.2 [1241/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_12.cpp.o 6635.4 [1242/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 6640.4 [1243/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_2.cpp.o 6641.5 [1244/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_0.cpp.o 6644.0 [1245/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances.cpp.o 6653.8 [1246/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_13.cpp.o 6655.9 [1247/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_8.cpp.o 6660.1 [1248/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_3.cpp.o 6662.8 [1249/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_4.cpp.o 6663.2 [1250/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 6667.7 [1251/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_18.cpp.o 6668.2 [1252/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 6672.8 [1253/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_0.cpp.o 6675.3 [1254/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_12.cpp.o 6675.4 [1255/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_10.cpp.o 6675.6 [1256/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_11.cpp.o 6675.8 [1257/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_16.cpp.o 6678.1 [1258/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_6.cpp.o 6681.2 [1259/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_14.cpp.o 6684.3 [1260/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_3.cpp.o 6686.1 [1261/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_2.cpp.o 6686.7 [1262/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_19.cpp.o 6690.4 [1263/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 6693.7 [1264/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_15.cpp.o 6695.6 [1265/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 6697.3 [1266/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_5.cpp.o 6700.7 [1267/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_17.cpp.o 6702.4 [1268/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 6707.7 [1269/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_7.cpp.o 6710.5 [1270/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances.cpp.o 6711.8 [1271/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_10.cpp.o 6712.3 [1272/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_1.cpp.o 6716.5 [1273/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_3.cpp.o 6717.1 [1274/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_11.cpp.o 6722.6 [1275/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_12.cpp.o 6723.9 [1276/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp.o 6725.4 [1277/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_6.cpp.o 6727.6 [1278/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_8.cpp.o 6730.6 [1279/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 6730.9 [1280/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_15.cpp.o 6731.3 [1281/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_13.cpp.o 6731.3 [1282/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_4.cpp.o 6736.3 [1283/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_18.cpp.o 6743.1 [1284/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 6743.9 [1285/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances.cpp.o 6745.9 [1286/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_14.cpp.o 6747.0 [1287/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_8.cpp.o 6749.0 [1288/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 6753.2 [1289/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 6755.9 [1290/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_16.cpp.o 6756.5 [1291/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_17.cpp.o 6760.8 [1292/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_1.cpp.o 6762.1 [1293/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_19.cpp.o 6762.6 [1294/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_2.cpp.o 6765.3 [1295/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 6768.8 [1296/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_7.cpp.o 6772.2 [1297/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_0.cpp.o 6775.4 [1298/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_15.cpp.o 6777.7 [1299/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 6779.8 [1300/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 6781.4 [1301/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_3.cpp.o 6782.3 [1302/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_14.cpp.o 6783.0 [1303/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_7.cpp.o 6784.0 [1304/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_6.cpp.o 6785.4 [1305/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_4.cpp.o 6785.4 [1306/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_11.cpp.o 6785.9 [1307/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 6785.9 [1308/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_9.cpp.o 6790.3 [1309/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_13.cpp.o 6791.6 [1310/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_10.cpp.o 6792.9 [1311/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances.cpp.o 6794.9 [1312/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_5.cpp.o 6796.0 [1313/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances.cpp.o 6798.6 [1314/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 6798.7 [1315/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 6799.9 [1316/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_12.cpp.o 6801.4 [1317/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances.cpp.o 6801.8 [1318/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 6807.5 [1319/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 6810.6 [1320/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 6814.6 [1321/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 6817.7 [1322/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 6819.0 [1323/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_9.cpp.o 6820.5 [1324/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 6823.5 [1325/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_0.cpp.o 6823.6 [1326/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances.cpp.o 6824.3 [1327/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 6825.3 [1328/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 6828.4 [1329/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 6830.5 [1330/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 6838.3 [1331/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_1.cpp.o 6841.2 [1332/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances_shard_0.cpp.o 6843.3 [1333/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances.cpp.o 6846.8 [1334/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances_shard_0.cpp.o 6850.4 [1335/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_2.cpp.o 6850.5 [1336/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_2.cpp.o 6850.7 [1337/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_3.cpp.o 6858.0 [1338/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_4.cpp.o 6865.1 [1339/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_5.cpp.o 6867.7 [1340/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_1.cpp.o 6869.5 [1341/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_3.cpp.o 6869.5 [1342/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6871.5 [1343/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_0.cpp.o 6872.8 [1344/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_1.cpp.o 6875.7 [1345/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_9.cpp.o 6876.4 [1346/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp.o 6877.5 [1347/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_5.cpp.o 6882.1 [1348/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_3.cpp.o 6882.2 [1349/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_8.cpp.o 6885.4 [1350/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_7.cpp.o 6887.0 [1351/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 6892.3 [1352/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_5.cpp.o 6892.4 [1353/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 6895.5 [1354/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_11.cpp.o 6897.5 [1355/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_10.cpp.o 6909.0 [1356/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_9.cpp.o 6928.3 [1357/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 6928.3 [1358/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 6936.8 [1359/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6942.7 [1360/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_7.cpp.o 6957.1 [1361/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 6971.8 [1362/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_5.cpp.o 6977.4 [1363/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6979.3 [1364/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 7000.2 [1365/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_7.cpp.o 7002.4 [1366/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7006.4 [1367/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_7.cpp.o 7027.3 [1368/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 7028.2 [1369/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 7031.7 [1370/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 7042.7 [1371/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 7050.2 [1372/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 7050.4 [1373/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 7051.0 [1374/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7056.8 [1375/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_5.cpp.o 7071.9 [1376/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7074.3 [1377/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 7079.9 [1378/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 7085.0 [1379/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_9.cpp.o 7088.5 [1380/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 7091.1 [1381/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7107.3 [1382/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 7109.6 [1383/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_4.cpp.o 7114.0 [1384/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 7116.4 [1385/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 7127.9 [1386/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_9.cpp.o 7129.1 [1387/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7131.6 [1388/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 7148.5 [1389/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 7178.3 [1390/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 7179.4 [1391/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 7186.8 [1392/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 7203.7 [1393/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 7225.6 [1394/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 7291.9 [1395/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 7311.7 [1396/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7335.7 [1397/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 7346.6 [1398/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 7359.7 [1399/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 7364.5 [1400/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7367.4 [1401/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 7384.4 [1402/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7392.6 [1403/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 7393.0 [1404/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7395.3 [1405/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7401.2 [1406/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 7414.6 [1407/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7435.5 [1408/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 7478.1 [1409/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7497.1 [1410/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7513.6 [1411/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7516.8 [1412/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 7529.0 [1413/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7546.5 [1414/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 7549.8 [1415/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 7551.0 [1416/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7557.7 [1417/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 7562.7 [1418/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 7569.5 [1419/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7585.9 [1420/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 7597.2 [1421/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7601.5 [1422/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 7603.0 [1423/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 7620.0 [1424/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 7639.7 [1425/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_4.cpp.o 7659.8 [1426/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7664.1 [1427/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7675.2 [1428/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 7681.9 [1429/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 7683.1 [1430/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 7697.7 [1431/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_0.cpp.o 7721.3 [1432/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_4.cpp.o 7723.0 [1433/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 7724.0 [1434/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7725.3 [1435/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7725.3 [1436/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 7726.0 [1437/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_6.cpp.o 7741.8 [1438/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_2.cpp.o 7756.4 [1439/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7761.6 [1440/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 7761.6 [1441/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 7775.1 [1442/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 7791.0 [1443/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7792.5 [1444/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 7822.8 [1445/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7834.6 [1446/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 7837.4 [1447/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7842.9 [1448/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 7854.3 [1449/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 7865.5 [1450/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 7872.6 [1451/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 7902.6 [1452/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 7902.8 [1453/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 7907.1 [1454/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 7940.3 [1455/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 7942.2 [1456/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 7961.2 [1457/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 7968.5 [1458/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 7994.8 [1459/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 8011.2 [1460/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 8012.1 [1461/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_0.cpp.o 8020.6 [1462/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 8023.4 [1463/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 8028.2 [1464/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 8035.7 [1465/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_4.cpp.o 8057.2 [1466/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_default_instance.cpp.o 8059.6 [1467/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 8081.0 [1468/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_5.cpp.o 8094.6 [1469/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 8119.1 [1470/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_3.cpp.o 8137.0 [1471/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 8165.9 [1472/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 8183.7 [1473/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 8207.6 [1474/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 8339.6 [1475/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 8420.5 [1476/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.o 8430.0 [1477/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.o 8439.5 [1478/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.o 8521.2 [1479/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 8527.5 [1480/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.o 8529.3 [1481/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.o 8552.4 [1482/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.o 8579.8 [1483/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 8580.6 [1484/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 8591.7 [1485/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 8663.1 [1486/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_default_instance.cpp.o 8679.7 [1487/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 8702.7 [1488/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp.o 8707.2 [1489/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.o 8712.2 [1490/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 8728.6 [1491/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 8735.1 [1492/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.o 8735.9 [1493/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 8736.8 [1494/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp.o 8743.6 [1495/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 8743.7 [1496/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 8757.3 [1497/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 8785.5 [1498/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp.o 8800.2 [1499/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.o 8814.0 [1500/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.o 8816.7 [1501/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 8819.7 [1502/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 8835.9 [1503/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_m_instance.cpp.o 8842.2 [1504/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.o 8844.7 [1505/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 8847.6 [1506/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp.o 8860.1 [1507/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 8860.2 [1508/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp.o 8869.0 [1509/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.o 8871.1 [1510/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 8883.6 [1511/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_default_instance.cpp.o 8897.9 [1512/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.o 8897.9 [1513/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 8902.4 [1514/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.o 8905.5 [1515/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.o 8914.0 [1516/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp.o 8916.3 [1517/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.o 8919.6 [1518/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.o 8923.6 [1519/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_n_instance.cpp.o 8924.7 [1520/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.o 8926.9 [1521/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.o 8927.7 [1522/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.o 8930.1 [1523/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.o 8931.4 [1524/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.o 8943.3 [1525/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.o 8949.2 [1526/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.o 8955.5 [1527/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.o 8969.9 [1528/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_m_instance.cpp.o 8973.3 [1529/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.o 8973.8 [1530/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.o 8974.2 [1531/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.o 8974.2 [1532/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.o 8976.8 [1533/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.o 8986.6 [1534/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.o 8987.4 [1535/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.o 8988.3 [1536/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 8989.0 [1537/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.o 8994.8 [1538/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.o 9003.9 [1539/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 9006.0 [1540/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.o 9006.2 [1541/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.o 9009.4 [1542/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.o 9017.8 [1543/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.o 9019.7 [1544/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.o 9023.9 [1545/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.o 9025.0 [1546/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.o 9025.5 [1547/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.o 9029.6 [1548/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.o 9030.1 [1549/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 9032.0 [1550/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.o 9034.2 [1551/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.o 9038.5 [1552/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 9042.5 [1553/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.o 9044.6 [1554/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.o 9047.7 [1555/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.o 9049.1 [1556/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.o 9050.1 [1557/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.o 9050.1 [1558/1691] cd /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build && /usr/local/therock-tools/bin/cmake -E rm -f /__w/rockrel/rockrel/build/ml-libs/composable_kernel/stamp/stage.stamp 9055.8 [1559/1691] Building CXX object library/src/utility/CMakeFiles/utility.dir/device_memory.cpp.o 9057.7 [1560/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.o 9058.9 [1561/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 9059.7 [1562/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.o 9061.1 [1563/1691] Building CXX object library/src/utility/CMakeFiles/utility.dir/host_tensor.cpp.o 9062.7 [1564/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_n_instance.cpp.o 9062.9 [1565/1691] Building CXX object library/src/utility/CMakeFiles/utility.dir/convolution_parameter.cpp.o 9067.9 [1566/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.o 9068.6 [1567/1691] Linking CXX static library lib/libdevice_contraction_operations.a 9068.7 [1568/1691] Linking CXX static library lib/libdevice_other_operations.a 9074.1 [1569/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.o 9093.2 [1570/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 9098.5 [1571/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 9098.6 [1572/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 9099.4 [1573/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 9099.7 [1574/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 9103.1 [1575/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce1.cpp.o 9106.1 [1576/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 9107.7 [1577/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 9109.5 [1578/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 9109.7 [1579/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce1.cpp.o 9109.8 [1580/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 9111.2 [1581/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_mn_instance.cpp.o 9119.4 [1582/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce2.cpp.o 9121.0 [1583/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 9122.1 [1584/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce1.cpp.o 9124.3 [1585/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 9125.7 [1586/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce3.cpp.o 9130.0 [1587/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce3.cpp.o 9134.0 [1588/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 9136.0 [1589/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce1.cpp.o 9136.5 [1590/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce2.cpp.o 9138.0 [1591/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 9142.8 [1592/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce2.cpp.o 9143.4 [1593/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce4.cpp.o 9144.9 [1594/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 9145.6 [1595/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce2.cpp.o 9149.6 [1596/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 9151.3 [1597/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 9153.3 [1598/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce3.cpp.o 9153.9 [1599/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 9155.7 [1600/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce3.cpp.o 9156.3 [1601/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.o 9169.0 [1602/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.o 9179.2 [1603/1691] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce4.cpp.o 9187.5 [1604/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_odd_mn_instance.cpp.o 9191.9 [1605/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.o 9193.8 [1606/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.o 9198.0 [1607/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 9201.7 [1608/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.o 9210.8 [1609/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.o 9210.9 [1610/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.o 9218.3 [1611/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.o 9219.6 [1612/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.o 9219.6 [1613/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.o 9220.2 [1614/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 9220.5 [1615/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 9220.9 [1616/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.o 9222.0 [1617/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 9231.5 [1618/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.o 9235.2 [1619/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.o 9236.4 [1620/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.o 9238.2 [1621/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 9240.9 [1622/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.o 9241.6 [1623/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.o 9241.7 [1624/1691] Linking CXX static library lib/libutility.a 9250.6 [1625/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.o 9252.1 [1626/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.o 9275.9 [1627/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.o 9277.0 [1628/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 9277.7 [1629/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 9292.2 [1630/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 9298.4 [1631/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 9299.6 [1632/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 9302.0 [1633/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 9303.0 [1634/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 9315.0 [1635/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 9342.3 [1636/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.o 9351.8 [1637/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.o 9355.2 [1638/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 9356.0 [1639/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 9356.2 [1640/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 9356.8 [1641/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 9362.1 [1642/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 9363.0 [1643/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 9364.5 [1644/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 9372.8 [1645/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 9375.2 [1646/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnpadding_instance.cpp.o 9381.3 [1647/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 9381.5 [1648/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 9386.2 [1649/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 9387.1 [1650/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 9387.7 [1651/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_kpadding_instance.cpp.o 9391.1 [1652/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.o 9391.8 [1653/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.o 9401.2 [1654/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 9402.8 [1655/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 9416.4 [1656/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_default_instance.cpp.o 9430.9 [1657/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_kpadding_instance.cpp.o 9435.9 [1658/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 9441.2 [1659/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_mnkpadding_instance.cpp.o 9444.0 [1660/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 9445.7 [1661/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_default_instance.cpp.o 9447.5 [1662/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 9448.9 [1663/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 9451.9 [1664/1691] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.o 9452.4 [1665/1691] Linking CXX static library lib/libdevice_reduction_operations.a 9456.1 [1666/1691] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 9456.7 [1667/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_default_instance.cpp.o 9458.8 [1668/1691] Linking CXX static library lib/libdevice_conv_operations.a 9472.6 [1669/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 9477.8 [1670/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 9478.2 [1671/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 9479.7 [1672/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 9509.2 [1673/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 9632.9 [1674/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 9692.2 [1675/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 9701.4 [1676/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 9705.1 [1677/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 9752.6 [1678/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 9754.1 [1679/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 9794.0 [1680/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 9794.8 [1681/1691] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 9823.5 [1682/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 9851.3 [1683/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 9865.3 [1684/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 9900.4 [1685/1691] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 9914.7 [1686/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 9925.1 [1687/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 9941.7 [1688/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 11115.3 [1689/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_bf16_bf16/device_gemm_wmma_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 11158.5 [1690/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_f16_f16_f16/device_gemm_wmma_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 11207.9 [1691/1691] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_i8_bf16/device_gemm_wmma_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o END 1777965123.8992968 11207.932774066925 0