BEGIN 1777953503.527669 EXEC /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build /usr/local/therock-tools/bin/cmake -E env --unset=ROCM_PATH --unset=ROCM_DIR --unset=HIP_PATH --unset=HIP_DIR -- /usr/local/therock-tools/bin/cmake --build /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build 34.9 [1/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 35.3 [2/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 35.6 [3/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 36.1 [4/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 37.7 [5/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 37.9 [6/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 39.0 [7/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 39.2 [8/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 39.4 [9/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 41.6 [10/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 42.0 [11/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 42.3 [12/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 42.3 [13/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 42.5 [14/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 43.2 [15/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 43.7 [16/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 44.4 [17/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 44.6 [18/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 46.0 [19/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 46.1 [20/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 46.5 [21/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 47.0 [22/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 48.3 [23/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 48.4 [24/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 48.7 [25/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 48.7 [26/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 49.4 [27/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 50.0 [28/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 67.1 [29/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 68.7 [30/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 69.8 [31/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 70.8 [32/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 71.0 [33/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 71.7 [34/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 71.8 [35/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 71.9 [36/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 72.6 [37/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 73.7 [38/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 75.6 [39/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 75.8 [40/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 101.7 [41/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 103.7 [42/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 105.2 [43/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 106.6 [44/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 108.3 [45/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 108.6 [46/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 112.5 [47/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 116.2 [48/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 118.3 [49/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 119.4 [50/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 131.2 [51/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 131.4 [52/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 131.8 [53/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 132.0 [54/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 132.7 [55/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 135.2 [56/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 135.7 [57/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 138.5 [58/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 138.7 [59/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 140.1 [60/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 141.0 [61/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 151.2 [62/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 151.2 [63/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 155.0 [64/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 155.5 [65/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 156.7 [66/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 158.6 [67/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 161.4 [68/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 161.9 [69/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 162.3 [70/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 163.5 [71/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 165.3 [72/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 165.4 [73/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 165.5 [74/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 171.1 [75/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 173.1 [76/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 173.1 [77/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 173.8 [78/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 173.9 [79/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 174.4 [80/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 175.2 [81/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 176.2 [82/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 178.5 [83/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 180.3 [84/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 180.7 [85/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 181.1 [86/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 182.8 [87/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 183.8 [88/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 183.8 [89/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 184.3 [90/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 189.4 [91/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 190.7 [92/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 191.5 [93/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 193.5 [94/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 196.0 [95/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 199.1 [96/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 199.3 [97/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 199.7 [98/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 199.8 [99/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 199.9 [100/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 200.1 [101/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 206.8 [102/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 206.9 [103/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 209.6 [104/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 210.1 [105/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 211.8 [106/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 211.8 [107/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 212.2 [108/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 213.9 [109/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 214.5 [110/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 219.3 [111/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 221.0 [112/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 221.4 [113/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 222.2 [114/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 222.8 [115/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 226.0 [116/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 226.0 [117/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 228.7 [118/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.o 228.9 [119/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 229.9 [120/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.o 230.9 [121/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.o 233.0 [122/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.o 234.3 [123/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 234.6 [124/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 235.1 [125/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 235.1 [126/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 236.1 [127/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 236.3 [128/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 237.2 [129/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 238.2 [130/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 238.2 [131/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 239.9 [132/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 240.3 [133/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.o 243.6 [134/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 244.9 [135/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 245.1 [136/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 246.9 [137/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 247.7 [138/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeFiles/device_grouped_conv1d_bwd_weight_instance.dir/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_f32_bf16_instance.cpp.o 248.2 [139/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.o 248.8 [140/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.o 249.1 [141/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 249.2 [142/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 249.7 [143/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 251.7 [144/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 252.2 [145/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.o 252.7 [146/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 253.0 [147/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 254.7 [148/1682] Building CXX object library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeFiles/device_elementwise_normalization_instance.dir/device_elementwise_normalization_f16_instance.cpp.o 256.7 [149/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.o 257.7 [150/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 259.2 [151/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.o 259.7 [152/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.o 262.2 [153/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeFiles/device_grouped_conv1d_fwd_instance.dir/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.o 262.6 [154/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_16_16_instance.cpp.o 263.6 [155/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.o 267.3 [156/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeFiles/device_conv2d_fwd_bias_relu_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.o 267.9 [157/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 270.8 [158/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 273.3 [159/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 273.7 [160/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 273.8 [161/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_f32_bf16_instance.cpp.o 276.1 [162/1682] Building CXX object library/src/tensor_operation_instance/gpu/elementwise/CMakeFiles/device_elementwise_instance.dir/device_normalize_instance.cpp.o 276.3 [163/1682] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeFiles/device_conv2d_fwd_bias_relu_add_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.o 276.8 [164/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_pad0_pipev1_instance.cpp.o 277.4 [165/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 277.4 [166/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_default_pipev1_instance.cpp.o 277.9 [167/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 278.1 [168/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_optimized_loads_instance.cpp.o 279.1 [169/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 282.6 [170/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_optimized_loads_instance.cpp.o 285.0 [171/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 285.2 [172/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 286.1 [173/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_optimized_loads_instance.cpp.o 286.3 [174/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 287.1 [175/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 287.6 [176/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 288.0 [177/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 288.0 [178/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 288.7 [179/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 289.2 [180/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_16_16_instance.cpp.o 289.8 [181/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_default_pipev1_instance.cpp.o 290.7 [182/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/gnhwc_gkyxc_gnhwk/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_pad0_pipev1_instance.cpp.o 290.7 [183/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 292.0 [184/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 292.2 [185/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 292.4 [186/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 293.6 [187/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 296.3 [188/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev2_instance.cpp.o 296.8 [189/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev5_instance.cpp.o 297.3 [190/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev2_instance.cpp.o 297.7 [191/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 297.8 [192/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev5_instance.cpp.o 298.0 [193/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 299.0 [194/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev2_instance.cpp.o 299.6 [195/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 301.7 [196/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev5_instance.cpp.o 302.0 [197/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev2_instance.cpp.o 302.4 [198/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load.cpp.o 302.7 [199/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load.cpp.o 303.0 [200/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 303.5 [201/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instance.cpp.o 304.9 [202/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 306.5 [203/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 306.8 [204/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev5_instance.cpp.o 309.1 [205/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev2_instance.cpp.o 310.6 [206/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev2_instance.cpp.o 310.9 [207/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev5_instance.cpp.o 311.4 [208/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev5_instance.cpp.o 313.2 [209/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_16_16_instance.cpp.o 314.7 [210/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 316.0 [211/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_16_16_instance.cpp.o 317.9 [212/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 324.2 [213/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 326.1 [214/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_f16_pipev1_instance.cpp.o 327.2 [215/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 327.5 [216/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_bf16_pipev1_instance.cpp.o 331.2 [217/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkyxc_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 336.3 [218/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instance.cpp.o 339.0 [219/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 342.9 [220/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 349.4 [221/1682] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 352.5 [222/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp.o 355.1 [223/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp.o 356.4 [224/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 356.8 [225/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp.o 357.4 [226/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 357.5 [227/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp.o 357.8 [228/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 358.0 [229/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp.o 359.1 [230/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 359.7 [231/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 361.5 [232/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.o 361.7 [233/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.o 362.4 [234/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 363.9 [235/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 366.2 [236/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 368.7 [237/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 369.6 [238/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp.o 370.3 [239/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 370.6 [240/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 371.7 [241/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 379.4 [242/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 380.6 [243/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 392.4 [244/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 395.5 [245/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_part2_instance.cpp.o 397.4 [246/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 399.8 [247/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_part2_instance.cpp.o 406.7 [248/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 412.9 [249/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 422.9 [250/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instance.cpp.o 429.5 [251/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_vec_transpose_instance.cpp.o 448.2 [252/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 450.6 [253/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_vec_transpose_instance.cpp.o 452.3 [254/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instance.cpp.o 453.8 [255/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 467.1 [256/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 470.3 [257/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_vec_transpose_instance.cpp.o 479.3 [258/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 488.5 [259/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 488.8 [260/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 489.0 [261/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_part2_instance.cpp.o 496.3 [262/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_part2_instance.cpp.o 496.4 [263/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_irregular_instance.cpp.o 501.5 [264/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_part2_instance.cpp.o 512.6 [265/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_part2_instance.cpp.o 513.9 [266/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_bf16_instance.cpp.o 517.6 [267/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_f16_instance.cpp.o 524.6 [268/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_irregular_instance.cpp.o 534.9 [269/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 537.3 [270/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_irregular_instance.cpp.o 541.1 [271/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 541.3 [272/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_irregular_instance.cpp.o 543.8 [273/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 548.9 [274/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 549.0 [275/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_bf16_instance.cpp.o 553.6 [276/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 560.8 [277/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 564.3 [278/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instance.cpp.o 571.0 [279/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev2_instance.cpp.o 589.5 [280/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev5_instance.cpp.o 591.1 [281/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev1_instance.cpp.o 593.6 [282/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp.o 595.2 [283/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev5_instance.cpp.o 599.8 [284/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_f16_pipev2_instance.cpp.o 601.7 [285/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_instance.cpp.o 604.8 [286/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkcyx_ngkhw_bf16_pipev1_instance.cpp.o 605.9 [287/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp.o 608.6 [288/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_2x_instance.cpp.o 611.4 [289/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 621.1 [290/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_instance.cpp.o 632.8 [291/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_comp_instance.cpp.o 633.9 [292/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_2x_instance.cpp.o 634.5 [293/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f16_instance.cpp.o 657.0 [294/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp.o 658.6 [295/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 672.4 [296/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp.o 717.9 [297/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp.o 723.5 [298/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 727.9 [299/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp.o 735.0 [300/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/xdl/ngchw_gkcyx_ngkhw/device_grouped_conv2d_bwd_weight_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp.o 739.8 [301/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_1.cpp.o 768.9 [302/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 774.5 [303/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_2.cpp.o 786.0 [304/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_12.cpp.o 793.3 [305/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_10.cpp.o 793.9 [306/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances.cpp.o 795.1 [307/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 798.6 [308/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_3.cpp.o 801.3 [309/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.o 803.1 [310/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_5.cpp.o 809.2 [311/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_14.cpp.o 809.4 [312/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.o 810.3 [313/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_7.cpp.o 818.5 [314/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_6.cpp.o 825.5 [315/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 834.2 [316/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 836.6 [317/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.o 839.9 [318/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_8.cpp.o 845.5 [319/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_9.cpp.o 848.2 [320/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 863.5 [321/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.o 866.3 [322/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances.cpp.o 875.6 [323/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.o 878.0 [324/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.o 880.7 [325/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_intra_instance.cpp.o 887.5 [326/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_1.cpp.o 891.5 [327/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 893.4 [328/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_11.cpp.o 894.7 [329/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_13.cpp.o 896.7 [330/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_10.cpp.o 904.1 [331/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_12.cpp.o 911.5 [332/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_14.cpp.o 926.3 [333/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_mem_inter_instance.cpp.o 927.0 [334/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp.o 929.4 [335/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_11.cpp.o 932.1 [336/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_15.cpp.o 942.0 [337/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_intra_instance.cpp.o 942.7 [338/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 945.4 [339/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_13.cpp.o 945.4 [340/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 945.6 [341/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_16x16_instance.cpp.o 945.7 [342/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_14.cpp.o 947.1 [343/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_12.cpp.o 947.1 [344/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_15.cpp.o 947.5 [345/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_5.cpp.o 950.8 [346/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.o 956.4 [347/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances.cpp.o 957.0 [348/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_9.cpp.o 957.8 [349/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_6.cpp.o 961.2 [350/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_8.cpp.o 970.7 [351/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_8.cpp.o 970.8 [352/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_3.cpp.o 971.8 [353/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_inter_instance.cpp.o 975.2 [354/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_2.cpp.o 979.9 [355/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_0.cpp.o 981.2 [356/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 981.6 [357/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_10.cpp.o 984.5 [358/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_0.cpp.o 987.8 [359/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_4.cpp.o 989.0 [360/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_2.cpp.o 994.2 [361/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_3.cpp.o 995.3 [362/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_1.cpp.o 996.9 [363/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_7.cpp.o 998.6 [364/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.o 1002.4 [365/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_6.cpp.o 1004.9 [366/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeFiles/device_grouped_conv2d_bwd_data_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 1006.3 [367/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_1.cpp.o 1008.7 [368/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_8.cpp.o 1010.9 [369/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_13.cpp.o 1012.4 [370/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_20.cpp.o 1012.6 [371/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances.cpp.o 1012.7 [372/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_11.cpp.o 1012.7 [373/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_12.cpp.o 1016.5 [374/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_10.cpp.o 1019.0 [375/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_16.cpp.o 1020.9 [376/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp.o 1023.0 [377/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_14.cpp.o 1023.9 [378/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_15.cpp.o 1030.5 [379/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_13.cpp.o 1031.5 [380/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_15.cpp.o 1034.7 [381/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_17.cpp.o 1036.2 [382/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_0.cpp.o 1040.1 [383/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 1041.7 [384/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_19.cpp.o 1042.4 [385/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.o 1043.6 [386/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_18.cpp.o 1046.4 [387/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_1.cpp.o 1047.0 [388/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_3.cpp.o 1048.4 [389/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_11.cpp.o 1052.9 [390/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_2.cpp.o 1055.0 [391/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_11.cpp.o 1055.3 [392/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_10.cpp.o 1055.4 [393/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_20.cpp.o 1056.1 [394/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances.cpp.o 1060.3 [395/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_4.cpp.o 1060.3 [396/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_6.cpp.o 1060.7 [397/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_2.cpp.o 1061.7 [398/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_8.cpp.o 1064.1 [399/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_12.cpp.o 1065.0 [400/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_14.cpp.o 1065.8 [401/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_17.cpp.o 1066.8 [402/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_13.cpp.o 1067.5 [403/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_9.cpp.o 1071.3 [404/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_16.cpp.o 1071.6 [405/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_15.cpp.o 1076.4 [406/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_7.cpp.o 1076.9 [407/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_19.cpp.o 1077.1 [408/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_18.cpp.o 1081.6 [409/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1087.0 [410/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_6.cpp.o 1087.6 [411/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1089.3 [412/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.o 1089.5 [413/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1090.3 [414/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1090.4 [415/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_3.cpp.o 1095.4 [416/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_8.cpp.o 1096.4 [417/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_4.cpp.o 1096.6 [418/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_6.cpp.o 1096.7 [419/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_5.cpp.o 1098.1 [420/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_10.cpp.o 1098.8 [421/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_12.cpp.o 1099.2 [422/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_9.cpp.o 1100.1 [423/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_11.cpp.o 1100.2 [424/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_7.cpp.o 1101.9 [425/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_13.cpp.o 1103.4 [426/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_15.cpp.o 1105.4 [427/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1108.6 [428/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_14.cpp.o 1112.9 [429/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1113.4 [430/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1114.6 [431/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_5.cpp.o 1114.8 [432/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_4.cpp.o 1115.2 [433/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_3.cpp.o 1117.7 [434/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1121.2 [435/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_6.cpp.o 1124.6 [436/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_8.cpp.o 1124.8 [437/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_7.cpp.o 1125.2 [438/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_9.cpp.o 1126.0 [439/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp.o 1128.9 [440/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_13.cpp.o 1129.0 [441/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_11.cpp.o 1129.4 [442/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.o 1130.0 [443/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_12.cpp.o 1130.0 [444/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 1130.3 [445/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_10.cpp.o 1130.6 [446/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_15.cpp.o 1130.8 [447/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1131.8 [448/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_6.cpp.o 1132.1 [449/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_3.cpp.o 1132.4 [450/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_14.cpp.o 1133.0 [451/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_4.cpp.o 1133.5 [452/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_5.cpp.o 1134.4 [453/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.o 1135.3 [454/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_11.cpp.o 1137.1 [455/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1137.1 [456/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_5.cpp.o 1137.2 [457/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_13.cpp.o 1137.9 [458/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_7.cpp.o 1138.6 [459/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_9.cpp.o 1139.1 [460/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1139.4 [461/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_10.cpp.o 1140.7 [462/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1141.2 [463/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 1142.2 [464/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1142.3 [465/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_12.cpp.o 1142.9 [466/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_8.cpp.o 1144.9 [467/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_0.cpp.o 1145.5 [468/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_14.cpp.o 1146.2 [469/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances.cpp.o 1146.5 [470/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1146.8 [471/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances.cpp.o 1147.5 [472/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_15.cpp.o 1149.0 [473/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1149.2 [474/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1150.1 [475/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1150.3 [476/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances.cpp.o 1150.5 [477/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1151.8 [478/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1152.9 [479/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_16x16_instance.cpp.o 1155.0 [480/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_2.cpp.o 1155.5 [481/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1156.1 [482/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1156.3 [483/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1157.0 [484/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1158.3 [485/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1158.4 [486/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1158.9 [487/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_1.cpp.o 1158.9 [488/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1159.9 [489/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1159.9 [490/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_0.cpp.o 1160.0 [491/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1163.4 [492/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1168.0 [493/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_5.cpp.o 1168.6 [494/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_2.cpp.o 1169.5 [495/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_3.cpp.o 1169.9 [496/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_3.cpp.o 1171.1 [497/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_2.cpp.o 1171.7 [498/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_0.cpp.o 1171.9 [499/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1173.0 [500/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_1.cpp.o 1173.3 [501/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1173.3 [502/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1173.6 [503/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_3.cpp.o 1173.9 [504/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 1174.1 [505/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_1.cpp.o 1174.2 [506/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_2.cpp.o 1175.2 [507/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_1.cpp.o 1175.7 [508/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_0.cpp.o 1176.7 [509/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_0.cpp.o 1180.3 [510/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_4.cpp.o 1180.7 [511/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_8.cpp.o 1181.0 [512/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_11.cpp.o 1181.7 [513/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_15.cpp.o 1182.2 [514/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances.cpp.o 1182.7 [515/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_10.cpp.o 1183.5 [516/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_12.cpp.o 1183.8 [517/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_14.cpp.o 1184.4 [518/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_6.cpp.o 1184.4 [519/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_13.cpp.o 1184.9 [520/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_16.cpp.o 1186.9 [521/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_17.cpp.o 1187.1 [522/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances.cpp.o 1189.3 [523/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_18.cpp.o 1190.5 [524/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_19.cpp.o 1191.9 [525/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_1.cpp.o 1192.6 [526/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_9.cpp.o 1192.9 [527/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_6.cpp.o 1193.3 [528/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_10.cpp.o 1193.4 [529/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_5.cpp.o 1194.2 [530/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_2.cpp.o 1194.4 [531/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_4.cpp.o 1195.0 [532/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_7.cpp.o 1195.3 [533/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_8.cpp.o 1196.2 [534/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_11.cpp.o 1196.8 [535/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_0.cpp.o 1197.3 [536/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_3.cpp.o 1198.6 [537/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_0.cpp.o 1198.8 [538/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1199.6 [539/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_1.cpp.o 1202.2 [540/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_2.cpp.o 1202.5 [541/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1203.1 [542/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_3.cpp.o 1204.3 [543/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances.cpp.o 1204.5 [544/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_4.cpp.o 1204.7 [545/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_8.cpp.o 1205.2 [546/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_11.cpp.o 1205.5 [547/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_12.cpp.o 1206.4 [548/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_6.cpp.o 1209.2 [549/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_13.cpp.o 1209.9 [550/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard_4.cpp.o 1210.1 [551/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_14.cpp.o 1210.3 [552/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_10.cpp.o 1211.7 [553/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_16.cpp.o 1212.0 [554/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_15.cpp.o 1212.2 [555/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1212.7 [556/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1213.6 [557/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances.cpp.o 1213.8 [558/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_1.cpp.o 1213.8 [559/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_17.cpp.o 1214.5 [560/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1214.9 [561/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_6.cpp.o 1215.7 [562/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_7.cpp.o 1215.9 [563/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_19.cpp.o 1216.1 [564/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_5.cpp.o 1216.4 [565/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_18.cpp.o 1218.1 [566/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_2.cpp.o 1219.1 [567/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_3.cpp.o 1219.2 [568/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1219.4 [569/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_8.cpp.o 1219.6 [570/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_10.cpp.o 1219.8 [571/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_4.cpp.o 1220.2 [572/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_0.cpp.o 1220.5 [573/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_9.cpp.o 1226.2 [574/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1228.4 [575/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_2.cpp.o 1233.1 [576/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeFiles/device_grouped_conv2d_bwd_weight_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1233.9 [577/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_4.cpp.o 1234.0 [578/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances.cpp.o 1235.4 [579/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_0.cpp.o 1235.8 [580/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_11.cpp.o 1237.8 [581/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_6.cpp.o 1238.2 [582/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_3.cpp.o 1238.5 [583/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_1.cpp.o 1240.2 [584/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_9.cpp.o 1241.4 [585/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_8.cpp.o 1242.5 [586/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_2.cpp.o 1242.7 [587/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_11.cpp.o 1242.9 [588/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_12.cpp.o 1243.8 [589/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_13.cpp.o 1243.9 [590/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_15.cpp.o 1245.6 [591/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_10.cpp.o 1246.5 [592/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_0.cpp.o 1246.7 [593/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_5.cpp.o 1247.2 [594/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_7.cpp.o 1247.5 [595/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_12.cpp.o 1248.3 [596/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_3.cpp.o 1248.4 [597/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_11.cpp.o 1249.4 [598/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_6.cpp.o 1249.8 [599/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_14.cpp.o 1250.3 [600/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_4.cpp.o 1250.8 [601/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_14.cpp.o 1251.4 [602/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances.cpp.o 1251.5 [603/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_1.cpp.o 1253.2 [604/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_10.cpp.o 1253.4 [605/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_13.cpp.o 1255.2 [606/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_8.cpp.o 1255.6 [607/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_15.cpp.o 1256.0 [608/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_16.cpp.o 1258.0 [609/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_18.cpp.o 1258.1 [610/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_19.cpp.o 1258.4 [611/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_17.cpp.o 1259.2 [612/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_2.cpp.o 1259.9 [613/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_3.cpp.o 1259.9 [614/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_1.cpp.o 1260.8 [615/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_0.cpp.o 1261.2 [616/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances.cpp.o 1264.5 [617/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_4.cpp.o 1264.8 [618/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_5.cpp.o 1265.2 [619/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_9.cpp.o 1266.0 [620/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_8.cpp.o 1266.3 [621/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_10.cpp.o 1267.0 [622/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_6.cpp.o 1267.5 [623/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_12.cpp.o 1268.0 [624/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_7.cpp.o 1268.1 [625/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_11.cpp.o 1268.6 [626/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_15.cpp.o 1268.6 [627/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_13.cpp.o 1269.7 [628/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_14.cpp.o 1270.7 [629/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_2.cpp.o 1271.1 [630/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_16.cpp.o 1271.6 [631/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_1.cpp.o 1271.9 [632/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_18.cpp.o 1272.2 [633/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_17.cpp.o 1272.8 [634/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_0.cpp.o 1272.8 [635/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_19.cpp.o 1275.3 [636/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances.cpp.o 1275.4 [637/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_3.cpp.o 1276.6 [638/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_4.cpp.o 1277.3 [639/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_6.cpp.o 1277.8 [640/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_9.cpp.o 1278.0 [641/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_10.cpp.o 1278.2 [642/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_7.cpp.o 1279.5 [643/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_8.cpp.o 1280.0 [644/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_13.cpp.o 1281.2 [645/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_11.cpp.o 1281.9 [646/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_12.cpp.o 1282.2 [647/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances.cpp.o 1283.1 [648/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances.cpp.o 1283.4 [649/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_14.cpp.o 1283.7 [650/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_15.cpp.o 1285.6 [651/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_1.cpp.o 1286.0 [652/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_0.cpp.o 1286.3 [653/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_2.cpp.o 1287.6 [654/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_3.cpp.o 1288.0 [655/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances.cpp.o 1288.6 [656/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_7.cpp.o 1289.6 [657/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances.cpp.o 1289.8 [658/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp.o 1290.0 [659/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_5.cpp.o 1290.4 [660/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_10.cpp.o 1290.8 [661/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances.cpp.o 1291.4 [662/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_4.cpp.o 1291.6 [663/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances_shard_0.cpp.o 1293.8 [664/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_7.cpp.o 1294.5 [665/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_9.cpp.o 1295.4 [666/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_8.cpp.o 1296.9 [667/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_0.cpp.o 1297.1 [668/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_6.cpp.o 1297.2 [669/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_5.cpp.o 1298.9 [670/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_3.cpp.o 1299.2 [671/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_7.cpp.o 1299.4 [672/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1300.0 [673/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_9.cpp.o 1300.5 [674/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances.cpp.o 1304.8 [675/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_1.cpp.o 1305.6 [676/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances_shard_0.cpp.o 1309.2 [677/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances_shard_0.cpp.o 1309.7 [678/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_2.cpp.o 1311.5 [679/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances.cpp.o 1313.9 [680/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_1.cpp.o 1317.1 [681/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_3.cpp.o 1319.3 [682/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_7.cpp.o 1320.5 [683/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_1.cpp.o 1320.9 [684/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_0.cpp.o 1322.3 [685/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_2.cpp.o 1322.9 [686/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_3.cpp.o 1324.7 [687/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1324.9 [688/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_4.cpp.o 1326.4 [689/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_5.cpp.o 1331.4 [690/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_8.cpp.o 1332.8 [691/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_7.cpp.o 1333.0 [692/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_5.cpp.o 1335.2 [693/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_10.cpp.o 1335.5 [694/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1336.0 [695/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_11.cpp.o 1336.6 [696/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_9.cpp.o 1341.2 [697/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1345.1 [698/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1345.5 [699/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_7.cpp.o 1347.7 [700/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 1350.6 [701/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_5.cpp.o 1351.1 [702/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 1359.3 [703/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 1364.8 [704/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_7.cpp.o 1369.8 [705/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1377.8 [706/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 1387.8 [707/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_9.cpp.o 1389.9 [708/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1393.6 [709/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1403.6 [710/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1407.0 [711/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 1407.9 [712/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1412.4 [713/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 1413.0 [714/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_3.cpp.o 1418.6 [715/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 1426.6 [716/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 1426.7 [717/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_5.cpp.o 1428.2 [718/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_4.cpp.o 1437.6 [719/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1442.1 [720/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_9.cpp.o 1447.2 [721/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 1448.8 [722/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1454.6 [723/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1455.9 [724/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1465.4 [725/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1468.8 [726/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1469.4 [727/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 1469.7 [728/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_9.cpp.o 1470.1 [729/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 1474.4 [730/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_0.cpp.o 1480.7 [731/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 1486.6 [732/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1488.1 [733/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 1488.4 [734/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 1488.7 [735/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1495.3 [736/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard_5.cpp.o 1507.5 [737/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 1515.3 [738/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1519.6 [739/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1521.9 [740/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 1524.2 [741/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 1528.0 [742/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp.o 1532.0 [743/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 1535.8 [744/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1539.8 [745/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 1544.1 [746/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1548.3 [747/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_9.cpp.o 1564.1 [748/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1566.3 [749/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 1568.3 [750/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1572.0 [751/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1574.6 [752/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1602.6 [753/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1603.1 [754/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1609.9 [755/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 1614.7 [756/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 1626.8 [757/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 1657.1 [758/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1674.0 [759/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 1676.5 [760/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1694.4 [761/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/generated/xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard_4.cpp.o 1694.4 [762/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16_16_instance.cpp.o 1713.6 [763/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1715.7 [764/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 1718.4 [765/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 1722.6 [766/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 1725.6 [767/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 1727.0 [768/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1736.0 [769/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 1742.5 [770/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_optimized_loads_instance.cpp.o 1743.6 [771/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_optimized_loads_instance.cpp.o 1749.7 [772/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 1751.6 [773/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 1752.4 [774/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_optimized_loads_instance.cpp.o 1760.8 [775/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 1761.7 [776/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 1764.9 [777/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 1767.1 [778/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 1770.2 [779/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 1772.7 [780/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 1773.0 [781/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f16_instance.cpp.o 1778.1 [782/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_instance.cpp.o 1789.2 [783/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1791.8 [784/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_4.cpp.o 1792.6 [785/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1793.2 [786/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 1797.3 [787/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_0.cpp.o 1802.2 [788/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 1816.3 [789/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1820.4 [790/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_2.cpp.o 1821.3 [791/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 1824.5 [792/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_16_16_instance.cpp.o 1840.8 [793/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_6.cpp.o 1863.6 [794/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1863.6 [795/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1863.7 [796/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 1866.7 [797/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 1868.7 [798/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_16_16_instance.cpp.o 1875.8 [799/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16_16_instance.cpp.o 1875.9 [800/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/gndhwc_gkzyxc_gndhwk/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_f32_bf16_instance.cpp.o 1879.6 [801/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 1900.6 [802/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1913.0 [803/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1927.4 [804/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1928.0 [805/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 1929.8 [806/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 1930.7 [807/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 1931.3 [808/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev2_instance.cpp.o 1932.1 [809/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 1934.8 [810/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev2_instance.cpp.o 1941.3 [811/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.o 1942.3 [812/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 1942.8 [813/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev5_instance.cpp.o 1943.7 [814/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 1945.9 [815/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 1946.4 [816/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 1947.1 [817/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 1949.6 [818/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 1951.7 [819/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 1953.4 [820/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev5_instance.cpp.o 1953.9 [821/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1956.0 [822/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 1956.6 [823/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 1963.0 [824/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instance.cpp.o 1968.2 [825/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instance.cpp.o 1970.0 [826/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instance.cpp.o 1970.2 [827/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev2_instance.cpp.o 1970.5 [828/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev2_instance.cpp.o 1976.0 [829/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 1979.1 [830/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1984.4 [831/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev2_instance.cpp.o 1991.2 [832/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev5_instance.cpp.o 1995.4 [833/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev5_instance.cpp.o 2005.8 [834/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 2014.4 [835/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkzyxc_ngkdhw_f32_instance.cpp.o 2017.4 [836/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_f16_pipev1_instance.cpp.o 2018.1 [837/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 2024.4 [838/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 2024.5 [839/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkzyxc_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_pipev1_instance.cpp.o 2052.6 [840/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 2068.1 [841/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp.o 2075.6 [842/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 2084.5 [843/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp.o 2095.0 [844/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp.o 2096.4 [845/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 2097.6 [846/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp.o 2114.9 [847/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_vec_transpose_instance.cpp.o 2124.2 [848/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 2131.6 [849/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 2136.7 [850/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 2144.3 [851/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2163.1 [852/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_vec_transpose_instance.cpp.o 2174.5 [853/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_vec_transpose_instance.cpp.o 2175.4 [854/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 2181.1 [855/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_part2_instance.cpp.o 2189.2 [856/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2197.4 [857/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_part2_instance.cpp.o 2197.5 [858/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 2207.0 [859/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2226.4 [860/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 2234.3 [861/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 2273.2 [862/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 2280.3 [863/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2281.9 [864/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.o 2281.9 [865/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 2290.5 [866/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.o 2294.8 [867/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_irregular_instance.cpp.o 2295.3 [868/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2296.1 [869/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.o 2306.3 [870/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_irregular_instance.cpp.o 2316.0 [871/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 2316.2 [872/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 2326.3 [873/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp.o 2327.3 [874/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 2328.8 [875/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 2330.0 [876/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_irregular_instance.cpp.o 2332.8 [877/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_part2_instance.cpp.o 2334.5 [878/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2337.3 [879/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2338.2 [880/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2349.5 [881/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 2349.9 [882/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 2350.5 [883/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2353.6 [884/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_irregular_instance.cpp.o 2375.0 [885/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_part2_instance.cpp.o 2375.3 [886/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp.o 2379.2 [887/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2380.7 [888/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 2381.3 [889/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 2387.8 [890/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev2_instance.cpp.o 2388.8 [891/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 2402.1 [892/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_part2_instance.cpp.o 2403.8 [893/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev1_instance.cpp.o 2406.8 [894/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_part2_instance.cpp.o 2409.5 [895/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev5_instance.cpp.o 2411.2 [896/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2415.3 [897/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev1_instance.cpp.o 2415.5 [898/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 2422.4 [899/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp.o 2423.5 [900/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_f16_pipev2_instance.cpp.o 2424.6 [901/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2425.9 [902/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2434.6 [903/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkczyx_ngkdhw_bf16_pipev5_instance.cpp.o 2435.7 [904/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2436.0 [905/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2437.2 [906/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2465.5 [907/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2469.2 [908/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 2474.7 [909/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 2488.4 [910/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 2489.9 [911/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2498.5 [912/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_2x_instance.cpp.o 2504.0 [913/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp.o 2506.4 [914/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp.o 2510.4 [915/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 2514.0 [916/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 2517.8 [917/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 2536.2 [918/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2547.3 [919/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 2559.9 [920/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_2x_instance.cpp.o 2564.7 [921/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cpp.o 2568.9 [922/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2575.9 [923/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp.o 2585.0 [924/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeFiles/device_grouped_conv2d_fwd_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp.o 2591.6 [925/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 2593.2 [926/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 2607.1 [927/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/xdl/ngcdhw_gkczyx_ngkdhw/device_grouped_conv3d_bwd_weight_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 2609.4 [928/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances.cpp.o 2630.9 [929/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 2642.9 [930/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 2650.1 [931/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 2653.6 [932/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2657.5 [933/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 2659.1 [934/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp.o 2659.1 [935/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeFiles/device_grouped_conv3d_bwd_data_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 2661.0 [936/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 2670.2 [937/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances.cpp.o 2672.4 [938/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 2677.6 [939/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeFiles/device_grouped_conv3d_bwd_data_scale_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2679.7 [940/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_3.cpp.o 2684.5 [941/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_2.cpp.o 2684.7 [942/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.o 2685.1 [943/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 2685.7 [944/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 2686.7 [945/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_5.cpp.o 2688.0 [946/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.o 2692.3 [947/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_0.cpp.o 2695.2 [948/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2697.2 [949/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_1.cpp.o 2698.9 [950/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances.cpp.o 2701.3 [951/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_2.cpp.o 2701.5 [952/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2706.3 [953/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_3.cpp.o 2707.1 [954/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.o 2709.5 [955/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.o 2709.8 [956/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_7.cpp.o 2709.9 [957/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.o 2712.0 [958/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 2717.9 [959/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.o 2719.8 [960/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_0.cpp.o 2720.4 [961/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp.o 2722.4 [962/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances.cpp.o 2724.5 [963/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 2725.1 [964/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_6.cpp.o 2725.7 [965/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_1.cpp.o 2726.1 [966/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_1.cpp.o 2727.3 [967/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_2.cpp.o 2732.4 [968/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_3.cpp.o 2733.8 [969/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_4.cpp.o 2738.6 [970/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances.cpp.o 2739.5 [971/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_3.cpp.o 2740.8 [972/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2742.3 [973/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 2746.6 [974/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_1.cpp.o 2748.2 [975/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_0.cpp.o 2748.5 [976/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_2.cpp.o 2749.2 [977/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_7.cpp.o 2749.5 [978/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeFiles/device_grouped_conv2d_fwd_clamp_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 2750.7 [979/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_1.cpp.o 2753.2 [980/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_5.cpp.o 2755.9 [981/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_6.cpp.o 2756.8 [982/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_9.cpp.o 2759.4 [983/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_4.cpp.o 2760.6 [984/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_8.cpp.o 2770.5 [985/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances.cpp.o 2770.7 [986/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_6.cpp.o 2773.5 [987/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_0.cpp.o 2777.0 [988/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_1.cpp.o 2779.6 [989/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_2.cpp.o 2781.2 [990/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_3.cpp.o 2789.8 [991/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_2.cpp.o 2794.0 [992/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_3.cpp.o 2800.6 [993/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.o 2800.7 [994/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances.cpp.o 2809.1 [995/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_0.cpp.o 2817.9 [996/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_1.cpp.o 2820.3 [997/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_2.cpp.o 2823.2 [998/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_3.cpp.o 2824.6 [999/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.o 2827.4 [1000/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 2831.0 [1001/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.o 2836.0 [1002/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_4.cpp.o 2836.7 [1003/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_7.cpp.o 2839.0 [1004/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_8.cpp.o 2842.8 [1005/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 2849.3 [1006/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances.cpp.o 2852.7 [1007/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_0.cpp.o 2853.1 [1008/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.o 2853.8 [1009/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_1.cpp.o 2860.4 [1010/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_5.cpp.o 2861.7 [1011/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_6.cpp.o 2864.3 [1012/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_4.cpp.o 2870.8 [1013/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_6.cpp.o 2878.2 [1014/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_8.cpp.o 2885.8 [1015/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_9.cpp.o 2887.1 [1016/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.o 2887.3 [1017/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_11.cpp.o 2889.7 [1018/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_5.cpp.o 2890.2 [1019/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_3.cpp.o 2890.8 [1020/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_6.cpp.o 2892.2 [1021/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_2.cpp.o 2892.3 [1022/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2892.9 [1023/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 2903.0 [1024/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_8.cpp.o 2903.3 [1025/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.o 2920.5 [1026/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_4.cpp.o 2931.3 [1027/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 2937.3 [1028/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_0.cpp.o 2937.8 [1029/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_5.cpp.o 2938.1 [1030/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 2939.1 [1031/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_7.cpp.o 2940.8 [1032/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 2942.5 [1033/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_5.cpp.o 2949.6 [1034/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_6.cpp.o 2949.7 [1035/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_5.cpp.o 2957.7 [1036/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_7.cpp.o 2958.1 [1037/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_16x16_instance.cpp.o 2967.2 [1038/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 2985.8 [1039/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_7.cpp.o 2988.4 [1040/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_11.cpp.o 2989.9 [1041/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard_5.cpp.o 2992.4 [1042/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_8.cpp.o 3001.3 [1043/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 3005.0 [1044/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances.cpp.o 3006.8 [1045/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 3007.3 [1046/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 3008.5 [1047/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_1.cpp.o 3010.5 [1048/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_0.cpp.o 3012.1 [1049/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_2.cpp.o 3013.3 [1050/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_8.cpp.o 3013.5 [1051/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_6.cpp.o 3015.3 [1052/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_7.cpp.o 3015.9 [1053/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_1.cpp.o 3017.1 [1054/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_4.cpp.o 3019.3 [1055/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard_9.cpp.o 3023.5 [1056/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_3.cpp.o 3024.2 [1057/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_4.cpp.o 3024.4 [1058/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_5.cpp.o 3026.5 [1059/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_6.cpp.o 3026.7 [1060/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_8.cpp.o 3027.4 [1061/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_7.cpp.o 3027.9 [1062/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_8.cpp.o 3029.5 [1063/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_9.cpp.o 3029.5 [1064/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_10.cpp.o 3030.5 [1065/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_11.cpp.o 3031.1 [1066/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances.cpp.o 3031.1 [1067/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_10.cpp.o 3033.1 [1068/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_7.cpp.o 3036.1 [1069/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_2.cpp.o 3040.0 [1070/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_9.cpp.o 3040.4 [1071/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_7.cpp.o 3040.6 [1072/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_10.cpp.o 3040.7 [1073/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_11.cpp.o 3044.0 [1074/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 3048.8 [1075/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 3048.9 [1076/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_4.cpp.o 3049.8 [1077/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 3050.2 [1078/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard_9.cpp.o 3050.3 [1079/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 3050.7 [1080/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_3.cpp.o 3050.8 [1081/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 3051.5 [1082/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_5.cpp.o 3051.6 [1083/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_4.cpp.o 3053.1 [1084/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_6.cpp.o 3054.5 [1085/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeFiles/device_grouped_conv3d_bwd_weight_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3056.9 [1086/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_7.cpp.o 3059.9 [1087/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_10.cpp.o 3060.0 [1088/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_8.cpp.o 3060.1 [1089/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_9.cpp.o 3060.4 [1090/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 3062.0 [1091/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_11.cpp.o 3062.4 [1092/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard_5.cpp.o 3063.9 [1093/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_12.cpp.o 3065.5 [1094/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_1.cpp.o 3067.5 [1095/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_14.cpp.o 3067.8 [1096/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_7.cpp.o 3067.9 [1097/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp.o 3067.9 [1098/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_15.cpp.o 3068.7 [1099/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_instance.cpp.o 3069.0 [1100/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_13.cpp.o 3069.1 [1101/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_2.cpp.o 3070.0 [1102/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 3070.3 [1103/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 3070.5 [1104/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 3071.0 [1105/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.o 3071.1 [1106/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_4.cpp.o 3071.4 [1107/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_bf8_instance.cpp.o 3072.2 [1108/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_3.cpp.o 3072.4 [1109/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_6.cpp.o 3072.7 [1110/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard_9.cpp.o 3073.0 [1111/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_5.cpp.o 3073.6 [1112/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_6.cpp.o 3074.2 [1113/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_fp8_instance.cpp.o 3075.4 [1114/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 3076.7 [1115/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3076.9 [1116/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_7.cpp.o 3077.4 [1117/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 3077.7 [1118/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3079.0 [1119/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_9.cpp.o 3079.6 [1120/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_11.cpp.o 3079.7 [1121/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_8.cpp.o 3080.0 [1122/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_12.cpp.o 3080.4 [1123/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_9.cpp.o 3081.5 [1124/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_13.cpp.o 3082.4 [1125/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_10.cpp.o 3082.6 [1126/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard_9.cpp.o 3083.5 [1127/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_14.cpp.o 3084.7 [1128/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 3084.8 [1129/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_15.cpp.o 3087.2 [1130/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 3087.3 [1131/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 3087.8 [1132/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard_4.cpp.o 3088.2 [1133/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_4.cpp.o 3088.4 [1134/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_5.cpp.o 3088.5 [1135/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances.cpp.o 3088.7 [1136/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_3.cpp.o 3089.5 [1137/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_10.cpp.o 3089.6 [1138/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_6.cpp.o 3090.1 [1139/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 3090.4 [1140/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_9.cpp.o 3090.9 [1141/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances.cpp.o 3090.9 [1142/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_8.cpp.o 3091.0 [1143/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_11.cpp.o 3091.7 [1144/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3091.7 [1145/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_15.cpp.o 3091.8 [1146/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_14.cpp.o 3092.2 [1147/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_12.cpp.o 3092.4 [1148/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_7.cpp.o 3092.6 [1149/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3092.6 [1150/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_13.cpp.o 3093.2 [1151/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances.cpp.o 3094.3 [1152/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard_0.cpp.o 3095.4 [1153/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 3096.0 [1154/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_6.cpp.o 3096.4 [1155/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 3098.3 [1156/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 3099.2 [1157/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 3099.9 [1158/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp.o 3101.2 [1159/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 3101.3 [1160/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_0.cpp.o 3101.5 [1161/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 3101.8 [1162/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_1.cpp.o 3103.1 [1163/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 3103.6 [1164/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 3104.0 [1165/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 3104.0 [1166/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 3104.2 [1167/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 3104.3 [1168/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 3104.4 [1169/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 3104.4 [1170/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_2.cpp.o 3106.0 [1171/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 3108.1 [1172/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_3.cpp.o 3110.2 [1173/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_2.cpp.o 3110.8 [1174/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_3.cpp.o 3113.8 [1175/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_0.cpp.o 3114.0 [1176/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_1.cpp.o 3115.0 [1177/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_1.cpp.o 3115.3 [1178/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_2.cpp.o 3117.5 [1179/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_0.cpp.o 3117.8 [1180/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances.cpp.o 3118.5 [1181/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_0.cpp.o 3118.7 [1182/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 3118.7 [1183/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 3118.9 [1184/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_8.cpp.o 3119.0 [1185/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 3119.2 [1186/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_1.cpp.o 3120.7 [1187/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_2.cpp.o 3123.7 [1188/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_8.cpp.o 3123.7 [1189/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_3.cpp.o 3124.3 [1190/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_4.cpp.o 3124.9 [1191/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_10.cpp.o 3125.0 [1192/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_11.cpp.o 3125.2 [1193/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_6.cpp.o 3128.3 [1194/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_18.cpp.o 3128.9 [1195/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_12.cpp.o 3128.9 [1196/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_13.cpp.o 3130.5 [1197/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_19.cpp.o 3130.8 [1198/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_14.cpp.o 3131.8 [1199/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_15.cpp.o 3132.4 [1200/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 3132.5 [1201/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_17.cpp.o 3133.0 [1202/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_0.cpp.o 3134.5 [1203/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_1.cpp.o 3134.6 [1204/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_16.cpp.o 3134.9 [1205/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_2.cpp.o 3136.5 [1206/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 3137.1 [1207/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_3.cpp.o 3137.8 [1208/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances.cpp.o 3141.9 [1209/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_6.cpp.o 3142.6 [1210/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_4.cpp.o 3142.9 [1211/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_11.cpp.o 3144.5 [1212/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_15.cpp.o 3144.5 [1213/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_8.cpp.o 3144.5 [1214/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_14.cpp.o 3145.1 [1215/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_10.cpp.o 3145.2 [1216/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_12.cpp.o 3146.9 [1217/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_16.cpp.o 3147.2 [1218/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_13.cpp.o 3149.4 [1219/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_1.cpp.o 3149.5 [1220/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_17.cpp.o 3150.6 [1221/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances.cpp.o 3151.0 [1222/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_18.cpp.o 3153.3 [1223/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_19.cpp.o 3154.3 [1224/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_0.cpp.o 3155.2 [1225/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_3.cpp.o 3155.9 [1226/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_2.cpp.o 3156.6 [1227/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_4.cpp.o 3157.4 [1228/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_10.cpp.o 3157.9 [1229/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_8.cpp.o 3158.9 [1230/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_6.cpp.o 3158.9 [1231/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_9.cpp.o 3161.0 [1232/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_12.cpp.o 3161.3 [1233/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 3161.7 [1234/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 3162.7 [1235/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_11.cpp.o 3163.2 [1236/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 3163.8 [1237/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_13.cpp.o 3163.9 [1238/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_15.cpp.o 3165.1 [1239/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_2.cpp.o 3166.3 [1240/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_14.cpp.o 3167.1 [1241/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_0.cpp.o 3167.7 [1242/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 3169.4 [1243/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_1.cpp.o 3170.4 [1244/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_3.cpp.o 3171.2 [1245/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances.cpp.o 3172.6 [1246/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_11.cpp.o 3172.8 [1247/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_6.cpp.o 3174.6 [1248/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_4.cpp.o 3175.3 [1249/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 3175.4 [1250/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_8.cpp.o 3176.5 [1251/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_13.cpp.o 3176.6 [1252/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_12.cpp.o 3177.0 [1253/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_10.cpp.o 3178.3 [1254/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_15.cpp.o 3178.3 [1255/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_14.cpp.o 3181.7 [1256/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_5.cpp.o 3181.9 [1257/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 3182.4 [1258/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_16.cpp.o 3183.1 [1259/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_18.cpp.o 3183.5 [1260/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_1.cpp.o 3183.8 [1261/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_17.cpp.o 3184.1 [1262/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_19.cpp.o 3184.1 [1263/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_2.cpp.o 3184.9 [1264/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_0.cpp.o 3185.5 [1265/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3185.9 [1266/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_3.cpp.o 3186.5 [1267/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances.cpp.o 3187.5 [1268/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_7.cpp.o 3189.3 [1269/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_8.cpp.o 3190.2 [1270/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_4.cpp.o 3190.2 [1271/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_11.cpp.o 3190.3 [1272/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_6.cpp.o 3190.4 [1273/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_10.cpp.o 3191.1 [1274/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_12.cpp.o 3191.5 [1275/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp.o 3193.1 [1276/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_14.cpp.o 3193.5 [1277/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_13.cpp.o 3195.0 [1278/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_15.cpp.o 3195.1 [1279/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_18.cpp.o 3196.5 [1280/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_5.cpp.o 3196.5 [1281/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_7.cpp.o 3196.7 [1282/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_17.cpp.o 3196.8 [1283/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_1.cpp.o 3197.1 [1284/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances.cpp.o 3197.2 [1285/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_16.cpp.o 3197.7 [1286/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_0.cpp.o 3197.8 [1287/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_9.cpp.o 3197.9 [1288/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_7.cpp.o 3197.9 [1289/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_2.cpp.o 3198.1 [1290/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_19.cpp.o 3199.4 [1291/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3200.1 [1292/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_3.cpp.o 3200.9 [1293/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_4.cpp.o 3203.4 [1294/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_6.cpp.o 3204.4 [1295/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_8.cpp.o 3205.0 [1296/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_9.cpp.o 3205.6 [1297/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_10.cpp.o 3205.6 [1298/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_11.cpp.o 3206.1 [1299/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_12.cpp.o 3206.2 [1300/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 3206.8 [1301/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_13.cpp.o 3206.8 [1302/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 3207.1 [1303/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 3207.8 [1304/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_14.cpp.o 3208.4 [1305/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances.cpp.o 3208.6 [1306/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 3208.7 [1307/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_15.cpp.o 3209.1 [1308/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances.cpp.o 3209.6 [1309/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances.cpp.o 3210.5 [1310/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 3211.9 [1311/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 3212.8 [1312/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 3213.6 [1313/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 3213.9 [1314/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_5.cpp.o 3214.3 [1315/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 3214.9 [1316/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 3215.3 [1317/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 3215.4 [1318/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 3215.5 [1319/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances.cpp.o 3215.8 [1320/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 3216.2 [1321/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_0.cpp.o 3216.3 [1322/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 3216.7 [1323/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances_shard_0.cpp.o 3217.6 [1324/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances_shard_0.cpp.o 3217.6 [1325/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_9.cpp.o 3219.1 [1326/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 3219.2 [1327/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_3.cpp.o 3219.4 [1328/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_1.cpp.o 3219.5 [1329/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_2.cpp.o 3220.7 [1330/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances.cpp.o 3222.5 [1331/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_1.cpp.o 3224.6 [1332/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_0.cpp.o 3224.9 [1333/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_3.cpp.o 3226.0 [1334/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_1.cpp.o 3227.0 [1335/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_4.cpp.o 3227.9 [1336/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_2.cpp.o 3229.2 [1337/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_3.cpp.o 3229.4 [1338/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_5.cpp.o 3229.5 [1339/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3229.6 [1340/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_5.cpp.o 3230.2 [1341/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_10.cpp.o 3230.6 [1342/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_7.cpp.o 3231.4 [1343/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_8.cpp.o 3231.5 [1344/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3231.9 [1345/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_11.cpp.o 3232.5 [1346/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_7.cpp.o 3233.2 [1347/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_9.cpp.o 3235.8 [1348/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3238.2 [1349/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 3243.4 [1350/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3245.2 [1351/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard_4.cpp.o 3245.3 [1352/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_7.cpp.o 3245.7 [1353/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_5.cpp.o 3246.0 [1354/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3246.3 [1355/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_3.cpp.o 3249.9 [1356/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_7.cpp.o 3251.3 [1357/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 3257.3 [1358/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3259.3 [1359/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3260.7 [1360/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 3261.7 [1361/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 3262.8 [1362/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3263.2 [1363/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_9.cpp.o 3263.4 [1364/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3263.9 [1365/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 3268.3 [1366/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3268.7 [1367/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3269.4 [1368/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 3270.1 [1369/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_5.cpp.o 3272.9 [1370/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 3275.5 [1371/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 3280.7 [1372/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_9.cpp.o 3281.0 [1373/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3281.2 [1374/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3282.9 [1375/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3283.4 [1376/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3284.5 [1377/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3284.9 [1378/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp.o 3288.6 [1379/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3291.9 [1380/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3295.0 [1381/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 3295.0 [1382/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3295.3 [1383/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3295.7 [1384/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 3299.7 [1385/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3305.5 [1386/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3306.1 [1387/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3306.5 [1388/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3309.2 [1389/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 3310.2 [1390/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_3.cpp.o 3310.7 [1391/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 3310.9 [1392/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 3318.4 [1393/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 3321.5 [1394/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_4.cpp.o 3321.5 [1395/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_0.cpp.o 3328.2 [1396/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard_5.cpp.o 3334.8 [1397/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3336.2 [1398/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3339.3 [1399/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3339.6 [1400/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3346.9 [1401/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeFiles/device_grouped_conv3d_fwd_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 3349.0 [1402/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 3350.3 [1403/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 3351.5 [1404/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3352.8 [1405/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3353.9 [1406/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3354.8 [1407/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3355.7 [1408/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 3356.7 [1409/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 3356.9 [1410/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3357.5 [1411/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3357.6 [1412/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 3357.9 [1413/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 3364.3 [1414/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/CMakeFiles/device_grouped_conv3d_fwd_convscale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 3365.3 [1415/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3368.0 [1416/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 3371.3 [1417/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3375.9 [1418/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 3386.3 [1419/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_0.cpp.o 3387.9 [1420/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_4.cpp.o 3396.3 [1421/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3396.4 [1422/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3396.4 [1423/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3396.6 [1424/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3399.2 [1425/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3400.1 [1426/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3400.9 [1427/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3402.6 [1428/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3405.0 [1429/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3406.2 [1430/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_6.cpp.o 3410.2 [1431/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_2.cpp.o 3415.5 [1432/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3415.6 [1433/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3417.3 [1434/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 3423.0 [1435/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 3423.6 [1436/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 3426.2 [1437/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3429.2 [1438/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3430.4 [1439/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 3431.6 [1440/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3432.5 [1441/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3456.5 [1442/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 3457.1 [1443/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 3458.7 [1444/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 3460.9 [1445/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 3463.0 [1446/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 3463.3 [1447/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3469.5 [1448/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3473.4 [1449/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 3473.8 [1450/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 3473.9 [1451/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3476.0 [1452/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3476.7 [1453/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3478.5 [1454/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3482.8 [1455/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 3488.1 [1456/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 3488.3 [1457/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 3496.4 [1458/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_default_instance.cpp.o 3497.8 [1459/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 3504.1 [1460/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 3507.9 [1461/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 3553.9 [1462/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3555.3 [1463/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3569.1 [1464/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3580.6 [1465/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3587.8 [1466/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.o 3588.8 [1467/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.o 3593.2 [1468/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp.o 3594.2 [1469/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3609.5 [1470/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp.o 3618.9 [1471/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp.o 3624.1 [1472/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_m_instance.cpp.o 3624.7 [1473/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3628.2 [1474/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_default_instance.cpp.o 3631.2 [1475/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp.o 3635.0 [1476/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.o 3635.2 [1477/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.o 3636.8 [1478/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp.o 3638.8 [1479/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3641.7 [1480/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp.o 3642.4 [1481/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.o 3643.6 [1482/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 3648.3 [1483/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_default_instance.cpp.o 3648.4 [1484/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3649.0 [1485/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_m_instance.cpp.o 3649.0 [1486/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.o 3649.7 [1487/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.o 3651.1 [1488/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_n_instance.cpp.o 3652.9 [1489/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.o 3660.8 [1490/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.o 3674.5 [1491/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3676.7 [1492/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.o 3682.4 [1493/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.o 3684.0 [1494/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3686.1 [1495/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_mn_instance.cpp.o 3689.8 [1496/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.o 3690.5 [1497/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_n_instance.cpp.o 3693.7 [1498/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.o 3694.0 [1499/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3697.3 [1500/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.o 3702.8 [1501/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.o 3703.8 [1502/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3703.9 [1503/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.o 3704.1 [1504/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.o 3704.8 [1505/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.o 3706.9 [1506/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.o 3707.8 [1507/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.o 3709.8 [1508/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.o 3710.0 [1509/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.o 3710.5 [1510/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.o 3711.9 [1511/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.o 3721.7 [1512/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3722.2 [1513/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.o 3722.3 [1514/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.o 3722.7 [1515/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.o 3722.8 [1516/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.o 3723.8 [1517/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.o 3725.0 [1518/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.o 3725.3 [1519/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.o 3725.5 [1520/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.o 3726.2 [1521/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.o 3726.6 [1522/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.o 3727.4 [1523/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.o 3729.5 [1524/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.o 3731.1 [1525/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.o 3731.6 [1526/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.o 3731.7 [1527/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 3732.7 [1528/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.o 3732.7 [1529/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.o 3735.5 [1530/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.o 3737.0 [1531/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.o 3737.2 [1532/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3739.5 [1533/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.o 3741.8 [1534/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3742.0 [1535/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.o 3743.2 [1536/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.o 3743.3 [1537/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.o 3743.5 [1538/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3744.4 [1539/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.o 3745.0 [1540/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.o 3747.1 [1541/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3750.2 [1542/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.o 3751.1 [1543/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.o 3751.3 [1544/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.o 3753.6 [1545/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.o 3754.7 [1546/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.o 3755.1 [1547/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.o 3758.7 [1548/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/device_memory.cpp.o 3758.7 [1549/1682] cd /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build && /usr/local/therock-tools/bin/cmake -E rm -f /__w/rockrel/rockrel/build/ml-libs/composable_kernel/stamp/stage.stamp 3760.8 [1550/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/host_tensor.cpp.o 3761.2 [1551/1682] Building CXX object library/src/utility/CMakeFiles/utility.dir/convolution_parameter.cpp.o 3765.2 [1552/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3768.8 [1553/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3769.8 [1554/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3770.2 [1555/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce1.cpp.o 3771.6 [1556/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce2.cpp.o 3771.9 [1557/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 3772.5 [1558/1682] Linking CXX static library lib/libdevice_contraction_operations.a 3772.5 [1559/1682] Linking CXX static library lib/libdevice_other_operations.a 3775.5 [1560/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 3776.1 [1561/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce2.cpp.o 3776.1 [1562/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce1.cpp.o 3776.6 [1563/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce1.cpp.o 3777.1 [1564/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 3777.2 [1565/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce3.cpp.o 3777.8 [1566/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 3778.4 [1567/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce3.cpp.o 3779.0 [1568/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce4.cpp.o 3780.2 [1569/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce2.cpp.o 3781.7 [1570/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 3781.9 [1571/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce1.cpp.o 3781.9 [1572/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce3.cpp.o 3785.9 [1573/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce2.cpp.o 3786.6 [1574/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3787.9 [1575/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.o 3788.0 [1576/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce3.cpp.o 3789.0 [1577/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.o 3789.3 [1578/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3790.4 [1579/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3790.9 [1580/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.o 3791.4 [1581/1682] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce4.cpp.o 3792.5 [1582/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3793.4 [1583/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 3794.3 [1584/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3794.7 [1585/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3795.3 [1586/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.o 3795.9 [1587/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 3796.5 [1588/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.o 3796.5 [1589/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.o 3797.6 [1590/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3799.4 [1591/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.o 3804.0 [1592/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3805.9 [1593/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.o 3807.5 [1594/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 3809.4 [1595/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.o 3811.2 [1596/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3811.3 [1597/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.o 3811.5 [1598/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.o 3811.9 [1599/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.o 3813.6 [1600/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.o 3814.4 [1601/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3814.6 [1602/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.o 3815.1 [1603/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.o 3816.1 [1604/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_odd_mn_instance.cpp.o 3817.9 [1605/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.o 3818.6 [1606/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.o 3818.6 [1607/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.o 3821.8 [1608/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.o 3828.0 [1609/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3835.4 [1610/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3841.0 [1611/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3841.1 [1612/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3850.7 [1613/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3854.9 [1614/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 3855.0 [1615/1682] Linking CXX static library lib/libutility.a 3855.5 [1616/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 3856.8 [1617/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_default_instance.cpp.o 3858.3 [1618/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 3858.7 [1619/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 3863.3 [1620/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3864.0 [1621/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 3867.3 [1622/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3869.2 [1623/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3870.0 [1624/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeFiles/device_grouped_conv3d_fwd_clamp_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3876.5 [1625/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.o 3879.8 [1626/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.o 3881.4 [1627/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3885.0 [1628/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 3885.3 [1629/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 3885.6 [1630/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 3888.8 [1631/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 3890.3 [1632/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3891.1 [1633/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 3891.6 [1634/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_kpadding_instance.cpp.o 3892.2 [1635/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 3892.7 [1636/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 3892.9 [1637/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 3893.7 [1638/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3895.4 [1639/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnpadding_instance.cpp.o 3896.0 [1640/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 3897.1 [1641/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3900.1 [1642/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 3901.2 [1643/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.o 3902.5 [1644/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3903.7 [1645/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3905.1 [1646/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.o 3908.1 [1647/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeFiles/device_grouped_conv3d_fwd_bilinear_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3912.2 [1648/1682] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.o 3912.8 [1649/1682] Linking CXX static library lib/libdevice_reduction_operations.a 3916.8 [1650/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_kpadding_instance.cpp.o 3917.2 [1651/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3919.0 [1652/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 3919.1 [1653/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_default_instance.cpp.o 3921.3 [1654/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 3921.4 [1655/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_mnkpadding_instance.cpp.o 3926.8 [1656/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 3929.2 [1657/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_default_instance.cpp.o 3930.1 [1658/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 3930.2 [1659/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 3942.6 [1660/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 3943.1 [1661/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 3945.4 [1662/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 3957.9 [1663/1682] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeFiles/device_grouped_conv3d_fwd_scale_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3960.2 [1664/1682] Linking CXX static library lib/libdevice_conv_operations.a 3979.9 [1665/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 3994.4 [1666/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 4010.9 [1667/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 4011.3 [1668/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 4025.4 [1669/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 4028.8 [1670/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 4031.7 [1671/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 4032.8 [1672/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 4033.5 [1673/1682] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 4071.0 [1674/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 4077.6 [1675/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 4079.5 [1676/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 4089.4 [1677/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 4093.7 [1678/1682] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 4097.4 [1679/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 5272.3 [1680/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_bf16_bf16/device_gemm_wmma_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 5277.7 [1681/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_f16_f16_f16/device_gemm_wmma_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 5384.7 [1682/1682] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_i8_bf16/device_gemm_wmma_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o END 1777958888.1854005 5384.6577315330505 0