; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
;
; RUN: igc_opt --igc-scalarize --opaque-pointers -S < %s | FileCheck %s
; REQUIRES: llvm-14-plus
; ------------------------------------------------
; ScalarizeFunction
; ------------------------------------------------
; The pass should break vector operation into many scalar operations
; ------------------------------------------------

define <2 x i8> @basic(<2 x i32> %src1) {
; CHECK-LABEL: define <2 x i8> @basic(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SRC1_SCALAR]] to i8
; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[SRC1_SCALAR1]] to i8
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i8> undef, i8 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i8> [[DOTASSEMBLED_VECT]], i8 [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x i8> [[DOTASSEMBLED_VECT2]]
;
  %1 = trunc <2 x i32> %src1 to <2 x i8>
  ret <2 x i8> %1
}

define <2 x i8> @should_preserve_metadata(<2 x i32> %src1) {
; CHECK-LABEL: define <2 x i8> @should_preserve_metadata(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SRC1_SCALAR]] to i8, !any_metadata [[META0:![0-9]+]]
; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[SRC1_SCALAR1]] to i8, !any_metadata [[META0]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i8> undef, i8 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i8> [[DOTASSEMBLED_VECT]], i8 [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x i8> [[DOTASSEMBLED_VECT2]]
;
  %1 = trunc <2 x i32> %src1 to <2 x i8>, !any_metadata !{i32 0}
  ret <2 x i8> %1
}

define <2 x float> @should_work_with_different_instruction_type(<2 x double> %src1) {
; CHECK-LABEL: define <2 x float> @should_work_with_different_instruction_type(
; CHECK-SAME: <2 x double> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x double> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x double> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[SRC1_SCALAR]] to float
; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc double [[SRC1_SCALAR1]] to float
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x float> [[DOTASSEMBLED_VECT]], float [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x float> [[DOTASSEMBLED_VECT2]]
;
  %1 = fptrunc <2 x double> %src1 to <2 x float>
  ret <2 x float> %1
}

define <2 x i8> @should_work_with_different_value_type(<2 x i64> %src1) {
; CHECK-LABEL: define <2 x i8> @should_work_with_different_value_type(
; CHECK-SAME: <2 x i64> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i64> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i64> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[SRC1_SCALAR]] to i8
; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[SRC1_SCALAR1]] to i8
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i8> undef, i8 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i8> [[DOTASSEMBLED_VECT]], i8 [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x i8> [[DOTASSEMBLED_VECT2]]
;
  %1 = trunc <2 x i64> %src1 to <2 x i8>
  ret <2 x i8> %1
}

define <2 x i16> @should_work_with_different_cast_type(<2 x i32> %src1) {
; CHECK-LABEL: define <2 x i16> @should_work_with_different_cast_type(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SRC1_SCALAR]] to i16
; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[SRC1_SCALAR1]] to i16
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i16> undef, i16 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i16> [[DOTASSEMBLED_VECT]], i16 [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x i16> [[DOTASSEMBLED_VECT2]]
;
  %1 = trunc <2 x i32> %src1 to <2 x i16>
  ret <2 x i16> %1
}

define <2 x float> @should_work_with_type_cast_type_2(<2 x i32> %src1) {
; CHECK-LABEL: define <2 x float> @should_work_with_type_cast_type_2(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[SRC1_SCALAR]] to float
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[SRC1_SCALAR1]] to float
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x float> [[DOTASSEMBLED_VECT]], float [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x float> [[DOTASSEMBLED_VECT2]]
;
  %1 = bitcast <2 x i32> %src1 to <2 x float>
  ret <2 x float> %1
}

define <2 x i32> @should_work_with_type_extension(<2 x i16> %src1) {
; CHECK-LABEL: define <2 x i32> @should_work_with_type_extension(
; CHECK-SAME: <2 x i16> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i16> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i16> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[SRC1_SCALAR]] to i32
; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[SRC1_SCALAR1]] to i32
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x i32> [[DOTASSEMBLED_VECT2]]
;
  %1 = zext <2 x i16> %src1 to <2 x i32>
  ret <2 x i32> %1
}

define <16 x i8> @should_work_with_larger_vector_size(<16 x i32> %src1) {
; CHECK-LABEL: define <16 x i8> @should_work_with_larger_vector_size(
; CHECK-SAME: <16 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <16 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <16 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR2:%.*]] = extractelement <16 x i32> [[SRC1]], i32 2
; CHECK-NEXT:    [[SRC1_SCALAR3:%.*]] = extractelement <16 x i32> [[SRC1]], i32 3
; CHECK-NEXT:    [[SRC1_SCALAR4:%.*]] = extractelement <16 x i32> [[SRC1]], i32 4
; CHECK-NEXT:    [[SRC1_SCALAR5:%.*]] = extractelement <16 x i32> [[SRC1]], i32 5
; CHECK-NEXT:    [[SRC1_SCALAR6:%.*]] = extractelement <16 x i32> [[SRC1]], i32 6
; CHECK-NEXT:    [[SRC1_SCALAR7:%.*]] = extractelement <16 x i32> [[SRC1]], i32 7
; CHECK-NEXT:    [[SRC1_SCALAR8:%.*]] = extractelement <16 x i32> [[SRC1]], i32 8
; CHECK-NEXT:    [[SRC1_SCALAR9:%.*]] = extractelement <16 x i32> [[SRC1]], i32 9
; CHECK-NEXT:    [[SRC1_SCALAR10:%.*]] = extractelement <16 x i32> [[SRC1]], i32 10
; CHECK-NEXT:    [[SRC1_SCALAR11:%.*]] = extractelement <16 x i32> [[SRC1]], i32 11
; CHECK-NEXT:    [[SRC1_SCALAR12:%.*]] = extractelement <16 x i32> [[SRC1]], i32 12
; CHECK-NEXT:    [[SRC1_SCALAR13:%.*]] = extractelement <16 x i32> [[SRC1]], i32 13
; CHECK-NEXT:    [[SRC1_SCALAR14:%.*]] = extractelement <16 x i32> [[SRC1]], i32 14
; CHECK-NEXT:    [[SRC1_SCALAR15:%.*]] = extractelement <16 x i32> [[SRC1]], i32 15
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SRC1_SCALAR]] to i8
; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[SRC1_SCALAR1]] to i8
; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[SRC1_SCALAR2]] to i8
; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[SRC1_SCALAR3]] to i8
; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[SRC1_SCALAR4]] to i8
; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[SRC1_SCALAR5]] to i8
; CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[SRC1_SCALAR6]] to i8
; CHECK-NEXT:    [[TMP8:%.*]] = trunc i32 [[SRC1_SCALAR7]] to i8
; CHECK-NEXT:    [[TMP9:%.*]] = trunc i32 [[SRC1_SCALAR8]] to i8
; CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[SRC1_SCALAR9]] to i8
; CHECK-NEXT:    [[TMP11:%.*]] = trunc i32 [[SRC1_SCALAR10]] to i8
; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[SRC1_SCALAR11]] to i8
; CHECK-NEXT:    [[TMP13:%.*]] = trunc i32 [[SRC1_SCALAR12]] to i8
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i32 [[SRC1_SCALAR13]] to i8
; CHECK-NEXT:    [[TMP15:%.*]] = trunc i32 [[SRC1_SCALAR14]] to i8
; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[SRC1_SCALAR15]] to i8
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT16:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT]], i8 [[TMP2]], i32 1
; CHECK-NEXT:    [[DOTASSEMBLED_VECT17:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT16]], i8 [[TMP3]], i32 2
; CHECK-NEXT:    [[DOTASSEMBLED_VECT18:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT17]], i8 [[TMP4]], i32 3
; CHECK-NEXT:    [[DOTASSEMBLED_VECT19:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT18]], i8 [[TMP5]], i32 4
; CHECK-NEXT:    [[DOTASSEMBLED_VECT20:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT19]], i8 [[TMP6]], i32 5
; CHECK-NEXT:    [[DOTASSEMBLED_VECT21:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT20]], i8 [[TMP7]], i32 6
; CHECK-NEXT:    [[DOTASSEMBLED_VECT22:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT21]], i8 [[TMP8]], i32 7
; CHECK-NEXT:    [[DOTASSEMBLED_VECT23:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT22]], i8 [[TMP9]], i32 8
; CHECK-NEXT:    [[DOTASSEMBLED_VECT24:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT23]], i8 [[TMP10]], i32 9
; CHECK-NEXT:    [[DOTASSEMBLED_VECT25:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT24]], i8 [[TMP11]], i32 10
; CHECK-NEXT:    [[DOTASSEMBLED_VECT26:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT25]], i8 [[TMP12]], i32 11
; CHECK-NEXT:    [[DOTASSEMBLED_VECT27:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT26]], i8 [[TMP13]], i32 12
; CHECK-NEXT:    [[DOTASSEMBLED_VECT28:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT27]], i8 [[TMP14]], i32 13
; CHECK-NEXT:    [[DOTASSEMBLED_VECT29:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT28]], i8 [[TMP15]], i32 14
; CHECK-NEXT:    [[DOTASSEMBLED_VECT30:%.*]] = insertelement <16 x i8> [[DOTASSEMBLED_VECT29]], i8 [[TMP16]], i32 15
; CHECK-NEXT:    ret <16 x i8> [[DOTASSEMBLED_VECT30]]
;
  %1 = trunc <16 x i32> %src1 to <16 x i8>
  ret <16 x i8> %1
}

define <2 x ptr> @should_work_with_different_instruction_type_2(<2 x i64> %src1) {
; CHECK-LABEL: define <2 x ptr> @should_work_with_different_instruction_type_2(
; CHECK-SAME: <2 x i64> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i64> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i64> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[SRC1_SCALAR]] to ptr
; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[SRC1_SCALAR1]] to ptr
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x ptr> undef, ptr [[TMP1]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x ptr> [[DOTASSEMBLED_VECT]], ptr [[TMP2]], i32 1
; CHECK-NEXT:    ret <2 x ptr> [[DOTASSEMBLED_VECT2]]
;
  %1 = inttoptr <2 x i64> %src1 to <2 x ptr>
  ret <2 x ptr> %1
}

define <2 x i8> @should_not_scalarize_constants() {
; CHECK-LABEL: define <2 x i8> @should_not_scalarize_constants() {
; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> <i32 2, i32 4> to <2 x i8>
; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
;
  %1 = trunc <2 x i32> <i32 2, i32 4> to <2 x i8>
  ret <2 x i8> %1
}

define i8 @should_not_scalarize_scalar() {
; CHECK-LABEL: define i8 @should_not_scalarize_scalar() {
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 4 to i8
; CHECK-NEXT:    ret i8 [[TMP1]]
;
  %1 = trunc i32 4 to i8
  ret i8 %1
}

; CHECK: [[META0]] = !{i32 0}