vpopcnt{d,q} supports ptr_b

This commit is contained in:
MITSUNARI Shigeo 2018-01-05 14:13:58 +09:00
parent 48499eb100
commit e450f96513
3 changed files with 68 additions and 20 deletions

View file

@ -325,6 +325,22 @@ CYBOZU_TEST_AUTO(shrd)
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
@ -351,6 +367,22 @@ CYBOZU_TEST_AUTO(shrd)
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
@ -373,9 +405,17 @@ CYBOZU_TEST_AUTO(vpopcnt)
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
@ -391,9 +431,17 @@ CYBOZU_TEST_AUTO(vpopcnt)
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);