{"id":6751,"date":"2017-12-28T00:44:01","date_gmt":"2017-12-27T15:44:01","guid":{"rendered":"http:\/\/umezawa.dyndns.info\/wordpress\/?p=6751"},"modified":"2017-12-28T01:02:38","modified_gmt":"2017-12-27T16:02:38","slug":"%e8%a4%87%e6%95%b0%e3%81%ae%e3%83%99%e3%82%af%e3%82%bf%e3%83%ac%e3%82%b8%e3%82%b9%e3%82%bf%e3%82%92%e8%bf%94%e3%81%99%e6%99%ae%e9%80%9a%e3%81%ae%e9%96%a2%e6%95%b0%e3%81%ae%e3%82%a4%e3%83%b3%e3%83%a9","status":"publish","type":"post","link":"http:\/\/umezawa.dyndns.info\/wordpress\/?p=6751","title":{"rendered":"\u8907\u6570\u306e\u30d9\u30af\u30bf\u30ec\u30b8\u30b9\u30bf\u3092\u8fd4\u3059\u666e\u901a\u306e\u95a2\u6570\u306e\u30a4\u30f3\u30e9\u30a4\u30f3\u5c55\u958b"},"content":{"rendered":"<p>1\u3064\u306e\u30d9\u30af\u30bf\u30ec\u30b8\u30b9\u30bf\uff08\u305f\u3068\u3048\u3070 __m128i \u306e\u3053\u3068\uff09\u3092\u8fd4\u3059\u30e9\u30e0\u30c0\u306f\u304d\u308c\u3044\u306b\u30a4\u30f3\u30e9\u30a4\u30f3\u5c55\u958b\u3055\u308c\u308b\uff08\u3064\u307e\u308a\u3001\u5999\u306a\u4e00\u6642\u5909\u6570\u304c\u30e1\u30e2\u30ea\u4e0a\u306b\u53d6\u3089\u308c\u305f\u308a\u3057\u306a\u3044\uff09\u3053\u3068\u306f\u5206\u304b\u3063\u3066\u3044\u308b\u3093\u3067\u3059\u304c\u3001\u8907\u6570\u306e\u30d9\u30af\u30bf\u30ec\u30b8\u30b9\u30bf\u3092\u8fd4\u3059\u666e\u901a\u306e\uff08=\u30e9\u30e0\u30c0\u3067\u306f\u306a\u3044\uff09\u95a2\u6570\u304c\u30a4\u30f3\u30e9\u30a4\u30f3\u5c55\u958b\u3055\u308c\u308b\u3068\u304d\u306f\u3069\u3046\u306a\u306e\u304b\u3001\u3068\u3044\u3046\u306e\u304c\u6c17\u306b\u306a\u308a\u307e\u3057\u305f\u3002<\/p>\n<p><!--more--><\/p>\n<p>\u3068\u3044\u3046\u308f\u3051\u3067\u3053\u3093\u306a\u30b3\u30fc\u30c9\u3002<\/p>\n<blockquote>\n<pre>\r\nstatic std::tuple<__m128i , __m128i, __m256i> addvec(__m128i a, __m128i b, __m128i c, __m128i d, __m256i e, __m256i f)\r\n{\r\n\treturn std::make_tuple(_mm_add_epi8(a, b), _mm_add_epi8(c, d), _mm256_add_epi8(e, f));\r\n}\r\n\r\nvoid foo(__m256i* res, __m128i* a, __m128i* b, __m128i* c, __m128i* d, __m256i* e, __m256i* f)\r\n{\r\n\tauto x = addvec(*a, *b, *c, *d, *e, *f);\r\n\t*res = _mm256_add_epi16(_mm256_castsi128_si256(_mm_add_epi16(std::get&lt;0>(x), std::get&lt;1>(x))), std::get&lt;2>(x));\r\n}\r\n<\/__m128i><\/pre>\n<\/blockquote>\n<p>\u307e\u305a Linux \u306e GCC 4.8.5 \u2026\u3060\u3068\u4f55\u3084\u3089 mangling \u304c\u30a4\u30b1\u3066\u306a\u3044\u306e\u304b __m256i \u3092\u4f7f\u3046\u3068\u30b3\u30f3\u30d1\u30a4\u30eb\u30a8\u30e9\u30fc\u306b\u306a\u308a\u307e\u3059\u3002 __m128i \u3060\u3051\u306a\u3089\u901a\u3063\u3066\u304d\u308c\u3044\u306a\u5c55\u958b\u7d50\u679c\u304c\u51fa\u529b\u3055\u308c\u307e\u3059\uff08\u7d50\u679c\u306f\u7701\u7565\uff09<\/p>\n<p>\u6b21\u306b Mac \u306e Xcode 8.3.2 \u306e clang\u3002<\/p>\n<blockquote>\n<pre>\r\n[umezawa@metis:ttys002 ~]$ clang++ -std=c++11 -mavx2 -O2 -S -o- vecinline.cc\r\n\uff08\u7565\uff09\r\n__Z3fooPDv4_xPDv2_xS2_S2_S2_S0_S0_:     ## @_Z3fooPDv4_xPDv2_xS2_S2_S2_S0_S0_\r\n        .cfi_startproc\r\n## BB#0:\r\n        pushq   %rbp\r\nLtmp0:\r\n        .cfi_def_cfa_offset 16\r\nLtmp1:\r\n        .cfi_offset %rbp, -16\r\n        movq    %rsp, %rbp\r\nLtmp2:\r\n        .cfi_def_cfa_register %rbp\r\n        movq    16(%rbp), %rax\r\n        vmovdqa (%rdx), %xmm0\r\n        vmovdqa (%r8), %xmm1\r\n        vmovdqu (%rax), %ymm2\r\n        vpaddb  (%rsi), %xmm0, %xmm0\r\n        vpaddb  (%rcx), %xmm1, %xmm1\r\n        vpaddb  (%r9), %ymm2, %ymm2\r\n        vpaddw  %xmm0, %xmm1, %xmm0\r\n        vpaddw  %ymm2, %ymm0, %ymm0\r\n        vmovdqu %ymm0, (%rdi)\r\n        popq    %rbp\r\n        vzeroupper\r\n        retq\r\n        .cfi_endproc\r\n<\/pre>\n<\/blockquote>\n<p>\u3053\u308c\u3082\u304d\u308c\u3044\u306b\u5c55\u958b\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3067\u3082\u3063\u3066 Cygwin \u306e GCC 5.0.0\u3002Windows \u3060\u3068\u547c\u3073\u51fa\u3057\u898f\u7d04\u306e\u95a2\u4fc2\u3067\u30e1\u30e2\u30ea\u30a2\u30af\u30bb\u30b9\u304c\u5897\u3048\u3066\u3057\u307e\u3063\u3066\u5206\u304b\u308a\u306b\u304f\u3044\u3067\u3059\u304c\u3001\u5999\u306a\u4e00\u6642\u5909\u6570\u306f\u51fa\u73fe\u305b\u305a\u3061\u3083\u3093\u3068\u30ec\u30b8\u30b9\u30bf\u3060\u3051\u3067\u8a08\u7b97\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<blockquote>\n<pre>\r\n$ gcc -mavx2 -O2 -o- -std=c++11 -S vecinline.cpp\r\n\uff08\u4e2d\u7565\uff09\r\n        movq    40(%rsp), %rax\r\n        vmovdqa (%r9), %xmm0\r\n        vpaddb  (%rax), %xmm0, %xmm1\r\n        movq    48(%rsp), %rax\r\n        vmovdqa (%rdx), %xmm0\r\n        vpaddb  (%r8), %xmm0, %xmm0\r\n        vpaddw  %xmm0, %xmm1, %xmm0\r\n        vmovdqa (%rax), %ymm1\r\n        movq    56(%rsp), %rax\r\n        vpaddb  (%rax), %ymm1, %ymm1\r\n        vpaddw  %ymm0, %ymm1, %ymm0\r\n        vmovdqa %ymm0, (%rcx)\r\n        vzeroupper\r\n        ret\r\n\uff08\u4ee5\u4e0b\u7565\uff09\r\n<\/pre>\n<\/blockquote>\n<p>\u3067\u3001\u61f8\u6848\u306e Visual C++ 2015 \u3067\u3059\u304c\u2026<\/p>\n<blockquote>\n<pre>\r\nc:\\cygwin64\\home\\umezawa>cl \/O2 \/c \/arch:AVX2 \/EHsc vecinline.cpp\r\n\uff08\u7565\uff09\r\n$ objdump -d vecinline.obj\r\n\r\nvecinline.obj:     \u30d5\u30a1\u30a4\u30eb\u5f62\u5f0f pe-x86-64\r\n\uff08\u7565\uff09\r\n\u30bb\u30af\u30b7\u30e7\u30f3 .text$mn \u306e\u9006\u30a2\u30bb\u30f3\u30d6\u30eb:\r\n\r\n0000000000000000 < ?foo@@YAXPEAT__m256i@@PEAT__m128i@@11100@Z>:\r\n   0:   40 55                   rex push %rbp\r\n   2:   48 83 ec 20             sub    $0x20,%rsp\r\n   6:   48 8d 6c 24 20          lea    0x20(%rsp),%rbp\r\n   b:   48 83 e5 e0             and    $0xffffffffffffffe0,%rbp\r\n   f:   c4 c1 7a 6f 09          vmovdqu (%r9),%xmm1\r\n  14:   48 8b 44 24 58          mov    0x58(%rsp),%rax\r\n  19:   c5 fe 6f 00             vmovdqu (%rax),%ymm0\r\n  1d:   48 8b 44 24 60          mov    0x60(%rsp),%rax\r\n  22:   c5 fd fc 18             vpaddb (%rax),%ymm0,%ymm3\r\n  26:   c5 fa 6f 02             vmovdqu (%rdx),%xmm0\r\n  2a:   48 8b 44 24 50          mov    0x50(%rsp),%rax\r\n  2f:   c5 f1 fc 10             vpaddb (%rax),%xmm1,%xmm2\r\n  33:   c4 c1 79 fc 08          vpaddb (%r8),%xmm0,%xmm1\r\n  38:   c5 f1 fd c2             vpaddw %xmm2,%xmm1,%xmm0\r\n  3c:   c5 fd fd d3             vpaddw %ymm3,%ymm0,%ymm2\r\n  40:   c5 fe 7f 11             vmovdqu %ymm2,(%rcx)\r\n  44:   c5 f8 77                vzeroupper\r\n  47:   48 83 c4 20             add    $0x20,%rsp\r\n  4b:   5d                      pop    %rbp\r\n  4c:   c3                      retq\r\n<\/pre>\n<\/blockquote>\n<p>\u30d7\u30ed\u30ed\u30fc\u30b0\u3068\u30a8\u30d4\u30ed\u30fc\u30b0\u306b\u306a\u3093\u304b\u304f\u3063\u3064\u3044\u3066\u3044\u308b\u306e\u304c\u6c17\u306b\u306a\u308a\u307e\u3059\u304c\u3001\u809d\u5fc3\u306e\u5c55\u958b\u7d50\u679c\u306b\u95a2\u3057\u3066\u306f\u671f\u5f85\u901a\u308a\u306b\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u304c Visual C++ 2017 \u306b\u306a\u308b\u3068\u30d7\u30ed\u30ed\u30fc\u30b0\u3068\u30a8\u30d4\u30ed\u30fc\u30b0\u304c\u304d\u308c\u3044\u306b\u306a\u3063\u3066\u3001<\/p>\n<blockquote>\n<pre>\r\n0000000000000000 < ?foo@@YAXPEAT__m256i@@PEAT__m128i@@11100@Z>:\r\n   0:   c4 c1 7a 6f 09          vmovdqu (%r9),%xmm1\r\n   5:   48 8b 44 24 30          mov    0x30(%rsp),%rax\r\n   a:   c5 fe 6f 00             vmovdqu (%rax),%ymm0\r\n   e:   48 8b 44 24 38          mov    0x38(%rsp),%rax\r\n  13:   c5 fd fc 18             vpaddb (%rax),%ymm0,%ymm3\r\n  17:   c5 fa 6f 02             vmovdqu (%rdx),%xmm0\r\n  1b:   48 8b 44 24 28          mov    0x28(%rsp),%rax\r\n  20:   c5 f1 fc 10             vpaddb (%rax),%xmm1,%xmm2\r\n  24:   c4 c1 79 fc 08          vpaddb (%r8),%xmm0,%xmm1\r\n  29:   c5 f1 fd c2             vpaddw %xmm2,%xmm1,%xmm0\r\n  2d:   c5 fd fd d3             vpaddw %ymm3,%ymm0,%ymm2\r\n  31:   c5 fe 7f 11             vmovdqu %ymm2,(%rcx)\r\n  35:   c5 f8 77                vzeroupper\r\n  38:   c3                      retq\r\n<\/pre>\n<\/blockquote>\n<p>\u3053\u3046\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u7d50\u8ad6\u3068\u3057\u3066\u3001\u3069\u306e\u30b3\u30f3\u30d1\u30a4\u30e9\u3067\u3082\u671f\u5f85\u901a\u308a\u306b\u304d\u308c\u3044\u306a\u5c55\u958b\u7d50\u679c\u306b\u306a\u308b\u306e\u3067\u3001\u5c11\u306a\u304f\u3068\u3082\u7c21\u5358\u306a\u95a2\u6570\u3067\u3042\u308c\u3070\u5b89\u5fc3\u3057\u3066\u4f7f\u3063\u3066\u3044\u3044\u3068\u3044\u3046\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>1\u3064\u306e\u30d9\u30af\u30bf\u30ec\u30b8\u30b9\u30bf\uff08\u305f\u3068\u3048\u3070 __m128i \u306e\u3053\u3068\uff09\u3092\u8fd4\u3059\u30e9\u30e0\u30c0\u306f\u304d\u308c\u3044\u306b\u30a4\u30f3\u30e9\u30a4\u30f3\u5c55\u958b\u3055\u308c\u308b\uff08\u3064\u307e\u308a\u3001\u5999\u306a\u4e00\u6642\u5909\u6570\u304c\u30e1\u30e2\u30ea\u4e0a\u306b\u53d6\u3089\u308c\u305f\u308a\u3057\u306a\u3044\uff09\u3053\u3068\u306f\u5206\u304b\u3063\u3066\u3044\u308b\u3093\u3067\u3059\u304c\u3001\u8907\u6570\u306e\u30d9\u30af\u30bf\u30ec\u30b8\u30b9\u30bf\u3092\u8fd4\u3059\u666e\u901a\u306e\uff08=\u30e9\u30e0\u30c0 [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[27],"tags":[],"class_list":["post-6751","post","type-post","status-publish","format-standard","hentry","category-technology"],"_links":{"self":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/6751","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=6751"}],"version-history":[{"count":5,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/6751\/revisions"}],"predecessor-version":[{"id":6756,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/6751\/revisions\/6756"}],"wp:attachment":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=6751"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=6751"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=6751"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}