{"id":3784,"date":"2013-04-02T22:01:51","date_gmt":"2013-04-02T13:01:51","guid":{"rendered":"http:\/\/umezawa.dyndns.info\/wordpress\/?p=3784"},"modified":"2013-04-07T17:06:03","modified_gmt":"2013-04-07T08:06:03","slug":"x86-%e3%81%ae%e3%83%93%e3%83%83%e3%83%88%e3%82%b9%e3%82%ad%e3%83%a3%e3%83%b3%e5%91%bd%e4%bb%a4%e3%81%ae-simd-%e5%8c%96%ef%bc%88%e3%81%9d%e3%81%ae%ef%bc%92%ef%bc%89","status":"publish","type":"post","link":"http:\/\/umezawa.dyndns.info\/wordpress\/?p=3784","title":{"rendered":"x86 \u306e\u30d3\u30c3\u30c8\u30b9\u30ad\u30e3\u30f3\u547d\u4ee4\u306e SIMD \u5316\uff08\u305d\u306e\uff12\uff09"},"content":{"rendered":"<p>\u3072\u3068\u3064\u6539\u5584\u70b9\u3092\u601d\u3044\u3064\u3044\u305f\u306e\u3068\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u3067\u3059\u3002<\/p>\n<p><!--more--><\/p>\n<h3>\u6539\u5584\u70b9<\/h3>\n<p>\u6d6e\u52d5\u5c0f\u6570\u70b9\u6570\u306b\u5909\u63db\u3057\u3066\u51e6\u7406\u3059\u308b\u5834\u5408\u3067\u30d3\u30c3\u30c8\u304c\u305f\u304f\u3055\u3093\u7acb\u3063\u3066\u3044\u308b\u5834\u5408\u306e\u30b1\u30a2\u3067\u3059\u304c\u3001\u4e00\u756a\u4e0a\u306e 1 \u306e\u30d3\u30c3\u30c8\u304b\u3089\u9023\u7d9a\u3057\u3066 25 \u30d3\u30c3\u30c8\u304c\u7acb\u3063\u3066\u3044\u308b\u6642\u306b\u306e\u307f\u4e38\u3081\u3067\u7e70\u308a\u4e0a\u304c\u308b\u306e\u3067\u3001\u4e00\u756a\u4e0a 1 \u306e\u6b21\u306e\u30d3\u30c3\u30c8\u3092 0 \u306b\u3057\u3066\u3057\u307e\u3048\u3070\u305d\u308c\u3067\u89e3\u6c7a\u3067\u3059\u3002\u7d50\u5c40\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30b3\u30fc\u30c9\u304c\u4f7f\u3048\u307e\u3059\u3002<\/p>\n<blockquote>\n<pre>    movdqa      xmm2, xmm0\r\n    psrld       xmm0, 1\r\n    pandn       xmm2, xmm0\r\n\r\n    cvtdq2ps    xmm1, xmm2\r\n    psrld       xmm1, 23\r\n    psubd       xmm1, 0000007f0000007f0000007f0000007fh\r\n<\/pre>\n<\/blockquote>\n<p>\u3053\u306e\u65b9\u6cd5\u306a\u3089 SSE2 \u306e\u7bc4\u56f2\u3067\u66f8\u3051\u307e\u3059\u3002<\/p>\n<h3>\u30d9\u30f3\u30c1\u30de\u30fc\u30af<\/h3>\n<p>Core i7-2600K 3.4GHz\u3001EIST \u7121\u52b9\u3001TB \u7121\u52b9\u300132bit \u30d7\u30ed\u30bb\u30b9 \u3067<strong>\u30ec\u30a4\u30c6\u30f3\u30b7\u3092<\/strong>\u8a08\u6e2c\u3057\u305f\u7d50\u679c\u3067\u3059\u3002\uff08\u4e00\u90e8\u51fa\u529b\u3092\u7701\u7565\u3057\u3066\u3042\u308a\u307e\u3059\uff09<\/p>\n<p><a href=\"http:\/\/umezawa.dyndns.info\/archive\/pbsrd-bench.zip\">\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u30d7\u30ed\u30b0\u30e9\u30e0\u306f\u3053\u3061\u3089\uff08Windows + Visual C++ + NASM\uff09<\/a><\/p>\n<blockquote>\n<pre>E:\\pbsrd>asmbench.exe\r\nalgorithm                         cycles\r\n----------------------------------------\r\ngpr_nothing                        1.000\r\ngpr_bsr                            4.000\r\n----------------------------------------\r\nsse_nothing                        1.000\r\nsse_cvtdq2ps_only                  6.000\r\nsse_cvtdq2pd_only                  6.049\r\nsse_insext_pbsrd                  12.053\r\nsse_cvtdq2ps_pbsrd_nocare          8.000\r\nsse_cvtdq2ps_pbsrd_msbcare_sse2   10.000\r\nsse_cvtdq2ps_pbsrd_msbcare_sse41   9.000\r\nsse_cvtdq2ps_pbsrd_roundcare      11.000\r\nsse_cvtdq2ps_pbsrd_bothcare_sse41 12.000\r\nsse_cvtdq2pd_pbsrd_nocare         11.969\r\n<\/pre>\n<\/blockquote>\n<p>\u5206\u304b\u308a\u3084\u3059\u304f\u3059\u308b\u3068\u4ee5\u4e0b\u306e\u8868\u306e\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<table border=\"border\">\n<tr>\n<th>\u624b\u6cd5<\/th>\n<th>\u30b5\u30a4\u30af\u30eb\u6570<\/th>\n<\/tr>\n<tr>\n<td>\u751f\u306e BSR \u547d\u4ee4<\/td>\n<td>3<\/td>\n<\/tr>\n<tr>\n<td>\u751f\u306e CVTDQ2PS \u547d\u4ee4<\/td>\n<td>5<\/td>\n<\/tr>\n<tr>\n<td>\u751f\u306e CVTDQ2PD \u547d\u4ee4<\/td>\n<td>5<\/td>\n<\/tr>\n<tr>\n<td>\u30b9\u30ab\u30e9\u30fc\u306b\u3057\u3066\u51e6\u7406<\/td>\n<td>11<\/td>\n<\/tr>\n<tr>\n<td>CVTDQ2PS \u3092\u4f7f\u3046 \uff08\u5225\u6bb5\u306e\u30b1\u30a2\u7121\u3057\uff09<\/td>\n<td>7<\/td>\n<\/tr>\n<tr>\n<td>CVTDQ2PS \u3092\u4f7f\u3046 \uff08\u6700\u4e0a\u4f4d\u30d3\u30c3\u30c8\u306e\u30b1\u30a2 SSE2 \u7248\uff09<\/td>\n<td>9<\/td>\n<\/tr>\n<tr>\n<td>CVTDQ2PS \u3092\u4f7f\u3046 \uff08\u6700\u4e0a\u4f4d\u30d3\u30c3\u30c8\u306e\u30b1\u30a2 SSE4.1 \u7248\uff09<\/td>\n<td>8<\/td>\n<\/tr>\n<tr>\n<td>CVTDQ2PS \u3092\u4f7f\u3046 \uff08\u4e38\u3081\u306e\u30b1\u30a2 SSE2 \u7248\uff09<\/td>\n<td>10<\/td>\n<\/tr>\n<tr>\n<td>CVTDQ2PS \u3092\u4f7f\u3046 \uff08\u6700\u4e0a\u4f4d\u30d3\u30c3\u30c8 SSE4.1 + \u4e38\u3081 SSE2\uff09<\/td>\n<td>11<\/td>\n<\/tr>\n<tr>\n<td>CVSDQ2PD \u3092\u4f7f\u3046 \uff08\u5225\u6bb5\u306e\u30b1\u30a2\u7121\u3057\uff09<\/td>\n<td>11<\/td>\n<\/tr>\n<\/table>\n<p>\u3053\u308c\u3092\u898b\u308b\u9650\u308a\u3001\u30b9\u30ab\u30e9\u30fc\u306b\u3057\u3066\u51e6\u7406\u3059\u308b\u304b CVTDQ2PS \u3092\u4f7f\u3046\u306e\u304c\u3088\u3055\u305d\u3046\u3067\u3059\u306d\u3002\u307e\u305f\u3001\u5c06\u6765 ymm \u30ec\u30b8\u30b9\u30bf\u306b\u5bfe\u3057\u3066\u51e6\u7406\u3059\u308b\u5834\u5408\u306f CVTDQ2PS \u3092\u4f7f\u3046\u306e\u304c\u6700\u9069\u306b\u306a\u308a\u305d\u3046\u3067\u3059\u3002<\/p>\n<p>\u3057\u304b\u3057\u3001\u4e0a\u306e\u4e38\u3081\u306e\u30b1\u30a2\u304c 3 \u30b5\u30a4\u30af\u30eb\u3082\u304b\u304b\u308b\u3053\u3068\u306b\u306a\u3063\u3066\u3044\u308b\u306e\u304c\u610f\u5916\u3067\u3059\u3002SandyBridge \u4ee5\u964d\u3067\u306f\u30ec\u30b8\u30b9\u30bf\u9593 mov \u306f\u547d\u4ee4\u30c7\u30b3\u30fc\u30c9\u6bb5\u968e\u3067\u6d88\u6ec5\u3059\u308b\u3068\u805e\u3044\u305f\u306e\u3067 2 \u30b5\u30a4\u30af\u30eb\u306b\u306a\u308b\u3068\u601d\u3063\u3066\u3044\u305f\u306e\u3067\u3059\u304c\u2026 \u3046\u307e\u304f\u5bfe\u51e6\u3067\u304d\u306a\u3044\u30b1\u30fc\u30b9\u306a\u306e\u304b\u3057\u3089\u3002<\/p>\n<h3>\u7d50\u8ad6<\/h3>\n<p>{V,}PBS{F,R}{D,Q} \u547d\u4ee4\u8ffd\u52a0\u3057\u3066\u304f\u308c\u3088 Intel \u3055\u3093\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u3072\u3068\u3064\u6539\u5584\u70b9\u3092\u601d\u3044\u3064\u3044\u305f\u306e\u3068\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u3067\u3059\u3002<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[27],"tags":[],"class_list":["post-3784","post","type-post","status-publish","format-standard","hentry","category-technology"],"_links":{"self":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/3784","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3784"}],"version-history":[{"count":6,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/3784\/revisions"}],"predecessor-version":[{"id":3795,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/3784\/revisions\/3795"}],"wp:attachment":[{"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3784"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3784"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/umezawa.dyndns.info\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3784"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}