ScummVM API documentation
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
dgSimd_Instrutions.h
1 /* Copyright (c) <2003-2011> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef AFX_SIMD_INTRUCTION_4563GFJK9R__INCLUDED_
23 #define AFX_SIMD_INTRUCTION_4563GFJK9R__INCLUDED_
24 
25 
26 #include "dgStdafx.h"
27 #include "dgTypes.h"
28 
29 #ifdef DG_BUILD_SIMD_CODE
30 #ifdef __ppc__
31 
32 #include <vecLib/veclib.h>
33 
34 #define simd_type vFloat
35 #define simd_char vUInt8
36 #define simd_env vUInt16
37 
38 union vFloatTuple {
39  simd_type v;
40  dgFloat32 f[4];
41 };
42 
43 
44 #define PURMUT_MASK(w, z, y, x) (vUInt8) \
45  (x * 4 + 0, x * 4 + 1, x * 4 + 2, x * 4 + 3, \
46  y * 4 + 0, y * 4 + 1, y * 4 + 2, y * 4 + 3, \
47  z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3, \
48  w * 4 + 0, w * 4 + 1, w * 4 + 2, w * 4 + 3)
49 
50 
51 #define simd_get_ctrl() vec_mfvscr ()
52 #define simd_set_ctrl(env) vec_mtvscr (env)
53 #define simd_set_FZ_mode() simd_set_ctrl (vec_or ((simd_env) (0, 0, 0, 0, 0, 0, 1, 0), simd_get_ctrl()))
54 #define simd_set1(a) (vFloat) ((dgFloat32)a, (dgFloat32)a, (dgFloat32)a, (dgFloat32)a)
55 #define simd_load_s(a) (vFloat) ((dgFloat32)a, (dgFloat32)a, (dgFloat32)a, (dgFloat32)a)
56 #define simd_load1_v(a) (vFloat) ((dgFloat32)a, (dgFloat32)a, (dgFloat32)a, (dgFloat32)a)
57 
58 #define simd_permut_v(a,b,mask) vec_perm (a, b, mask)
59 
60 #define simd_or_v(a,b) vec_or (a, b)
61 #define simd_and_v(a,b) vec_and (a, b)
62 #define simd_add_v(a,b) vec_add (a, b)
63 #define simd_sub_v(a,b) vec_sub (a, b)
64 #define simd_min_v(a,b) vec_min (a, b)
65 #define simd_max_v(a,b) vec_max (a, b)
66 #define simd_mul_v(a,b) vec_madd (a, b, (simd_type) (0.0f, 0.0f, 0.0f, 0.0f))
67 #define simd_mul_add_v(a,b,c) vec_madd (b, c, a)
68 #define simd_mul_sub_v(a,b,c) vec_nmsub (b, c, a)
69 #define simd_cmpgt_v(a,b) xxxxxx_mm_cmpgt_ps (a, b)
70 #define simd_rsqrt_v(a) xxxx(a)
71 
72 #define simd_add_s(a,b) simd_add_v (a, b)
73 #define simd_sub_s(a,b) simd_sub_v (a, b)
74 #define simd_mul_s(a,b) simd_mul_v (a, b)
75 #define simd_min_s(a,b) simd_min_v (a, b)
76 #define simd_max_s(a,b) simd_max_v (a, b)
77 #define simd_mul_add_s(a,b,c) simd_mul_add_v (a, b, c)
78 #define simd_mul_sub_s(a,b,c) simd_mul_sub_v (a, b, c)
79 #define simd_cmpgt_s(a,b) (simd_type) vec_cmpgt(a, b)
80 #define simd_store_s(a,x) {vFloatTuple __tmp; __tmp.v = x; a = __tmp.f[0];}
81 
82 #else
83 
84 #define simd_type __m128
85 #define simd_env dgUnsigned32
86 
87 #define simd_get_ctrl() _mm_getcsr ()
88 #define simd_set_ctrl(a) _mm_setcsr (a)
89 #define simd_set_FZ_mode() _MM_SET_FLUSH_ZERO_MODE (_MM_FLUSH_ZERO_ON)
90 
91 #define simd_set1(a) _mm_set_ps1 (a)
92 #define simd_set(x,y,z,w) _mm_set_ps(w, z, y, x)
93 #define simd_load_s(a) _mm_load_ss (&a)
94 //#define simd_load_is(a,i) _mm_cvtsi32_ss (a, i)
95 #define simd_load1_v(a) _mm_load1_ps (&a)
96 #define simd_loadu_v(a) _mm_loadu_ps (&a)
97 
98 
99 #define PURMUT_MASK(w, z, y, x) _MM_SHUFFLE (w, z, y, x)
100 #define simd_permut_v(a,b,mask) _mm_shuffle_ps (a,b,mask)
101 
102 
103 #define simd_or_v(a,b) _mm_or_ps (a, b)
104 #define simd_and_v(a,b) _mm_and_ps (a, b)
105 #define simd_xor_v(a,b) _mm_xor_ps (a, b)
106 #define simd_andnot_v(a,b) _mm_andnot_ps(b, a)
107 #define simd_add_v(a,b) _mm_add_ps (a, b)
108 #define simd_sub_v(a,b) _mm_sub_ps (a, b)
109 #define simd_min_v(a,b) _mm_min_ps (a, b)
110 #define simd_max_v(a,b) _mm_max_ps (a, b)
111 #define simd_mul_v(a,b) _mm_mul_ps (a, b)
112 #define simd_mul_add_v(a,b,c) _mm_add_ps (a, _mm_mul_ps (b, c))
113 #define simd_mul_sub_v(a,b,c) _mm_sub_ps (a, _mm_mul_ps (b, c))
114 #define simd_cmpgt_v(a,b) _mm_cmpgt_ps (a, b)
115 #define simd_cmpge_v(a,b) _mm_cmpge_ps(a, b)
116 #define simd_cmplt_v(a,b) _mm_cmplt_ps (a, b)
117 #define simd_cmple_v(a,b) _mm_cmple_ps (a, b)
118 #define simd_div_v(a,b) _mm_div_ps(a,b)
119 #define simd_rsqrt_v(a) _mm_rsqrt_ps(a)
120 #define simd_store_v(a,ptr) _mm_store_ps (ptr, a)
121 
122 // #define simd_mask_v(a) _mm_movemask_ps(a)
123 #define simd_pack_lo_v(a,b) _mm_unpacklo_ps(a,b)
124 #define simd_pack_hi_v(a,b) _mm_unpackhi_ps(a,b)
125 #define simd_move_lh_v(a,b) _mm_movelh_ps(a,b)
126 #define simd_move_hl_v(a,b) _mm_movehl_ps(a,b)
127 
128 
129 
130 
131 #define simd_add_s(a,b) _mm_add_ss (a, b)
132 #define simd_sub_s(a,b) _mm_sub_ss (a, b)
133 #define simd_mul_s(a,b) _mm_mul_ss (a, b)
134 #define simd_min_s(a,b) _mm_min_ss (a, b)
135 #define simd_max_s(a,b) _mm_max_ss (a, b)
136 #define simd_mul_add_s(a,b,c) _mm_add_ss (a, _mm_mul_ss (b, c))
137 #define simd_mul_sub_s(a,b,c) _mm_sub_ss (a, _mm_mul_ss (b, c))
138 #define simd_store_s(a,ptr) _mm_store_ss (ptr, a)
139 #define simd_store_is(a) _mm_cvtss_si32 (a)
140 //#define simd_store_d(a) _mm_cvtps_pi32 (a)
141 
142 
143 
144 #define simd_cmpgt_s(a,b) _mm_cmpgt_ss(a, b)
145 #define simd_cmpge_s(a,b) _mm_cmpge_ss(a, b)
146 #define simd_cmplt_s(a,b) _mm_cmplt_ss (a, b)
147 #define simd_cmple_s(a,b) _mm_cmple_ss (a, b)
148 
149 #define simd_div_s(a,b) _mm_div_ss(a,b)
150 #define simd_rcp_s(a) _mm_rcp_ss(a)
151 #define simd_rsqrt_s(a) _mm_rsqrt_ss(a)
152 
153 #endif
154 
155 #endif
156 
157 #endif
158