1 //          Copyright Jernej Krempuš 2012
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module pfft.stdsimd;
7 
8 import core.simd;
9 
10 import pfft.fft_impl;
11 
12 template shuf_mask(int a3, int a2, int a1, int a0)
13 { 
14     enum shuf_mask = a0 | (a1<<2) | (a2<<4) | (a3<<6); 
15 }
16 
17 struct Vector 
18 {
19     alias float4 vec;
20     alias float T;
21     
22     enum vec_size = 4;
23     
24     private static float4 * v(float * a)
25     {
26         return cast(float4*)a;
27     }
28     
29     import std.simd;
30     
31     static void bit_reverse_swap_16(float * p0, float * p1, float * p2, float * p3, size_t i1, size_t i2)
32     {
33         auto m1 = float4x4(*v(p0 + i1), *v(p2 + i1), *v(p1 + i1), *v(p3 + i1));
34         m1 = transpose(m1);
35         
36         auto m2 = float4x4(*v(p0 + i2), *v(p2 + i2), *v(p1 + i2), *v(p3 + i2));
37         m2 = transpose(m2);
38         
39         *v(p0 + i1) = m2.xRow;
40         *v(p2 + i1) = m2.yRow;
41         *v(p1 + i1) = m2.zRow;
42         *v(p3 + i1) = m2.wRow;
43         
44         *v(p0 + i2) = m1.xRow;
45         *v(p2 + i2) = m1.yRow;
46         *v(p1 + i2) = m1.zRow;
47         *v(p3 + i2) = m1.wRow;
48     }
49 
50     static void bit_reverse_16(float * p0, float * p1, float * p2, float * p3, size_t i)
51     {
52         auto m1 = float4x4(*v(p0 + i), *v(p2 + i), *v(p1 + i), *v(p3 + i));
53         m1 = transpose(m1);
54         
55         *v(p0 + i) = m1.xRow;
56         *v(p2 + i) = m1.yRow;
57         *v(p1 + i) = m1.zRow;
58         *v(p3 + i) = m1.wRow;
59     }
60     
61     version(GNU)
62     {
63         static vec scalar_to_vector(T a)
64         {
65             return a;
66         }
67     }
68     else
69     {
70         static vec scalar_to_vector(float a)
71         {
72             struct quad
73             {
74                 align(16) float a;
75                 float b;
76                 float c;
77                 float d;
78             };
79             return *cast(vec*)&quad(a,a,a,a);
80         }
81     }
82 }
83 
84 struct Options
85 {
86     enum log2_bitreverse_large_chunk_size = 5;
87     enum large_limit = 14;
88     enum log2_optimal_n = 9;
89     enum passes_per_recursive_call = 5;
90     enum log2_recursive_passes_chunk_size = 5;
91 }
92