1 //          Copyright Jernej Krempuš 2012
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module pfft.stdapi;
7 
8 import core.memory, std.complex, std.traits, core.bitop, std.typetuple, std.range;
9 
10 // Current GDC branch for android insists on using sinl and friends when
11 // std.math is imported, so we need to do this:
12 version(GNU) version(ARM)
13 {
14     auto generateFunctions(string fmt, string[] names)
15     {
16         auto r = "";
17         
18         foreach(name; names)
19             r ~= std.array.replace(fmt, "%s", name);
20 
21         return r; 
22     }
23   
24     enum twoArgMathL =
25     q{
26         extern(C) auto %sl(real a, real b) 
27         { 
28             return cast(real) 
29                 core.stdc.math.%s(cast(double) a, cast(double) b); 
30         }
31     };
32  
33     enum oneArgMathL =
34     q{
35         extern(C) auto %sl(real a) 
36         { 
37             return cast(real) 
38                 core.stdc.math.%s(cast(double) a); 
39         }
40     };
41     
42     mixin(generateFunctions(oneArgMathL, 
43         ["sin", "cos", "asin", "tan", "sqrt", "atan", "logb"]));
44     mixin(generateFunctions(twoArgMathL, ["remainder", "fmod", "atan2"]));
45 
46     extern(C) auto modfl(real a, real* b) 
47     {
48         double tmp = *b;
49         auto r = cast(real) core.stdc.math.modf(cast(double) a, &tmp);
50         *b = tmp;
51         return r;
52     }
53 
54     extern(C) auto llrintl(real a)
55     {
56         return core.stdc.math.llrint(cast(double) a);
57     }
58 
59     extern(C) auto remquol(real a, real b, int *i)
60     {
61        return cast(real) core.stdc.math.remquo(cast(double) a,cast(double) b, i); 
62     } 
63 }
64 
65 template st(alias a){ enum st = cast(size_t) a; } 
66 
67 auto isComplexArray(R, T)()
68 {
69     static if(
70         ((isArray!R || isPointer!R) &&
71         is(typeof(R.init[0].re) == T) && 
72         is(typeof(R.init[0].im) == T) &&
73         typeof(R.init[0]).sizeof == 2*T.sizeof))
74     {
75         typeof(R.init[0]) e;
76         return 
77             cast(typeof(e.re)*)&e == &(e.re) && 
78             &(e.re) + 1 == &(e.im);
79     }
80     else
81         return false;
82 }
83 
84 private final class TypedFft(TT)
85 {    
86     static if(is(TT == float))
87         import impl = pfft.impl_float;
88     else static if(is(TT == double))
89         import impl = pfft.impl_double;
90     else static if(is(TT == real))
91         import impl = pfft.impl_real;
92     else 
93         static assert(0, "Not implemented");
94         
95     
96     uint log2n;
97     impl.T* re;
98     impl.T* im;
99     alias Complex!(impl.T) C;
100     impl.Table table;
101     impl.RTable rtable;
102     
103     
104     this(size_t n)
105     {
106         log2n  = bsf(n);
107         re = cast(impl.T*)GC.malloc(impl.T.sizeof << log2n);
108         im = cast(impl.T*)GC.malloc(impl.T.sizeof << log2n);
109         
110         auto mem = GC.malloc( impl.table_size_bytes(log2n));
111         table = impl.fft_table(log2n, mem);
112 
113         mem = GC.malloc( impl.rtable_size_bytes(log2n + 1));
114         rtable = impl.rfft_table(log2n + 1, mem);
115     }
116 
117     private bool isAligned(impl.T* p)
118     {
119         return ((cast(size_t)p) & (impl.alignment(log2n) - 1)) == 0;
120     }    
121     
122     private bool fastInterleave(R)(R range)
123     {
124         return isComplexArray!(R, TT)() && isAligned(&(range[0].re));
125     }
126     
127     private void deinterleave_array(R)(R range)
128     {
129         static if(is(typeof(range[0].re) == impl.T))
130             if(fastInterleave(range))
131                 return impl.deinterleave_array(
132                     re, im, &(range[0].re), st!1 << log2n);
133         
134         foreach(i, e; range)
135         {
136             re[i] = e.re;
137             im[i] = e.im;
138         }
139     }
140    
141     private void interleave_array(R)(R range)
142     {
143         static if(is(typeof(range[0].re) == impl.T))
144             if(fastInterleave(range))
145                 return impl.interleave_array(
146                     re, im, &(range[0].re), st!1 << log2n);
147         
148         foreach(i, ref e; range)
149         {
150             e.re = re[i];
151             e.im = im[i];
152         }
153     }
154 
155     void fft(bool inverse, Ret, R)(R range, Ret buf)
156     {
157         deinterleave_array(range);
158         static if(inverse)
159         {
160             auto n = st!1 << log2n; 
161             impl.fft(im, re, log2n, table);
162 
163             impl.scale(re, n, (cast(TT) 1) / n);
164             impl.scale(im, n, (cast(TT) 1) / n);
165         }
166         else
167             impl.fft(re, im, log2n, table);
168         
169         interleave_array(buf);
170     }
171     
172     void rfft(Ret, R)(R range, Ret buf)
173     {
174         deinterleave_array(cast(Complex!(ElementType!R)[])range);
175         impl.rfft(re, im, log2n + 1, table, rtable);
176         interleave_array(buf);
177        
178         auto n = st!1 << log2n; 
179         buf[n] = Complex!TT(buf[0].im, 0);
180         buf[0].im = 0;
181         
182         foreach(i; 1 .. n)
183         {
184             buf[2*n - i].re = buf[i].re;
185             buf[2*n - i].im = -buf[i].im;
186         }
187     }
188     
189     C[] fft(bool inverse, R)(R range)
190     {
191         auto return_buf = cast(C*)GC.malloc(C.sizeof << log2n);
192         fft!inverse(range, return_buf);
193         return return_buf[0 .. (1 << log2n)];
194     }
195 
196     C[] rfft(R)(R range)
197     {
198         auto n = st!1 << log2n;
199         auto return_buf = cast(C*)GC.malloc(C.sizeof << (log2n + 1));
200         rfft(range, return_buf[0 .. 2 * n]);
201         return return_buf[0 .. 2 * n];
202     }
203 
204     static C[] allocate(size_t n)
205     {
206         auto r = cast(C*) GC.malloc(n * C.sizeof);
207         assert(((impl.alignment(bsr(n)) - 1) & cast(size_t) r) == 0);
208         return r[0 .. n];
209     }
210 }
211 
212 
213 /**
214 A class for calculating discrete fourier transforms using fast fourier 
215 transform. This class mimics std.numeric.Fft, but works a bit differently
216 internally. The Fft in phobos does all the initialization when the
217 constructor is called. Because pfft uses different tables for each
218 combination of type and size, it can't do all the initialization up front.
219 Instead, it calculates a table for a combination of type $(D T) and size $(I n)
220 the first time the $(D fft) method is called with a template parameter $(D T)
221 and a parameter of size $(I n) and then stores this table for later use.   
222 
223 Example:
224 ---
225 import std.stdio, std.conv, std.exception, std.complex;
226 import pfft.stdapi;
227 
228 void main(string[] args)
229 {
230     auto n = to!int(args[1]);
231     
232     enforce((n & (n-1)) == 0, "N must be a power of two.");
233 
234     auto f = new Fft(n);
235     auto data = Fft.allocate!(double)(n);
236 
237     foreach(ref e; data)
238         readf("%s %s\n", &e.re, &e.im);
239     
240     auto ft = f.fft!double(data);
241 
242     foreach(e; ft)
243         writefln("%s %s", e.re, e.im);
244 }
245 ---
246  */
247 final class Fft
248 {
249     import std.variant;
250 
251     private enum nSizes = 8 * size_t.sizeof;
252     private void*[3 * nSizes] cache;
253 
254     private auto impl(T)(size_t n)
255     {
256         static if(is(T == float))
257             auto i = bsf(n);
258         else static if(is(T == double))
259             auto i = nSizes + bsf(n);
260         else
261             auto i = 2 * nSizes + bsf(n);
262 
263         if(cache[i] )
264             return cast(TypedFft!T) cache[i];
265         else
266         {
267             auto t = new TypedFft!T(n);
268             cache[i] = cast(void*)t;
269             return t;
270         }
271     }
272    
273     auto numberOfElements(R)(R r)
274     {
275         static if(hasLength!R)
276             return r.length;
277         else static if(isForwardRange!R)
278             return walkLength(r.save());
279         else
280             static assert(false, 
281                 "Can't get the length of the range without consumming it.");
282     }
283 
284 /** 
285 Fft constructor. nmax is there just for compatibility with std.numeric.Fft.
286  */
287     this(size_t nmax = size_t.init) { }
288     
289     private Complex!(T)[] fftTemplate(bool inverse, T, R)(R r) 
290     {
291         auto n = numberOfElements(r);
292         assert((n & (n - 1)) == 0);
293         return impl!T(n).fft!inverse(r);
294     }
295 
296 /**
297 Computes  the discrete fourier transform of data in r  and returns it. Data in
298 r isn't changed. $(D R) must be a forward range or a range with a length property.
299 It must have complex or floating point elements. Here a complex type is a type
300 with assignable floating point properties $(D re) and $(D im). The number of elements
301 in r must be a power of two. $(D T) must be a floating point type. The length
302 of the returned array is the same as the number of elements in r.
303  */
304     Complex!(T)[] fft(T, R)(R r) if(!isNumeric!(ElementType!R)) 
305     {
306         return fftTemplate!false(r);
307     }
308 
309     Complex!(T)[] fft(T, R)(R r) if(isNumeric!(ElementType!R)) 
310     {
311         auto n = numberOfElements(r);
312         assert((n & (n - 1)) == 0);
313         return impl!T(n / 2).rfft(r);
314     }
315    
316     private void fftTemplate(bool inverse, R, Ret)(R r, Ret ret)
317     {
318         r = r.save();
319 
320         auto n = numberOfElements(r);
321         assert((n & (n - 1)) == 0);
322         impl!(typeof(ret[0].re))(n).fft!inverse(r, ret);
323     }
324 
325 /**
326 Computes the discrete fourier transform of data in r and stores the result in
327 the user provided buffer ret. Data in r isn't changed. $(D R) must be a forward 
328 range or a range with a length property. It must have complex or floating point 
329 elements. Here a complex type is a type with assignable floating point properties
330 $(D re) and $(D im). Ret must be an input range with complex elements. r and ret must
331 have the same number of elements and that number must be a power of two.
332  */ 
333     void fft(R, Ret)(R r, Ret ret) if(!isNumeric!(ElementType!R))
334     {
335        fftTemplate!false(r, ret); 
336     }
337 
338     auto fft(R, Ret)(R r, Ret ret) if(isNumeric!(ElementType!R))
339     {
340         r = r.save();
341         auto n = numberOfElements(r);
342         assert((n & (n - 1)) == 0);
343         impl!(typeof(ret[0].re))(r.length / 2).rfft(r, ret);
344     }
345 
346 
347 /**
348 Computes  the inverse discrete fourier transform of data in r  and returns it. 
349 Data in r isn't changed.  $(D R) must be a forward range or a range with a
350 length property. It must have complex or floating point elements. Here a complex
351 type is a type with assignable floating point properties $(D re) and $(D im). The number
352 of elements in r must be a power of two. T must be a floating point
353 type. The length of the returned array is the same as the number of elements in
354 r.
355  */
356     Complex!(T)[] inverseFft(T, R)(R r)
357     {
358         return fftTemplate!true(r);
359     }
360 
361 /**
362 Computes the inverse discrete fourier transform of data in r and stores the 
363 result in the user provided buffer ret. Data in r isn't changed. R must be a 
364 forward range with complex or floating point elements.Here a complex type is 
365 a type with assignable properties $(D re) and $(D im). Ret must be an input range with
366 complex elements. r and ret must have the same number of elements and that
367 number must be a power of two.
368  */ 
369     void inverseFft(R, Ret)(R r, Ret ret)
370     {
371        fftTemplate!true(r, ret); 
372     }
373 
374 /**
375 Allocates an array of size n aligned appropriately for use as parameters to
376 $(D fft) methods. Both fft methods will still work correctly even if the 
377 parameters are not propperly aligned (or even if they aren't arrays at all), 
378 they will just be a bit slower.
379  */
380     static T[] allocate(T)(size_t n)
381     {
382         assert((n & (n - 1)) == 0);
383         auto r = cast(T*) GC.malloc(n * T.sizeof);
384         return r[0 .. n];
385     }
386 }
387