1 //          Copyright Jernej Krempuš 2012
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module pfft.pfft;
7 import core.stdc.stdlib;
8 import core.stdc..string: memcpy;
9 import core.exception,
10        core.bitop,
11        std.array;
12 
13 template Import(TT)
14 {
15     static if(is(TT == float))
16         import impl = pfft.impl_float;
17     else static if(is(TT == double))
18         import impl = pfft.impl_double;
19     else static if(is(TT == real))
20         import impl = pfft.impl_real;
21     else
22         static assert(0, "Not implemented");
23 }
24 
25 template st(alias a){ enum st = cast(size_t) a; }
26 
27 /**
28 A class for calculating discrete fourier transform. The methods of this class
29 use split format for complex data. This means that a complex data set is
30 represented as two arrays - one for the real part and one for the imaginary
31 part. An instance of this class can only be used for transforms of one
32 particular size. The template parameter is the floating point type that the
33 methods of the class will operate on.
34 
35 Example:
36 ---
37 import std.stdio, std.conv, std.exception;
38 import pfft.pfft;
39 
40 void main(string[] args)
41 {
42     auto n = to!int(args[1]);
43     enforce((n & (n-1)) == 0, "N must be a power of two.");
44 
45     alias Fft!float F;
46 
47     F f;
48     f.initialize(n);
49 
50     auto re = F.allocate(n);
51     auto im = F.allocate(n);
52 
53     foreach(i, _; re)
54         readf("%s %s\n", &re[i], &im[i]);
55 
56     f.fft(re, im);
57 
58     foreach(i, _; re)
59         writefln("%s\t%s", re[i], im[i]);
60 }
61 ---
62  */
63 struct Fft(T)
64 {
65 public:
66 nothrow:
67 @nogc:
68     mixin Import!T;
69 
70     int log2n;
71     impl.Table table;
72 
73 /**
74 The Fft constructor. The parameter is the size of data sets that $(D fft) and
75 $(D ifft) will operate on. I will refer to this number as n in the rest of the
76 documentation for this class.Tables used in fft and ifft are calculated in the
77 constructor.
78  */
79     void initialize(size_t n)
80     {
81         assert((n & (n - 1)) == 0);
82         log2n  = bsf(n);
83         auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64);
84         table = impl.fft_table(log2n, mem);
85         assert(mem == table);
86     }
87 
88     ~this()
89     {
90         alignedFree(table, 64);
91     }
92 
93 /**
94 Calculates discrete fourier transform. $(D_PARAM re) should contain the real
95 part of the data and $(D_PARAM im) the imaginary part of the data. The method
96 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im).
97 Both arrays must be properly aligned - to obtain a properly aligned array you can
98 use $(D allocate).
99  */
100     void fft(T[] re, T[] im)
101     {
102         assert(re.length == im.length);
103         assert(re.length == (st!1 << log2n));
104         assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0);
105         assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0);
106 
107         impl.fft(re.ptr, im.ptr, log2n, table);
108     }
109 
110 /**
111 Calculates inverse discrete fourier transform scaled by n. The arguments have
112 the same role as they do in $(D fft).
113  */
114     void ifft(T[] re, T[] im)
115     {
116         fft(im, re);
117     }
118 
119 /**
120     Returns requited alignment for use with $(D fft), $(D ifft) and
121     $(D scale) methods.
122  */
123     static size_t alignment(size_t n)
124     {
125         return impl.alignment(n);
126     }
127 
128 /**
129 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and
130 $(D scale) methods.
131  */
132     static T[] allocate(size_t n)
133     {
134         T* r = cast(T*) alignedMalloc(n, alignment(n));
135         assert(((impl.alignment(n) - 1) & cast(size_t) r) == 0);
136         return r[0..n];
137     }
138 
139 /**
140 Deallocates an array allocated with `allocate`.
141 */
142     static void deallocate(T[] arr)
143     {
144         size_t n = arr.length;
145         alignedFree(arr.ptr, alignment(n));
146     }
147 
148 
149 /**
150 Scales an array data by factor k. The array must be properly aligned. To obtain
151 a properly aligned array, use $(D allocate).
152  */
153     static void scale(T[] data, T k)
154     {
155         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
156         impl.scale(data.ptr, data.length, k);
157     }
158 
159 }
160 
161 /**
162 A class for calculating real discrete fourier transform. The methods of this
163 class use split format for complex data. This means that complex data set is
164 represented as two arrays - one for the real part and one for the imaginary
165 part. An instance of this class can only be used for transforms of one
166 particular size. The template parameter is the floating point type that the
167 methods of the class will operate on.
168 
169 Example:
170 ---
171 import std.stdio, std.conv, std.exception;
172 import pfft.pfft;
173 
174 void main(string[] args)
175 {
176     auto n = to!int(args[1]);
177     enforce((n & (n-1)) == 0, "N must be a power of two.");
178 
179     alias Rfft!float F;
180 
181     F f;
182     f.initialize(n);
183 
184     auto data = F.allocate(n);
185 
186     foreach(ref e; data)
187         readf("%s\n", &e);
188 
189     f.rfft(data);
190 
191     foreach(i; 0 .. n / 2 + 1)
192         writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]);
193 }
194 ---
195  */
196 struct Rfft(T)
197 {
198 public:
199 nothrow:
200 @nogc:
201     mixin Import!T;
202 
203     int log2n;
204     Fft!T _complex;
205     impl.RTable rtable;
206     impl.ITable itable;
207 
208 /**
209 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will
210 operate on. I will refer to this number as n in the rest of the documentation
211 for this class. All tables used in rfft are calculated in the constructor.
212  */
213     void initialize(size_t n)
214     {
215         assert((n & (n - 1)) == 0);
216         log2n  = bsf(n);
217 
218         _complex.initialize(n / 2);
219 
220         auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64);
221         rtable = impl.rfft_table(log2n, mem);
222         assert(mem == rtable);
223 
224         mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64);
225         itable = impl.interleave_table(log2n, mem);
226         assert(mem == itable);
227     }
228 
229     ~this()
230     {
231         alignedFree(rtable, 64);
232         alignedFree(itable, 64);
233     }
234 
235 /**
236 Calculates discrete fourier transform of the real valued sequence in data.
237 The method operates in place. When the method completes, data contains the
238 result. First $(I n / 2 + 1) elements contain the real part of the result and
239 the rest contains the imaginary part. Imaginary parts at position 0 and
240 $(I n / 2) are known to be equal to 0 and are not stored, so the content of
241 data looks like this:
242 
243  $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1))
244 
245 
246 The elements of the result at position greater than n / 2 can be trivially
247 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds
248 because the input sequence is real.
249 
250 
251 The length of the array must be equal to n and the array must be properly
252 aligned. To obtain a properly aligned array you can use $(D allocate).
253  */
254     void rfft(T[] data)
255     {
256         assert(data.length == (st!1 << log2n));
257         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
258 
259         impl.deinterleave(data.ptr, log2n, itable);
260         impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
261     }
262 
263 /**
264 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale)
265 to normalize the result). Before the method is called, data should contain a
266 complex sequence in the same format as the result of $(D rfft). It is
267 assumed that the input sequence is a discrete fourier transform of a real
268 valued sequence, so the elements of the input sequence not stored in data
269 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method
270 completes, the array contains the real part of the inverse discrete fourier
271 transform. The imaginary part is known to be equal to 0.
272 
273 The length of the array must be equal to n and the array must be properly
274 aligned. To obtain a properly aligned array you can use $(D allocate).
275  */
276     void irfft(T[] data)
277     {
278         assert(data.length == (st!1 << log2n));
279         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
280 
281         impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
282         impl.interleave(data.ptr, log2n, itable);
283     }
284 
285 /// An alias for Fft!T.allocate
286     alias Fft!(T).allocate allocate;
287 
288 /// An alias for Fft!T.deallocate
289     alias Fft!(T).deallocate deallocate;
290 
291 /// An alias for Fft!T.scale
292     alias Fft!(T).scale scale;
293 
294     /// An alias for Fft!T.alignment
295     alias Fft!(T).alignment alignment;
296 
297     @property complex(){ return _complex; }
298 }
299 
300 
301 private:
302 
303 /// Returns: `true` if the pointer is suitably aligned.
304 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
305 {
306     assert(alignment != 0);
307     return ( cast(size_t)p & (alignment - 1) ) == 0;
308 }
309 
310 /// Allocates an aligned memory chunk.
311 /// Functionally equivalent to Visual C++ _aligned_malloc.
312 /// Do not mix allocations with different alignment.
313 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
314 {
315     assert(alignment != 0);
316 
317     // Short-cut and use the C allocator to avoid overhead if no alignment
318     if (alignment == 1)
319     {
320         // C99: 
321         // Implementation-defined behavior
322         // Whether the calloc, malloc, and realloc functions return a null pointer 
323         // or a pointer to an allocated object when the size requested is zero.
324         void* res = malloc(size);
325         if (size == 0)
326             return null;
327     }
328 
329     if (size == 0)
330         return null;
331 
332     size_t request = requestedSize(size, alignment);
333     void* raw = malloc(request);
334 
335     if (request > 0 && raw == null) // malloc(0) can validly return anything
336         onOutOfMemoryError();
337 
338     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
339 }
340 
341 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
342 /// Functionally equivalent to Visual C++ _aligned_free.
343 /// Do not mix allocations with different alignment.
344 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
345 {
346     assert(alignment != 0);
347 
348     // Short-cut and use the C allocator to avoid overhead if no alignment
349     if (alignment == 1)
350         return free(aligned);
351 
352     // support for free(NULL)
353     if (aligned is null)
354         return;
355 
356     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
357     free(*rawLocation);
358 }
359 
360 /// Returns: next pointer aligned with alignment bytes.
361 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc
362 {
363     return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
364 }
365 
366 // Returns number of bytes to actually allocate when asking
367 // for a particular alignement
368 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow
369 {
370     enum size_t pointerSize = size_t.sizeof;
371     return askedSize + alignment - 1 + pointerSize * 2;
372 }
373 
374 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
375 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow
376 {
377     enum size_t pointerSize = size_t.sizeof;
378     char* start = cast(char*)raw + pointerSize * 2;
379     void* aligned = nextAlignedPointer(start, alignment);
380     void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
381     *rawLocation = raw;
382     size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
383     *sizeLocation = size;
384     return aligned;
385 }
386 
387 // Returns: x, multiple of powerOfTwo, so that x >= n.
388 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow
389 {
390     // check power-of-two
391     assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0));
392 
393     size_t mask = ~(powerOfTwo - 1);
394     return (n + powerOfTwo - 1) & mask;
395 }
396 
397 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc.
398 /// Functionally equivalent to Visual C++ _aligned_realloc.
399 /// Do not mix allocations with different alignment.
400 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow
401 {
402     assert(isPointerAligned(aligned, alignment));
403 
404     // If you fail here, it can mean you've used an uninitialized AlignedBuffer.
405     assert(alignment != 0);
406 
407     // Short-cut and use the C allocator to avoid overhead if no alignment
408     if (alignment == 1)
409     {
410         void* res = realloc(aligned, size);
411 
412         // C99: 
413         // Implementation-defined behavior
414         // Whether the calloc, malloc, and realloc functions return a null pointer 
415         // or a pointer to an allocated object when the size requested is zero.
416         if (size == 0)
417             return null;
418 
419         return res;
420     }
421 
422     if (aligned is null)
423         return alignedMalloc(size, alignment);
424 
425     if (size == 0)
426     {
427         alignedFree(aligned, alignment);
428         return null;
429     }
430 
431     size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
432 
433 
434     void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
435     size_t request = requestedSize(size, alignment);
436 
437     // Heuristic: if a requested size is within 50% to 100% of what is already allocated
438     //            then exit with the same pointer
439     if ( (previousSize < request * 4) && (request <= previousSize) )
440         return aligned;
441 
442     void* newRaw = malloc(request);
443     static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
444     {
445         if (request > 0 && newRaw == null) // realloc(0) can validly return anything
446             onOutOfMemoryError();
447     }
448 
449     void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment);
450     size_t minSize = size < previousSize ? size : previousSize;
451     memcpy(newAligned, aligned, minSize);
452 
453     // Free previous data
454     alignedFree(aligned, alignment);
455     isPointerAligned(newAligned, alignment);
456     return newAligned;
457 }