1 //          Copyright Jernej Krempuš 2012
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module pfft.pfft;
7 import core.stdc.stdlib;
8 import core.stdc..string: memcpy;
9 import core.exception,
10        core.bitop,
11        std.array;
12 
13 template Import(TT)
14 {
15     static if(is(TT == float))
16         import impl = pfft.impl_float;
17     else static if(is(TT == double))
18         import impl = pfft.impl_double;
19     else static if(is(TT == real))
20         import impl = pfft.impl_real;
21     else
22         static assert(0, "Not implemented");
23 }
24 
25 template st(alias a){ enum st = cast(size_t) a; }
26 
27 /**
28 A class for calculating discrete fourier transform. The methods of this class
29 use split format for complex data. This means that a complex data set is
30 represented as two arrays - one for the real part and one for the imaginary
31 part. An instance of this class can only be used for transforms of one
32 particular size. The template parameter is the floating point type that the
33 methods of the class will operate on.
34 
35 Example:
36 ---
37 import std.stdio, std.conv, std.exception;
38 import pfft.pfft;
39 
40 void main(string[] args)
41 {
42     auto n = to!int(args[1]);
43     enforce((n & (n-1)) == 0, "N must be a power of two.");
44 
45     alias Fft!float F;
46 
47     F f;
48     f.initialize(n);
49 
50     auto re = F.allocate(n);
51     auto im = F.allocate(n);
52 
53     foreach(i, _; re)
54         readf("%s %s\n", &re[i], &im[i]);
55 
56     f.fft(re, im);
57 
58     foreach(i, _; re)
59         writefln("%s\t%s", re[i], im[i]);
60 }
61 ---
62  */
63 struct Fft(T)
64 {
65 public:
66 nothrow:
67 @nogc:
68     mixin Import!T;
69 
70     int log2n;
71     impl.Table table;
72 
73 /**
74 The Fft constructor. The parameter is the size of data sets that $(D fft) and
75 $(D ifft) will operate on. I will refer to this number as n in the rest of the
76 documentation for this class.Tables used in fft and ifft are calculated in the
77 constructor.
78  */
79     void initialize(size_t n)
80     {
81         assert((n & (n - 1)) == 0);
82         log2n  = bsf(n);
83         auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64);
84         table = impl.fft_table(log2n, mem);
85         assert(mem == table);
86     }
87 
88     ~this()
89     {
90         alignedFree(table, 64);
91     }
92 
93 /**
94 Calculates discrete fourier transform. $(D_PARAM re) should contain the real
95 part of the data and $(D_PARAM im) the imaginary part of the data. The method
96 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im).
97 Both arrays must be properly aligned - to obtain a properly aligned array you can
98 use $(D allocate).
99  */
100     void fft(T[] re, T[] im)
101     {
102         assert(re.length == im.length);
103         assert(re.length == (st!1 << log2n));
104         assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0);
105         assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0);
106 
107         impl.fft(re.ptr, im.ptr, log2n, table);
108     }
109 
110 /**
111 Calculates inverse discrete fourier transform scaled by n. The arguments have
112 the same role as they do in $(D fft).
113  */
114     void ifft(T[] re, T[] im)
115     {
116         fft(im, re);
117     }
118 
119 /**
120     Returns requited alignment for use with $(D fft), $(D ifft) and
121     $(D scale) methods.
122  */
123     static size_t alignment(size_t n)
124     {
125         return impl.alignment(n);
126     }
127 
128 /**
129 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and
130 $(D scale) methods.
131  */
132     static T[] allocate(size_t n)
133     {
134         size_t bytes = n * T.sizeof;
135         T* r = cast(T*) alignedMalloc(bytes, alignment(bytes));
136         assert(((impl.alignment(bytes) - 1) & cast(size_t) r) == 0);
137         return r[0..n];
138     }
139 
140 /**
141 Deallocates an array allocated with `allocate`.
142 */
143     static void deallocate(T[] arr)
144     {
145         size_t n = arr.length;
146         alignedFree(arr.ptr, alignment(n));
147     }
148 
149 
150 /**
151 Scales an array data by factor k. The array must be properly aligned. To obtain
152 a properly aligned array, use $(D allocate).
153  */
154     static void scale(T[] data, T k)
155     {
156         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
157         impl.scale(data.ptr, data.length, k);
158     }
159 
160 }
161 
162 /**
163 A class for calculating real discrete fourier transform. The methods of this
164 class use split format for complex data. This means that complex data set is
165 represented as two arrays - one for the real part and one for the imaginary
166 part. An instance of this class can only be used for transforms of one
167 particular size. The template parameter is the floating point type that the
168 methods of the class will operate on.
169 
170 Example:
171 ---
172 import std.stdio, std.conv, std.exception;
173 import pfft.pfft;
174 
175 void main(string[] args)
176 {
177     auto n = to!int(args[1]);
178     enforce((n & (n-1)) == 0, "N must be a power of two.");
179 
180     alias Rfft!float F;
181 
182     F f;
183     f.initialize(n);
184 
185     auto data = F.allocate(n);
186 
187     foreach(ref e; data)
188         readf("%s\n", &e);
189 
190     f.rfft(data);
191 
192     foreach(i; 0 .. n / 2 + 1)
193         writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]);
194 }
195 ---
196  */
197 struct Rfft(T)
198 {
199 public:
200 nothrow:
201 @nogc:
202     mixin Import!T;
203 
204     int log2n;
205     Fft!T _complex;
206     impl.RTable rtable;
207     impl.ITable itable;
208 
209 /**
210 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will
211 operate on. I will refer to this number as n in the rest of the documentation
212 for this class. All tables used in rfft are calculated in the constructor.
213  */
214     void initialize(size_t n)
215     {
216         assert((n & (n - 1)) == 0);
217         log2n  = bsf(n);
218 
219         _complex.initialize(n / 2);
220 
221         auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64);
222         rtable = impl.rfft_table(log2n, mem);
223         assert(mem == rtable);
224 
225         mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64);
226         itable = impl.interleave_table(log2n, mem);
227         assert(mem == itable);
228     }
229 
230     ~this()
231     {
232         alignedFree(rtable, 64);
233         alignedFree(itable, 64);
234     }
235 
236 /**
237 Calculates discrete fourier transform of the real valued sequence in data.
238 The method operates in place. When the method completes, data contains the
239 result. First $(I n / 2 + 1) elements contain the real part of the result and
240 the rest contains the imaginary part. Imaginary parts at position 0 and
241 $(I n / 2) are known to be equal to 0 and are not stored, so the content of
242 data looks like this:
243 
244  $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1))
245 
246 
247 The elements of the result at position greater than n / 2 can be trivially
248 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds
249 because the input sequence is real.
250 
251 
252 The length of the array must be equal to n and the array must be properly
253 aligned. To obtain a properly aligned array you can use $(D allocate).
254  */
255     void rfft(T[] data)
256     {
257         assert(data.length == (st!1 << log2n));
258         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
259 
260         impl.deinterleave(data.ptr, log2n, itable);
261         impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
262     }
263 
264 /**
265 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale)
266 to normalize the result). Before the method is called, data should contain a
267 complex sequence in the same format as the result of $(D rfft). It is
268 assumed that the input sequence is a discrete fourier transform of a real
269 valued sequence, so the elements of the input sequence not stored in data
270 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method
271 completes, the array contains the real part of the inverse discrete fourier
272 transform. The imaginary part is known to be equal to 0.
273 
274 The length of the array must be equal to n and the array must be properly
275 aligned. To obtain a properly aligned array you can use $(D allocate).
276  */
277     void irfft(T[] data)
278     {
279         assert(data.length == (st!1 << log2n));
280         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
281 
282         impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
283         impl.interleave(data.ptr, log2n, itable);
284     }
285 
286 /// An alias for Fft!T.allocate
287     alias Fft!(T).allocate allocate;
288 
289 /// An alias for Fft!T.deallocate
290     alias Fft!(T).deallocate deallocate;
291 
292 /// An alias for Fft!T.scale
293     alias Fft!(T).scale scale;
294 
295     /// An alias for Fft!T.alignment
296     alias Fft!(T).alignment alignment;
297 
298     @property complex(){ return _complex; }
299 }
300 
301 
302 private:
303 
304 /// Returns: `true` if the pointer is suitably aligned.
305 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
306 {
307     assert(alignment != 0);
308     return ( cast(size_t)p & (alignment - 1) ) == 0;
309 }
310 
311 /// Allocates an aligned memory chunk.
312 /// Functionally equivalent to Visual C++ _aligned_malloc.
313 /// Do not mix allocations with different alignment.
314 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
315 {
316     assert(alignment != 0);
317 
318     // Short-cut and use the C allocator to avoid overhead if no alignment
319     if (alignment == 1)
320     {
321         // C99: 
322         // Implementation-defined behavior
323         // Whether the calloc, malloc, and realloc functions return a null pointer 
324         // or a pointer to an allocated object when the size requested is zero.
325         void* res = malloc(size);
326         if (size == 0)
327             return null;
328     }
329 
330     if (size == 0)
331         return null;
332 
333     size_t request = requestedSize(size, alignment);
334     void* raw = malloc(request);
335 
336     if (request > 0 && raw == null) // malloc(0) can validly return anything
337         onOutOfMemoryError();
338 
339     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
340 }
341 
342 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
343 /// Functionally equivalent to Visual C++ _aligned_free.
344 /// Do not mix allocations with different alignment.
345 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
346 {
347     assert(alignment != 0);
348 
349     // Short-cut and use the C allocator to avoid overhead if no alignment
350     if (alignment == 1)
351         return free(aligned);
352 
353     // support for free(NULL)
354     if (aligned is null)
355         return;
356 
357     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
358     free(*rawLocation);
359 }
360 
361 /// Returns: next pointer aligned with alignment bytes.
362 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc
363 {
364     return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
365 }
366 
367 // Returns number of bytes to actually allocate when asking
368 // for a particular alignement
369 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow
370 {
371     enum size_t pointerSize = size_t.sizeof;
372     return askedSize + alignment - 1 + pointerSize * 2;
373 }
374 
375 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
376 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow
377 {
378     enum size_t pointerSize = size_t.sizeof;
379     char* start = cast(char*)raw + pointerSize * 2;
380     void* aligned = nextAlignedPointer(start, alignment);
381     void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
382     *rawLocation = raw;
383     size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
384     *sizeLocation = size;
385     return aligned;
386 }
387 
388 // Returns: x, multiple of powerOfTwo, so that x >= n.
389 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow
390 {
391     // check power-of-two
392     assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0));
393 
394     size_t mask = ~(powerOfTwo - 1);
395     return (n + powerOfTwo - 1) & mask;
396 }
397 
398 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc.
399 /// Functionally equivalent to Visual C++ _aligned_realloc.
400 /// Do not mix allocations with different alignment.
401 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow
402 {
403     assert(isPointerAligned(aligned, alignment));
404 
405     // If you fail here, it can mean you've used an uninitialized AlignedBuffer.
406     assert(alignment != 0);
407 
408     // Short-cut and use the C allocator to avoid overhead if no alignment
409     if (alignment == 1)
410     {
411         void* res = realloc(aligned, size);
412 
413         // C99: 
414         // Implementation-defined behavior
415         // Whether the calloc, malloc, and realloc functions return a null pointer 
416         // or a pointer to an allocated object when the size requested is zero.
417         if (size == 0)
418             return null;
419 
420         return res;
421     }
422 
423     if (aligned is null)
424         return alignedMalloc(size, alignment);
425 
426     if (size == 0)
427     {
428         alignedFree(aligned, alignment);
429         return null;
430     }
431 
432     size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
433 
434 
435     void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
436     size_t request = requestedSize(size, alignment);
437 
438     // Heuristic: if a requested size is within 50% to 100% of what is already allocated
439     //            then exit with the same pointer
440     if ( (previousSize < request * 4) && (request <= previousSize) )
441         return aligned;
442 
443     void* newRaw = malloc(request);
444     static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
445     {
446         if (request > 0 && newRaw == null) // realloc(0) can validly return anything
447             onOutOfMemoryError();
448     }
449 
450     void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment);
451     size_t minSize = size < previousSize ? size : previousSize;
452     memcpy(newAligned, aligned, minSize);
453 
454     // Free previous data
455     alignedFree(aligned, alignment);
456     isPointerAligned(newAligned, alignment);
457     return newAligned;
458 }