1 // Copyright Jernej Krempuš 2012 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 module pfft.pfft; 7 import core.stdc.stdlib; 8 import core.stdc..string: memcpy; 9 import core.exception, 10 core.bitop, 11 std.array; 12 13 template Import(TT) 14 { 15 static if(is(TT == float)) 16 import impl = pfft.impl_float; 17 else static if(is(TT == double)) 18 import impl = pfft.impl_double; 19 else static if(is(TT == real)) 20 import impl = pfft.impl_real; 21 else 22 static assert(0, "Not implemented"); 23 } 24 25 template st(alias a){ enum st = cast(size_t) a; } 26 27 /** 28 A class for calculating discrete fourier transform. The methods of this class 29 use split format for complex data. This means that a complex data set is 30 represented as two arrays - one for the real part and one for the imaginary 31 part. An instance of this class can only be used for transforms of one 32 particular size. The template parameter is the floating point type that the 33 methods of the class will operate on. 34 35 Example: 36 --- 37 import std.stdio, std.conv, std.exception; 38 import pfft.pfft; 39 40 void main(string[] args) 41 { 42 auto n = to!int(args[1]); 43 enforce((n & (n-1)) == 0, "N must be a power of two."); 44 45 alias Fft!float F; 46 47 F f; 48 f.initialize(n); 49 50 auto re = F.allocate(n); 51 auto im = F.allocate(n); 52 53 foreach(i, _; re) 54 readf("%s %s\n", &re[i], &im[i]); 55 56 f.fft(re, im); 57 58 foreach(i, _; re) 59 writefln("%s\t%s", re[i], im[i]); 60 } 61 --- 62 */ 63 struct Fft(T) 64 { 65 public: 66 nothrow: 67 @nogc: 68 mixin Import!T; 69 70 int log2n; 71 impl.Table table; 72 73 /** 74 The Fft constructor. The parameter is the size of data sets that $(D fft) and 75 $(D ifft) will operate on. I will refer to this number as n in the rest of the 76 documentation for this class.Tables used in fft and ifft are calculated in the 77 constructor. 78 */ 79 void initialize(size_t n) 80 { 81 assert((n & (n - 1)) == 0); 82 log2n = bsf(n); 83 auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64); 84 table = impl.fft_table(log2n, mem); 85 assert(mem == table); 86 } 87 88 ~this() 89 { 90 alignedFree(table, 64); 91 } 92 93 /** 94 Calculates discrete fourier transform. $(D_PARAM re) should contain the real 95 part of the data and $(D_PARAM im) the imaginary part of the data. The method 96 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im). 97 Both arrays must be properly aligned - to obtain a properly aligned array you can 98 use $(D allocate). 99 */ 100 void fft(T[] re, T[] im) 101 { 102 assert(re.length == im.length); 103 assert(re.length == (st!1 << log2n)); 104 assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0); 105 assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0); 106 107 impl.fft(re.ptr, im.ptr, log2n, table); 108 } 109 110 /** 111 Calculates inverse discrete fourier transform scaled by n. The arguments have 112 the same role as they do in $(D fft). 113 */ 114 void ifft(T[] re, T[] im) 115 { 116 fft(im, re); 117 } 118 119 /** 120 Returns requited alignment for use with $(D fft), $(D ifft) and 121 $(D scale) methods. 122 */ 123 static size_t alignment(size_t n) 124 { 125 return impl.alignment(n); 126 } 127 128 /** 129 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and 130 $(D scale) methods. 131 */ 132 static T[] allocate(size_t n) 133 { 134 T* r = cast(T*) alignedMalloc(n, alignment(n)); 135 assert(((impl.alignment(n) - 1) & cast(size_t) r) == 0); 136 return r[0..n]; 137 } 138 139 /** 140 Deallocates an array allocated with `allocate`. 141 */ 142 static void deallocate(T[] arr) 143 { 144 size_t n = arr.length; 145 alignedFree(arr.ptr, alignment(n)); 146 } 147 148 149 /** 150 Scales an array data by factor k. The array must be properly aligned. To obtain 151 a properly aligned array, use $(D allocate). 152 */ 153 static void scale(T[] data, T k) 154 { 155 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 156 impl.scale(data.ptr, data.length, k); 157 } 158 159 } 160 161 /** 162 A class for calculating real discrete fourier transform. The methods of this 163 class use split format for complex data. This means that complex data set is 164 represented as two arrays - one for the real part and one for the imaginary 165 part. An instance of this class can only be used for transforms of one 166 particular size. The template parameter is the floating point type that the 167 methods of the class will operate on. 168 169 Example: 170 --- 171 import std.stdio, std.conv, std.exception; 172 import pfft.pfft; 173 174 void main(string[] args) 175 { 176 auto n = to!int(args[1]); 177 enforce((n & (n-1)) == 0, "N must be a power of two."); 178 179 alias Rfft!float F; 180 181 F f; 182 f.initialize(n); 183 184 auto data = F.allocate(n); 185 186 foreach(ref e; data) 187 readf("%s\n", &e); 188 189 f.rfft(data); 190 191 foreach(i; 0 .. n / 2 + 1) 192 writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]); 193 } 194 --- 195 */ 196 struct Rfft(T) 197 { 198 public: 199 nothrow: 200 @nogc: 201 mixin Import!T; 202 203 int log2n; 204 Fft!T _complex; 205 impl.RTable rtable; 206 impl.ITable itable; 207 208 /** 209 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will 210 operate on. I will refer to this number as n in the rest of the documentation 211 for this class. All tables used in rfft are calculated in the constructor. 212 */ 213 void initialize(size_t n) 214 { 215 assert((n & (n - 1)) == 0); 216 log2n = bsf(n); 217 218 _complex.initialize(n / 2); 219 220 auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64); 221 rtable = impl.rfft_table(log2n, mem); 222 assert(mem == rtable); 223 224 mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64); 225 itable = impl.interleave_table(log2n, mem); 226 assert(mem == itable); 227 } 228 229 ~this() 230 { 231 alignedFree(rtable, 64); 232 alignedFree(itable, 64); 233 } 234 235 /** 236 Calculates discrete fourier transform of the real valued sequence in data. 237 The method operates in place. When the method completes, data contains the 238 result. First $(I n / 2 + 1) elements contain the real part of the result and 239 the rest contains the imaginary part. Imaginary parts at position 0 and 240 $(I n / 2) are known to be equal to 0 and are not stored, so the content of 241 data looks like this: 242 243 $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1)) 244 245 246 The elements of the result at position greater than n / 2 can be trivially 247 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds 248 because the input sequence is real. 249 250 251 The length of the array must be equal to n and the array must be properly 252 aligned. To obtain a properly aligned array you can use $(D allocate). 253 */ 254 void rfft(T[] data) 255 { 256 assert(data.length == (st!1 << log2n)); 257 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 258 259 impl.deinterleave(data.ptr, log2n, itable); 260 impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 261 } 262 263 /** 264 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale) 265 to normalize the result). Before the method is called, data should contain a 266 complex sequence in the same format as the result of $(D rfft). It is 267 assumed that the input sequence is a discrete fourier transform of a real 268 valued sequence, so the elements of the input sequence not stored in data 269 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method 270 completes, the array contains the real part of the inverse discrete fourier 271 transform. The imaginary part is known to be equal to 0. 272 273 The length of the array must be equal to n and the array must be properly 274 aligned. To obtain a properly aligned array you can use $(D allocate). 275 */ 276 void irfft(T[] data) 277 { 278 assert(data.length == (st!1 << log2n)); 279 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 280 281 impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 282 impl.interleave(data.ptr, log2n, itable); 283 } 284 285 /// An alias for Fft!T.allocate 286 alias Fft!(T).allocate allocate; 287 288 /// An alias for Fft!T.deallocate 289 alias Fft!(T).deallocate deallocate; 290 291 /// An alias for Fft!T.scale 292 alias Fft!(T).scale scale; 293 294 /// An alias for Fft!T.alignment 295 alias Fft!(T).alignment alignment; 296 297 @property complex(){ return _complex; } 298 } 299 300 301 private: 302 303 /// Returns: `true` if the pointer is suitably aligned. 304 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 305 { 306 assert(alignment != 0); 307 return ( cast(size_t)p & (alignment - 1) ) == 0; 308 } 309 310 /// Allocates an aligned memory chunk. 311 /// Functionally equivalent to Visual C++ _aligned_malloc. 312 /// Do not mix allocations with different alignment. 313 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 314 { 315 assert(alignment != 0); 316 317 // Short-cut and use the C allocator to avoid overhead if no alignment 318 if (alignment == 1) 319 { 320 // C99: 321 // Implementation-defined behavior 322 // Whether the calloc, malloc, and realloc functions return a null pointer 323 // or a pointer to an allocated object when the size requested is zero. 324 void* res = malloc(size); 325 if (size == 0) 326 return null; 327 } 328 329 if (size == 0) 330 return null; 331 332 size_t request = requestedSize(size, alignment); 333 void* raw = malloc(request); 334 335 if (request > 0 && raw == null) // malloc(0) can validly return anything 336 onOutOfMemoryError(); 337 338 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 339 } 340 341 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 342 /// Functionally equivalent to Visual C++ _aligned_free. 343 /// Do not mix allocations with different alignment. 344 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 345 { 346 assert(alignment != 0); 347 348 // Short-cut and use the C allocator to avoid overhead if no alignment 349 if (alignment == 1) 350 return free(aligned); 351 352 // support for free(NULL) 353 if (aligned is null) 354 return; 355 356 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 357 free(*rawLocation); 358 } 359 360 /// Returns: next pointer aligned with alignment bytes. 361 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc 362 { 363 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 364 } 365 366 // Returns number of bytes to actually allocate when asking 367 // for a particular alignement 368 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow 369 { 370 enum size_t pointerSize = size_t.sizeof; 371 return askedSize + alignment - 1 + pointerSize * 2; 372 } 373 374 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 375 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow 376 { 377 enum size_t pointerSize = size_t.sizeof; 378 char* start = cast(char*)raw + pointerSize * 2; 379 void* aligned = nextAlignedPointer(start, alignment); 380 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 381 *rawLocation = raw; 382 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 383 *sizeLocation = size; 384 return aligned; 385 } 386 387 // Returns: x, multiple of powerOfTwo, so that x >= n. 388 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow 389 { 390 // check power-of-two 391 assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0)); 392 393 size_t mask = ~(powerOfTwo - 1); 394 return (n + powerOfTwo - 1) & mask; 395 } 396 397 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc. 398 /// Functionally equivalent to Visual C++ _aligned_realloc. 399 /// Do not mix allocations with different alignment. 400 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow 401 { 402 assert(isPointerAligned(aligned, alignment)); 403 404 // If you fail here, it can mean you've used an uninitialized AlignedBuffer. 405 assert(alignment != 0); 406 407 // Short-cut and use the C allocator to avoid overhead if no alignment 408 if (alignment == 1) 409 { 410 void* res = realloc(aligned, size); 411 412 // C99: 413 // Implementation-defined behavior 414 // Whether the calloc, malloc, and realloc functions return a null pointer 415 // or a pointer to an allocated object when the size requested is zero. 416 if (size == 0) 417 return null; 418 419 return res; 420 } 421 422 if (aligned is null) 423 return alignedMalloc(size, alignment); 424 425 if (size == 0) 426 { 427 alignedFree(aligned, alignment); 428 return null; 429 } 430 431 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 432 433 434 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 435 size_t request = requestedSize(size, alignment); 436 437 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 438 // then exit with the same pointer 439 if ( (previousSize < request * 4) && (request <= previousSize) ) 440 return aligned; 441 442 void* newRaw = malloc(request); 443 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 444 { 445 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 446 onOutOfMemoryError(); 447 } 448 449 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment); 450 size_t minSize = size < previousSize ? size : previousSize; 451 memcpy(newAligned, aligned, minSize); 452 453 // Free previous data 454 alignedFree(aligned, alignment); 455 isPointerAligned(newAligned, alignment); 456 return newAligned; 457 }