1 // Copyright Jernej Krempuš 2012 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 module pfft.pfft; 7 import core.stdc.stdlib; 8 import core.stdc..string: memcpy; 9 import core.exception, 10 core.bitop, 11 std.array; 12 13 template Import(TT) 14 { 15 static if(is(TT == float)) 16 import impl = pfft.impl_float; 17 else static if(is(TT == double)) 18 import impl = pfft.impl_double; 19 else static if(is(TT == real)) 20 import impl = pfft.impl_real; 21 else 22 static assert(0, "Not implemented"); 23 } 24 25 template st(alias a){ enum st = cast(size_t) a; } 26 27 /** 28 A class for calculating discrete fourier transform. The methods of this class 29 use split format for complex data. This means that a complex data set is 30 represented as two arrays - one for the real part and one for the imaginary 31 part. An instance of this class can only be used for transforms of one 32 particular size. The template parameter is the floating point type that the 33 methods of the class will operate on. 34 35 Example: 36 --- 37 import std.stdio, std.conv, std.exception; 38 import pfft.pfft; 39 40 void main(string[] args) 41 { 42 auto n = to!int(args[1]); 43 enforce((n & (n-1)) == 0, "N must be a power of two."); 44 45 alias Fft!float F; 46 47 F f; 48 f.initialize(n); 49 50 auto re = F.allocate(n); 51 auto im = F.allocate(n); 52 53 foreach(i, _; re) 54 readf("%s %s\n", &re[i], &im[i]); 55 56 f.fft(re, im); 57 58 foreach(i, _; re) 59 writefln("%s\t%s", re[i], im[i]); 60 } 61 --- 62 */ 63 struct Fft(T) 64 { 65 public: 66 nothrow: 67 @nogc: 68 mixin Import!T; 69 70 int log2n; 71 impl.Table table; 72 73 /** 74 The Fft constructor. The parameter is the size of data sets that $(D fft) and 75 $(D ifft) will operate on. I will refer to this number as n in the rest of the 76 documentation for this class.Tables used in fft and ifft are calculated in the 77 constructor. 78 */ 79 void initialize(size_t n) 80 { 81 assert((n & (n - 1)) == 0); 82 log2n = bsf(n); 83 auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64); 84 table = impl.fft_table(log2n, mem); 85 assert(mem == table); 86 } 87 88 ~this() 89 { 90 alignedFree(table, 64); 91 } 92 93 /** 94 Calculates discrete fourier transform. $(D_PARAM re) should contain the real 95 part of the data and $(D_PARAM im) the imaginary part of the data. The method 96 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im). 97 Both arrays must be properly aligned - to obtain a properly aligned array you can 98 use $(D allocate). 99 */ 100 void fft(T[] re, T[] im) 101 { 102 assert(re.length == im.length); 103 assert(re.length == (st!1 << log2n)); 104 assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0); 105 assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0); 106 107 impl.fft(re.ptr, im.ptr, log2n, table); 108 } 109 110 /** 111 Calculates inverse discrete fourier transform scaled by n. The arguments have 112 the same role as they do in $(D fft). 113 */ 114 void ifft(T[] re, T[] im) 115 { 116 fft(im, re); 117 } 118 119 /** 120 Returns requited alignment for use with $(D fft), $(D ifft) and 121 $(D scale) methods. 122 */ 123 static size_t alignment(size_t n) 124 { 125 return impl.alignment(n); 126 } 127 128 /** 129 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and 130 $(D scale) methods. 131 */ 132 static T[] allocate(size_t n) 133 { 134 size_t bytes = n * T.sizeof; 135 T* r = cast(T*) alignedMalloc(bytes, alignment(bytes)); 136 assert(((impl.alignment(bytes) - 1) & cast(size_t) r) == 0); 137 return r[0..n]; 138 } 139 140 /** 141 Deallocates an array allocated with `allocate`. 142 */ 143 static void deallocate(T[] arr) 144 { 145 size_t n = arr.length; 146 alignedFree(arr.ptr, alignment(n)); 147 } 148 149 150 /** 151 Scales an array data by factor k. The array must be properly aligned. To obtain 152 a properly aligned array, use $(D allocate). 153 */ 154 static void scale(T[] data, T k) 155 { 156 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 157 impl.scale(data.ptr, data.length, k); 158 } 159 160 } 161 162 /** 163 A class for calculating real discrete fourier transform. The methods of this 164 class use split format for complex data. This means that complex data set is 165 represented as two arrays - one for the real part and one for the imaginary 166 part. An instance of this class can only be used for transforms of one 167 particular size. The template parameter is the floating point type that the 168 methods of the class will operate on. 169 170 Example: 171 --- 172 import std.stdio, std.conv, std.exception; 173 import pfft.pfft; 174 175 void main(string[] args) 176 { 177 auto n = to!int(args[1]); 178 enforce((n & (n-1)) == 0, "N must be a power of two."); 179 180 alias Rfft!float F; 181 182 F f; 183 f.initialize(n); 184 185 auto data = F.allocate(n); 186 187 foreach(ref e; data) 188 readf("%s\n", &e); 189 190 f.rfft(data); 191 192 foreach(i; 0 .. n / 2 + 1) 193 writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]); 194 } 195 --- 196 */ 197 struct Rfft(T) 198 { 199 public: 200 nothrow: 201 @nogc: 202 mixin Import!T; 203 204 int log2n; 205 Fft!T _complex; 206 impl.RTable rtable; 207 impl.ITable itable; 208 209 /** 210 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will 211 operate on. I will refer to this number as n in the rest of the documentation 212 for this class. All tables used in rfft are calculated in the constructor. 213 */ 214 void initialize(size_t n) 215 { 216 assert((n & (n - 1)) == 0); 217 log2n = bsf(n); 218 219 _complex.initialize(n / 2); 220 221 auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64); 222 rtable = impl.rfft_table(log2n, mem); 223 assert(mem == rtable); 224 225 mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64); 226 itable = impl.interleave_table(log2n, mem); 227 assert(mem == itable); 228 } 229 230 ~this() 231 { 232 alignedFree(rtable, 64); 233 alignedFree(itable, 64); 234 } 235 236 /** 237 Calculates discrete fourier transform of the real valued sequence in data. 238 The method operates in place. When the method completes, data contains the 239 result. First $(I n / 2 + 1) elements contain the real part of the result and 240 the rest contains the imaginary part. Imaginary parts at position 0 and 241 $(I n / 2) are known to be equal to 0 and are not stored, so the content of 242 data looks like this: 243 244 $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1)) 245 246 247 The elements of the result at position greater than n / 2 can be trivially 248 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds 249 because the input sequence is real. 250 251 252 The length of the array must be equal to n and the array must be properly 253 aligned. To obtain a properly aligned array you can use $(D allocate). 254 */ 255 void rfft(T[] data) 256 { 257 assert(data.length == (st!1 << log2n)); 258 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 259 260 impl.deinterleave(data.ptr, log2n, itable); 261 impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 262 } 263 264 /** 265 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale) 266 to normalize the result). Before the method is called, data should contain a 267 complex sequence in the same format as the result of $(D rfft). It is 268 assumed that the input sequence is a discrete fourier transform of a real 269 valued sequence, so the elements of the input sequence not stored in data 270 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method 271 completes, the array contains the real part of the inverse discrete fourier 272 transform. The imaginary part is known to be equal to 0. 273 274 The length of the array must be equal to n and the array must be properly 275 aligned. To obtain a properly aligned array you can use $(D allocate). 276 */ 277 void irfft(T[] data) 278 { 279 assert(data.length == (st!1 << log2n)); 280 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 281 282 impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 283 impl.interleave(data.ptr, log2n, itable); 284 } 285 286 /// An alias for Fft!T.allocate 287 alias Fft!(T).allocate allocate; 288 289 /// An alias for Fft!T.deallocate 290 alias Fft!(T).deallocate deallocate; 291 292 /// An alias for Fft!T.scale 293 alias Fft!(T).scale scale; 294 295 /// An alias for Fft!T.alignment 296 alias Fft!(T).alignment alignment; 297 298 @property complex(){ return _complex; } 299 } 300 301 302 private: 303 304 /// Returns: `true` if the pointer is suitably aligned. 305 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 306 { 307 assert(alignment != 0); 308 return ( cast(size_t)p & (alignment - 1) ) == 0; 309 } 310 311 /// Allocates an aligned memory chunk. 312 /// Functionally equivalent to Visual C++ _aligned_malloc. 313 /// Do not mix allocations with different alignment. 314 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 315 { 316 assert(alignment != 0); 317 318 // Short-cut and use the C allocator to avoid overhead if no alignment 319 if (alignment == 1) 320 { 321 // C99: 322 // Implementation-defined behavior 323 // Whether the calloc, malloc, and realloc functions return a null pointer 324 // or a pointer to an allocated object when the size requested is zero. 325 void* res = malloc(size); 326 if (size == 0) 327 return null; 328 } 329 330 if (size == 0) 331 return null; 332 333 size_t request = requestedSize(size, alignment); 334 void* raw = malloc(request); 335 336 if (request > 0 && raw == null) // malloc(0) can validly return anything 337 onOutOfMemoryError(); 338 339 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 340 } 341 342 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 343 /// Functionally equivalent to Visual C++ _aligned_free. 344 /// Do not mix allocations with different alignment. 345 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 346 { 347 assert(alignment != 0); 348 349 // Short-cut and use the C allocator to avoid overhead if no alignment 350 if (alignment == 1) 351 return free(aligned); 352 353 // support for free(NULL) 354 if (aligned is null) 355 return; 356 357 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 358 free(*rawLocation); 359 } 360 361 /// Returns: next pointer aligned with alignment bytes. 362 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc 363 { 364 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 365 } 366 367 // Returns number of bytes to actually allocate when asking 368 // for a particular alignement 369 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow 370 { 371 enum size_t pointerSize = size_t.sizeof; 372 return askedSize + alignment - 1 + pointerSize * 2; 373 } 374 375 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 376 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow 377 { 378 enum size_t pointerSize = size_t.sizeof; 379 char* start = cast(char*)raw + pointerSize * 2; 380 void* aligned = nextAlignedPointer(start, alignment); 381 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 382 *rawLocation = raw; 383 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 384 *sizeLocation = size; 385 return aligned; 386 } 387 388 // Returns: x, multiple of powerOfTwo, so that x >= n. 389 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow 390 { 391 // check power-of-two 392 assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0)); 393 394 size_t mask = ~(powerOfTwo - 1); 395 return (n + powerOfTwo - 1) & mask; 396 } 397 398 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc. 399 /// Functionally equivalent to Visual C++ _aligned_realloc. 400 /// Do not mix allocations with different alignment. 401 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow 402 { 403 assert(isPointerAligned(aligned, alignment)); 404 405 // If you fail here, it can mean you've used an uninitialized AlignedBuffer. 406 assert(alignment != 0); 407 408 // Short-cut and use the C allocator to avoid overhead if no alignment 409 if (alignment == 1) 410 { 411 void* res = realloc(aligned, size); 412 413 // C99: 414 // Implementation-defined behavior 415 // Whether the calloc, malloc, and realloc functions return a null pointer 416 // or a pointer to an allocated object when the size requested is zero. 417 if (size == 0) 418 return null; 419 420 return res; 421 } 422 423 if (aligned is null) 424 return alignedMalloc(size, alignment); 425 426 if (size == 0) 427 { 428 alignedFree(aligned, alignment); 429 return null; 430 } 431 432 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 433 434 435 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 436 size_t request = requestedSize(size, alignment); 437 438 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 439 // then exit with the same pointer 440 if ( (previousSize < request * 4) && (request <= previousSize) ) 441 return aligned; 442 443 void* newRaw = malloc(request); 444 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 445 { 446 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 447 onOutOfMemoryError(); 448 } 449 450 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment); 451 size_t minSize = size < previousSize ? size : previousSize; 452 memcpy(newAligned, aligned, minSize); 453 454 // Free previous data 455 alignedFree(aligned, alignment); 456 isPointerAligned(newAligned, alignment); 457 return newAligned; 458 }