1 // Copyright Jernej Krempuš 2012 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 module pfft.stdapi; 7 8 import core.memory, std.complex, std.traits, core.bitop, std.typetuple, std.range; 9 10 // Current GDC branch for android insists on using sinl and friends when 11 // std.math is imported, so we need to do this: 12 version(GNU) version(ARM) 13 { 14 auto generateFunctions(string fmt, string[] names) 15 { 16 auto r = ""; 17 18 foreach(name; names) 19 r ~= std.array.replace(fmt, "%s", name); 20 21 return r; 22 } 23 24 enum twoArgMathL = 25 q{ 26 extern(C) auto %sl(real a, real b) 27 { 28 return cast(real) 29 core.stdc.math.%s(cast(double) a, cast(double) b); 30 } 31 }; 32 33 enum oneArgMathL = 34 q{ 35 extern(C) auto %sl(real a) 36 { 37 return cast(real) 38 core.stdc.math.%s(cast(double) a); 39 } 40 }; 41 42 mixin(generateFunctions(oneArgMathL, 43 ["sin", "cos", "asin", "tan", "sqrt", "atan", "logb"])); 44 mixin(generateFunctions(twoArgMathL, ["remainder", "fmod", "atan2"])); 45 46 extern(C) auto modfl(real a, real* b) 47 { 48 double tmp = *b; 49 auto r = cast(real) core.stdc.math.modf(cast(double) a, &tmp); 50 *b = tmp; 51 return r; 52 } 53 54 extern(C) auto llrintl(real a) 55 { 56 return core.stdc.math.llrint(cast(double) a); 57 } 58 59 extern(C) auto remquol(real a, real b, int *i) 60 { 61 return cast(real) core.stdc.math.remquo(cast(double) a,cast(double) b, i); 62 } 63 } 64 65 template st(alias a){ enum st = cast(size_t) a; } 66 67 auto isComplexArray(R, T)() 68 { 69 static if( 70 ((isArray!R || isPointer!R) && 71 is(typeof(R.init[0].re) == T) && 72 is(typeof(R.init[0].im) == T) && 73 typeof(R.init[0]).sizeof == 2*T.sizeof)) 74 { 75 typeof(R.init[0]) e; 76 return 77 cast(typeof(e.re)*)&e == &(e.re) && 78 &(e.re) + 1 == &(e.im); 79 } 80 else 81 return false; 82 } 83 84 private final class TypedFft(TT) 85 { 86 static if(is(TT == float)) 87 import impl = pfft.impl_float; 88 else static if(is(TT == double)) 89 import impl = pfft.impl_double; 90 else static if(is(TT == real)) 91 import impl = pfft.impl_real; 92 else 93 static assert(0, "Not implemented"); 94 95 96 uint log2n; 97 impl.T* re; 98 impl.T* im; 99 alias Complex!(impl.T) C; 100 impl.Table table; 101 impl.RTable rtable; 102 103 104 this(size_t n) 105 { 106 log2n = bsf(n); 107 re = cast(impl.T*)GC.malloc(impl.T.sizeof << log2n); 108 im = cast(impl.T*)GC.malloc(impl.T.sizeof << log2n); 109 110 auto mem = GC.malloc( impl.table_size_bytes(log2n)); 111 table = impl.fft_table(log2n, mem); 112 113 mem = GC.malloc( impl.rtable_size_bytes(log2n + 1)); 114 rtable = impl.rfft_table(log2n + 1, mem); 115 } 116 117 private bool isAligned(impl.T* p) 118 { 119 return ((cast(size_t)p) & (impl.alignment(log2n) - 1)) == 0; 120 } 121 122 private bool fastInterleave(R)(R range) 123 { 124 return isComplexArray!(R, TT)() && isAligned(&(range[0].re)); 125 } 126 127 private void deinterleave_array(R)(R range) 128 { 129 static if(is(typeof(range[0].re) == impl.T)) 130 if(fastInterleave(range)) 131 return impl.deinterleave_array( 132 re, im, &(range[0].re), st!1 << log2n); 133 134 foreach(i, e; range) 135 { 136 re[i] = e.re; 137 im[i] = e.im; 138 } 139 } 140 141 private void interleave_array(R)(R range) 142 { 143 static if(is(typeof(range[0].re) == impl.T)) 144 if(fastInterleave(range)) 145 return impl.interleave_array( 146 re, im, &(range[0].re), st!1 << log2n); 147 148 foreach(i, ref e; range) 149 { 150 e.re = re[i]; 151 e.im = im[i]; 152 } 153 } 154 155 void fft(bool inverse, Ret, R)(R range, Ret buf) 156 { 157 deinterleave_array(range); 158 static if(inverse) 159 { 160 auto n = st!1 << log2n; 161 impl.fft(im, re, log2n, table); 162 163 impl.scale(re, n, (cast(TT) 1) / n); 164 impl.scale(im, n, (cast(TT) 1) / n); 165 } 166 else 167 impl.fft(re, im, log2n, table); 168 169 interleave_array(buf); 170 } 171 172 void rfft(Ret, R)(R range, Ret buf) 173 { 174 deinterleave_array(cast(Complex!(ElementType!R)[])range); 175 impl.rfft(re, im, log2n + 1, table, rtable); 176 interleave_array(buf); 177 178 auto n = st!1 << log2n; 179 buf[n] = Complex!TT(buf[0].im, 0); 180 buf[0].im = 0; 181 182 foreach(i; 1 .. n) 183 { 184 buf[2*n - i].re = buf[i].re; 185 buf[2*n - i].im = -buf[i].im; 186 } 187 } 188 189 C[] fft(bool inverse, R)(R range) 190 { 191 auto return_buf = cast(C*)GC.malloc(C.sizeof << log2n); 192 fft!inverse(range, return_buf); 193 return return_buf[0 .. (1 << log2n)]; 194 } 195 196 C[] rfft(R)(R range) 197 { 198 auto n = st!1 << log2n; 199 auto return_buf = cast(C*)GC.malloc(C.sizeof << (log2n + 1)); 200 rfft(range, return_buf[0 .. 2 * n]); 201 return return_buf[0 .. 2 * n]; 202 } 203 204 static C[] allocate(size_t n) 205 { 206 auto r = cast(C*) GC.malloc(n * C.sizeof); 207 assert(((impl.alignment(bsr(n)) - 1) & cast(size_t) r) == 0); 208 return r[0 .. n]; 209 } 210 } 211 212 213 /** 214 A class for calculating discrete fourier transforms using fast fourier 215 transform. This class mimics std.numeric.Fft, but works a bit differently 216 internally. The Fft in phobos does all the initialization when the 217 constructor is called. Because pfft uses different tables for each 218 combination of type and size, it can't do all the initialization up front. 219 Instead, it calculates a table for a combination of type $(D T) and size $(I n) 220 the first time the $(D fft) method is called with a template parameter $(D T) 221 and a parameter of size $(I n) and then stores this table for later use. 222 223 Example: 224 --- 225 import std.stdio, std.conv, std.exception, std.complex; 226 import pfft.stdapi; 227 228 void main(string[] args) 229 { 230 auto n = to!int(args[1]); 231 232 enforce((n & (n-1)) == 0, "N must be a power of two."); 233 234 auto f = new Fft(n); 235 auto data = Fft.allocate!(double)(n); 236 237 foreach(ref e; data) 238 readf("%s %s\n", &e.re, &e.im); 239 240 auto ft = f.fft!double(data); 241 242 foreach(e; ft) 243 writefln("%s %s", e.re, e.im); 244 } 245 --- 246 */ 247 final class Fft 248 { 249 import std.variant; 250 251 private enum nSizes = 8 * size_t.sizeof; 252 private void*[3 * nSizes] cache; 253 254 private auto impl(T)(size_t n) 255 { 256 static if(is(T == float)) 257 auto i = bsf(n); 258 else static if(is(T == double)) 259 auto i = nSizes + bsf(n); 260 else 261 auto i = 2 * nSizes + bsf(n); 262 263 if(cache[i] ) 264 return cast(TypedFft!T) cache[i]; 265 else 266 { 267 auto t = new TypedFft!T(n); 268 cache[i] = cast(void*)t; 269 return t; 270 } 271 } 272 273 auto numberOfElements(R)(R r) 274 { 275 static if(hasLength!R) 276 return r.length; 277 else static if(isForwardRange!R) 278 return walkLength(r.save()); 279 else 280 static assert(false, 281 "Can't get the length of the range without consumming it."); 282 } 283 284 /** 285 Fft constructor. nmax is there just for compatibility with std.numeric.Fft. 286 */ 287 this(size_t nmax = size_t.init) { } 288 289 private Complex!(T)[] fftTemplate(bool inverse, T, R)(R r) 290 { 291 auto n = numberOfElements(r); 292 assert((n & (n - 1)) == 0); 293 return impl!T(n).fft!inverse(r); 294 } 295 296 /** 297 Computes the discrete fourier transform of data in r and returns it. Data in 298 r isn't changed. $(D R) must be a forward range or a range with a length property. 299 It must have complex or floating point elements. Here a complex type is a type 300 with assignable floating point properties $(D re) and $(D im). The number of elements 301 in r must be a power of two. $(D T) must be a floating point type. The length 302 of the returned array is the same as the number of elements in r. 303 */ 304 Complex!(T)[] fft(T, R)(R r) if(!isNumeric!(ElementType!R)) 305 { 306 return fftTemplate!false(r); 307 } 308 309 Complex!(T)[] fft(T, R)(R r) if(isNumeric!(ElementType!R)) 310 { 311 auto n = numberOfElements(r); 312 assert((n & (n - 1)) == 0); 313 return impl!T(n / 2).rfft(r); 314 } 315 316 private void fftTemplate(bool inverse, R, Ret)(R r, Ret ret) 317 { 318 r = r.save(); 319 320 auto n = numberOfElements(r); 321 assert((n & (n - 1)) == 0); 322 impl!(typeof(ret[0].re))(n).fft!inverse(r, ret); 323 } 324 325 /** 326 Computes the discrete fourier transform of data in r and stores the result in 327 the user provided buffer ret. Data in r isn't changed. $(D R) must be a forward 328 range or a range with a length property. It must have complex or floating point 329 elements. Here a complex type is a type with assignable floating point properties 330 $(D re) and $(D im). Ret must be an input range with complex elements. r and ret must 331 have the same number of elements and that number must be a power of two. 332 */ 333 void fft(R, Ret)(R r, Ret ret) if(!isNumeric!(ElementType!R)) 334 { 335 fftTemplate!false(r, ret); 336 } 337 338 auto fft(R, Ret)(R r, Ret ret) if(isNumeric!(ElementType!R)) 339 { 340 r = r.save(); 341 auto n = numberOfElements(r); 342 assert((n & (n - 1)) == 0); 343 impl!(typeof(ret[0].re))(r.length / 2).rfft(r, ret); 344 } 345 346 347 /** 348 Computes the inverse discrete fourier transform of data in r and returns it. 349 Data in r isn't changed. $(D R) must be a forward range or a range with a 350 length property. It must have complex or floating point elements. Here a complex 351 type is a type with assignable floating point properties $(D re) and $(D im). The number 352 of elements in r must be a power of two. T must be a floating point 353 type. The length of the returned array is the same as the number of elements in 354 r. 355 */ 356 Complex!(T)[] inverseFft(T, R)(R r) 357 { 358 return fftTemplate!true(r); 359 } 360 361 /** 362 Computes the inverse discrete fourier transform of data in r and stores the 363 result in the user provided buffer ret. Data in r isn't changed. R must be a 364 forward range with complex or floating point elements.Here a complex type is 365 a type with assignable properties $(D re) and $(D im). Ret must be an input range with 366 complex elements. r and ret must have the same number of elements and that 367 number must be a power of two. 368 */ 369 void inverseFft(R, Ret)(R r, Ret ret) 370 { 371 fftTemplate!true(r, ret); 372 } 373 374 /** 375 Allocates an array of size n aligned appropriately for use as parameters to 376 $(D fft) methods. Both fft methods will still work correctly even if the 377 parameters are not propperly aligned (or even if they aren't arrays at all), 378 they will just be a bit slower. 379 */ 380 static T[] allocate(T)(size_t n) 381 { 382 assert((n & (n - 1)) == 0); 383 auto r = cast(T*) GC.malloc(n * T.sizeof); 384 return r[0 .. n]; 385 } 386 } 387