Periodogram.cxx
1 #include "FFTtools.h"
2 #include <fftw3.h>
3 #include "TMath.h"
4 #include "TH2.h"
5 
6 
7 #ifdef __APPLE__
8 #define SINCOS __sincos
9 #else
10 #define SINCOS sincos
11 #endif
12 
13 #ifdef ENABLE_VECTORIZE
14 #include "vectormath_trig.h"
15 #ifdef __AVX__
16 #define VEC Vec4d
17 #define VEC_T double
18 #define VEC_N 4
19 #define GATHER_REAL gather4d<0,2,4,6>
20 #define GATHER_IM gather4d<1,3,5,7>
21 static VEC INDEX (0,1,2,3);
22 #else
23 #define VEC Vec2d
24 #define VEC_T double
25 #define VEC_N 2
26 #define GATHER_REAL gather2d<0,2>
27 #define GATHER_IM gather2d<1,3>
28 static VEC INDEX (0,1);
29 #endif
30 
31 #endif
32 
33 /* Each point gets moved to 4 nearby evenly-spaced points */
34 static unsigned factorial_table[] =
35  { 0,
36  1,
37  1,
38  2,
39  6,
40  24,
41  120,
42  720,
43  5040,
44  40320,
45  362880,
46  3628800,
47  39916800,
48  479001600
49  };
50 
51 /* lagrange interpolation.. I think. based on NR code*/
52 void extirpolate(double y, int n, double *yys, double x, int extirpolation_factor)
53 {
54  // double * ys = (double*) __builtin_assume_aligned(yys, 32);
55  //
56 #if ( __clang__major__ >3 || (__clang_major__==3 && __clang_minor__ >=6) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) || (__GNUC__ > 4))
57  double * ys = (double*) __builtin_assume_aligned(yys, 32);
58 #else
59  double * ys = (double*) yys;
60 #endif
61 
62  int ix = int(x);
63 
64  extirpolation_factor = std::max(0,extirpolation_factor);
65  extirpolation_factor = std::min(sizeof(factorial_table)/sizeof(unsigned) - 1, size_t(extirpolation_factor));
66 
67  // if it exactly equals a value, just set it
68  if (__builtin_expect(x == ix,0)) ys[ix-1] += y;
69  else
70  {
71  int ilo = std::min( std::max( int(x - 0.5 * extirpolation_factor),0),int(n-extirpolation_factor)); //make sure we dont' exceed bounds
72  int ihi = ilo +extirpolation_factor ;
73 
74  double fac = x - ilo -1;
75 
76  //lagrange interpolation is ugly
77  for (int j = ilo+1; j < ihi; j++)
78  {
79  fac *= (x-j-1);
80  }
81 
82  int nden = factorial_table[extirpolation_factor];
83  ys[ihi-1] += y * fac / (nden * (x- ihi));
84 
85  for (int j = ihi - 1; j > ilo; j--)
86  {
87  nden = (nden / (j - ilo)) * (j -ihi);
88  ys[j-1] += y * fac / (nden * (x-j));
89  }
90  }
91 }
92 
93 
94 /* delegate so that can use optimization attributes */
95 
96 static TGraph * _lombScarglePeriodogram(int n, double dt, const double * __restrict x, const double * __restrict y, double oversample_factor = 4 ,
97  double high_factor = 2, TGraph * replaceme = 0, int extirpolation_factor = 4)
98 #ifdef __clang__
99  /* For OS X */
100  // As best I can tell this would be the correct syntax for the fast math optimization with llvm
101  // but it seems to not be supported :(
102  // so for now we will just disable the fast math optimization for periodogram
103  // [[gnu::optimize("fast-math")]];
104  ;
105 #else
106  __attribute((__optimize__("fast-math","tree-vectorize"))); /* enable associativity and other things that help autovectorize */
107 #endif
108 
109 TGraph * _lombScarglePeriodogram(int n, double dt, const double * __restrict x, const double * __restrict y , double oversample_factor, double high_factor, TGraph * out, int extirpolation_factor)
110 {
111 
112 
113  int nout = n * oversample_factor * high_factor/2;
114 
115 
116  if (out)
117  {
118  if (out->GetN() != nout) out->Set(nout);
119  }
120  else
121  {
122  out = new TGraph(nout);
123  }
124 
125  int nfreq = n *oversample_factor * high_factor * extirpolation_factor;
126  int nwork = 2 * nfreq;
127 
128 
129  /* compute mean */
130  double mean = 0;
131  for (int i = 0; i < n; i++)
132  {
133  mean += y[i];
134  }
135 
136 
137  mean /= n;
138 
139 
140 
141  //create and zero workspace
142  double wk1[nwork] __attribute__((aligned(32)));
143  double wk2[nwork] __attribute__((aligned(32)));
144  memset(wk1,0, sizeof(wk1));
145  memset(wk2,0, sizeof(wk2));
146 
147  dt = dt ? dt : (x[n-1] - x[0]) / n;
148  double range = n * dt;
149 
150 
151  double scale_factor = 2 * nfreq / oversample_factor / range;
152  for (int i = 0; i < n; i++)
153  {
154  double xx1 = fmod( (x[i] - x[0]) * scale_factor, 2 * nfreq);
155  double xx2 = fmod( 2 * (xx1++) , 2 * nfreq)+1;
156 
157  extirpolate(y[i] - mean, nwork,wk1, xx1, extirpolation_factor);
158  extirpolate(1., nwork,wk2, xx2, extirpolation_factor);
159  }
160 
161  FFTWComplex * fft1 = (FFTWComplex* ) fftw_malloc(sizeof(FFTWComplex) * (nwork/2+1));
162  FFTWComplex * fft2 = (FFTWComplex* ) fftw_malloc(sizeof(FFTWComplex) * (nwork/2+1));
163 
164  FFTtools::doFFT(nwork, wk1,fft1);
165  FFTtools::doFFT(nwork, wk2,fft2);
166 
167  double df = 1./(range * oversample_factor);
168  double norm_factor = 4. / n;
169 
170 #ifdef ENABLE_VECTORIZE
171  int leftover = nout % VEC_N;
172  int nit = nout / VEC_N + (leftover? 1: 0);
173 
174  for (int i = 0; i < nit; i++)
175  {
176  VEC fft2_re= GATHER_REAL((double*) &fft2[i*VEC_N+1]);
177  VEC fft2_im = GATHER_IM((double*) &fft2[i*VEC_N+1]);
178  VEC fft1_re= GATHER_REAL((double*) &fft1[i*VEC_N+1]);
179  VEC fft1_im = GATHER_IM((double*) &fft1[i*VEC_N+1]);
180 
181  VEC invhypo = 1./sqrt((fft2_re* fft2_re+ fft2_im * fft2_im));
182 
183  VEC half_cos2wt = 0.5 * fft2_re * invhypo;
184  VEC half_sin2wt = 0.5 * fft2_im * invhypo;
185 
186  VEC coswt = sqrt(0.5 + half_cos2wt);
187  VEC sinwt = sign_combine(sqrt(0.5 - half_cos2wt), half_sin2wt);
188 
189  VEC density = 0.5 * n + half_cos2wt * fft2_re + half_sin2wt * fft2_im;
190  VEC costerm = square( coswt * fft1_re + sinwt * fft1_im) / density;
191  VEC sinterm = square( coswt * fft1_re - sinwt * fft1_im) / (n -density);
192 
193 
194  VEC index(i * VEC_N);
195  index += INDEX;
196 
197  VEC ans_x = (index + 1) * df;
198  VEC ans_y = (costerm + sinterm)*norm_factor;
199  if (i < nit-1 || leftover == 0)
200  {
201  ans_x.store(out->GetX() +i * VEC_N);
202  ans_y.store(out->GetY()+ i * VEC_N);
203  }
204  else
205  {
206  ans_x.store_partial(leftover,out->GetX() +i * VEC_N);
207  ans_y.store_partial(leftover,out->GetY()+ i * VEC_N);
208 
209  }
210  }
211 
212 #else
213  for (int j = 0; j < nout; j++)
214  {
215  double hypo = fft2[j+1].getAbs();
216  double half_cos2wt = 0.5 * fft2[j+1].re / hypo;
217  double half_sin2wt = 0.5 * fft2[j+1].im / hypo;
218  double coswt = sqrt(0.5 + half_cos2wt);
219  double sinwt = TMath::Sign(sqrt(0.5 - half_cos2wt), half_sin2wt);
220 
221  double density = 0.5 * n + half_cos2wt * fft2[j+1].re + half_sin2wt * fft2[j+1].im;
222 
223  double costerm = coswt * fft1[j+1].re + sinwt * fft1[j+1].im;
224  costerm *= costerm;
225  costerm /= density;
226 
227  double sinterm = coswt * fft1[j+1].re - sinwt * fft1[j+1].im;
228  sinterm *= sinterm;
229  sinterm /= (n-density);
230 
231  out->GetX()[j] = (j+1) * df;
232  out->GetY()[j] = (costerm + sinterm) * norm_factor;
233  }
234 #endif
235  free(fft1);
236  free(fft2);
237 
238  return out;
239 
240 }
241 TGraph * FFTtools::lombScarglePeriodogram(int n, double dt, const double * __restrict x, const double * __restrict y, double oversample_factor,
242  double high_factor, TGraph * out, int extirpolation_factor)
243 {
244 
245  return _lombScarglePeriodogram(n,dt, x,y, oversample_factor, high_factor,out, extirpolation_factor);
246 }
247 
248 
249 
250 TGraph * FFTtools::lombScarglePeriodogram(const TGraph * g, double dt, double oversample_factor,
251  double high_factor, TGraph * out, int extirpolation_factor)
252 {
253 
254  return _lombScarglePeriodogram(g->GetN(),dt, g->GetX(), g->GetY(), oversample_factor, high_factor,out, extirpolation_factor);
255 }
256 
257 
258 TGraph * FFTtools::welchPeriodogram(const TGraph * gin, int segment_size, double overlap_fraction, const FFTWindowType * window, bool truncate_extra, TGraph * gout)
259 {
260 
261  double window_vals[segment_size];
262  window->fill(segment_size, window_vals);
263 
264  double window_weight = 0;
265  for (int i = 0; i < segment_size; i++) window_weight += window_vals[i] * window_vals[i] / segment_size;
266 
267  double y[segment_size] __attribute__((aligned(32)));
268 
269 
270 
271  TGraph * power = gout ? gout : new TGraph(segment_size/2+1);
272 
273  if (gout)
274  {
275  power->Set(segment_size/2+1);
276  memset(power->GetY(),0, power->GetN() *sizeof(double));
277  }
278 
279  double df = 1./(segment_size * (gin->GetX()[1] - gin->GetX()[0]));
280  for (int i = 0; i < power->GetN(); i++)
281  {
282  power->GetX()[i] = df*i;
283  }
284 
285 
286  int index = 0;
287  double nsegs = 0;
288  while (truncate_extra ? index + segment_size < gin->GetN() : index < gin->GetN())
289  {
290 
291  for (int i = 0; i < segment_size; i++)
292  {
293  y[i] = i + index < gin->GetN() ? window_vals[i] * gin->GetY()[i + index] : 0;
294 // printf("%d %d %g %g\n", index, i, window_vals[i], y[i]);
295  }
296 
297  FFTWComplex * fft = doFFT(segment_size, y);
298 
299  power->GetY()[0] += fft[0].getAbsSq()/segment_size;
300  for (int j = 1; j < segment_size/2; j++)
301  {
302  power->GetY()[j] += fft[j].getAbsSq() *2 / segment_size;
303  }
304  power->GetY()[segment_size/2] += fft[segment_size/2].getAbsSq() / segment_size;
305 
306  delete [] fft;
307 
308  index += (1.-overlap_fraction) * segment_size;
309  if (index + segment_size >= gin->GetN())
310  {
311  nsegs += double(gin->GetN() -index) / segment_size;
312  }
313  else
314  {
315  nsegs +=1;
316  }
317  }
318 
319  for (int i = 0; i <= segment_size/2; i++) power->GetY()[i] /= nsegs * window_weight;
320 
321  return power;
322 }
323 
324 
325 
326 
327 double * FFTtools::lombScarglePeriodogramSlow(int N, const double *x, const double *y, int nfreqs, const double * freqs, double * answer)
328 {
329  if (!answer) answer = new double[nfreqs];
330 
331  for (int ifreq = 0; ifreq < nfreqs; ifreq++)
332  {
333 
334  double sin2wt = 0;
335  double cos2wt = 0;
336  double two_w = freqs[ifreq] * 2 * TMath::Pi();
337  double mean = 0;
338  for (int i =0; i < N; i++)
339  {
340  double c,s;
341  SINCOS(x[i] * two_w, &s,&c);
342  sin2wt += s;
343  cos2wt += c;
344  mean += y[i];
345  }
346 
347  mean /= N;
348 
349  double tau = atan2(sin2wt, cos2wt) / two_w;
350 
351 
352 
353  double ys = 0;
354  double yc = 0;
355  double s2 = 0;
356  double c2 = 0;
357 
358 
359  for (int i = 0; i < N; i++)
360  {
361  double yy = y[i] - mean;
362 
363  double s,c;
364 
365  SINCOS(two_w * (x[i] - tau), &s,&c);
366 
367  ys += yy * s;
368  yc += yy * c;
369 
370  s2 += s*s;
371  c2 += c*c;
372  }
373 
374  answer[ifreq] = ys*ys / s2 + yc*yc/c2;
375  }
376 
377  return answer;
378 }
379 
380 
381 
382 //TH2 *FFTtools::getPowerVsTimeUsingLombScargle(const TGraph * g, int nbins, double sampling_dt, double oversample, double
383 
384 
struct __attribute__((packed))
Debugging use only TURF scaler data.
double im
The imaginary part.
Definition: FFTWComplex.h:18
FFTWComplex * doFFT(int length, const double *theInput)
Computes an FFT of an array of real numbers.
Definition: FFTtools.cxx:436
double * lombScarglePeriodogramSlow(int N, const double *x, const double *y, int nfreqs, const double *freqs, double *answer=0)
This is a wrapper class for a complex number.
Definition: FFTWComplex.h:12
double re
Destructor.
Definition: FFTWComplex.h:15
TGraph * welchPeriodogram(const TGraph *gin, int segment_size, double overlap_fraction=0.5, const FFTWindowType *window=&GAUSSIAN_WINDOW, bool truncate_extra=true, TGraph *gout=0)
TGraph * lombScarglePeriodogram(const TGraph *g, double dt=0, double oversample_factor=4, double high_factor=2, TGraph *replaceme=0, int extirpolation_factor=4)