AcclaimClustering.h
1 /* -*- C++ -*-.*********************************************************************************************
2  Author: Ben Strutt
3  Email: strutt@physics.ucla.edu
4 
5  Description:
6  A class to cluster. What were you expecting?
7 ***********************************************************************************************************/
8 
9 #ifndef ACCLAIM_CLUSTERING_H
10 #define ACCLAIM_CLUSTERING_H
11 
12 #include "UsefulAdu5Pat.h"
13 #include "AnitaConventions.h"
14 #include "BaseList.h"
15 #include "AnitaEventSummary.h"
16 #include "TKDTree.h"
17 #include "TCut.h"
18 #include "TEntryList.h"
19 #include "Math/Minimizer.h"
20 #include "Math/Functor.h"
21 #include "AcclaimOpenMP.h"
22 #include "TRandom3.h"
23 #include "TChain.h"
24 
25 class TTree;
26 class TGraphAntarctica;
27 class TFile;
28 class TH2DAntarctica;
29 class TCanvas;
30 class TArrowAntarctica;
31 
32 
33 namespace Acclaim{
34 
35  namespace Clustering {
36 
37  const Double_t default_sigma_theta = 0.25;
38  const Double_t default_sigma_phi = 0.5;
39  const Double_t default_range_easting_northing = 750e3;
40  const Double_t default_horizon_distance = 750e3;
41 
42  void getAngularResolution(const AnitaEventSummary* sum, AnitaPol::AnitaPol_t pol, Int_t peakInd, double& sigma_theta, double& sigma_phi);
43  void getAngularResolution(double snr, double& sigma_theta, double& sigma_phi);
44  TCanvas* drawAngularResolutionModel(double maxSnr);
45 
46  template <class T>
47  Bool_t isVaguelyNearMcMurdo(const T& t){
48  return t.longitude >= 150 && t.longitude < 180 && t.latitude >= -85 && t.latitude < -75;
49  }
50  template <class T>
51  Bool_t inSandbox(const T& t){
52  return t.longitude >= -90 && t.longitude < 90;
53  }
54 
59  class Event{
60  public:
61 
62  //--------------------------------------------------------------------------------
63  // determined by reconstruction
64  //--------------------------------------------------------------------------------
65  UInt_t eventNumber;
66  Int_t run;
68  Int_t peakIndex;
69 
70  Double_t centre[3];
71  Double_t latitude;
72  Double_t longitude;
73  Double_t altitude;
74  Double_t easting;
75  Double_t northing;
77 
78  Double_t theta;
79  Double_t phi;
81 
82  Double_t sigmaTheta;
83  Double_t sigmaPhi;
84 
85  //--------------------------------------------------------------------------------
86  // determined by clustering
87  //--------------------------------------------------------------------------------
88  Int_t nThresholds;
89  Int_t* cluster;//[nThresholds] /// which cluster am I associated with?
90  Double_t* dThetaCluster;//[nThresholds] /// theta distance to cluster
91  Double_t* dPhiCluster;//[nThresholds] /// phi distance to cluster
92 
93  Bool_t eventEventClustering;
98  Double_t selfLogLikelihood;
103 
106  mutable Bool_t fDebug;
107 
108  Event(Int_t nT=0);
109  Event(const AnitaEventSummary* sum, AnitaPol::AnitaPol_t pol, Int_t peakInd, Int_t nT=0);
110  Event(const Event& event);
111  Event& operator=(const Event& event);
112  Event(int pol, int peakInd, double peak_phi, double peak_theta, int nT, UInt_t eventNumber, Int_t run,
113  double anita_longitude, double anita_latitude, double anita_altitude, double anita_heading, double coherent_filtered_snr);
114 
115 
117  void setupUsefulPat(bool calculateNow = true);
118  void resetClusteringNumbers();
119  void deleteArrays();
120  void setNThresholds(int n);
121 
122  double logLikelihoodFromPoint(double sourceLon, double sourceLat, double sourceAlt, bool addOverHorizonPenalty=false) const;
123  template<class T>
124  double logLikelihoodFromPoint(const T& point, bool addOverHorizonPenalty=false) const {
125  return logLikelihoodFromPoint(point.longitude, point.latitude, point.altitude, addOverHorizonPenalty);
126  }
127  template<class T>
128  inline double cartesianSeparation(const T& event2){
129  double d0 = centre[0] - event2.centre[0];
130  double d1 = centre[1] - event2.centre[1];
131  double d2 = centre[2] - event2.centre[2];
132  return TMath::Sqrt(d0*d0 + d1*d1 + d2*d2);
133  }
134 
141  inline bool operator < (const Event& other) const {
142  return (eventNumber < other.eventNumber);
143  }
144 
145  virtual ~Event();
146  ClassDef(Event, 14)
147  };
148 
149 
150 
155  class McEvent : public Event{
156  public:
157  Double_t weight;
158  Double_t energy;
159 
160  McEvent(Int_t nT = 0);
161  McEvent(const AnitaEventSummary* sum, AnitaPol::AnitaPol_t pol, Int_t peakInd, Int_t nT=0);
162  McEvent(double weight, double energy, int pol, int peakInd, double peak_phi, double peak_theta, int nT, UInt_t eventNumber, Int_t run,
163  double anita_longitude, double anita_latitude, double anita_altitude, double anita_heading, double coherent_filtered_snr);
164 
165  virtual ~McEvent(){;}
166 
167  ClassDef(McEvent, 1)
168  };
169 
170 
171 
172 
173 
174  //--------------------------------------------------------------------------------------------------------
179  class Cluster{
180  public:
181  Cluster(Int_t i=-1);
182  Cluster(const Event& seedEvent, Int_t i=-1);
183  Cluster(const BaseList::base& base, Int_t i=-1);
184 
185  virtual ~Cluster(){ ;}
186 
187  Double_t centre[3];
188 
189  Double_t latitude;
190  Double_t longitude;
191  Double_t altitude;
192 
194  Double_t sumMcWeights;
195  Int_t knownBase;
196  Int_t index;
198  Double_t llEventCut;
199 
201  Int_t seedEvent;
202 
203  void resetClusteringNumbers();
204 
205  ClassDef(Cluster, 5)
206  };
207 
208 
215  public:
216 
217  static const Int_t SmallClusterSizeThreshold = 100;
218 
220  virtual ~LogLikelihoodMethod();
221 
222  void doClustering(const char* dataGlob, const char* mcGlob, const char* outFileName, bool useAcclaimFiles=true);
223  void testSmallClusters(const char* dataGlob, const char* outFileName, int clusterSizeMin=5, int clusterSizeMax=16, int nAttempts=100);
224 
225  bool getUseBaseList(){return fUseBaseList;}
226  void setUseBaseList(bool useBaseList){ // *TOGGLE *GETTER=GetUseBaseList
227  fUseBaseList = useBaseList;
228  }
229 
230  void setCut(TCut cut){ fCut = cut; }
231  void setCutHical(bool hc){ fCutHical = hc; } // *TOGGLE *GETTER=GetCutHical
232  void setSelfLLMax(double llmax){ fSelfLLMax = llmax; }
233  void setSurfaceDistThresholdKm(Double_t dist){ surfaceDistThresholdKm = dist; }
234  void setPercentOfMC(Int_t percent){ fPercentOfMC = percent; }
235 
236  bool getDebug(){return fDebug;}
237  void setDebug(bool db){fDebug = db;} // *TOGGLE *GETTER=GetDebug
238 
244  size_t addCut(const TCut c){
245  fThermalChainCuts.push_back(c);
246  return fThermalChainCuts.size();
247  }
248 
249  bool fStoreUnclusteredHistograms;
250 
251  private:
252  Double_t getDistSqEventCluster(const Event& event, const Cluster& cluster);
253  Double_t getAngDistSqEventCluster(const Event& event, const Cluster& cluster);
254  void getDeltaThetaDegDeltaPhiDegEventCluster(const Event& event, const Cluster& cluster, Double_t& deltaThetaDeg, Double_t& deltaPhiDeg);
255 
256 
257  Long64_t readInSummaries(const char* summaryGlob);
258  Long64_t readInTMVATreeSummaries(const char* summaryGlob, bool isMC);
259  void readInSummariesForTesting(const char* summaryGlob);
260  void pickEventsFromList(int n_in_cluster);
261  size_t addEvent(const AnitaEventSummary* sum, AnitaPol::AnitaPol_t pol, Int_t peakInd);
262  size_t addMcEvent(const AnitaEventSummary* sum, AnitaPol::AnitaPol_t pol, Int_t peakInd);
263  void assignSingleEventToCloserCluster(Int_t eventInd, Int_t isMC, Cluster& cluster, Int_t z, double llEventCut = -1);
264  void readInBaseList();
265 
266  TRandom3* tr3;
267 
268  void assignMcEventsToClusters();
269  void setInitialBaseClusters();
270  void makeSummaryTrees();
271  void addToHistograms(TH2D* h, TH2D* h2);
272  void resetClusters();
273  Double_t getSumOfMcWeights();
274  void initKDTree();
275  Acclaim::Clustering::Event* nextEvent();
276 
277  Double_t dMin(const Event* event1, const Event* event2);
278  Double_t dSum(const Event* event1, const Event* event2);
279  Double_t dAsym(const Event* event1, const Event* event2);
280  Double_t dFit(const Event* event1, const Event* event2);
281 
282  void testTriangleInequality();
283  void testSmallClustersFromPointSource();
284  Int_t removeLargeBasesNearMcMurdo();
285 
286  void doBaseEventClustering();
287  void doEventEventClustering();
288 
289  void doMcEventClustering();
290  void doMcBaseClustering();
291  bool considerBin(const Event& event, Int_t bx, Int_t by, double& easting, double& northing);
292  void nearbyEvents2(UInt_t eventInd, std::vector<UInt_t>& nearbyEvents);
293 
294  void makeAndWriteNSquaredEventEventHistograms();
295  Double_t evalPairLogLikelihoodAtLonLat(const Double_t* params);
296 
297 
298 
299 
300  std::vector<const Acclaim::Clustering::Event*> fFitEvent1s;
301  std::vector<const Acclaim::Clustering::Event*> fFitEvent2s;
302  Int_t fMaxFitterAttempts;
303  TCut fCut;
304  bool fCutHical;
305  bool fSelfLLMax;
306  TEntryList* fEntryList;
307 
308  UInt_t fTestEvent1;
309  UInt_t fTestEvent2;
310  std::vector<Double_t> fFitEastings;
311  std::vector<Double_t> fFitNorthings;
312  const Int_t numMcDivisions;
313  Int_t mcDivision; // Which of the MC divisions should I read in? (runs from 0 to numMcDivisions-1)
314 
315  std::vector<Double_t> llEventCuts;
316  Double_t surfaceDistThresholdKm;
317  Bool_t fEventsAlreadyClustered;
318 
319  std::vector<std::vector<Acclaim::Clustering::Cluster> >clusters;
320  // std::vector<Acclaim::Clustering::Cluster> clusters; /// Vector of clusters,
321  std::vector<Acclaim::Clustering::Event> events;
322  std::vector<Acclaim::Clustering::McEvent> mcEvents;
323  std::vector<std::vector<std::vector<UInt_t> > > fLookupEN;
324  AntarcticaBackground fMyBackground;
325 
326 
327  TKDTreeID* fKDTree;
328  std::vector<Double_t> fEventEastings;
329  std::vector<Double_t> fEventNorthings;
330 
331  std::vector<TH2DAntarctica*> hUnclusteredEvents;
332 
333  bool fDebug;
334  bool fUseBaseList;
335  TChain* fChain;
336  Int_t fPercentOfMC;
337  TGraph* grTestMinimizerWalk;
338  TGraph* grTestMinimizerValue;
339  TGraph* grTestMinimumPosition;
340  std::vector<ROOT::Math::Minimizer*> fMinimizers;
341  std::vector<ROOT::Math::Functor> fFunctors;
342  Int_t fROOTgErrorIgnoreLevel;
343  bool fDrawNewNearbyEventsHistograms;
344  bool fReadInBaseList;
345 
346  std::vector<TCut> fThermalChainCuts;
347  };
348  }
349 }
350 
351 #endif
Double_t longitude
cluster center latitude
Double_t sigmaTheta
the adjustment from traceBackToContinent
Int_t llEventCutInd
Where am I in the cluster array?
Double_t nearestKnownBaseLogLikelihood
Remove huge clusters near MCM before doing event-to-event clustering.
Int_t nThresholds
resolution associated with this snr?
Double_t thetaAdjustmentRequired
reconstructed phi
Int_t nearestKnownBaseCluster
How far to the nearest known base?
Double_t latitude
Does not persist /// Center in cartesian.
void resetClusteringNumbers()
/// Which event seeded the cluster?
TArrowAntarctica * makeArrowFromAnitaToEvent()
Double_t centre[3]
Which peak in the map does this represent?
Int_t antarcticaHistBin
What&#39;s the event number of the nearest surface neighbour by LL?
Double_t nearestEventSurfaceDistanceKm
If the event is above the continent surface, this may be non-zero.
Double_t llEventCut
which entry in the llEventCut array does this correspond to?
Double_t sigmaPhi
resolution associated with this snr?
Double_t nearestEventSurfaceLogLikelihood
What&#39;s the event number of the nearest surface neighbour?
Int_t knownBase
How many MC events does this cluster contain?
Int_t nearestKnownBaseClusterSurface
How far to the nearest known base?
UsefulAdu5Pat usefulPat
Which global bin in the TH2DAntarctica?
Double_t nearestKnownBaseSurfaceSeparationKm
How far to the nearest known base?
void setupUsefulPat(bool calculateNow=true)
Namespace which wraps everything in the library.
Bool_t fDebug
/// Only construct this once
McEvent(Int_t nT=0)
MC information.
Int_t numDataEvents
cluster center altitude
Double_t theta
Anita&#39;s position.
UInt_t nearestEventSurfaceEventNumber
How far away to the nearest event, in kilometers?
Double_t latitude
/// Cartesian coordinates, does not persist in ROOT
Int_t antarcticaHistBin
and what&#39;s the value of that cut?
UInt_t nearestEventSurfaceLLEventNumber
What&#39;s the fitted log likelihood to the nearest surface neighbour?
Double_t selfLogLikelihood
How far to the nearest known base?
Double_t altitude
longitude
As UsefulAnitaEvent is to RawAnitaEvent, UsefulAdu5Pat is to Adu5Pat. Well not quite as useful but yo...
Definition: UsefulAdu5Pat.h:39
Same as event, but with an energy and a weight.
Workhorse class: groups events into clusters around bases and other events using the log likelihood m...
Int_t seedEvent
/// Which global bin in the TH2DAntarctica?
Double_t altitude
cluster center longitude
AnitaEventSummary::PayloadLocation anita
northing
A position on the continent, with which a bunch of events are associated.
Common analysis format between UCorrelator and Acclaim.
AnitaPol::AnitaPol_t pol
Run.
bool operator<(const Event &other) const
Minimum required information about an ANITA event to be clustered.
Double_t energy
MC information.
Int_t index
Known base == 0, Pseudo-base == 1.
Draw an arrow between two lon/lat points.
enum AnitaPol::EAnitaPol AnitaPol_t
Polarisation enumeration.
Double_t sumMcWeights
How many data events does this cluster contain?
Double_t phi
reconstructed theta
Double_t easting
longitude
Int_t peakIndex
Polarization.