DUNE-DAQ
DUNE Trigger and Data Acquisition software
Loading...
Searching...
No Matches
tc_dump.py
Go to the documentation of this file.
1#!/usr/bin/env python
2"""
3Display diagnostic information for TCs for a given
4tpstream file.
5"""
6import trgtools
7from trgtools.plot import PDFPlotter
8
9import trgdataformats
10
11import numpy as np
12import matplotlib.pyplot as plt
13from matplotlib.backends.backend_pdf import PdfPages
14from scipy import stats
15
16import os
17import argparse
18
19
20ALGORITHM_LABELS = list(trgdataformats.TriggerCandidateData.Algorithm.__members__.keys())
21ALGORITHM_TICKS = [tp_alg.value for tp_alg in trgdataformats.TriggerCandidateData.Algorithm.__members__.values()]
22TYPE_LABELS = list(trgdataformats.TriggerCandidateData.Type.__members__.keys())
23TYPE_TICKS = [tp_type.value for tp_type in trgdataformats.TriggerCandidateData.Type.__members__.values()]
24
25TICK_TO_SEC_SCALE = 16e-9 # s per tick
26
27
28def find_save_name(run_id: int, file_index: int, overwrite: bool) -> str:
29 """
30 Find a new save name or overwrite an existing one.
31
32 Parameters:
33 run_id (int): The run number for the read file.
34 file_index (int): The file index for the run number of the read file.
35 overwrite (bool): Overwrite the 0th plot directory of the same naming.
36
37 Returns:
38 (str): Save name to write as.
39
40 This is missing the file extension. It's the job of the save/write command
41 to append the extension.
42 """
43 # Try to find a new name.
44 name_iter = 0
45 save_name = f"tc_{run_id}-{file_index:04}_figures_{name_iter:04}"
46
47 # Outputs will always create a PDF, so use that as the comparison.
48 while not overwrite and os.path.exists(save_name + ".pdf"):
49 name_iter += 1
50 save_name = f"tc_{run_id}-{file_index:04}_figures_{name_iter:04}"
51 print(f"Saving outputs to ./{save_name}.*")
52
53 return save_name
54
55
57 x_data: np.ndarray,
58 y_data: np.ndarray,
59 plot_details_dict: dict,
60 pdf: PdfPages) -> None:
61 """
62 Plot a scatter plot for the given x and y data to a PdfPages object.
63
64 Parameters:
65 x_data (np.ndarray): Array to use as x values.
66 y_data (np.ndarray): Array to use as y values.
67 plot_details_dict (dict): Dictionary with keys such as 'title', 'xlabel', etc.
68 pdf (PdfPages): The PdfPages object that this plot will be appended to.
69
70 Returns:
71 Nothing. Mutates :pdf: with the new plot.
72 """
73 # May or may not have a preferred style on the scatter, e.g., marker, color, size.
74 scatter_style = plot_details_dict.get('scatter_style', {})
75
76 plt.figure(figsize=(6, 4))
77 plt.scatter(x_data, y_data, **scatter_style)
78
79 # Asserts that the following need to be in the plotting details.
80 # Who wants unlabeled plots?
81 plt.title(plot_details_dict['title'])
82 plt.xlabel(plot_details_dict['xlabel'])
83 plt.ylabel(plot_details_dict['ylabel'])
84
85 plt.tight_layout()
86 pdf.savefig()
87 plt.close()
88
89 return None
90
91
93 tc_data: np.ndarray,
94 ta_data: list[np.ndarray],
95 pdf: PdfPages,
96 time_label: str,
97 logarithm: bool) -> None:
98 """
99 Plot the different time delta histograms to a PdfPages.
100
101 Parameters:
102 tc_data (np.ndarray): Array of TC data members.
103 ta_data (list[np.ndarray]): List of TAs per TC. ta_data[i] holds TA data for the i-th TC.
104 pdf (PdfPages): PdfPages object to append plot to.
105 time_label (str): Time label to plot with (ticks vs seconds).
106 logarithm (bool): Use logarithmic scaling if true.
107
108 Returns:
109 Nothing. Mutates :pdf: with the new plot.
110 """
111 direct_diff = tc_data['time_end'] - tc_data['time_start']
112 last_ta_start_diff = []
113 last_ta_end_diff = []
114 pure_ta_diff = []
115 for idx, ta in enumerate(ta_data):
116 last_ta_start_diff.append(np.max(ta['time_start']) - tc_data[idx]['time_start'])
117 last_ta_end_diff.append(np.max(ta['time_end']) - tc_data[idx]['time_start'])
118 pure_ta_diff.append(np.max(ta['time_end']) - np.min(ta['time_start']))
119
120 last_ta_start_diff = np.array(last_ta_start_diff)
121 last_ta_end_diff = np.array(last_ta_end_diff)
122 pure_ta_diff = np.array(pure_ta_diff)
123
124 # Seconds case.
125 if "Ticks" not in time_label:
126 direct_diff = direct_diff * TICK_TO_SEC_SCALE
127 last_ta_start_diff = last_ta_start_diff * TICK_TO_SEC_SCALE
128 last_ta_end_diff = last_ta_end_diff * TICK_TO_SEC_SCALE
129 pure_ta_diff = pure_ta_diff * TICK_TO_SEC_SCALE
130
131 bins = 40
132
133 plt.figure(figsize=(6, 4))
134
135 plt.hist(
136 (direct_diff, last_ta_start_diff, last_ta_end_diff, pure_ta_diff),
137 bins=bins,
138 label=(
139 "TC(End) - TC(Start)",
140 "Last TA(Start) - TC(Start)",
141 "Last TA(End) - TC(Start)",
142 "Last TA(End) - First TA(Start)"
143 ),
144 color=(
145 "#CA0020",
146 "#F4A582",
147 "#92C5DE",
148 "#0571B0"
149 ),
150 alpha=0.6
151 )
152
153 if logarithm:
154 plt.yscale('log')
155
156 plt.title("Time Difference Histograms")
157 plt.xlabel(time_label)
158 plt.legend(framealpha=0.4)
159
160 plt.tight_layout()
161 pdf.savefig()
162 plt.close()
163 return None
164
165
166def write_summary_stats(data: np.ndarray, filename: str, title: str) -> None:
167 """
168 Writes the given summary statistics to 'filename'.
169
170 Parameters:
171 data (np.ndarray): Array of a TC data member.
172 filename (str): File to append outputs to.
173 title (str): Title of the TC data member.
174
175 Appends statistics to the given file.
176 """
177 # Algorithm, Det ID, etc. are not expected to vary.
178 # Check first that they don't vary, and move on if so.
179 if np.all(data == data[0]):
180 print(f"{title} data member is the same for all TCs. Skipping summary statistics.")
181 return None
182
183 summary = stats.describe(data)
184 std = np.sqrt(summary.variance)
185 with open(filename, 'a') as out:
186 out.write(f"{title}\n")
187 out.write(f"Reference Statistics:\n"
188 f"\tTotal # TCs = {summary.nobs},\n"
189 f"\tMean = {summary.mean:.2f},\n"
190 f"\tStd = {std:.2f},\n"
191 f"\tMin = {summary.minmax[0]},\n"
192 f"\tMax = {summary.minmax[1]}.\n")
193 std3_count = np.sum(data > summary.mean + 3*std) + np.sum(data < summary.mean - 3*std)
194 std2_count = np.sum(data > summary.mean + 2*std) + np.sum(data < summary.mean - 2*std)
195 out.write(f"Anomalies:\n"
196 f"\t# of >3 Sigma TCs = {std3_count},\n"
197 f"\t# of >2 Sigma TCs = {std2_count}.\n")
198 out.write("\n\n")
199
200 return None
201
202
203def parse():
204 """
205 Parses CLI input arguments.
206 """
207 parser = argparse.ArgumentParser(
208 description="Display diagnostic information for TCs for a given HDF5 file."
209 )
210 parser.add_argument(
211 "filename",
212 help="Absolute path to tpstream file to display."
213 )
214 parser.add_argument(
215 "--verbose", "-v",
216 action="count",
217 help="Increment the verbose level (errors, warnings, all)."
218 "Save names and skipped writes are always printed. Default: 0.",
219 default=0
220 )
221 parser.add_argument(
222 "--start-frag",
223 type=int,
224 help="Starting fragment index to process from. Takes negative indexing. Default: -10.",
225 default=-10
226 )
227 parser.add_argument(
228 "--end-frag",
229 type=int,
230 help="Fragment index to stop processing (i.e. not inclusive). Takes negative indexing. Default: N.",
231 default=0
232 )
233 parser.add_argument(
234 "--no-anomaly",
235 action="store_true",
236 help="Pass to not write 'ta_anomaly_summary.txt'. Default: False."
237 )
238 parser.add_argument(
239 "--seconds",
240 action="store_true",
241 help="Pass to use seconds instead of time ticks. Default: False."
242 )
243 parser.add_argument(
244 "--linear",
245 action="store_true",
246 help="Pass to use linear histogram scaling. Default: plots both linear and log."
247 )
248 parser.add_argument(
249 "--log",
250 action="store_true",
251 help="Pass to use logarithmic histogram scaling. Default: plots both linear and log."
252 )
253 parser.add_argument(
254 "--overwrite",
255 action="store_true",
256 help="Overwrite old outputs. Default: False."
257 )
258 parser.add_argument(
259 "--batch_mode", "-b",
260 action="store_true",
261 help="Do you want to run in batch mode (without loading bars/tqdm)?"
262 )
263
264 return parser.parse_args()
265
266
267def main():
268 """
269 Drives the processing and plotting.
270 """
271 # Process Arguments & Data
272 args = parse()
273 filename = args.filename
274 verbosity = args.verbose
275 start_frag = args.start_frag
276 end_frag = args.end_frag
277 no_anomaly = args.no_anomaly
278 seconds = args.seconds
279 overwrite = args.overwrite
280 batch_mode = args.batch_mode
281
282 linear = args.linear
283 log = args.log
284
285 # User didn't pass either flag, so default to both being true.
286 if (not linear) and (not log):
287 linear = True
288 log = True
289
290 data = trgtools.TCReader(filename, verbosity, batch_mode)
291
292 # Check that there are TC fragments.
293 if len(data.get_fragment_paths()) == 0:
294 print("File doesn't contain any TriggerCandidate fragments.")
295 return 1
296
297 # Load all case.
298 if start_frag == 0 and end_frag == -1:
299 data.read_all_fragments() # Has extra debug/warning info
300 else: # Only load some.
301 if end_frag != 0: # Python doesn't like [n:0]
302 frag_paths = data.get_fragment_paths()[start_frag:end_frag]
303 elif end_frag == 0:
304 frag_paths = data.get_fragment_paths()[start_frag:]
305
306 for path in frag_paths:
307 data.read_fragment(path)
308
309 # Find a new save name or overwrite an old one.
310 save_name = find_save_name(data.run_id, data.file_index, overwrite)
311
312 print(f"Number of TCs: {data.tc_data.shape[0]}") # Enforcing output for useful metric
313
314 # Plotting
315
316 if not no_anomaly:
317 anomaly_filename = f"{save_name}.txt"
318 if verbosity >= 2:
319 print(f"Writing descriptive statistics to {anomaly_filename}.")
320 if os.path.isfile(anomaly_filename):
321 # Prepare a new ta_anomaly_summary.txt
322 os.remove(anomaly_filename)
323
324 time_label = "Time (s)" if seconds else "Time (Ticks)"
325
326 # Dictionary containing unique title, xlabel, and xticks (only some)
327 plot_hist_dict = {
328 'algorithm': {
329 'bins': np.sort(np.array([(tick-0.45, tick+0.45) for tick in ALGORITHM_TICKS]).flatten()),
330 'title': "Algorithm",
331 'xlabel': 'Algorithm Type',
332 'ylabel': "Count",
333 'linear': True, # TODO: Hard set for now.
334 'linear_style': dict(color='k'),
335 'log': False,
336 'xticks': {
337 'labels': ALGORITHM_LABELS,
338 'ticks': ALGORITHM_TICKS,
339 'fontsize': 6,
340 'rotation': 60,
341 'ha': 'right' # Horizontal alignment
342 }
343 },
344 'detid': {
345 'title': "Detector ID",
346 'xlabel': "Detector IDs",
347 'ylabel': "Count",
348 'linear': linear,
349 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
350 'log': log,
351 'log_style': dict(color='#EE442F', alpha=0.6, label='Log'),
352 'use_integer_xticks': True
353 },
354 'num_tas': {
355 'title': "Number of TAs per TC",
356 'xlabel': "Number of TAs",
357 'ylabel': "Count",
358 'linear': linear,
359 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
360 'log': log,
361 'log_style': dict(color='#EE442F', alpha=0.6, label='Log'),
362 'use_integer_xticks': True
363 },
364 'time_candidate': {
365 'title': "Relative Time Candidate",
366 'xlabel': time_label,
367 'ylabel': "Count",
368 'linear': linear,
369 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
370 'log': log,
371 'log_style': dict(color='#EE442F', alpha=0.6, label='Log')
372 },
373 'time_end': {
374 'title': "Relative Time End",
375 'xlabel': time_label,
376 'ylabel': "Count",
377 'linear': linear,
378 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
379 'log': log,
380 'log_style': dict(color='#EE442F', alpha=0.6, label='Log')
381 },
382 'time_peak': {
383 'title': "Relative Time Peak",
384 'xlabel': time_label,
385 'ylabel': "Count",
386 'linear': linear,
387 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
388 'log': log,
389 'log_style': dict(color='#EE442F', alpha=0.6, label='Log')
390 },
391 'time_start': {
392 'title': "Relative Time Start",
393 'xlabel': time_label,
394 'ylabel': "Count",
395 'linear': linear,
396 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
397 'log': log,
398 'log_style': dict(color='#EE442F', alpha=0.6, label='Log')
399 },
400 'type': {
401 'bins': np.sort(np.array([(tick-0.45, tick+0.45) for tick in TYPE_TICKS]).flatten()),
402 'title': "Type",
403 'xlabel': "Type",
404 'ylabel': "Count",
405 'linear': True, # TODO: Hard set for now.
406 'linear_style': dict(color='k'),
407 'log': False,
408 'xticks': {
409 'labels': TYPE_LABELS,
410 'ticks': TYPE_TICKS,
411 'fontsize': 6,
412 'rotation': 60,
413 'ha': 'right' # Horizontal alignment
414 }
415 },
416 'version': {
417 'title': "Version",
418 'xlabel': "Versions",
419 'ylabel': "Count",
420 'linear': linear,
421 'linear_style': dict(color='#63ACBE', alpha=0.6, label='Linear'),
422 'log': log,
423 'log_style': dict(color='#EE442F', alpha=0.6, label='Log'),
424 'use_integer_xticks': True
425 }
426 }
427
428 pdf_plotter = PDFPlotter(save_name)
429
430 # Generic plots
431 for tc_key in data.tc_data.dtype.names:
432 if 'time' in tc_key: # Special case.
433 time = data.tc_data[tc_key]
434 if seconds:
435 time = time * TICK_TO_SEC_SCALE
436 min_time = np.min(time) # Prefer making the relative time change.
437 pdf_plotter.plot_histogram(time - min_time, plot_hist_dict[tc_key])
438 if not no_anomaly:
439 write_summary_stats(time - min_time, anomaly_filename, tc_key)
440 continue
441
442 if tc_key == 'algorithm' or tc_key == 'type': # Special case.
443 plot_data = np.array([datum.value for datum in data.tc_data[tc_key]], dtype=int)
444 pdf_plotter.plot_histogram(plot_data, plot_hist_dict[tc_key])
445 if not no_anomaly:
446 write_summary_stats(plot_data, anomaly_filename, tc_key)
447 del plot_data
448 continue
449
450 pdf_plotter.plot_histogram(data.tc_data[tc_key], plot_hist_dict[tc_key])
451 if not no_anomaly:
452 write_summary_stats(data.tc_data[tc_key], anomaly_filename, tc_key)
453
454 pdf = pdf_plotter.get_pdf()
455 # Analysis plots
456 # ==== Time Delta Comparisons =====
457 if np.sum(data.tc_data['num_tas']) > 0:
458 if linear:
459 plot_pdf_time_delta_histograms(data.tc_data, data.ta_data, pdf, time_label, False)
460 if log:
461 plot_pdf_time_delta_histograms(data.tc_data, data.ta_data, pdf, time_label, True)
462 # =================================
463
464 # ==== TC ADC Integrals ====
465 if np.sum(data.tc_data['num_tas']) > 0:
466 tc_adc_integrals = np.array([np.sum(tas['adc_integral']) for tas in data.ta_data])
467 adc_integrals_dict = {
468 'title': "TC ADC Integrals",
469 'xlabel': "ADC Integral",
470 'ylabel': "Count"
471 }
472 pdf_plotter.plot_histogram(tc_adc_integrals, adc_integrals_dict)
473 # ==========================
474
475 # ==== ADC Integral vs Number of TAs ====
476 if np.sum(data.tc_data['num_tas']) > 0:
477 integral_vs_num_tas_dict = {
478 'title': "TC ADC Integral vs Number of TAs",
479 'xlabel': "Number of TAs",
480 'ylabel': "TC ADC Integral",
481 'scatter_style': {
482 'alpha': 0.6,
483 'c': 'k',
484 's': 2
485 }
486 }
487 plot_pdf_scatter(data.tc_data['num_tas'], tc_adc_integrals, integral_vs_num_tas_dict, pdf)
488 # =======================================
489
490 # ==== Time Spans Per TC ====
491 time_candidate = data.tc_data['time_candidate']
492 time_end = data.tc_data['time_end']
493 time_start = data.tc_data['time_start']
494 tc_min_time = np.min((time_candidate, time_end, time_start))
495
496 time_candidate -= tc_min_time
497 time_end -= tc_min_time
498 time_start -= tc_min_time
499
500 if seconds:
501 tc_min_time = tc_min_time * TICK_TO_SEC_SCALE
502 time_candidate = time_candidate * TICK_TO_SEC_SCALE
503 time_end = time_end * TICK_TO_SEC_SCALE
504 time_start = time_start * TICK_TO_SEC_SCALE
505
506 yerr = np.array([time_candidate - time_start, time_end - time_candidate]).astype(np.int64)
507 time_unit = "Seconds" if seconds else "Ticks"
508 time_spans_dict = {
509 'title': "TC Relative Time Spans",
510 'xlabel': "TC",
511 'ylabel': time_label,
512 'errorbar_style': {
513 'yerr': yerr,
514 'capsize': 4,
515 'color': 'k',
516 'ecolor': "#EE442F",
517 'label': f"Avg {time_unit} / TC: "
518 f"{(time_candidate[-1] - time_candidate[0]) / len(time_candidate):.2f}",
519 'mec': "#EE442F",
520 'mfc': "#EE442F",
521 'marker': 'h',
522 'markersize': 4.00
523 }
524 }
525 tc_count = np.arange(len(time_candidate))
526 pdf_plotter.plot_errorbar(tc_count, time_candidate, time_spans_dict)
527 # ===========================
528 pdf_plotter.close()
529
530 return None
531
532
533if __name__ == "__main__":
534 main()
None plot_pdf_time_delta_histograms(np.ndarray tc_data, list[np.ndarray] ta_data, PdfPages pdf, str time_label, bool logarithm)
Definition tc_dump.py:97
parse()
Definition tc_dump.py:203
None write_summary_stats(np.ndarray data, str filename, str title)
Definition tc_dump.py:166
None plot_pdf_scatter(np.ndarray x_data, np.ndarray y_data, dict plot_details_dict, PdfPages pdf)
Definition tc_dump.py:60
str find_save_name(int run_id, int file_index, bool overwrite)
Definition tc_dump.py:28