Line data Source code
1 : /**
2 : * @file HDF5FileUtils.hpp
3 : *
4 : * HDF5FileUtils collection of functions to assist with interacting with
5 : * HDF5 files.
6 : *
7 : * This is part of the DUNE DAQ Software Suite, copyright 2020.
8 : * Licensing/copyright details are in the COPYING file that you should have
9 : * received with this code.
10 : */
11 :
12 : #ifndef DFMODULES_PLUGINS_HDF5FILEUTILS_HPP_
13 : #define DFMODULES_PLUGINS_HDF5FILEUTILS_HPP_
14 :
15 : #include "dfmodules/CommonIssues.hpp"
16 :
17 : #include "highfive/H5File.hpp"
18 :
19 : #include <filesystem>
20 : #include <memory>
21 : #include <regex>
22 : #include <string>
23 : #include <vector>
24 :
25 : //#include "dfmodules/StorageKey.hpp"
26 : //#include <boost/algorithm/string.hpp>
27 : //#include <iomanip>
28 : //#include <sstream>
29 :
30 : namespace dunedaq {
31 : namespace dfmodules {
32 : namespace HDF5FileUtils {
33 : /**
34 : * @brief Retrieve top HDF5 group
35 : */
36 : HighFive::Group
37 0 : get_top_group(const HighFive::File* file_ptr, const std::vector<std::string>& group_dataset)
38 : {
39 0 : std::string top_level_group_name = group_dataset[0];
40 0 : HighFive::Group top_group = file_ptr->getGroup(top_level_group_name);
41 0 : if (!top_group.isValid()) {
42 : // throw InvalidHDF5Group(ERS_HERE, get_name(), top_level_group_name);
43 0 : throw InvalidHDF5Group(ERS_HERE, top_level_group_name, top_level_group_name);
44 : }
45 :
46 0 : return top_group;
47 0 : }
48 :
49 : /**
50 : * @brief Recursive function to create HDF5 sub-groups
51 : */
52 : HighFive::Group
53 0 : get_subgroup(HighFive::File* file_ptr, const std::vector<std::string>& group_dataset, bool create_if_needed)
54 : {
55 0 : std::string top_level_group_name = group_dataset[0];
56 0 : if (create_if_needed && !file_ptr->exist(top_level_group_name)) {
57 0 : file_ptr->createGroup(top_level_group_name);
58 : }
59 0 : HighFive::Group working_group = file_ptr->getGroup(top_level_group_name);
60 0 : if (!working_group.isValid()) {
61 0 : throw InvalidHDF5Group(ERS_HERE, top_level_group_name, top_level_group_name);
62 : }
63 : // Create the remaining subgroups
64 0 : for (size_t idx = 1; idx < group_dataset.size() - 1; ++idx) {
65 : // group_dataset.size()-1 because the last element is the dataset
66 0 : std::string child_group_name = group_dataset[idx];
67 0 : if (child_group_name.empty()) {
68 0 : throw InvalidHDF5Group(ERS_HERE, child_group_name, child_group_name);
69 : }
70 0 : if (create_if_needed && !working_group.exist(child_group_name)) {
71 0 : working_group.createGroup(child_group_name);
72 : }
73 0 : HighFive::Group child_group = working_group.getGroup(child_group_name);
74 0 : if (!child_group.isValid()) {
75 0 : throw InvalidHDF5Group(ERS_HERE, child_group_name, child_group_name);
76 : }
77 0 : working_group = child_group;
78 0 : }
79 :
80 0 : return working_group;
81 0 : }
82 : /**
83 : * @brief This is a recursive function that adds the 'paths' to all of the DataSets
84 : * contained within the specified Group to the specified path list. This function
85 : * is used by the getAlDataSetPaths() function.
86 : */
87 : void
88 0 : add_datasets_to_path(HighFive::Group parent_group, const std::string& parent_path, std::vector<std::string>& path_list)
89 : {
90 0 : std::vector<std::string> childNames = parent_group.listObjectNames();
91 0 : for (auto& child_name : childNames) {
92 0 : std::string full_path = parent_path + "/" + child_name;
93 0 : HighFive::ObjectType child_type = parent_group.getObjectType(child_name);
94 0 : if (child_type == HighFive::ObjectType::Dataset) {
95 0 : path_list.push_back(full_path);
96 0 : } else if (child_type == HighFive::ObjectType::Group) {
97 0 : HighFive::Group child_group = parent_group.getGroup(child_name);
98 0 : add_datasets_to_path(child_group, full_path, path_list);
99 0 : }
100 0 : }
101 0 : }
102 :
103 : /**
104 : * @brief Fetches the list of all DataSet paths in the specified file.
105 : */
106 : std::vector<std::string>
107 0 : get_all_dataset_paths(const HighFive::File* hdf_file_ptr)
108 : {
109 0 : std::vector<std::string> path_list;
110 :
111 0 : std::vector<std::string> top_level_names = hdf_file_ptr->listObjectNames();
112 0 : for (auto& top_level_name : top_level_names) {
113 0 : HighFive::ObjectType top_level_type = hdf_file_ptr->getObjectType(top_level_name);
114 0 : if (top_level_type == HighFive::ObjectType::Dataset) {
115 0 : path_list.push_back(top_level_name);
116 0 : } else if (top_level_type == HighFive::ObjectType::Group) {
117 0 : HighFive::Group top_level_group = hdf_file_ptr->getGroup(top_level_name);
118 0 : add_datasets_to_path(top_level_group, top_level_name, path_list);
119 0 : }
120 : }
121 :
122 0 : return path_list;
123 0 : }
124 :
125 : /**
126 : * @brief Fetches the list of files in the specified directory that have
127 : * filenames that match the specified search pattern. The search pattern uses regex
128 : * syntax (e.g. ".*" to match zero or more instances of any character).
129 : * @return the list of filenames
130 : */
131 : std::vector<std::string>
132 1 : get_files_matching_pattern(const std::string& directory_path, const std::string& filename_pattern)
133 : {
134 1 : std::regex regexSearchPattern(filename_pattern);
135 1 : std::vector<std::string> file_list;
136 16272 : for (const auto& entry : std::filesystem::directory_iterator(directory_path)) {
137 16271 : if (std::regex_match(entry.path().filename().string(), regexSearchPattern)) {
138 3 : file_list.push_back(entry.path());
139 : }
140 1 : }
141 1 : return file_list;
142 1 : }
143 :
144 : } // namespace HDF5FileUtils
145 :
146 : } // namespace dfmodules
147 : } // namespace dunedaq
148 :
149 : #endif // DFMODULES_PLUGINS_HDF5FILEUTILS_HPP_
|