DUNE-DAQ
DUNE Trigger and Data Acquisition software
Loading...
Searching...
No Matches
dunedaq::opmonlib::MonitorableObject Class Reference

#include <MonitorableObject.hpp>

Inheritance diagram for dunedaq::opmonlib::MonitorableObject:
[legend]
Collaboration diagram for dunedaq::opmonlib::MonitorableObject:
[legend]

Public Types

using NodePtr = std::weak_ptr<MonitorableObject>
 
using NewNodePtr = std::shared_ptr<MonitorableObject>
 
using ElementId = std::string
 

Public Member Functions

 MonitorableObject (const MonitorableObject &)=delete
 
MonitorableObjectoperator= (const MonitorableObject &)=delete
 
 MonitorableObject (MonitorableObject &&)=delete
 
MonitorableObjectoperator= (MonitorableObject &&)=delete
 
virtual ~MonitorableObject ()=default
 
auto get_opmon_id () const noexcept
 
auto get_opmon_level () const noexcept
 

Static Public Member Functions

static bool publishable_metric (OpMonLevel entry, OpMonLevel system) noexcept
 

Protected Member Functions

 MonitorableObject ()=default
 
void register_node (ElementId name, NewNodePtr)
 
void publish (google::protobuf::Message &&, CustomOrigin &&co={}, OpMonLevel l=to_level(EntryOpMonLevel::kDefault)) const noexcept
 
virtual void generate_opmon_data ()
 

Private Types

using facility_ptr_t = std::shared_ptr<opmonlib::OpMonFacility>
 
using const_metric_counter_t
 
using metric_counter_t = std::remove_const<const_metric_counter_t>::type
 
using const_time_counter_t
 
using time_counter_t = std::remove_const<const_metric_counter_t>::type
 

Private Member Functions

opmon::MonitoringTreeInfo collect () noexcept
 
void set_opmon_level (OpMonLevel) noexcept
 
void inherit_parent_properties (const MonitorableObject &parent)
 
 MonitorableObject (ElementId name, ElementId parent_id="")
 

Private Attributes

std::map< ElementId, NodePtrm_nodes
 
std::mutex m_node_mutex
 
std::atomic< facility_ptr_tm_facility {s_default_facility}
 
dunedaq::opmon::OpMonId m_parent_id
 
std::atomic< OpMonLevelm_opmon_level = to_level(SystemOpMonLevel::kAll)
 
ElementId m_opmon_name
 
std::atomic< metric_counter_tm_published_counter {0}
 
std::atomic< metric_counter_tm_ignored_counter {0}
 
std::atomic< metric_counter_tm_error_counter {0}
 
std::atomic< time_counter_tm_cpu_us_counter {0}
 

Static Private Attributes

static facility_ptr_t s_default_facility = std::make_shared<NullOpMonFacility>()
 

Friends

class OpMonManager
 

Detailed Description

Definition at line 73 of file MonitorableObject.hpp.

Member Typedef Documentation

◆ const_metric_counter_t

◆ const_time_counter_t

◆ ElementId

Definition at line 79 of file MonitorableObject.hpp.

◆ facility_ptr_t

Definition at line 170 of file MonitorableObject.hpp.

◆ metric_counter_t

◆ NewNodePtr

◆ NodePtr

◆ time_counter_t

Constructor & Destructor Documentation

◆ MonitorableObject() [1/4]

dunedaq::opmonlib::MonitorableObject::MonitorableObject ( const MonitorableObject & )
delete

copy and move constructors are deleted as they violate the linking chain

◆ MonitorableObject() [2/4]

dunedaq::opmonlib::MonitorableObject::MonitorableObject ( MonitorableObject && )
delete

◆ ~MonitorableObject()

virtual dunedaq::opmonlib::MonitorableObject::~MonitorableObject ( )
virtualdefault

◆ MonitorableObject() [3/4]

dunedaq::opmonlib::MonitorableObject::MonitorableObject ( )
protecteddefault

default constructors are ok as they set the links correctly i.e. The service points to the null and the names are not set

◆ MonitorableObject() [4/4]

dunedaq::opmonlib::MonitorableObject::MonitorableObject ( ElementId name,
ElementId parent_id = "" )
inlineprivate

Contructor to set initial strings

Definition at line 161 of file MonitorableObject.hpp.

162 : m_parent_id()
163 , m_opmon_name(name) {
164 m_parent_id.set_session(parent_id);
165 }
void set_session(Arg_ &&arg, Args_... args)

Member Function Documentation

◆ collect()

opmon::MonitoringTreeInfo MonitorableObject::collect ( )
privatenoexcept

Instructs the object to pusblish regular interval metrics. It also instruct the children to execute their collect methods.

Returns
It returns a protobuf schema object to monitor the tree

Definition at line 101 of file MonitorableObject.cpp.

101 {
102
103 auto start_time = std::chrono::high_resolution_clock::now();
104
105 TLOG_DEBUG(TLVL_MONITORING_STEPS) << "Collecting data from " << to_string(get_opmon_id());
106
108
109 info.set_n_invalid_links(0);
110
111 try {
113 } catch ( const ers::Issue & i ) {
115 auto cause_ptr = i.cause();
116 while ( cause_ptr ) {
118 cause_ptr = cause_ptr->cause();
119 }
120 ers::error( ErrorWhileCollecting(ERS_HERE, to_string(get_opmon_id()), i) );
121 } catch ( const std::exception & e ) {
123 ers::error( ErrorWhileCollecting(ERS_HERE, to_string(get_opmon_id()), e) );
124 } catch (...) {
126 ers::error( ErrorWhileCollecting(ERS_HERE, to_string(get_opmon_id())) );
127 }
128
129 info.set_n_published_measurements( m_published_counter.exchange(0) );
130 info.set_n_ignored_measurements( m_ignored_counter.exchange(0) );
131 info.set_n_errors( m_error_counter.exchange(0) );
132 if (info.n_published_measurements() > 0) {
133 info.set_n_publishing_nodes(1);
134 }
135 info.set_cpu_elapsed_time_us( m_cpu_us_counter.exchange(0) );
136
137
138 std::lock_guard<std::mutex> lock(m_node_mutex);
139
140 info.set_n_registered_nodes( m_nodes.size() );
141
142 unsigned int n_invalid_links = 0;
143
144 for ( auto it = m_nodes.begin(); it != m_nodes.end(); ) {
145
146 auto ptr = it->second.lock();
147
148 if( ptr ) {
149 auto child_info = ptr->collect(); // MR: can we make this an async? There is no point to wait all done here
150 info.set_n_registered_nodes( info.n_registered_nodes() + child_info.n_registered_nodes() );
151 info.set_n_publishing_nodes( info.n_publishing_nodes() + child_info.n_publishing_nodes() );
152 info.set_n_invalid_links( info.n_invalid_links() + child_info.n_invalid_links() );
153 info.set_n_published_measurements( info.n_published_measurements() + child_info.n_published_measurements() );
154 info.set_n_ignored_measurements( info.n_ignored_measurements() + child_info.n_ignored_measurements() );
155 info.set_n_errors( info.n_errors() + child_info.n_errors() );
156 info.set_cpu_elapsed_time_us( info.cpu_elapsed_time_us() + child_info.cpu_elapsed_time_us() );
157 }
158
159 // prune the dead links
160 if ( it->second.expired() ) {
161 it = m_nodes.erase(it);
162 ++n_invalid_links;
163 } else {
164 ++it;
165 }
166 }
167
168 info.set_n_invalid_links( info.n_invalid_links() + n_invalid_links );
169
170
171 auto stop_time = std::chrono::high_resolution_clock::now();
172
173 auto duration = std::chrono::duration_cast<std::chrono::microseconds>( stop_time - start_time );
174 info.set_clockwall_elapsed_time_us( duration.count() );
175
176 return info;
177}
#define ERS_HERE
@ TLVL_MONITORING_STEPS
std::atomic< metric_counter_t > m_published_counter
std::atomic< metric_counter_t > m_ignored_counter
std::atomic< metric_counter_t > m_error_counter
std::atomic< time_counter_t > m_cpu_us_counter
std::map< ElementId, NodePtr > m_nodes
Base class for any user define issue.
Definition Issue.hpp:69
const Issue * cause() const
return the cause Issue of this Issue
Definition Issue.hpp:97
#define TLOG_DEBUG(lvl,...)
Definition Logging.hpp:112
std::string to_string(const dunedaq::opmon::OpMonId &)
Definition Utils.cpp:167
Cannot add TPSet with start_time
void error(const Issue &issue)
Definition ers.hpp:81

◆ generate_opmon_data()

virtual void dunedaq::opmonlib::MonitorableObject::generate_opmon_data ( )
inlineprotectedvirtual

Hook for customisable pubblication. The function can throw, exception will be caught by the monitoring thread

Reimplemented in dunedaq::asiolibs::SourceModel< TargetPayloadType >, dunedaq::crtmodules::SourceModel< TargetPayloadType >, dunedaq::datahandlinglibs::DataHandlingModel< ReadoutType, RequestHandlerType, LatencyBufferType, RawDataProcessorType, InputDataType >, dunedaq::datahandlinglibs::DataSubscriberModel< PayloadType >, dunedaq::datahandlinglibs::DefaultRequestHandlerModel< ReadoutType, LatencyBufferType >, dunedaq::datahandlinglibs::DefaultRequestHandlerModel< T, datahandlinglibs::SkipListLatencyBufferModel< T > >, dunedaq::datahandlinglibs::DefaultRequestHandlerModel< TriggerPrimitiveTypeAdapter, datahandlinglibs::SkipListLatencyBufferModel< TriggerPrimitiveTypeAdapter > >, dunedaq::datahandlinglibs::DefaultRequestHandlerModel< types::DAPHNESuperChunkTypeAdapter, datahandlinglibs::SkipListLatencyBufferModel< types::DAPHNESuperChunkTypeAdapter > >, dunedaq::datahandlinglibs::IterableQueueModel< T >, dunedaq::datahandlinglibs::SkipListLatencyBufferModel< T >, dunedaq::datahandlinglibs::SourceEmulatorModel< ReadoutType >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< ReadoutType >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< hsilibs::HSI_FRAME_STRUCT >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< TAWrapper >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< TCWrapper >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< TriggerPrimitiveTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::CRTBernTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::CRTGrenobleTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::DAPHNEStreamSuperChunkTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::DUNEWIBEthTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::TDEEthTypeAdapter >, dunedaq::datahandlinglibs::TaskRawDataProcessorModel< types::TDEFrameTypeAdapter >, dunedaq::dpdklibs::SourceModel< TargetPayloadType >, dunedaq::fdreadoutlibs::TDEEthFrameProcessor, dunedaq::fdreadoutlibs::WIBEthFrameProcessor, dunedaq::flxlibs::CardControllerWrapper, dunedaq::flxlibs::ElinkModel< TargetPayloadType >, dunedaq::iomanager::QueueBase, dunedaq::ipm::Receiver, dunedaq::ipm::Sender, dunedaq::trigger::HSISourceModel, dunedaq::trigger::TAProcessor, dunedaq::trigger::TCProcessor, and dunedaq::trigger::TPProcessor.

Definition at line 135 of file MonitorableObject.hpp.

135{return;}

◆ get_opmon_id()

auto dunedaq::opmonlib::MonitorableObject::get_opmon_id ( ) const
inlinenoexcept

Definition at line 93 of file MonitorableObject.hpp.

93{ return m_parent_id + m_opmon_name; }

◆ get_opmon_level()

auto dunedaq::opmonlib::MonitorableObject::get_opmon_level ( ) const
inlinenoexcept

Definition at line 95 of file MonitorableObject.hpp.

95{ return m_opmon_level.load(); }

◆ inherit_parent_properties()

void MonitorableObject::inherit_parent_properties ( const MonitorableObject & parent)
private

utilities for linking with parent and top levels

Definition at line 193 of file MonitorableObject.cpp.

193 {
194
195 m_facility.store(parent.m_facility);
196 m_parent_id = parent.get_opmon_id();
198
199 std::lock_guard<std::mutex> lock(m_node_mutex);
200
201 for ( const auto & [key,wp] : m_nodes ) {
202
203 auto p = wp.lock();
204 if ( p ) {
205 p->inherit_parent_properties(*this);
206 }
207
208 }
209
210}
std::atomic< facility_ptr_t > m_facility

◆ operator=() [1/2]

MonitorableObject & dunedaq::opmonlib::MonitorableObject::operator= ( const MonitorableObject & )
delete

◆ operator=() [2/2]

MonitorableObject & dunedaq::opmonlib::MonitorableObject::operator= ( MonitorableObject && )
delete

◆ publish()

void MonitorableObject::publish ( google::protobuf::Message && m,
CustomOrigin && co = {},
OpMonLevel l = to_level(EntryOpMonLevel::kDefault) ) const
protectednoexcept

Convert the message into an OpMonEntry and then uses the pointer to the Facility to publish the entry. This also timestamps the message with the time of the invocation. It is possible to associate an element name to the published message. the element name is checked against the children to protect uniqueness. It is also possible to associate a custom origin in the form of a map<string,string>. This is designed to add information which is independent from the software structure e.g. channels or other hardware information. Messages will have an associated OpmonLevel that is used to suppress the pubblication of metrics. The level of the message is set by the OpMonManager.

Definition at line 58 of file MonitorableObject.cpp.

60 {
61
62 auto timestamp = google::protobuf::util::TimeUtil::GetCurrentTime();
63
64 auto start_time = std::chrono::high_resolution_clock::now();
65
67 TLOG_DEBUG(TLVL_LEVEL_SUPPRESSION) << "Metric " << m.GetTypeName() << " ignored because of the level";
69 return;
70 }
71
72 auto e = to_entry( m, co );
73
74 if ( e.data().empty() ) {
75 ers::warning( EntryWithNoData(ERS_HERE, e.measurement() ) );
76 return ;
77 }
78
79 *e.mutable_origin() = get_opmon_id() ;
80
81 *e.mutable_time() = timestamp;
82
83 // this pointer is always garanteed to be filled, even if with a null Facility.
84 // But the facility can fail
85 try {
86 m_facility.load()->publish(std::move(e));
88 } catch ( const OpMonPublishFailure & e ) {
89 ers::error(e);
91 }
92
93 auto stop_time = std::chrono::high_resolution_clock::now();
94
95 auto duration = std::chrono::duration_cast<std::chrono::microseconds>( stop_time - start_time );
96 m_cpu_us_counter += duration.count();
97
98}
@ TLVL_LEVEL_SUPPRESSION
static bool publishable_metric(OpMonLevel entry, OpMonLevel system) noexcept
dunedaq::opmon::OpMonEntry to_entry(const google::protobuf::Message &m, const CustomOrigin &co)
Definition Utils.cpp:20
void warning(const Issue &issue)
Definition ers.hpp:115

◆ publishable_metric()

static bool dunedaq::opmonlib::MonitorableObject::publishable_metric ( OpMonLevel entry,
OpMonLevel system )
inlinestaticnoexcept

Definition at line 97 of file MonitorableObject.hpp.

97 {
98 return (entry < system);
99 }

◆ register_node()

void MonitorableObject::register_node ( ElementId name,
NewNodePtr p )
protected

Append a register object to the chain The children will be modified using information from the this parent

Definition at line 32 of file MonitorableObject.cpp.

32 {
33
34 std::lock_guard<std::mutex> lock(m_node_mutex);
35
36 // check if the name is already present to ensure uniqueness
37 auto it = m_nodes.find(name) ;
38 if ( it != m_nodes.end() ) {
39 // This not desired because names are suppposed to be unique
40 // But if the pointer is expired, there is no harm in override it
41 if ( it -> second.expired() ) {
42 ers::warning(NonUniqueNodeName(ERS_HERE, name, to_string(get_opmon_id())));
43 }
44 else {
45 throw NonUniqueNodeName(ERS_HERE, name, to_string(get_opmon_id()));
46 }
47 }
48
49 m_nodes[name] = p;
50
51 p -> m_opmon_name = name;
52 p -> inherit_parent_properties( *this );
53
54 TLOG() << "Node " << name << " registered to " << to_string(get_opmon_id()) ;
55}
void inherit_parent_properties(const MonitorableObject &parent)
#define TLOG(...)
Definition macro.hpp:22

◆ set_opmon_level()

void MonitorableObject::set_opmon_level ( OpMonLevel l)
privatenoexcept

Hook to propagate the OpMonLevel at lower levels of the monitoring tree

Definition at line 180 of file MonitorableObject.cpp.

180 {
181
183
184 std::lock_guard<std::mutex> lock(m_node_mutex);
185 for ( const auto & [key,wp] : m_nodes ) {
186 auto p = wp.lock();
187 if (p) {
188 p->set_opmon_level(l);
189 }
190 }
191}

Friends And Related Symbol Documentation

◆ OpMonManager

friend class OpMonManager
friend

Definition at line 81 of file MonitorableObject.hpp.

Member Data Documentation

◆ m_cpu_us_counter

std::atomic<time_counter_t> dunedaq::opmonlib::MonitorableObject::m_cpu_us_counter {0}
mutableprivate

Definition at line 189 of file MonitorableObject.hpp.

189{0};

◆ m_error_counter

std::atomic<metric_counter_t> dunedaq::opmonlib::MonitorableObject::m_error_counter {0}
mutableprivate

Definition at line 184 of file MonitorableObject.hpp.

184{0};

◆ m_facility

std::atomic<facility_ptr_t> dunedaq::opmonlib::MonitorableObject::m_facility {s_default_facility}
private

Definition at line 171 of file MonitorableObject.hpp.

◆ m_ignored_counter

std::atomic<metric_counter_t> dunedaq::opmonlib::MonitorableObject::m_ignored_counter {0}
mutableprivate

Definition at line 183 of file MonitorableObject.hpp.

183{0};

◆ m_node_mutex

std::mutex dunedaq::opmonlib::MonitorableObject::m_node_mutex
private

Definition at line 168 of file MonitorableObject.hpp.

◆ m_nodes

std::map<ElementId, NodePtr> dunedaq::opmonlib::MonitorableObject::m_nodes
private

Definition at line 167 of file MonitorableObject.hpp.

◆ m_opmon_level

std::atomic<OpMonLevel> dunedaq::opmonlib::MonitorableObject::m_opmon_level = to_level(SystemOpMonLevel::kAll)
private

Definition at line 173 of file MonitorableObject.hpp.

◆ m_opmon_name

ElementId dunedaq::opmonlib::MonitorableObject::m_opmon_name
private

Definition at line 174 of file MonitorableObject.hpp.

◆ m_parent_id

dunedaq::opmon::OpMonId dunedaq::opmonlib::MonitorableObject::m_parent_id
private

Definition at line 172 of file MonitorableObject.hpp.

◆ m_published_counter

std::atomic<metric_counter_t> dunedaq::opmonlib::MonitorableObject::m_published_counter {0}
mutableprivate

Definition at line 182 of file MonitorableObject.hpp.

182{0};

◆ s_default_facility

std::shared_ptr< OpMonFacility > MonitorableObject::s_default_facility = std::make_shared<NullOpMonFacility>()
staticprivate

Definition at line 176 of file MonitorableObject.hpp.


The documentation for this class was generated from the following files: