Line data Source code
1 : /**
2 : * @file AVXRunSumProcessor.cpp
3 : *
4 : * @copyright This is part of the DUNE DAQ Software Suite, copyright 2020.
5 : * Licensing/copyright details are in the COPYING file that you should have
6 : * received with this code.
7 : */
8 :
9 : #include "tpglibs/AVXRunSumProcessor.hpp"
10 :
11 : namespace tpglibs {
12 :
13 17 : REGISTER_AVXPROCESSOR_CREATOR("AVXRunSumProcessor", AVXRunSumProcessor)
14 :
15 18 : void AVXRunSumProcessor::configure(const nlohmann::json& config, const int16_t* plane_numbers) {
16 : // Configure common metric collection parameters
17 : // Register pointers to the ACTUAL member variables, not copies
18 : // Use shared_ptr with no-op deleter to avoid double-free
19 18 : m_internal_state_name_registry.register_internal_state("r",
20 36 : std::shared_ptr<__m256i>(&m_memory_factor, [](auto*){}));
21 18 : m_internal_state_name_registry.register_internal_state("s",
22 36 : std::shared_ptr<__m256i>(&m_scale_factor, [](auto*){}));
23 18 : m_internal_state_name_registry.register_internal_state("rs",
24 36 : std::shared_ptr<__m256i>(&m_running_sum, [](auto*){}));
25 :
26 18 : configure_internal_state_collection(config);
27 :
28 18 : int16_t memory_factors[16];
29 18 : int16_t plane_memory_factors[3] = {config["memory_factor_plane0"],
30 18 : config["memory_factor_plane1"],
31 18 : config["memory_factor_plane2"]};
32 18 : int16_t memory_divisors[16];
33 18 : int16_t plane_memory_divisors[3] = {config["memory_divisor_plane0"],
34 18 : config["memory_divisor_plane1"],
35 18 : config["memory_divisor_plane2"]};
36 18 : int16_t scale_factors[16];
37 18 : int16_t plane_scale_factors[3] = {config["scale_factor_plane0"],
38 18 : config["scale_factor_plane1"],
39 18 : config["scale_factor_plane2"]};
40 18 : int16_t scale_divisors[16];
41 18 : int16_t plane_scale_divisors[3] = {config["scale_divisor_plane0"],
42 18 : config["scale_divisor_plane1"],
43 18 : config["scale_divisor_plane2"]};
44 :
45 306 : for (int i = 0; i < 16; i++) {
46 288 : memory_factors[i] = plane_memory_factors[plane_numbers[i]];
47 288 : memory_divisors[i] = 0x7FFF / plane_memory_divisors[plane_numbers[i]]; // Need to adjust for AVX2 usage.
48 288 : scale_factors[i] = plane_scale_factors[plane_numbers[i]];
49 288 : scale_divisors[i] = 0x7FFF / plane_scale_divisors[plane_numbers[i]]; // Need to adjust for AVX2 usage.
50 : }
51 :
52 18 : m_memory_factor = _mm256_lddqu_si256(reinterpret_cast<__m256i*>(memory_factors));
53 18 : m_memory_divisor = _mm256_lddqu_si256(reinterpret_cast<__m256i*>(memory_divisors));
54 18 : m_scale_factor = _mm256_lddqu_si256(reinterpret_cast<__m256i*>(scale_factors));
55 18 : m_scale_divisor = _mm256_lddqu_si256(reinterpret_cast<__m256i*>(scale_divisors));
56 18 : }
57 :
58 30 : __m256i AVXRunSumProcessor::process(const __m256i& signal) {
59 : // Update sample counter and write internal states to buffer for harvesting
60 30 : m_samples++;
61 30 : if (m_collect_internal_state_flag && (m_samples % m_sample_period == 0)) {
62 6 : m_internal_state_buffer_manager.write_to_active_buffer();
63 : }
64 :
65 30 : __m256i scaled_rs = _mm256_mulhrs_epi16(m_running_sum, m_memory_divisor);
66 30 : scaled_rs = _mm256_mullo_epi16(scaled_rs, m_memory_factor);
67 :
68 30 : __m256i scaled_signal = _mm256_mulhrs_epi16(signal, m_scale_divisor);
69 30 : scaled_signal = _mm256_mullo_epi16(scaled_signal, m_scale_factor);
70 :
71 30 : m_running_sum = _mm256_adds_epi16(scaled_rs, scaled_signal);
72 30 : return AVXProcessor::process(m_running_sum);
73 : }
74 :
75 : } // namespace tpglibs
|