Line data Source code
1 : /**
2 : * @file EALSetup.hpp EAL setup functions for DPDK
3 : *
4 : * This is part of the DUNE DAQ , copyright 2020.
5 : * Licensing/copyright details are in the COPYING file that you should have
6 : * received with this code.
7 : */
8 : // #ifndef DPDKLIBS_INCLUDE_DPDKLIBS_EALSETUP_HPP_
9 : // #define DPDKLIBS_INCLUDE_DPDKLIBS_EALSETUP_HPP_
10 :
11 : #include "logging/Logging.hpp"
12 :
13 : #include <boost/program_options/parsers.hpp>
14 :
15 : #include "dpdklibs/EALSetup.hpp"
16 : #include "dpdklibs/Issues.hpp"
17 :
18 : #include <rte_eal.h>
19 : #include <rte_ethdev.h>
20 :
21 : namespace dunedaq {
22 : namespace dpdklibs {
23 : namespace ealutils {
24 :
25 : #define NUM_MBUFS 8191
26 : #define MBUF_CACHE_SIZE 250
27 :
28 : #define PG_JUMBO_FRAME_LEN (9600 + RTE_ETHER_CRC_LEN + RTE_ETHER_HDR_LEN)
29 : #ifndef RTE_JUMBO_ETHER_MTU
30 : #define RTE_JUMBO_ETHER_MTU (PG_JUMBO_FRAME_LEN - RTE_ETHER_HDR_LEN - RTE_ETHER_CRC_LEN) /*< Ethernet MTU. */
31 : #endif
32 :
33 : // static volatile uint8_t dpdk_quit_signal;
34 :
35 : static const struct rte_eth_conf iface_conf_default = {
36 : .rxmode = {
37 : .mtu = 9000,
38 : .max_lro_pkt_size = 9000,
39 : //.split_hdr_size = 0, // deprecated in dpdk@22.10
40 : .offloads = (RTE_ETH_RX_OFFLOAD_TIMESTAMP
41 : | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM
42 : | RTE_ETH_RX_OFFLOAD_UDP_CKSUM),
43 : },
44 :
45 : .txmode = {
46 : .offloads = (RTE_ETH_TX_OFFLOAD_MULTI_SEGS),
47 : },
48 : };
49 :
50 :
51 0 : std::string get_mac_addr_str(const rte_ether_addr& addr) {
52 0 : std::stringstream macstr;
53 0 : macstr << std::hex << static_cast<int>(addr.addr_bytes[0]) << ":" << static_cast<int>(addr.addr_bytes[1]) << ":" << static_cast<int>(addr.addr_bytes[2]) << ":" << static_cast<int>(addr.addr_bytes[3]) << ":" << static_cast<int>(addr.addr_bytes[4]) << ":" << static_cast<int>(addr.addr_bytes[5]) << std::dec;
54 0 : return macstr.str();
55 0 : }
56 :
57 :
58 : // Modifies Ethernet device configuration to multi-queue RSS with offload
59 : void
60 0 : iface_conf_rss_mode(struct rte_eth_conf& iface_conf, bool mode, bool offload)
61 : {
62 0 : if (mode) {
63 0 : iface_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
64 0 : if (offload) {
65 0 : iface_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
66 : }
67 : } else {
68 0 : iface_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
69 : }
70 0 : }
71 :
72 : // Enables RX in promiscuous mode for the Ethernet device.
73 : int
74 0 : iface_promiscuous_mode(std::uint16_t iface, bool mode)
75 : {
76 0 : int retval = -1;
77 0 : retval = rte_eth_promiscuous_get(iface);
78 0 : TLOG() << "Before modification attempt, promiscuous mode is: " << retval;
79 0 : if (mode) {
80 0 : retval = rte_eth_promiscuous_enable(iface);
81 : } else {
82 0 : retval = rte_eth_promiscuous_disable(iface);
83 : }
84 0 : if (retval != 0) {
85 0 : TLOG() << "Couldn't modify promiscuous mode of iface[" << iface << "]! Error code: " << retval;
86 : }
87 0 : retval = rte_eth_promiscuous_get(iface);
88 0 : TLOG() << "New promiscuous mode of iface[" << iface << "] is: " << retval;
89 0 : return retval;
90 : }
91 :
92 :
93 :
94 : int
95 0 : iface_init(uint16_t iface, uint16_t rx_rings, uint16_t tx_rings,
96 : uint16_t rx_ring_size, uint16_t tx_ring_size,
97 : std::map<int, std::unique_ptr<rte_mempool>>& mbuf_pool,
98 : bool with_reset, bool with_mq_rss, bool check_link_status)
99 : {
100 0 : struct rte_eth_conf iface_conf = iface_conf_default;
101 0 : uint16_t nb_rxd = rx_ring_size;
102 0 : uint16_t nb_txd = tx_ring_size;
103 0 : int retval = -1;
104 0 : uint16_t q;
105 0 : struct rte_eth_dev_info dev_info;
106 0 : struct rte_eth_txconf txconf;
107 0 : struct rte_eth_link link;
108 :
109 : // Get interface validity
110 0 : if (!rte_eth_dev_is_valid_port(iface)) {
111 0 : TLOG() << "Specified interface " << iface << " is not valid in EAL!";
112 0 : throw InvalidEALPort(ERS_HERE, iface);
113 : }
114 :
115 : // Get interface info
116 0 : if ((retval = rte_eth_dev_info_get(iface, &dev_info)) != 0) {
117 0 : TLOG() << "Error during getting device (iface " << iface << ") retval: " << retval;
118 0 : throw FailedToRetrieveInterfaceInfo(ERS_HERE, iface, retval);
119 : }
120 :
121 0 : TLOG() << "Iface " << iface << " RX Ring info :"
122 0 : << " min " << dev_info.rx_desc_lim.nb_min
123 0 : << " max " << dev_info.rx_desc_lim.nb_max
124 0 : << " align " << dev_info.rx_desc_lim.nb_align
125 0 : ;
126 :
127 : // Carry out a reset of the interface
128 0 : if (with_reset) {
129 0 : if ((retval = rte_eth_dev_reset(iface)) != 0) {
130 0 : throw FailedToResetInterface(ERS_HERE, iface, retval);
131 : }
132 : }
133 :
134 : // Should we configure MQ RSS and offload?
135 0 : if (with_mq_rss) {
136 0 : iface_conf_rss_mode(iface_conf, true, true); // with_rss, with_offload
137 : // RSS
138 0 : if ((iface_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) != 0) {
139 0 : TLOG() << "Ethdev port config prepared with RX RSS mq_mode!";
140 0 : if ((iface_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH) != 0) {
141 0 : TLOG() << "Ethdev port config prepared with RX RSS mq_mode with offloading is requested!";
142 : }
143 : }
144 : }
145 :
146 0 : TLOG() << "Configuring Iface " << iface << " rx rings: " << rx_rings <<", tx rings " << tx_rings;
147 :
148 : // Configure the Ethernet interface
149 0 : if ((retval = rte_eth_dev_configure(iface, rx_rings, tx_rings, &iface_conf)) != 0) {
150 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "Device Configuration", retval);
151 : }
152 :
153 : // Set MTU of interface
154 0 : rte_eth_dev_set_mtu(iface, RTE_JUMBO_ETHER_MTU);
155 0 : {
156 0 : uint16_t mtu;
157 0 : rte_eth_dev_get_mtu(iface, &mtu);
158 0 : TLOG() << "Interface: " << iface << " MTU: " << mtu;
159 : }
160 :
161 : // Set PTYPE parsing. RS FIXME: This function needs to be factorized, with overall better offloading control.
162 : // On most Intel and Mellanox drivers, packet_type will automatically be set if:
163 : // - the hardware supports RTE_ETH_RX_OFFLOAD_*PTYPE
164 : // - and ptype RX parsing is enabled in the PMD
165 : // Some PMDs require calling:
166 0 : rte_eth_dev_set_ptypes(iface, RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK, NULL, 0);
167 :
168 : // // Adjust RX/TX ring sizes
169 : // retval = rte_eth_dev_adjust_nb_rx_tx_desc(iface, &nb_rxd, &nb_txd);
170 : // if (retval != 0)
171 : // return retval;
172 :
173 0 : if ((retval = rte_eth_dev_adjust_nb_rx_tx_desc(iface, &nb_rxd, &nb_txd)) != 0) {
174 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "Adjust tx/rx descriptors", retval);
175 : }
176 :
177 : // Allocate and set up RX queues for interface.
178 0 : for (q = 0; q < rx_rings; q++) {
179 : // retval = rte_eth_rx_queue_setup(iface, q, nb_rxd, rte_eth_dev_socket_id(iface), NULL, mbuf_pool[q].get());
180 0 : if ((retval = rte_eth_rx_queue_setup(iface, q, nb_rxd, rte_eth_dev_socket_id(iface), NULL, mbuf_pool[q].get())) < 0) {
181 : // return retval;
182 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "Rx queues setup", retval);
183 : }
184 : }
185 :
186 0 : txconf = dev_info.default_txconf;
187 0 : txconf.offloads = iface_conf.txmode.offloads;
188 :
189 : // These values influenced by Sec. 8.4.4 of https://doc.dpdk.org/guides-1.8/prog_guide/poll_mode_drv.html
190 0 : txconf.tx_rs_thresh = 32;
191 0 : txconf.tx_free_thresh = 32;
192 0 : txconf.tx_thresh.wthresh = 0;
193 :
194 : // Allocate and set up TX queues for interface.
195 0 : for (q = 0; q < tx_rings; q++) {
196 0 : if ((retval = rte_eth_tx_queue_setup(iface, q, nb_txd, rte_eth_dev_socket_id(iface), &txconf)) < 0) {
197 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "Tx queues setup", retval);
198 : }
199 : }
200 :
201 : // Start the Ethernet interface.
202 0 : if ((retval = rte_eth_dev_start(iface)) < 0) {
203 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "MAC address retrival", retval);
204 : }
205 :
206 0 : if ((retval = rte_eth_link_get(iface, &link)) != 0) {
207 0 : throw FailedToRetrieveLinkStatus(ERS_HERE, iface, retval);
208 : }
209 :
210 0 : TLOG() << "Link: speed=" << link.link_speed << " duplex=" << link.link_duplex << " autoneg=" << link.link_autoneg << " status=" << link.link_status;
211 :
212 0 : if ( check_link_status && link.link_status == 0 ) {
213 0 : throw LinkOffline(ERS_HERE, iface);
214 : }
215 :
216 : // Display the interface MAC address.
217 0 : struct rte_ether_addr addr;
218 0 : if ((retval = rte_eth_macaddr_get(iface, &addr)) == 0) {
219 0 : TLOG() << "MAC address: " << get_mac_addr_str(addr);
220 : } else {
221 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "MAC address retrival", retval);
222 : }
223 :
224 : // Get interface info
225 0 : if ((retval = rte_eth_dev_info_get(iface, &dev_info)) != 0) {
226 0 : TLOG() << "Error during getting device (iface " << iface << ") retval: " << retval;
227 0 : throw FailedToConfigureInterface(ERS_HERE, iface, "Device information retrival", retval);
228 : }
229 :
230 0 : TLOG() << "Iface[" << iface << "] Rx Ring info:"
231 0 : << " min=" << dev_info.rx_desc_lim.nb_min
232 0 : << " max=" << dev_info.rx_desc_lim.nb_max
233 0 : << " align=" << dev_info.rx_desc_lim.nb_align;
234 0 : TLOG() << "Iface[" << iface << "] Tx Ring info:"
235 0 : << " min=" << dev_info.rx_desc_lim.nb_min
236 0 : << " max=" << dev_info.rx_desc_lim.nb_max
237 0 : << " align=" << dev_info.rx_desc_lim.nb_align;
238 :
239 0 : for (size_t j = 0; j < dev_info.nb_rx_queues; j++) {
240 :
241 0 : struct rte_eth_rxq_info queue_info;
242 0 : int count;
243 :
244 0 : retval = rte_eth_rx_queue_info_get(iface, j, &queue_info);
245 0 : if (retval != 0)
246 0 : continue;
247 :
248 0 : count = rte_eth_rx_queue_count(iface, j);
249 0 : TLOG() << "rx[" << j << "] descriptors=" << count << "/" << queue_info.nb_desc
250 0 : << " scattered=" << (queue_info.scattered_rx ? "yes" : "no")
251 0 : << " conf.drop_en=" << (queue_info.conf.rx_drop_en ? "yes" : "no")
252 0 : << " conf.rx_deferred_start=" << (queue_info.conf.rx_deferred_start ? "yes" : "no")
253 0 : << " rx_buf_size=" << queue_info.rx_buf_size;
254 : }
255 :
256 0 : return 0;
257 : }
258 :
259 : std::unique_ptr<rte_mempool>
260 0 : get_mempool(const std::string& pool_name,
261 : int num_mbufs, int mbuf_cache_size,
262 : int data_room_size, int socket_id) {
263 0 : TLOG() << "get_mempool with: NUM_MBUFS = " << num_mbufs
264 0 : << " | MBUF_CACHE_SIZE = " << mbuf_cache_size
265 0 : << " | data_room_size = " << data_room_size
266 0 : << " | SOCKET_ID = " << socket_id;
267 :
268 0 : struct rte_mempool *mbuf_pool;
269 0 : mbuf_pool = rte_pktmbuf_pool_create(pool_name.c_str(), num_mbufs,
270 : mbuf_cache_size, 0, data_room_size,
271 : socket_id);
272 :
273 0 : if (mbuf_pool == NULL) {
274 : // ers fatal
275 0 : rte_exit(EXIT_FAILURE, "ERROR: Cannot create rte_mempool!\n");
276 : }
277 0 : return std::unique_ptr<rte_mempool>(mbuf_pool);
278 : }
279 :
280 : std::vector<const char*>
281 0 : construct_eal_argv(const std::vector<std::string> &std_argv){
282 0 : std::vector<const char*> vec_argv;
283 0 : for (int i=0; i < std_argv.size() ; i++){
284 0 : vec_argv.insert(vec_argv.end(), std_argv[i].data());
285 : }
286 0 : return vec_argv;
287 0 : }
288 :
289 :
290 :
291 : void
292 0 : init_eal(int argc, const char* argv[]) {
293 :
294 0 : std::stringstream ss;
295 0 : for( size_t i(0); i<argc; ++i) {
296 0 : ss << argv[i] << " ";
297 : }
298 0 : TLOG() << "EAL init arguments: " << ss.str();
299 :
300 : // Init EAL
301 0 : int ret = rte_eal_init(argc, (char**)argv);
302 0 : if (ret < 0) {
303 0 : rte_exit(EXIT_FAILURE, "ERROR: EAL initialization failed.\n");
304 : }
305 0 : TLOG() << "EAL initialized with provided parameters.";
306 0 : }
307 :
308 : void
309 0 : init_eal( const std::vector<std::string>& args ) {
310 :
311 0 : std::vector<const char*> eal_argv = ealutils::construct_eal_argv(args);
312 0 : const char** constructed_eal_argv = eal_argv.data();
313 0 : int constructed_eal_argc = args.size();
314 0 : ealutils::init_eal(constructed_eal_argc, constructed_eal_argv);
315 0 : }
316 :
317 : int
318 0 : get_available_ifaces() {
319 : // Check that there is an even number of interfaces to send/receive on
320 0 : unsigned nb_ifaces;
321 0 : nb_ifaces = rte_eth_dev_count_avail();
322 0 : TLOG() << "Available interfaces: " << nb_ifaces;
323 0 : return nb_ifaces;
324 : }
325 :
326 : int
327 0 : wait_for_lcores() {
328 0 : int lcore_id;
329 0 : int ret = 0;
330 0 : RTE_LCORE_FOREACH_WORKER(lcore_id) {
331 : //TLOG() << "Waiting for lcore[" << lcore_id << "] to finish packet processing.";
332 0 : ret = rte_eal_wait_lcore(lcore_id);
333 : }
334 0 : return ret;
335 : }
336 :
337 0 : void finish_eal() {
338 0 : rte_eal_cleanup();
339 0 : }
340 :
341 : } // namespace ealutils
342 : } // namespace dpdklibs
343 : } // namespace dunedaq
344 : // #endif // DPDKLIBS_INCLUDE_DPDKLIBS_EALSETUP_HPP_
345 :
|