41 #ifndef _ROLE_WARPTRANSFORM_NETWORK_LIBRARY_HPP_
42 #define _ROLE_WARPTRANSFORM_NETWORK_LIBRARY_HPP_
46 #include <hls_stream.h>
49 #include "../../../../../HOST/vision/warp_transform/languages/cplusplus/include/config.h"
50 #include "memory_utils.hpp"
56 #define FSM_WRITE_NEW_DATA 0
58 #define PortFsmType uint8_t
61 #if TRANSFERS_PER_CHUNK_DIVEND == 0
62 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK
64 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK_DIVEND
70 #define WARPTRANSFORM_CHNNEL_BITWIDTH 3
71 #define WARPTRANSFORM_COLS_BITWIDTH 16
72 #define WARPTRANSFORM_ROWS_BITWIDTH 16
74 #define WARPTRANSFORM_ROWS_HIGH_BIT NETWORK_WORD_BIT_WIDTH-1
75 #define WARPTRANSFORM_ROWS_LOW_BIT NETWORK_WORD_BIT_WIDTH-WARPTRANSFORM_ROWS_BITWIDTH
77 #define WARPTRANSFORM_COLS_HIGH_BIT WARPTRANSFORM_ROWS_LOW_BIT-1
78 #define WARPTRANSFORM_COLS_LOW_BIT WARPTRANSFORM_ROWS_LOW_BIT-WARPTRANSFORM_COLS_BITWIDTH
80 #define WARPTRANSFORM_CHNNEL_HIGH_BIT WARPTRANSFORM_COLS_LOW_BIT-1
81 #define WARPTRANSFORM_CHNNEL_LOW_BIT WARPTRANSFORM_COLS_LOW_BIT-WARPTRANSFORM_CHNNEL_BITWIDTH
100 hls::stream<NodeId> &sDstNode_sig,
101 ap_uint<32> *po_rx_ports
105 #pragma HLS INLINE off
108 #pragma HLS reset variable=port_fsm
114 printf(
"DEBUG in pPortAndDestionation: port_fsm - FSM_WRITE_NEW_DATA\n");
116 if(!sDstNode_sig.full())
118 NodeId dst_rank = (*pi_rank + 1) % *pi_size;
119 #if DEBUG_LEVEL == TRACE_ALL
120 printf(
"rank: %d; size: %d; \n", (
int) *pi_rank, (
int) *pi_size);
122 sDstNode_sig.write(dst_rank);
127 printf(
"DEBUG in pPortAndDestionation: port_fsm - FSM_DONE\n");
150 hls::stream<NetworkWord> &siSHL_This_Data,
151 hls::stream<NetworkMetaStream> &siNrc_meta,
152 hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
153 hls::stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
155 unsigned int *processed_word_rx,
156 unsigned int *processed_bytes_rx,
157 hls::stream<bool> &sImageLoaded
161 #pragma HLS INLINE off
162 #pragma HLS pipeline II=1
167 #pragma HLS reset variable=enqueueFSM
174 printf(
"DEBUG in pRXPath: enqueueFSM - WAIT_FOR_META, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
175 *processed_word_rx, *processed_bytes_rx);
176 if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
178 meta_tmp = siNrc_meta.read();
180 sRxtoTx_Meta.write(meta_tmp);
186 printf(
"DEBUG in pRXPath: enqueueFSM - PROCESSING_PACKET, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
187 *processed_word_rx, *processed_bytes_rx);
188 if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
191 netWord = siSHL_This_Data.read();
192 storeWordToAxiStream<stream<ap_uint<INPUT_PTR_WIDTH>>,
196 (netWord, img_in_axi_stream, processed_word_rx, processed_bytes_rx,
198 if(netWord.tlast == 1)
220 template<
typename TMemWrd, const
unsigned int loop_cnt, const
unsigned int cTransfers_Per_Chunk, const
unsigned int max_img_size, const
unsigned int cBytesPer10GbitEthAXIPckt>
222 hls::stream<NetworkWord> &siSHL_This_Data,
223 hls::stream<NetworkMetaStream> &siNrc_meta,
224 hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
225 hls::stream<TMemWrd> &img_in_axi_stream,
226 hls::stream<bool> &sMemBurstRx,
231 hls::stream<float> &sTxMatrix
235 #pragma HLS INLINE off
241 static TMemWrd v = 0;
242 static unsigned int cnt_wr_stream = 0, cnt_wr_burst = 0;
243 static unsigned int processed_net_bytes_rx = 0;
244 static unsigned int tx_mat_idx = 0;
245 #pragma HLS reset variable=meta_tmp
246 #pragma HLS reset variable=tx_mat_idx
247 #pragma HLS reset variable=cnt_wr_stream
248 #pragma HLS reset variable=cnt_wr_burst
249 #pragma HLS reset variable=processed_net_bytes_rx
251 #pragma HLS reset variable=enqueueRxToStrFSM
252 unsigned int expected_input_meta = TOT_TRANSFERS_TX;
253 unsigned int expected_output_meta = TOT_TRANSFERS_RX;
254 unsigned int received_and_fwded_meta = 0;
255 #pragma HLS reset variable=expected_input_meta
256 #pragma HLS reset variable=expected_output_meta
257 #pragma HLS reset variable=received_and_fwded_meta
263 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META\n");
265 if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
267 meta_tmp = siNrc_meta.read();
271 expected_output_meta = TOT_TRANSFERS_RX;
272 received_and_fwded_meta = 0;
277 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
278 if ( !siSHL_This_Data.empty())
281 netWord = siSHL_This_Data.read();
293 std::cout <<
"DEBUG pRXPathNetToStream - img rows =" <<
rows <<
" cols=" <<
cols <<
" chan=" << chan << std::endl;
308 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_IMGMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
309 if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
312 netWord = siSHL_This_Data.read();
313 printf(
"DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
314 netWord.
tdata.to_long(), netWord.
tkeep.to_int(), netWord.
tlast.to_int());
315 if ((netWord.
tkeep >> cnt_wr_stream) == 0) {
316 printf(
"WARNING: value with tkeep=0 at cnt_wr_stream=%u\n", cnt_wr_stream);
318 v(cnt_wr_stream*64, (cnt_wr_stream+1)*64-1) = netWord.
tdata(0,63);
319 if ((cnt_wr_stream++ == loop_cnt-1) || (netWord.
tlast == 1)) {
321 std::cout <<
"DEBUG in pRXPathNetToStream: Pushing to img_in_axi_stream :" << std::hex << v << std::endl;
322 img_in_axi_stream.write(v);
323 if ((cnt_wr_burst++ == cTransfers_Per_Chunk-1) ||
324 ((processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) &&
325 (netWord.
tlast == 1))) {
326 if (!sMemBurstRx.full()) {
327 sMemBurstRx.write(
true);
331 if (netWord.
tlast == 1) {
333 if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt)
335 if( received_and_fwded_meta < expected_output_meta){
336 sRxtoTx_Meta.write(meta_tmp);
337 received_and_fwded_meta++;
349 if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) {
350 processed_net_bytes_rx = 0;
353 processed_net_bytes_rx += cBytesPer10GbitEthAXIPckt;
358 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META_IMGMAT\n");
360 if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
362 meta_tmp = siNrc_meta.read();
364 sRxtoTx_Meta.write(meta_tmp);
365 received_and_fwded_meta++;
370 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_TXMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
371 if ( !siSHL_This_Data.empty())
374 netWord = siSHL_This_Data.read();
375 printf(
"DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
376 netWord.
tdata.to_long(), netWord.
tkeep.to_int(), netWord.
tlast.to_int());
382 tmp2.
i = netWord.
tdata.range(32-1,0);
385 sTxMatrix.write(tmp1.
f);
389 std::cout <<
"DEBUG in pRXPathNetToStream: tx matrix =" << tmp1.
f << std::endl;
391 std::cout <<
"DEBUG in pRXPathNetToStream: tx matrix id=" << tx_mat_idx << std::endl;
394 std::cout <<
"DEBUG in pRXPathNetToStream: end of matrix rx communication" << std::endl;
396 if (netWord.
tlast == 1) {
406 sTxMatrix.write(tmp2.
f);
413 printf(
"DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PUSH_REMAINING__META\n");
415 if ( !sRxtoTx_Meta.full() )
417 if( received_and_fwded_meta < expected_output_meta){
418 sRxtoTx_Meta.write(meta_tmp);
419 received_and_fwded_meta++;
442 template <
typename TStreamMemWrd,
typename TMemWrd,const
unsigned int loop_cnt,const
unsigned int bytes_per_loop>
444 hls::stream<TMemWrd> &img_in_axi_stream,
445 hls::stream<bool> &sMemBurstRx,
447 hls::stream<DmCmd> &soMemWrCmdP0,
448 hls::stream<DmSts> &siMemWrStsP0,
449 hls::stream<TStreamMemWrd> &soMemWriteP0,
451 hls::stream<bool> &sImageLoaded,
458 #pragma HLS INLINE off
459 #pragma HLS pipeline II=1
462 static TMemWrd v = 0;
463 static unsigned int cur_transfers_per_chunk;
464 static unsigned int cnt_wr_stream, cnt_wr_img_loaded;
465 static unsigned int ddr_addr_in;
467 #pragma HLS reset variable=enqueueStrToDdrFSM
472 static TStreamMemWrd memP0;
473 static DmSts memWrStsP0;
474 static unsigned int processed_bytes_rx;
476 #pragma HLS reset variable=cur_transfers_per_chunk
477 #pragma HLS reset variable=cnt_wr_stream
478 #pragma HLS reset variable=cnt_wr_img_loaded
479 #pragma HLS reset variable=ddr_addr_in
480 #pragma HLS reset variable=patternWriteNum
481 #pragma HLS reset variable=timeoutCnt
482 #pragma HLS reset variable=memP0
483 #pragma HLS reset variable=memWrStsP0
488 #pragma HLS reset variable=lcl_img_rows
489 #pragma HLS reset variable=lcl_img_cols
490 #pragma HLS reset variable=lcl_img_chan
495 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - WAIT_FOR_META, processed_bytes_rx=%u\n",
498 if ( !img_in_axi_stream.empty() )
500 if ((processed_bytes_rx) == 0) {
506 cur_transfers_per_chunk = 0;
515 lcl_img_rows = *img_rows;
516 lcl_img_cols = *img_cols;
517 lcl_img_chan = *img_chan;
524 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_CHK_PROC_BYTES, processed_bytes_rx=%u\n", processed_bytes_rx);
525 if (processed_bytes_rx <
IMGSIZE-bytes_per_loop) {
526 (processed_bytes_rx) += bytes_per_loop;
529 printf(
"DEBUG in pRXPathStreamToDDR: WARNING - you have reached the max depth of img. Will put processed_bytes_rx = 0.\n");
530 processed_bytes_rx = 0;
536 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_CMD\n");
537 if ( !soMemWrCmdP0.full() ) {
539 if (processed_bytes_rx == 0){
554 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_LOAD\n");
557 if (!sMemBurstRx.empty()) {
558 if (sMemBurstRx.read() ==
true) {
565 if((processed_bytes_rx) == 0) {
575 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_DATA\n");
576 if (!soMemWriteP0.full()) {
578 if (!img_in_axi_stream.empty()) {
579 memP0.tdata = img_in_axi_stream.read();
580 ap_uint<8> keepVal = 0xFF;
581 memP0.tkeep = (ap_uint<64>) (keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal);
583 printf(
"DEBUG: (patternWriteNum == cur_transfers_per_chunk -1) \n");
585 cnt_wr_img_loaded = 0;
593 std::cout <<
"DEBUG in pRXPathStreamToDDR: Pushing to soMemWriteP0 :" << std::hex << memP0.tdata << std::endl;
594 soMemWriteP0.write(memP0);
600 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_A\n");
601 if (!siMemWrStsP0.empty()) {
604 siMemWrStsP0.read(memWrStsP0);
620 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_B\n");
621 if ((memWrStsP0.
tag == 0x0) && (memWrStsP0.
okay == 1)) {
622 if ((processed_bytes_rx) == 0) {
623 if (!sImageLoaded.full()) {
624 if (cnt_wr_img_loaded++ >= 1) {
625 sImageLoaded.write(
false);
629 sImageLoaded.write(
true);
643 printf(
"DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_C\n");
644 if((processed_bytes_rx) == 0) {
669 hls::stream<NetworkWord> &soTHIS_Shl_Data,
670 hls::stream<NetworkMetaStream> &soNrc_meta,
671 hls::stream<NetworkWord> &sProcpToTxp_Data,
672 hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
673 hls::stream<NodeId> &sDstNode_sig,
674 unsigned int *processed_word_tx,
675 ap_uint<32> *pi_rank,
683 #pragma HLS INLINE off
688 #pragma HLS reset variable=dequeueFSM
695 #pragma HLS reset variable=dst_rank
696 #pragma HLS reset variable=netWordTx
701 #pragma HLS reset variable=lcl_img_rows
702 #pragma HLS reset variable=lcl_img_cols
703 #pragma HLS reset variable=lcl_img_chan
704 static bool tx_ongoing =
false;
710 if(!sDstNode_sig.empty())
712 dst_rank = sDstNode_sig.read();
719 printf(
"DEBUG in pTXPath: dequeueFSM=%d - WAIT_FOR_STREAM_PAIR, *processed_word_tx=%u\n",
725 *processed_word_tx = 0;
729 if (( !sProcpToTxp_Data.empty() && !sRxtoTx_Meta.empty()
730 && !soTHIS_Shl_Data.full() && !soNrc_meta.full() ))
732 netWordTx = sProcpToTxp_Data.read();
739 soTHIS_Shl_Data.write(netWordTx);
741 meta_in = sRxtoTx_Meta.read().tdata;
742 meta_out_stream.
tlast = 1;
743 meta_out_stream.
tkeep = 0xFF;
753 soNrc_meta.write(meta_out_stream);
755 (*processed_word_tx)++;
756 printf(
"DEBUG: Checking netWordTx.tlast...\n");
757 if(netWordTx.
tlast != 1)
766 printf(
"DEBUG in pTXPath: dequeueFSM=%d - PROCESSING_PACKET, *processed_word_tx=%u\n",
778 lcl_img_rows = *img_rows;
779 lcl_img_cols = *img_cols;
780 lcl_img_chan = *img_chan;
784 if( !sProcpToTxp_Data.empty() && !soTHIS_Shl_Data.full())
786 netWordTx = sProcpToTxp_Data.read();
790 (*processed_word_tx)++;
794 if ((netWordTx.
tlast == 1) || (((*processed_word_tx)*8) %
PACK_SIZE == 0))
797 printf(
"DEBUG: A netWordTx.tlast=1 ... sProcpToTxp_Data.empty()==%u \n", sProcpToTxp_Data.empty());
801 soTHIS_Shl_Data.write(netWordTx);
ap_uint< 32 > patternWriteNum
#define TRANSFERS_PER_CHUNK
#define FSM_CHK_PROC_BYTES
#define BITS_PER_10GBITETHRNET_AXI_PACKET
#define BYTES_PER_10GBITETHRNET_AXI_PACKET
#define CYCLES_UNTIL_TIMEOUT
#define PROCESSING_PACKET
#define WAIT_FOR_STREAM_PAIR
#define NETWORK_WORD_BIT_WIDTH