cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
warp_transform_network_library.hpp
Go to the documentation of this file.
1 
17 
41 #ifndef _ROLE_WARPTRANSFORM_NETWORK_LIBRARY_HPP_
42 #define _ROLE_WARPTRANSFORM_NETWORK_LIBRARY_HPP_
43 
44 #include <stdio.h>
45 #include <iostream>
46 #include <hls_stream.h>
47 #include "ap_int.h"
48 #include <stdint.h>
49 #include "../../../../../HOST/vision/warp_transform/languages/cplusplus/include/config.h"//debug level define
50 #include "memory_utils.hpp" //for stream based communication with ddr
51 #include "network.hpp"
53 
54 using namespace hls;
55 
56 #define FSM_WRITE_NEW_DATA 0
57 #define FSM_DONE 1
58 #define PortFsmType uint8_t
59 
60 #ifdef ENABLE_DDR
61 #if TRANSFERS_PER_CHUNK_DIVEND == 0
62 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK
63 #else
64 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK_DIVEND
65 #endif
66 #endif
67 
68 //64 bits 8 for cmd, 40 rows/cols 3 channels = 51 missing 13
69 //If other info, we need to change how it is working many stuffs I think
70 #define WARPTRANSFORM_CHNNEL_BITWIDTH 3
71 #define WARPTRANSFORM_COLS_BITWIDTH 16
72 #define WARPTRANSFORM_ROWS_BITWIDTH 16
73 
74 #define WARPTRANSFORM_ROWS_HIGH_BIT NETWORK_WORD_BIT_WIDTH-1 // 63
75 #define WARPTRANSFORM_ROWS_LOW_BIT NETWORK_WORD_BIT_WIDTH-WARPTRANSFORM_ROWS_BITWIDTH //64-20 = 44
76 
77 #define WARPTRANSFORM_COLS_HIGH_BIT WARPTRANSFORM_ROWS_LOW_BIT-1 // 43
78 #define WARPTRANSFORM_COLS_LOW_BIT WARPTRANSFORM_ROWS_LOW_BIT-WARPTRANSFORM_COLS_BITWIDTH //44-20 = 24
79 
80 #define WARPTRANSFORM_CHNNEL_HIGH_BIT WARPTRANSFORM_COLS_LOW_BIT-1 // 23
81 #define WARPTRANSFORM_CHNNEL_LOW_BIT WARPTRANSFORM_COLS_LOW_BIT-WARPTRANSFORM_CHNNEL_BITWIDTH //24-3 = 21
82 
86 
87 
98  ap_uint<32> *pi_rank,
99  ap_uint<32> *pi_size,
100  hls::stream<NodeId> &sDstNode_sig,
101  ap_uint<32> *po_rx_ports
102  )
103 {
104  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
105 #pragma HLS INLINE off
106  //-- STATIC VARIABLES (with RESET) ------------------------------------------
107  static PortFsmType port_fsm = FSM_WRITE_NEW_DATA;
108 #pragma HLS reset variable=port_fsm
109 
110  switch(port_fsm)
111  {
112  default:
113  case FSM_WRITE_NEW_DATA:
114  printf("DEBUG in pPortAndDestionation: port_fsm - FSM_WRITE_NEW_DATA\n");
115  //WarpTransform app needs to be reset to process new rank
116  if(!sDstNode_sig.full())
117  {
118  NodeId dst_rank = (*pi_rank + 1) % *pi_size;
119  #if DEBUG_LEVEL == TRACE_ALL
120  printf("rank: %d; size: %d; \n", (int) *pi_rank, (int) *pi_size);
121  #endif
122  sDstNode_sig.write(dst_rank);
123  port_fsm = FSM_DONE;
124  }
125  break;
126  case FSM_DONE:
127  printf("DEBUG in pPortAndDestionation: port_fsm - FSM_DONE\n");
128  *po_rx_ports = PORTS_OPENED;
129  break;
130  }
131 
132 }
133 
134 
135 
136 
149 void pRXPath(
150  hls::stream<NetworkWord> &siSHL_This_Data,
151  hls::stream<NetworkMetaStream> &siNrc_meta,
152  hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
153  hls::stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
154  NetworkMetaStream meta_tmp,
155  unsigned int *processed_word_rx,
156  unsigned int *processed_bytes_rx,
157  hls::stream<bool> &sImageLoaded
158  )
159 {
160  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
161  #pragma HLS INLINE off
162  #pragma HLS pipeline II=1
163 
164  //-- LOCAL VARIABLES ------------------------------------------------------
165  static NetworkWord netWord;
167  #pragma HLS reset variable=enqueueFSM
168  const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
169  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
170 
171  switch(enqueueFSM)
172  {
173  case WAIT_FOR_META:
174  printf("DEBUG in pRXPath: enqueueFSM - WAIT_FOR_META, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
175  *processed_word_rx, *processed_bytes_rx);
176  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
177  {
178  meta_tmp = siNrc_meta.read();
179  meta_tmp.tlast = 1; //just to be sure...
180  sRxtoTx_Meta.write(meta_tmp);
182  }
183  break;
184 
185  case PROCESSING_PACKET:
186  printf("DEBUG in pRXPath: enqueueFSM - PROCESSING_PACKET, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
187  *processed_word_rx, *processed_bytes_rx);
188  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
189  {
190  //-- Read incoming data chunk
191  netWord = siSHL_This_Data.read();
192  storeWordToAxiStream<stream<ap_uint<INPUT_PTR_WIDTH>>,
193  loop_cnt,
194  bytes_per_loop,
196  (netWord, img_in_axi_stream, processed_word_rx, processed_bytes_rx,
197  sImageLoaded);
198  if(netWord.tlast == 1)
199  {
201  }
202  }
203  break;
204  }
205 }
206 
207 
208 
220 template<typename TMemWrd, const unsigned int loop_cnt, const unsigned int cTransfers_Per_Chunk, const unsigned int max_img_size, const unsigned int cBytesPer10GbitEthAXIPckt>
222  hls::stream<NetworkWord> &siSHL_This_Data,
223  hls::stream<NetworkMetaStream> &siNrc_meta,
224  hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
225  hls::stream<TMemWrd> &img_in_axi_stream,
226  hls::stream<bool> &sMemBurstRx,
227  img_meta_t * img_rows,
228  img_meta_t * img_cols,
229  img_meta_t * img_chan,
230  // float tx_matrix[TRANSFORM_MATRIX_DIM]
231  hls::stream<float> &sTxMatrix
232  )
233 {
234  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
235  #pragma HLS INLINE off
236 
237  //-- LOCAL VARIABLES ------------------------------------------------------
238  static NetworkWord netWord;
239  // const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
240  static NetworkMetaStream meta_tmp;
241  static TMemWrd v = 0;
242  static unsigned int cnt_wr_stream = 0, cnt_wr_burst = 0;
243  static unsigned int processed_net_bytes_rx = 0;
244  static unsigned int tx_mat_idx = 0;
245  #pragma HLS reset variable=meta_tmp
246  #pragma HLS reset variable=tx_mat_idx
247  #pragma HLS reset variable=cnt_wr_stream
248  #pragma HLS reset variable=cnt_wr_burst
249  #pragma HLS reset variable=processed_net_bytes_rx
251  #pragma HLS reset variable=enqueueRxToStrFSM
252  unsigned int expected_input_meta = TOT_TRANSFERS_TX;
253  unsigned int expected_output_meta = TOT_TRANSFERS_RX;
254  unsigned int received_and_fwded_meta = 0;
255  #pragma HLS reset variable=expected_input_meta
256  #pragma HLS reset variable=expected_output_meta
257  #pragma HLS reset variable=received_and_fwded_meta
258 
259 
260  switch(enqueueRxToStrFSM)
261  {
262  case WAIT_FOR_META:
263  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META\n");
264 
265  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
266  {
267  meta_tmp = siNrc_meta.read();
268  meta_tmp.tlast = 1; //just to be sure...
269  //sRxtoTx_Meta.write(meta_tmp);
271  expected_output_meta = TOT_TRANSFERS_RX;
272  received_and_fwded_meta = 0;
273  }
274  break;
275 
276 case PROCESSING_PACKET:
277  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
278  if ( !siSHL_This_Data.empty())
279  {
280  //-- Read incoming data chunk
281  netWord = siSHL_This_Data.read();
282  switch(netWord.tdata.range(WARPTRANSFORM_COMMANDS_HIGH_BIT,WARPTRANSFORM_COMMANDS_LOW_BIT))//the command is in the first 8 bits
283  {
284  case(WRPTX_TXMAT_CMD):
286  tx_mat_idx = 0;
287  break;
288  case(WRPTX_IMG_CMD):
292  expected_output_meta = rows * cols;
293  std::cout << "DEBUG pRXPathNetToStream - img rows =" << rows << " cols=" << cols << " chan=" << chan << std::endl;
294  *img_rows = rows;
295  *img_cols = cols;
296  *img_chan = chan;
298  break;
299  //TODO: fix the default case
300  // default: // invalid cmd
301  // break;
302  // //might be consume data? dk
303  }
304  }
305  break;
306 
308  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_IMGMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
309  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
310  {
311  //-- Read incoming data chunk
312  netWord = siSHL_This_Data.read();
313  printf("DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
314  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
315  if ((netWord.tkeep >> cnt_wr_stream) == 0) {
316  printf("WARNING: value with tkeep=0 at cnt_wr_stream=%u\n", cnt_wr_stream);
317  }
318  v(cnt_wr_stream*64, (cnt_wr_stream+1)*64-1) = netWord.tdata(0,63);
319  if ((cnt_wr_stream++ == loop_cnt-1) || (netWord.tlast == 1)) {
320  // std::cout << std::hex << v << std::endl; // print hexadecimal value
321  std::cout << "DEBUG in pRXPathNetToStream: Pushing to img_in_axi_stream :" << std::hex << v << std::endl;
322  img_in_axi_stream.write(v);
323  if ((cnt_wr_burst++ == cTransfers_Per_Chunk-1) ||
324  ((processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) &&
325  (netWord.tlast == 1))) {
326  if (!sMemBurstRx.full()) {
327  sMemBurstRx.write(true);
328  }
329  cnt_wr_burst = 0;
330  }
331  if (netWord.tlast == 1) {
332  //Next state logic
333  if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt)
334  {
335  if( received_and_fwded_meta < expected_output_meta){
336  sRxtoTx_Meta.write(meta_tmp);
337  received_and_fwded_meta++;
339  }else{
341  }
342  }else{
344 
345  }
346  }
347  cnt_wr_stream = 0;
348  }
349  if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) {
350  processed_net_bytes_rx = 0;
351  }
352  else {
353  processed_net_bytes_rx += cBytesPer10GbitEthAXIPckt;
354  }
355  }
356  break;
358  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META_IMGMAT\n");
359 
360  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
361  {
362  meta_tmp = siNrc_meta.read();
363  meta_tmp.tlast = 1; //just to be sure...
364  sRxtoTx_Meta.write(meta_tmp);
365  received_and_fwded_meta++;
367  }
368  break;
370  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_TXMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
371  if ( !siSHL_This_Data.empty())
372  {
373  //-- Read incoming data chunk
374  netWord = siSHL_This_Data.read();
375  printf("DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
376  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
377  float_bits_u tmp1;
378  float_bits_u tmp2;
379  tmp1.i = netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32);
380  // unsigned int tmp1 = netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32);
381  // unsigned int tmp2 = netWord.tdata.range(32-1,0);
382  tmp2.i = netWord.tdata.range(32-1,0);
383  //always write one float
384  // tx_matrix[tx_mat_idx]=tmp1.f;
385  sTxMatrix.write(tmp1.f);
386  // std::cout << "DEBUG in pRXPathNetToStream: tmp1=" << tmp1 << " tmp2=" << tmp2 << std::endl;
387  // std::cout << "DEBUG in pRXPathNetToStream: tmp1=" << std::hex << netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32) << " tmp2=" << netWord.tdata.range(32-1,0) << std::dec << std::endl;
388 
389  std::cout << "DEBUG in pRXPathNetToStream: tx matrix =" << tmp1.f << std::endl;
390  tx_mat_idx++;
391  std::cout << "DEBUG in pRXPathNetToStream: tx matrix id=" << tx_mat_idx << std::endl;
392 
393  if ((tx_mat_idx == TRANSFORM_MATRIX_DIM) || (netWord.tlast == 1)) {
394  std::cout << "DEBUG in pRXPathNetToStream: end of matrix rx communication" << std::endl;
395  //end of rx --> w8 for something; else there is the image after the tx matrix
396  if (netWord.tlast == 1) {
398  }else{
400  }
401  tx_mat_idx = 0;
402 
403  } else { //not at the end of the matrix nor the tlast two float to write
404 
405  // tx_matrix[tx_mat_idx]=tmp2.f;
406  sTxMatrix.write(tmp2.f);
407 
408  tx_mat_idx++;
409  }
410  }
411  break;
413  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PUSH_REMAINING__META\n");
414 
415  if ( !sRxtoTx_Meta.full() )
416  {
417  if( received_and_fwded_meta < expected_output_meta){
418  sRxtoTx_Meta.write(meta_tmp);
419  received_and_fwded_meta++;
421  }else{
423  }
424  }
425  break;
426  }
427 }
428 
429 
430 
442 template <typename TStreamMemWrd, typename TMemWrd,const unsigned int loop_cnt,const unsigned int bytes_per_loop>
444  hls::stream<TMemWrd> &img_in_axi_stream,
445  hls::stream<bool> &sMemBurstRx,
446  //---- P0 Write Path (S2MM) -----------
447  hls::stream<DmCmd> &soMemWrCmdP0,
448  hls::stream<DmSts> &siMemWrStsP0,
449  hls::stream<TStreamMemWrd> &soMemWriteP0,
450  //---- P1 Memory mapped ---------------
451  hls::stream<bool> &sImageLoaded,
452  img_meta_t * img_rows,
453  img_meta_t * img_cols,
454  img_meta_t * img_chan
455  )
456 {
457  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
458  #pragma HLS INLINE off
459  #pragma HLS pipeline II=1
460 
461  //-- LOCAL VARIABLES ------------------------------------------------------
462  static TMemWrd v = 0;
463  static unsigned int cur_transfers_per_chunk;
464  static unsigned int cnt_wr_stream, cnt_wr_img_loaded;
465  static unsigned int ddr_addr_in;
467  #pragma HLS reset variable=enqueueStrToDdrFSM
468 
469  static ap_uint<32> patternWriteNum;
470  static ap_uint<32> timeoutCnt;
471 
472  static TStreamMemWrd memP0;
473  static DmSts memWrStsP0;
474  static unsigned int processed_bytes_rx;
475 
476  #pragma HLS reset variable=cur_transfers_per_chunk
477  #pragma HLS reset variable=cnt_wr_stream
478  #pragma HLS reset variable=cnt_wr_img_loaded
479  #pragma HLS reset variable=ddr_addr_in
480  #pragma HLS reset variable=patternWriteNum
481  #pragma HLS reset variable=timeoutCnt
482  #pragma HLS reset variable=memP0
483  #pragma HLS reset variable=memWrStsP0
484 
485  static img_meta_t lcl_img_rows=0;
486  static img_meta_t lcl_img_cols=0;
487  static img_meta_t lcl_img_chan=0;
488  #pragma HLS reset variable=lcl_img_rows
489  #pragma HLS reset variable=lcl_img_cols
490  #pragma HLS reset variable=lcl_img_chan
491 
492  switch(enqueueStrToDdrFSM)
493  {
494  case WAIT_FOR_META:
495  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - WAIT_FOR_META, processed_bytes_rx=%u\n",
496  processed_bytes_rx);
497 
498  if ( !img_in_axi_stream.empty() )
499  {
500  if ((processed_bytes_rx) == 0) {
501  memP0.tdata = 0;
502  memP0.tlast = 0;
503  memP0.tkeep = 0;
504  patternWriteNum = 0;
505  timeoutCnt = 0;
506  cur_transfers_per_chunk = 0;
507  ddr_addr_in = 0;
508  cnt_wr_stream = 0;
509  v = 0;
510  memWrStsP0.tag = 0;
511  memWrStsP0.interr = 0;
512  memWrStsP0.decerr = 0;
513  memWrStsP0.slverr = 0;
514  memWrStsP0.okay = 0;
515  lcl_img_rows = *img_rows;
516  lcl_img_cols = *img_cols;
517  lcl_img_chan = *img_chan;
518  }
520  }
521  break;
522 
523  case FSM_CHK_PROC_BYTES:
524  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_CHK_PROC_BYTES, processed_bytes_rx=%u\n", processed_bytes_rx);
525  if (processed_bytes_rx < IMGSIZE-bytes_per_loop) {
526  (processed_bytes_rx) += bytes_per_loop;
527  }
528  else {
529  printf("DEBUG in pRXPathStreamToDDR: WARNING - you have reached the max depth of img. Will put processed_bytes_rx = 0.\n");
530  processed_bytes_rx = 0;
531  }
533  break;
534 
535 case FSM_WR_PAT_CMD:
536  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_CMD\n");
537  if ( !soMemWrCmdP0.full() ) {
538  //-- Post a memory write command to SHELL/Mem/Mp0
539  if (processed_bytes_rx == 0){
540  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK_LAST_BURST;
541  }
542  else {
543  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK;
544  }
545  if (patternWriteNum == 0) { // Write cmd only the fitst time of every burst
546  soMemWrCmdP0.write(DmCmd(ddr_addr_in * BPERMDW_512, cur_transfers_per_chunk*BPERMDW_512)); // Byte-addresable
547  }
548  ddr_addr_in++;
550  }
551  break;
552 
553 case FSM_WR_PAT_LOAD:
554  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_LOAD\n");
555  // -- Assemble a 512-bit memory word with input values from stream
556  if (patternWriteNum++ >= cur_transfers_per_chunk - 1) {
557  if (!sMemBurstRx.empty()) {
558  if (sMemBurstRx.read() == true) {
559  patternWriteNum = 0;
561  }
562  }
563  }
564  else {
565  if((processed_bytes_rx) == 0) {
567  }
568  else {
570  }
571  }
572  break;
573 
574 case FSM_WR_PAT_DATA:
575  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_DATA\n");
576  if (!soMemWriteP0.full()) {
577  //-- Write a memory word to DRAM
578  if (!img_in_axi_stream.empty()) {
579  memP0.tdata = img_in_axi_stream.read();
580  ap_uint<8> keepVal = 0xFF;
581  memP0.tkeep = (ap_uint<64>) (keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal);
582  if (patternWriteNum++ == cur_transfers_per_chunk - 1) {
583  printf("DEBUG: (patternWriteNum == cur_transfers_per_chunk -1) \n");
584  memP0.tlast = 1;
585  cnt_wr_img_loaded = 0;
586  timeoutCnt = 0;
587  patternWriteNum = 0;
589  }
590  else {
591  memP0.tlast = 0;
592  }
593  std::cout << "DEBUG in pRXPathStreamToDDR: Pushing to soMemWriteP0 :" << std::hex << memP0.tdata << std::endl;
594  soMemWriteP0.write(memP0);
595  }
596  }
597  break;
598 
599 case FSM_WR_PAT_STS_A:
600  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_A\n");
601  if (!siMemWrStsP0.empty()) {
602  printf(" 1 \n");
603  //-- Get the memory write status for Mem/Mp0
604  siMemWrStsP0.read(memWrStsP0);
606  }
607  else {
608  if (timeoutCnt++ >= CYCLES_UNTIL_TIMEOUT) {
609  memWrStsP0.tag = 0;
610  memWrStsP0.interr = 0;
611  memWrStsP0.decerr = 0;
612  memWrStsP0.slverr = 0;
613  memWrStsP0.okay = 0;
615  }
616  }
617  break;
618 
619 case FSM_WR_PAT_STS_B:
620  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_B\n");
621  if ((memWrStsP0.tag == 0x0) && (memWrStsP0.okay == 1)) {
622  if ((processed_bytes_rx) == 0) {
623  if (!sImageLoaded.full()) {
624  if (cnt_wr_img_loaded++ >= 1) {
625  sImageLoaded.write(false);
627  }
628  else {
629  sImageLoaded.write(true);
630  }
631  }
632  }
633  else {
635  }
636  }
637  else {
638  ; // TODO: handle errors on memWrStsP0
639  }
640  break;
641 
642 case FSM_WR_PAT_STS_C:
643  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_C\n");
644  if((processed_bytes_rx) == 0) {
646  }
647  else {
649  }
650  break;
651 }
652 
653 }
654 
655 
656 
668 void pTXPath(
669  hls::stream<NetworkWord> &soTHIS_Shl_Data,
670  hls::stream<NetworkMetaStream> &soNrc_meta,
671  hls::stream<NetworkWord> &sProcpToTxp_Data,
672  hls::stream<NetworkMetaStream> &sRxtoTx_Meta,
673  hls::stream<NodeId> &sDstNode_sig,
674  unsigned int *processed_word_tx,
675  ap_uint<32> *pi_rank,
676  img_meta_t * img_rows,
677  img_meta_t * img_cols,
678  img_meta_t * img_chan
679 )
680 {
681  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
682  //#pragma HLS DATAFLOW interval=1
683  #pragma HLS INLINE off
684 
685  //-- STATIC DATAFLOW VARIABLES ------------------------------------------
686  static NodeId dst_rank;
688  #pragma HLS reset variable=dequeueFSM
689 
690  //-- LOCAL VARIABLES ------------------------------------------------------
691  NetworkWord netWordTx;
692  NetworkMeta meta_in = NetworkMeta();
693  NetworkMetaStream meta_out_stream = NetworkMetaStream();
694 
695  #pragma HLS reset variable=dst_rank
696  #pragma HLS reset variable=netWordTx
697 
698  static img_meta_t lcl_img_rows=0;
699  static img_meta_t lcl_img_cols=0;
700  static img_meta_t lcl_img_chan=0;
701  #pragma HLS reset variable=lcl_img_rows
702  #pragma HLS reset variable=lcl_img_cols
703  #pragma HLS reset variable=lcl_img_chan
704  static bool tx_ongoing = false;
705 
706  switch(dequeueFSM)
707  {
708  default:
709  case WAIT_FOR_META:
710  if(!sDstNode_sig.empty())
711  {
712  dst_rank = sDstNode_sig.read();
714  //WarpTransform app needs to be reset to process new rank
715  }
716  break;
718  //#if DEBUG_LEVEL == TRACE_ALL
719  printf("DEBUG in pTXPath: dequeueFSM=%d - WAIT_FOR_STREAM_PAIR, *processed_word_tx=%u\n",
720  dequeueFSM, *processed_word_tx);
721  // #endif
722  //-- Forward incoming chunk to SHELL
723  //WarpTransform-related
724  if (*processed_word_tx == MIN_TX_LOOPS) {
725  *processed_word_tx = 0;
726  tx_ongoing = false;
727  }
728 
729  if (( !sProcpToTxp_Data.empty() && !sRxtoTx_Meta.empty()
730  && !soTHIS_Shl_Data.full() && !soNrc_meta.full() ))
731  {
732  netWordTx = sProcpToTxp_Data.read();
733 
734  // in case MTU=8 ensure tlast is set in WAIT_FOR_STREAM_PAIR and don't visit PROCESSING_PACKET
735  if (PACK_SIZE == 8)
736  {
737  netWordTx.tlast = 1;
738  }
739  soTHIS_Shl_Data.write(netWordTx);
740 
741  meta_in = sRxtoTx_Meta.read().tdata;
742  meta_out_stream.tlast = 1;
743  meta_out_stream.tkeep = 0xFF; //just to be sure
744 
745  //WarpTransform-related Forcing the SHELL to wait for tlast
746  meta_out_stream.tdata.len = 0;
747 
748  meta_out_stream.tdata.dst_rank = dst_rank;
749  meta_out_stream.tdata.src_rank = (NodeId) *pi_rank;
750  meta_out_stream.tdata.dst_port = meta_in.src_port;
751  meta_out_stream.tdata.src_port = meta_in.dst_port;
752 
753  soNrc_meta.write(meta_out_stream);
754 
755  (*processed_word_tx)++;
756  printf("DEBUG: Checking netWordTx.tlast...\n");
757  if(netWordTx.tlast != 1)
758  {
760  }
761  }
762  break;
763 
764  case PROCESSING_PACKET:
765  //#if DEBUG_LEVEL == TRACE_ALL
766  printf("DEBUG in pTXPath: dequeueFSM=%d - PROCESSING_PACKET, *processed_word_tx=%u\n",
767  dequeueFSM, *processed_word_tx);
768  //#endif
769  // if (!tx_ongoing && !sInImgRows.empty() && !sInImgCols.empty() && !sInImgChan.empty())
770  // {
771  // img_rows = sInImgRows.read();
772  // img_cols = sInImgCols.read();
773  // img_chan = sInImgChan.read();
774  // tx_ongoing = true;
775  // }
776  if (!tx_ongoing)
777  {
778  lcl_img_rows = *img_rows;
779  lcl_img_cols = *img_cols;
780  lcl_img_chan = *img_chan;
781  tx_ongoing = true;
782  }
783 
784  if( !sProcpToTxp_Data.empty() && !soTHIS_Shl_Data.full())
785  {
786  netWordTx = sProcpToTxp_Data.read();
787  // This is our own termination based on the custom MTU we have set in PACK_SIZE.
788  // TODO: We can map PACK_SIZE to a dynamically assigned value either through MMIO or header
789  // in order to have a functional bitstream for any MTU size
790  (*processed_word_tx)++;
791 
792  // This is a normal termination of the axi stream from vitis functions
793  // This is a normal termination of the axi stream from vitis functions
794  if ((netWordTx.tlast == 1) || (((*processed_word_tx)*8) % PACK_SIZE == 0))
795  {
796  netWordTx.tlast = 1; // in case it is the 2nd or
797  printf("DEBUG: A netWordTx.tlast=1 ... sProcpToTxp_Data.empty()==%u \n", sProcpToTxp_Data.empty());
799  }
800 
801  soTHIS_Shl_Data.write(netWordTx);
802  }
803  break;
804  }
805 }
806 
810 
811 #endif //_ROLE_WARPTRANSFORM_NETWORK_LIBRARY_HPP_
ap_uint< 32 > timeoutCnt
ap_uint< 32 > patternWriteNum
ap_uint< 1 > okay
ap_uint< 1 > decerr
ap_uint< 1 > slverr
ap_uint< 4 > tag
ap_uint< 1 > interr
#define IMGSIZE
#define FSM_WR_PAT_STS_B
Definition: harris.hpp:87
#define FSM_WR_PAT_LOAD
Definition: harris.hpp:84
#define FSM_WR_PAT_CMD
Definition: harris.hpp:83
#define TRANSFERS_PER_CHUNK
Definition: harris.hpp:133
#define BPERMDW_512
Definition: harris.hpp:120
#define FSM_WR_PAT_DATA
Definition: harris.hpp:85
#define PORTS_OPENED
Definition: harris.hpp:102
#define FSM_CHK_PROC_BYTES
Definition: harris.hpp:81
#define FSM_WR_PAT_STS_A
Definition: harris.hpp:86
#define FSM_WR_PAT_STS_C
Definition: harris.hpp:88
#define MIN_TX_LOOPS
#define BITS_PER_10GBITETHRNET_AXI_PACKET
#define INPUT_PTR_WIDTH
#define BYTES_PER_10GBITETHRNET_AXI_PACKET
#define TRANSFERS_PER_CHUNK_LAST_BURST
Definition: median_blur.cpp:57
uint8_t enqueueStrToDdrFSM
Definition: median_blur.cpp:50
uint8_t enqueueRxToStrFSM
Definition: median_blur.cpp:49
#define CYCLES_UNTIL_TIMEOUT
Definition: memtest.hpp:96
#define PacketFsmType
Definition: memtest.hpp:76
#define PROCESSING_PACKET
Definition: memtest.hpp:73
#define WAIT_FOR_STREAM_PAIR
Definition: memtest.hpp:72
#define WAIT_FOR_META
Definition: memtest.hpp:71
#define PACK_SIZE
Definition: config.h:51
uint8_t enqueueFSM
Definition: uppercase.cpp:54
uint8_t dequeueFSM
Definition: uppercase.cpp:55
#define FSM_WRITE_NEW_DATA
#define TRANSFORM_MATRIX_DIM
#define WARPTRANSFORM_COLS_HIGH_BIT
unsigned int img_meta_t
#define PROCESSING_PACKET_IMGMAT
#define PROCESSING_PACKET_TXMAT
#define WARPTRANSFORM_CHNNEL_LOW_BIT
#define WARPTRANSFORM_COMMANDS_HIGH_BIT
void pRXPathStreamToDDR(hls::stream< TMemWrd > &img_in_axi_stream, hls::stream< bool > &sMemBurstRx, hls::stream< DmCmd > &soMemWrCmdP0, hls::stream< DmSts > &siMemWrStsP0, hls::stream< TStreamMemWrd > &soMemWriteP0, hls::stream< bool > &sImageLoaded, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan)
Receive Path - From RX path stream word aligned to store towards the DDR.
#define PUSH_REMAINING_META
#define WAIT_FOR_META_IMGMAT
void pRXPath(hls::stream< NetworkWord > &siSHL_This_Data, hls::stream< NetworkMetaStream > &siNrc_meta, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< ap_uint< INPUT_PTR_WIDTH >> &img_in_axi_stream, NetworkMetaStream meta_tmp, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, hls::stream< bool > &sImageLoaded)
Receive Path - From SHELL to THIS. FIXME: never checked, just substitute this one from DID.
#define WARPTRANSFORM_COLS_LOW_BIT
#define WARPTRANSFORM_CHNNEL_HIGH_BIT
void pTXPath(hls::stream< NetworkWord > &soTHIS_Shl_Data, hls::stream< NetworkMetaStream > &soNrc_meta, hls::stream< NetworkWord > &sProcpToTxp_Data, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< NodeId > &sDstNode_sig, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan)
Transmit Path - From THIS to SHELL.
void pPortAndDestionation(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, hls::stream< NodeId > &sDstNode_sig, ap_uint< 32 > *po_rx_ports)
pPortAndDestionation - Setup the port and the destination rank.
#define WARPTRANSFORM_ROWS_HIGH_BIT
void pRXPathNetToStream(hls::stream< NetworkWord > &siSHL_This_Data, hls::stream< NetworkMetaStream > &siNrc_meta, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< TMemWrd > &img_in_axi_stream, hls::stream< bool > &sMemBurstRx, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan, hls::stream< float > &sTxMatrix)
Receive Path - From SHELL to THIS. Function for accumulating a memory word and write it Not ready for...
#define WARPTRANSFORM_COMMANDS_LOW_BIT
#define WARPTRANSFORM_ROWS_LOW_BIT
@ WRPTX_TXMAT_CMD
@ WRPTX_IMG_CMD
ap_uint< 8 > NodeId
Definition: network.hpp:82
#define NETWORK_WORD_BIT_WIDTH
Definition: network.hpp:46
ap_uint< 1 > tlast
Definition: network.hpp:111
ap_uint< 8 > tkeep
Definition: network.hpp:110
NetworkMeta tdata
Definition: network.hpp:109
NetworkDataLength len
Definition: network.hpp:99
NodeId dst_rank
Definition: network.hpp:95
NodeId src_rank
Definition: network.hpp:97
NrcPort src_port
Definition: network.hpp:98
NrcPort dst_port
Definition: network.hpp:96
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51
A library for some common functionalities: Memory interaction Performance Counters.