cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
median_blur.cpp
Go to the documentation of this file.
1 
17 
36 #include "../include/median_blur.hpp"
37 #include "../include/xf_median_blur_config.h"
38 
39 #ifdef USE_HLSLIB_DATAFLOW
40 #include "../../../../../hlslib/include/hlslib/xilinx/Stream.h"
41 #include "../../../../../hlslib/include/hlslib/xilinx/Simulation.h"
42 #endif
43 
44 #ifdef USE_HLSLIB_STREAM
45 using hlslib::Stream;
46 #endif
47 using hls::stream;
48 
54 
55 #ifdef ENABLE_DDR
56 #if TRANSFERS_PER_CHUNK_DIVEND == 0
57 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK
58 #else
59 #define TRANSFERS_PER_CHUNK_LAST_BURST TRANSFERS_PER_CHUNK_DIVEND
60 #endif
61 #endif
62 
64  ap_uint<32> *pi_rank,
65  ap_uint<32> *pi_size,
66  ap_uint<32> *po_rx_ports
67  )
68 {
69  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
70 #pragma HLS inline off
71 #pragma HLS pipeline II=1 //TODO: check not necessary
72  *po_rx_ports = PORTS_OPENED;
73 }
74 
75 
76 
77 
81 void storeWordToArray(uint64_t input, ap_uint<INPUT_PTR_WIDTH> img[IMG_PACKETS],
82  unsigned int *processed_word, unsigned int *image_loaded)
83 {
84  #pragma HLS INLINE
85 
86  img[*processed_word] = (ap_uint<INPUT_PTR_WIDTH>) input;
87  printf("DEBUG in storeWordToArray: input = %u = 0x%16.16llX \n", input, input);
88  printf("DEBUG in storeWordToArray: img[%u]= %u = 0x%16.16llX \n", *processed_word,
89  (uint64_t)img[*processed_word], (uint64_t)img[*processed_word]);
90  if (*processed_word < IMG_PACKETS-1) {
91  *processed_word++;
92  }
93  else {
94  printf("DEBUG in storeWordToArray: WARNING - you've reached the max depth of img[%u]. Will put *processed_word = 0.\n", *processed_word);
95  *processed_word = 0;
96  *image_loaded = 1;
97  }
98 }
99 
100 
101 
106  NetworkWord word,
107  #ifdef USE_HLSLIB_STREAM
108  Stream<Data_t_in, MIN_RX_LOOPS> &img_in_axi_stream,
109  #else
110  //stream<Data_t_in> &img_in_axi_stream,
111  stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
112  #endif
113  unsigned int *processed_word_rx,
114  unsigned int *processed_bytes_rx,
115  stream<bool> &sImageLoaded
116 )
117 {
118  #pragma HLS INLINE
119  Data_t_in v;
120  const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
121  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
122  unsigned int bytes_with_keep = 0;
123  //v = word.tdata;
124  for (unsigned int i=0; i<loop_cnt; i++) {
125  //#pragma HLS PIPELINE
126  //#pragma HLS UNROLL factor=loop_cnt
127  //printf("DEBUG: Checking: word.tkeep=%u >> %u = %u\n", word.tkeep.to_int(), i, (word.tkeep.to_int() >> i));
128  if ((word.tkeep >> i) == 0) {
129  printf("WARNING: value with tkeep=0 at i=%u\n", i);
130  continue;
131  }
132  v.data = (ap_uint<INPUT_PTR_WIDTH>)(word.tdata >> i*8);
133  v.keep = word.tkeep;
134  v.last = word.tlast;
135  //printf("DEBUG in storeWordToAxiStream: word = %u = 0x%16.16llX \n", v.data, v.data);
136  img_in_axi_stream.write(v.data);
137  bytes_with_keep += bytes_per_loop;
138  }
139  /*
140  if (*processed_word_rx < IMG_PACKETS-1) {
141  (*processed_word_rx)++;
142  }
143  else {
144  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_word_rx = 0.\n");
145  *processed_word_rx = 0;
146  }*/
147  if (*processed_bytes_rx < IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) {
148  (*processed_bytes_rx) += bytes_with_keep;
149  if (!sImageLoaded.full()) {
150  sImageLoaded.write(false);
151  }
152  }
153  else {
154  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_bytes_rx = 0.\n");
155  *processed_bytes_rx = 0;
156  if (!sImageLoaded.full()) {
157  sImageLoaded.write(true);
158  }
159  }
160 }
161 
162 
163 #ifdef ENABLE_DDR
164 
165 
179  stream<NetworkWord> &siSHL_This_Data,
180  stream<NetworkMetaStream> &siNrc_meta,
181  stream<NetworkMetaStream> &sRxtoTx_Meta,
182  //---- P0 Write Path (S2MM) -----------
183  stream<DmCmd> &soMemWrCmdP0,
184  stream<DmSts> &siMemWrStsP0,
185  stream<Axis<MEMDW_512> > &soMemWriteP0,
186  //---- P1 Memory mapped ---------------
187  NetworkMetaStream meta_tmp,
188  unsigned int *processed_bytes_rx,
189  stream<bool> &sImageLoaded
190  )
191 {
192  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
193  #pragma HLS INLINE off
194  #pragma HLS pipeline II=1
195  #pragma HLS interface ap_ctrl_none port=return
196 
197  //-- LOCAL VARIABLES ------------------------------------------------------
198  static NetworkWord netWord;
199 
200  static ap_uint<MEMDW_512> v = 0;
201  const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
202  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET*loop_cnt);
203  static unsigned int cur_transfers_per_chunk;
204  static unsigned int cnt_wr_stream, cnt_wr_img_loaded;
205  static stream<ap_uint<MEMDW_512>> img_in_axi_stream ("img_in_axi_stream");
206  const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
207  #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
208  static unsigned int ddr_addr_in;
209 
210  // FIXME: Initialize to zero
211  static ap_uint<32> patternWriteNum;
212  static ap_uint<32> timeoutCnt;
213 
214  static Axis<MEMDW_512> memP0;
215  static DmSts memWrStsP0;
216 
217  #pragma HLS reset variable=cur_transfers_per_chunk
218  #pragma HLS reset variable=cnt_wr_stream
219  #pragma HLS reset variable=cnt_wr_img_loaded
220  #pragma HLS reset variable=ddr_addr_in
221  #pragma HLS reset variable=patternWriteNum
222  #pragma HLS reset variable=timeoutCnt
223  #pragma HLS reset variable=memP0
224  #pragma HLS reset variable=memWrStsP0
225 
226  switch(enqueueFSM)
227  {
228  case WAIT_FOR_META:
229  printf("DEBUG in pRXPathDDR: enqueueFSM - WAIT_FOR_META, *processed_bytes_rx=%u\n",
230  *processed_bytes_rx);
231 
232  printf("TOTMEMDW_512=%u\n", TOTMEMDW_512);
233  printf("TRANSFERS_PER_CHUNK=%u\n", TRANSFERS_PER_CHUNK);
234  printf("TRANSFERS_PER_CHUNK_DIVEND=%u\n", TRANSFERS_PER_CHUNK_DIVEND);
235  printf("TRANSFERS_PER_CHUNK_LAST_BURST=%u\n", TRANSFERS_PER_CHUNK_LAST_BURST);
236  //exit(-1);
237 
238  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
239  {
240  meta_tmp = siNrc_meta.read();
241  meta_tmp.tlast = 1; //just to be sure...
242  sRxtoTx_Meta.write(meta_tmp);
243  if ((*processed_bytes_rx) == 0) {
244  memP0.tdata = 0;
245  memP0.tlast = 0;
246  memP0.tkeep = 0;
247  patternWriteNum = 0;
248  timeoutCnt = 0;
249  cur_transfers_per_chunk = 0;
250  netWord.tlast = 0;
251  netWord.tkeep = 0x0;
252  netWord.tdata = 0x0;
253  ddr_addr_in = 0;
254  cnt_wr_stream = 0;
255  v = 0;
256  memWrStsP0.tag = 0;
257  memWrStsP0.interr = 0;
258  memWrStsP0.decerr = 0;
259  memWrStsP0.slverr = 0;
260  memWrStsP0.okay = 0;
261  }
263  }
264  break;
265 
266  case PROCESSING_PACKET:
267  printf("DEBUG in pRXPathDDR: enqueueFSM - PROCESSING_PACKET, *processed_bytes_rx=%u\n",
268  *processed_bytes_rx);
269  if ( !siSHL_This_Data.empty() )
270  {
271  //-- Read incoming data chunk
272  netWord = siSHL_This_Data.read();
273  printf("DEBUG in pRXPathDDR: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
274  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
275  //enqueueFSM = LOAD_IN_STREAM;
276  if ((netWord.tkeep >> cnt_wr_stream) == 0) {
277  printf("WARNING: value with tkeep=0 at cnt_wr_stream=%u\n", cnt_wr_stream);
278  //continue;
279  }
280  v(cnt_wr_stream*64, (cnt_wr_stream+1)*64-1) = netWord.tdata(0,63);
281  if ((cnt_wr_stream++ == loop_cnt-1) || (netWord.tlast == 1)) {
282  if ( !img_in_axi_stream.full() ) {
283  // std::cout << std::hex << v << std::endl; // print hexadecimal value
284  img_in_axi_stream.write(v);
285  }
287  cnt_wr_stream = 0;
288  }
289  }
290  break;
291 
292  case FSM_CHK_PROC_BYTES:
293  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_CHK_PROC_BYTES, processed_bytes_rx=%u\n", *processed_bytes_rx);
294  if (*processed_bytes_rx < IMGSIZE-bytes_per_loop) {
295  (*processed_bytes_rx) += bytes_per_loop;
296  }
297  else {
298  printf("DEBUG in pRXPathDDR: WARNING - you've reached the max depth of img. Will put *processed_bytes_rx = 0.\n");
299  *processed_bytes_rx = 0;
300  }
302  break;
303 
304 case FSM_WR_PAT_CMD:
305  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_CMD\n");
306  if ( !soMemWrCmdP0.full() ) {
307  //-- Post a memory write command to SHELL/Mem/Mp0
308  if (*processed_bytes_rx == 0){
309  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK_LAST_BURST;
310  }
311  else {
312  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK;
313  }
314  if (patternWriteNum == 0) { // Write cmd only the fitst time of every burst
315  soMemWrCmdP0.write(DmCmd(ddr_addr_in * BPERMDW_512, cur_transfers_per_chunk*BPERMDW_512)); // Byte-addresable
316  }
317  ddr_addr_in++;
319  }
320  break;
321 
322 case FSM_WR_PAT_LOAD:
323  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_LOAD\n");
324  // -- Assemble a 512-bit memory word with input values from stream
325  if (patternWriteNum++ == cur_transfers_per_chunk - 1) {
326  patternWriteNum = 0;
328  }
329  else {
330  if(netWord.tlast == 1) {
332  }
333  else {
335  }
336  }
337  break;
338 
339 case FSM_WR_PAT_DATA:
340  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_DATA\n");
341  if (!soMemWriteP0.full()) {
342  //-- Write a memory word to DRAM
343  if (!img_in_axi_stream.empty()) {
344  memP0.tdata = img_in_axi_stream.read();
345  }
346  ap_uint<8> keepVal = 0xFF;
347  memP0.tkeep = (ap_uint<64>) (keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal);
348  if (patternWriteNum++ == cur_transfers_per_chunk - 1) {
349  printf("DEBUG: (patternWriteNum == cur_transfers_per_chunk -1) \n");
350  memP0.tlast = 1;
351  cnt_wr_img_loaded = 0;
352  timeoutCnt = 0;
353  patternWriteNum = 0;
355  }
356  else {
357  memP0.tlast = 0;
358  }
359  soMemWriteP0.write(memP0);
360  }
361  break;
362 
363 case FSM_WR_PAT_STS_A:
364  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_STS_A\n");
365  if (!siMemWrStsP0.empty()) {
366  printf(" 1 \n");
367  //-- Get the memory write status for Mem/Mp0
368  siMemWrStsP0.read(memWrStsP0);
370  }
371  else {
372  if (timeoutCnt++ >= CYCLES_UNTIL_TIMEOUT) {
373  memWrStsP0.tag = 0;
374  memWrStsP0.interr = 0;
375  memWrStsP0.decerr = 0;
376  memWrStsP0.slverr = 0;
377  memWrStsP0.okay = 0;
379  }
380  }
381  break;
382 
383 case FSM_WR_PAT_STS_B:
384  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_STS_B\n");
385  if ((memWrStsP0.tag == 0x0) && (memWrStsP0.okay == 1)) {
386  if ((*processed_bytes_rx) == 0) {
387  if (!sImageLoaded.full()) {
388  if (cnt_wr_img_loaded++ >= 1) {
389  sImageLoaded.write(false);
391  }
392  else {
393  sImageLoaded.write(true);
394  }
395  }
396  }
397  else {
399  }
400  }
401  else {
402  ; // TODO: handle errors on memWrStsP0
403  }
404  break;
405 
406 case FSM_WR_PAT_STS_C:
407  printf("DEBUG in pRXPathDDR: enqueueFSM - FSM_WR_PAT_STS_C\n");
408  if(netWord.tlast == 1) {
410  }
411  else {
413  }
414  break;
415 }
416 
417 }
418 
419 
420 
421 
422 
433  stream<NetworkWord> &siSHL_This_Data,
434  stream<NetworkMetaStream> &siNrc_meta,
435  stream<NetworkMetaStream> &sRxtoTx_Meta,
436  stream<ap_uint<MEMDW_512>> &img_in_axi_stream,
437  stream<bool> &sMemBurstRx
438  )
439 {
440  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
441  #pragma HLS INLINE off
442  #pragma HLS pipeline II=1
443  //#pragma HLS interface ap_ctrl_none port=return
444 
445  //-- LOCAL VARIABLES ------------------------------------------------------
446  static NetworkWord netWord;
447  const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
448  NetworkMetaStream meta_tmp;
449  static ap_uint<MEMDW_512> v = 0;
450  static unsigned int cnt_wr_stream = 0, cnt_wr_burst = 0;
451  static unsigned int processed_net_bytes_rx = 0;
452 // static stream<ap_uint<MEMDW_512>> img_in_axi_stream ("img_in_axi_stream");
453 // const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
454 // #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
455 
456  #pragma HLS reset variable=cnt_wr_stream
457  #pragma HLS reset variable=cnt_wr_burst
458 
459  switch(enqueueRxToStrFSM)
460  {
461  case WAIT_FOR_META:
462  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META\n");
463 
464  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
465  {
466  meta_tmp = siNrc_meta.read();
467  meta_tmp.tlast = 1; //just to be sure...
468  sRxtoTx_Meta.write(meta_tmp);
470  }
471  break;
472 
473  case PROCESSING_PACKET:
474  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
475  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
476  {
477  //-- Read incoming data chunk
478  netWord = siSHL_This_Data.read();
479  printf("DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
480  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
481  //enqueueRxToStrFSM = LOAD_IN_STREAM;
482  if ((netWord.tkeep >> cnt_wr_stream) == 0) {
483  printf("WARNING: value with tkeep=0 at cnt_wr_stream=%u\n", cnt_wr_stream);
484  //continue;
485  }
486  v(cnt_wr_stream*64, (cnt_wr_stream+1)*64-1) = netWord.tdata(0,63);
487  if ((cnt_wr_stream++ == loop_cnt-1) || (netWord.tlast == 1)) {
488  // std::cout << std::hex << v << std::endl; // print hexadecimal value
489  std::cout << "DEBUG in pRXPathNetToStream: Pushing to img_in_axi_stream :" << std::hex << v << std::endl;
490  img_in_axi_stream.write(v);
491  if ((cnt_wr_burst++ == TRANSFERS_PER_CHUNK-1) ||
492  ((processed_net_bytes_rx == IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) &&
493  (netWord.tlast == 1))) {
494  if (!sMemBurstRx.full()) {
495  sMemBurstRx.write(true);
496  }
497  cnt_wr_burst = 0;
498  }
499  if (netWord.tlast == 1) {
501  }
502  cnt_wr_stream = 0;
503  }
504  if (processed_net_bytes_rx == IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) {
505  processed_net_bytes_rx = 0;
506  }
507  else {
508  processed_net_bytes_rx += BYTES_PER_10GBITETHRNET_AXI_PACKET;
509  }
510  }
511  break;
512  }
513 }
514 
515 
516 
517 
518 
531  stream<ap_uint<MEMDW_512>> &img_in_axi_stream,
532  stream<bool> &sMemBurstRx,
533  //---- P0 Write Path (S2MM) -----------
534  stream<DmCmd> &soMemWrCmdP0,
535  stream<DmSts> &siMemWrStsP0,
536  stream<Axis<MEMDW_512> > &soMemWriteP0,
537  //---- P1 Memory mapped ---------------
538  stream<bool> &sImageLoaded
539  )
540 {
541  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
542  #pragma HLS INLINE off
543  #pragma HLS pipeline II=1
544  //#pragma HLS interface ap_ctrl_none port=return
545 
546  //-- LOCAL VARIABLES ------------------------------------------------------
547  static ap_uint<MEMDW_512> v = 0;
548  const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
549  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET*loop_cnt);
550  static unsigned int cur_transfers_per_chunk;
551  static unsigned int cnt_wr_stream, cnt_wr_img_loaded;
552 // static stream<ap_uint<MEMDW_512>> img_in_axi_stream ("img_in_axi_stream");
553 // const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
554 // #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
555  static unsigned int ddr_addr_in;
556 
557  // FIXME: Initialize to zero
558  static ap_uint<32> patternWriteNum;
559  static ap_uint<32> timeoutCnt;
560 
561  static Axis<MEMDW_512> memP0;
562  static DmSts memWrStsP0;
563  static unsigned int processed_bytes_rx;
564 
565  #pragma HLS reset variable=cur_transfers_per_chunk
566  #pragma HLS reset variable=cnt_wr_stream
567  #pragma HLS reset variable=cnt_wr_img_loaded
568  #pragma HLS reset variable=ddr_addr_in
569  #pragma HLS reset variable=patternWriteNum
570  #pragma HLS reset variable=timeoutCnt
571  #pragma HLS reset variable=memP0
572  #pragma HLS reset variable=memWrStsP0
573 
574  switch(enqueueStrToDdrFSM)
575  {
576  case WAIT_FOR_META:
577  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - WAIT_FOR_META, processed_bytes_rx=%u\n",
578  processed_bytes_rx);
579 
580  if ( !img_in_axi_stream.empty() )
581  {
582  if ((processed_bytes_rx) == 0) {
583  memP0.tdata = 0;
584  memP0.tlast = 0;
585  memP0.tkeep = 0;
586  patternWriteNum = 0;
587  timeoutCnt = 0;
588  cur_transfers_per_chunk = 0;
589  ddr_addr_in = 0;
590  cnt_wr_stream = 0;
591  v = 0;
592  memWrStsP0.tag = 0;
593  memWrStsP0.interr = 0;
594  memWrStsP0.decerr = 0;
595  memWrStsP0.slverr = 0;
596  memWrStsP0.okay = 0;
597  }
599  }
600  break;
601 
602  case FSM_CHK_PROC_BYTES:
603  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_CHK_PROC_BYTES, processed_bytes_rx=%u\n", processed_bytes_rx);
604  if (processed_bytes_rx < IMGSIZE-bytes_per_loop) {
605  (processed_bytes_rx) += bytes_per_loop;
606  }
607  else {
608  printf("DEBUG in pRXPathStreamToDDR: WARNING - you've reached the max depth of img. Will put processed_bytes_rx = 0.\n");
609  processed_bytes_rx = 0;
610  }
612  break;
613 
614 case FSM_WR_PAT_CMD:
615  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_CMD\n");
616  if ( !soMemWrCmdP0.full() ) {
617  //-- Post a memory write command to SHELL/Mem/Mp0
618  if (processed_bytes_rx == 0){
619  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK_LAST_BURST;
620  }
621  else {
622  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK;
623  }
624  if (patternWriteNum == 0) { // Write cmd only the fitst time of every burst
625  soMemWrCmdP0.write(DmCmd(ddr_addr_in * BPERMDW_512, cur_transfers_per_chunk*BPERMDW_512)); // Byte-addresable
626  }
627  ddr_addr_in++;
629  }
630  break;
631 
632 case FSM_WR_PAT_LOAD:
633  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_LOAD\n");
634  // -- Assemble a 512-bit memory word with input values from stream
635  if (patternWriteNum++ >= cur_transfers_per_chunk - 1) {
636  if (!sMemBurstRx.empty()) {
637  if (sMemBurstRx.read() == true) {
638  patternWriteNum = 0;
640  }
641  }
642  }
643  else {
644  if((processed_bytes_rx) == 0) {
646  }
647  else {
649  }
650  }
651  break;
652 
653 case FSM_WR_PAT_DATA:
654  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_DATA\n");
655  if (!soMemWriteP0.full()) {
656  //-- Write a memory word to DRAM
657  if (!img_in_axi_stream.empty()) {
658  memP0.tdata = img_in_axi_stream.read();
659  ap_uint<8> keepVal = 0xFF;
660  memP0.tkeep = (ap_uint<64>) (keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal);
661  if (patternWriteNum++ == cur_transfers_per_chunk - 1) {
662  printf("DEBUG: (patternWriteNum == cur_transfers_per_chunk -1) \n");
663  memP0.tlast = 1;
664  cnt_wr_img_loaded = 0;
665  timeoutCnt = 0;
666  patternWriteNum = 0;
668  }
669  else {
670  memP0.tlast = 0;
671  }
672  std::cout << "DEBUG in pRXPathStreamToDDR: Pushing to soMemWriteP0 :" << std::hex << memP0.tdata << std::endl;
673  soMemWriteP0.write(memP0);
674  }
675  }
676  break;
677 
678 case FSM_WR_PAT_STS_A:
679  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_A\n");
680  if (!siMemWrStsP0.empty()) {
681  printf(" 1 \n");
682  //-- Get the memory write status for Mem/Mp0
683  siMemWrStsP0.read(memWrStsP0);
685  }
686  else {
687  if (timeoutCnt++ >= CYCLES_UNTIL_TIMEOUT) {
688  memWrStsP0.tag = 0;
689  memWrStsP0.interr = 0;
690  memWrStsP0.decerr = 0;
691  memWrStsP0.slverr = 0;
692  memWrStsP0.okay = 0;
694  }
695  }
696  break;
697 
698 case FSM_WR_PAT_STS_B:
699  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_B\n");
700  if ((memWrStsP0.tag == 0x0) && (memWrStsP0.okay == 1)) {
701  if ((processed_bytes_rx) == 0) {
702  if (!sImageLoaded.full()) {
703  if (cnt_wr_img_loaded++ >= 1) {
704  sImageLoaded.write(false);
706  }
707  else {
708  sImageLoaded.write(true);
709  }
710  }
711  }
712  else {
714  }
715  }
716  else {
717  ; // TODO: handle errors on memWrStsP0
718  }
719  break;
720 
721 case FSM_WR_PAT_STS_C:
722  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_C\n");
723  if((processed_bytes_rx) == 0) {
725  }
726  else {
728  }
729  break;
730 }
731 
732 }
733 
734 
735 #endif // ENABLE_DDR
736 
737 
738 
751 void pRXPath(
752  stream<NetworkWord> &siSHL_This_Data,
753  stream<NetworkMetaStream> &siNrc_meta,
754  stream<NetworkMetaStream> &sRxtoTx_Meta,
755  #ifdef USE_HLSLIB_STREAM
756  Stream<Data_t_in, MIN_RX_LOOPS> &img_in_axi_stream,
757  #else // !USE_HLSLIB_STREAM
758  //stream<Data_t_in> &img_in_axi_stream,
759  stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
760  #endif // USE_HLSLIB_STREAM
761  NetworkMetaStream meta_tmp,
762  unsigned int *processed_word_rx,
763  unsigned int *processed_bytes_rx,
764  stream<bool> &sImageLoaded
765  )
766 {
767  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
768  #pragma HLS INLINE off
769  #pragma HLS pipeline II=1
770 
771  //-- LOCAL VARIABLES ------------------------------------------------------
772  static NetworkWord netWord;
773 
774  switch(enqueueFSM)
775  {
776  case WAIT_FOR_META:
777  printf("DEBUG in pRXPath: enqueueFSM - WAIT_FOR_META, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
778  *processed_word_rx, *processed_bytes_rx);
779  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
780  {
781  meta_tmp = siNrc_meta.read();
782  meta_tmp.tlast = 1; //just to be sure...
783  sRxtoTx_Meta.write(meta_tmp);
785  }
786  break;
787 
788  case PROCESSING_PACKET:
789  printf("DEBUG in pRXPath: enqueueFSM - PROCESSING_PACKET, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
790  *processed_word_rx, *processed_bytes_rx);
791  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
792  {
793  //-- Read incoming data chunk
794  netWord = siSHL_This_Data.read();
795  storeWordToAxiStream(netWord, img_in_axi_stream, processed_word_rx, processed_bytes_rx,
796  sImageLoaded);
797  if(netWord.tlast == 1)
798  {
800  }
801  }
802  break;
803  }
804 }
805 
806 
807 
819  stream<NetworkWord> &sRxpToTxp_Data,
820  #ifdef ENABLE_DDR
821  //---- P1 Memory mapped ---------------
822  membus_t *lcl_mem0,
824  #else // !ENABLE_DDR
825  #ifdef USE_HLSLIB_STREAM
826  Stream<Data_t_in, MIN_RX_LOOPS> &img_in_axi_stream,
827  Stream<Data_t_out, MIN_TX_LOOPS> &img_out_axi_stream,
828  #else // !USE_HLSLIB_STREAM
829  stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
830  stream<ap_uint<OUTPUT_PTR_WIDTH>> &img_out_axi_stream,
831  #endif // USE_HLSLIB_STREAM
832  #endif // ENABLE_DDR
833 
834 
835  stream<bool> &sImageLoaded
836  )
837 {
838  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
839  #pragma HLS INLINE off
840  #pragma HLS pipeline II=1
841 
842  //-- LOCAL VARIABLES ------------------------------------------------------
843  NetworkWord newWord;
844  uint16_t Thresh = 442;
845  float K = 0.04;
846  uint16_t k = K * (1 << 16); // Convert to Q0.16 format
847  static bool accel_called;
848  static unsigned int processed_word_proc;
849  static unsigned int timeoutCntAbs;
850  static unsigned int cnt_i;
851  static membus_t tmp;
852  ap_uint<OUTPUT_PTR_WIDTH> raw64;
853  Data_t_out temp;
854  #ifdef ENABLE_DDR
855  //static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
856  //#pragma HLS stream variable=img_out_axi_stream depth=9
857  static unsigned int ddr_addr_out;
858  #pragma HLS reset variable=ddr_addr_out
859  #endif
860 
861  #pragma HLS reset variable=accel_called
862  #pragma HLS reset variable=processed_word_proc
863  #pragma HLS reset variable=timeoutCntAbs
864  #pragma HLS reset variable=cnt_i
865  #pragma HLS reset variable=tmp
866  #pragma HLS reset variable=raw64
867  #pragma HLS reset variable=temp
868 
869  switch(MedianBlurFSM)
870  {
871  case WAIT_FOR_META:
872  printf("DEBUG in pProcPath: WAIT_FOR_META\n");
873  if (!sImageLoaded.empty())
874  {
875  if (sImageLoaded.read() == true) {
877  accel_called = false;
878  processed_word_proc = 0;
879  #ifdef ENABLE_DDR
880  ddr_addr_out = 0;
881  timeoutCntAbs = 0;
882  cnt_i = 0;
883  #endif
884  }
885  }
886  break;
887 
888  case PROCESSING_PACKET:
889  printf("DEBUG in pProcPath: PROCESSING_PACKET\n");
890  #ifndef ENABLE_DDR
891  if ( !img_in_axi_stream.empty() && !img_out_axi_stream.full() )
892  {
893  #endif
894  if (accel_called == false) {
895  #ifdef ENABLE_DDR
897  #else // ! ENABLE_DDR
898  #ifdef FAKE_MedianBlur
899  fakeMedianBlurAccelStream(img_in_axi_stream, img_out_axi_stream, MIN_RX_LOOPS, MIN_TX_LOOPS);
900  #else // !FAKE_MedianBlur
901  medianBlurAccelStream(img_in_axi_stream, img_out_axi_stream, WIDTH, HEIGHT);
902  #endif // FAKE_MedianBlur
903  #endif // ENABLE_DDR
904  accel_called = true;
906  }
907  #ifndef ENABLE_DDR
908  }
909  #endif
910  break;
911 
912  #ifdef ENABLE_DDR
914  printf("DEBUG in pProcPath: MEDIANBLUR_RETURN_RESULTS, ddr_addr_out=%u\n", ddr_addr_out);
915  if (accel_called == true) {
916 
917  printf("DEBUG in pProcPath: Accumulated %u net words (%u B) to complete a single DDR word\n",
919  tmp = lcl_mem1[ddr_addr_out];
920  ddr_addr_out++;
922  timeoutCntAbs = 0;
923  }
924  break;
925 
927  printf("DEBUG in pProcPath: MEDIANBLUR_RETURN_RESULTS_ABSORB_DDR_LAT [%u out of %u]\n", timeoutCntAbs, DDR_LATENCY);
928  if (timeoutCntAbs++ == DDR_LATENCY) {
929  MedianBlurFSM = MEDIANBLUR_RETURN_RESULTS_FWD; //MEDIANBLUR_RETURN_RESULTS_UNPACK;
930  cnt_i = 0;
931  }
932  break;
933  /*
934  case MEDIANBLUR_RETURN_RESULTS_UNPACK:
935  printf("DEBUG in pProcPath: MEDIANBLUR_RETURN_RESULTS_UNPACK, cnt_i=%u\n", cnt_i);
936  //for (unsigned int cnt_i=0; cnt_i<(MEMDW_512/OUTPUT_PTR_WIDTH); cnt_i++) {
937  #if OUTPUT_PTR_WIDTH == 64
938  raw64(0 ,63) = tmp(cnt_i*OUTPUT_PTR_WIDTH , cnt_i*OUTPUT_PTR_WIDTH+63);
939  #endif
940  if ( !img_out_axi_stream.full() ) {
941  img_out_axi_stream.write(raw64);
942  }
943  if (cnt_i == (MEMDW_512/OUTPUT_PTR_WIDTH) - 1) {
944  MedianBlurFSM = MEDIANBLUR_RETURN_RESULTS_FWD;
945  }
946  cnt_i++;
947  //}
948  break;
949  */
951  printf("DEBUG in pProcPath: MEDIANBLUR_RETURN_RESULTS_FWD\n");
952  //if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() ) {
953  if ( (cnt_i <= (MEMDW_512/OUTPUT_PTR_WIDTH) - 1) && !sRxpToTxp_Data.full() ) {
954 
955  //temp.data = img_out_axi_stream.read();
956  temp.data(0 ,63) = tmp(cnt_i*OUTPUT_PTR_WIDTH , cnt_i*OUTPUT_PTR_WIDTH+63);
957  if (processed_word_proc++ == MIN_TX_LOOPS-1) {
958  temp.last = 1;
960  }
961  else {
962  temp.last = 0;
963  }
964  //TODO: find why Vitis kernel does not set keep and last by itself
965  temp.keep = 255;
966  newWord = NetworkWord(temp.data, temp.keep, temp.last);
967  sRxpToTxp_Data.write(newWord);
968  cnt_i++;
969  }
970  else {
972  }
973 
974  break;
975 
976  #else // ! ENABLE_DDR
978  printf("DEBUG in pProcPath: MEDIANBLUR_RETURN_RESULTS\n");
979  if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() )
980  {
981 
982  temp.data = img_out_axi_stream.read();
983  if ( img_out_axi_stream.empty() )
984  //if (processed_word_proc++ == MIN_TX_LOOPS-1)
985  {
986  temp.last = 1;
988  accel_called = false;
989  }
990  else
991  {
992  temp.last = 0;
993  }
994  //TODO: find why Vitis kernel does not set keep and last by itself
995  temp.keep = 255;
996  newWord = NetworkWord(temp.data, temp.keep, temp.last);
997  sRxpToTxp_Data.write(newWord);
998  }
999  break;
1000  #endif // ENABLE_DDR
1001  } // end switch
1002 
1003 }
1004 
1005 
1006 unsigned int sRxpToTxp_DataCounter = 0;
1007 
1008 
1020 void pTXPath(
1021  stream<NetworkWord> &soTHIS_Shl_Data,
1022  stream<NetworkMetaStream> &soNrc_meta,
1023  stream<NetworkWord> &sRxpToTxp_Data,
1024  stream<NetworkMetaStream> &sRxtoTx_Meta,
1025  unsigned int *processed_word_tx,
1026  ap_uint<32> *pi_rank,
1027  ap_uint<32> *pi_size
1028  )
1029 {
1030  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
1031  #pragma HLS INLINE off
1032  #pragma HLS pipeline II=1
1033 
1034  //-- STATIC DATAFLOW VARIABLES ------------------------------------------
1035  static NodeId dst_rank;
1036 
1037  //-- LOCAL VARIABLES ------------------------------------------------------
1038  NetworkWord netWordTx;
1039  NetworkMeta meta_in = NetworkMeta();
1040  NetworkMetaStream meta_out_stream = NetworkMetaStream();
1041 
1042  #pragma HLS reset variable=dst_rank
1043  #pragma HLS reset variable=netWordTx
1044 
1045  switch(dequeueFSM)
1046  {
1047  default:
1048  case WAIT_FOR_META:
1049  printf("DEBUG in pTXPath: dequeueFSM=%d - WAIT_FOR_META, *processed_word_tx=%u\n",
1050  dequeueFSM, *processed_word_tx);
1051  //-- Forward incoming chunk to SHELL
1052  if (*processed_word_tx == MIN_TX_LOOPS) {
1053  *processed_word_tx = 0;
1054  }
1055  /*
1056  printf("!sRxpToTxp_Data.empty()=%d\n", !sRxpToTxp_Data.empty());
1057  printf("!sRxtoTx_Meta.empty()=%d\n", !sRxtoTx_Meta.empty());
1058  printf("!soTHIS_Shl_Data.full()=%d\n", !soTHIS_Shl_Data.full());
1059  printf("!soNrc_meta.full()=%d\n", !soNrc_meta.full());
1060  */
1061 
1062  if (( !sRxpToTxp_Data.empty() && !sRxtoTx_Meta.empty()
1063  && !soTHIS_Shl_Data.full() && !soNrc_meta.full() ))
1064  {
1065  netWordTx = sRxpToTxp_Data.read();
1066 
1067  // in case MTU=8 ensure tlast is set in WAIT_FOR_META and don't visit PROCESSING_PACKET
1068  if (PACK_SIZE == 8)
1069  {
1070  netWordTx.tlast = 1;
1071  }
1072  soTHIS_Shl_Data.write(netWordTx);
1073 
1074  meta_in = sRxtoTx_Meta.read().tdata;
1075  //NetworkMetaStream meta_out_stream = NetworkMetaStream();
1076  meta_out_stream.tlast = 1;
1077  meta_out_stream.tkeep = 0xFF; //just to be sure
1078 
1079  meta_out_stream.tdata.dst_rank = meta_in.src_rank; //dst_rank; //(*pi_rank + 1) % *pi_size;
1080  //meta_out_stream.tdata.dst_port = DEFAULT_TX_PORT;
1081  meta_out_stream.tdata.src_rank = (NodeId) *pi_rank;
1082 
1083  // Forcing the SHELL to wait for tlast
1084  meta_out_stream.tdata.len = 0;
1085 
1086  //meta_out_stream.tdata.src_port = DEFAULT_RX_PORT;
1087  //printf("rank: %d; size: %d; \n", (int) *pi_rank, (int) *pi_size);
1088  //printf("meat_out.dst_rank: %d\n", (int) meta_out_stream.tdata.dst_rank);
1089  meta_out_stream.tdata.dst_port = meta_in.src_port;
1090  meta_out_stream.tdata.src_port = meta_in.dst_port;
1091 
1092 
1093  //meta_out_stream.tdata.len = meta_in.len;
1094  soNrc_meta.write(meta_out_stream);
1095 
1096  (*processed_word_tx)++;
1097  printf("DEBUGGGG: Checking netWordTx.tlast...\n");
1098  if(netWordTx.tlast != 1)
1099  {
1101  }
1102  }
1103  break;
1104 
1105  case PROCESSING_PACKET:
1106  printf("DEBUG in pTXPath: dequeueFSM=%d - PROCESSING_PACKET, *processed_word_tx=%u\n",
1107  dequeueFSM, *processed_word_tx);
1108  if( !sRxpToTxp_Data.empty() && !soTHIS_Shl_Data.full())
1109  {
1110  printf("DEBUGGGG: Reading sRxpToTxp_Data %u\n", sRxpToTxp_DataCounter++);
1111  netWordTx = sRxpToTxp_Data.read();
1112 
1113  (*processed_word_tx)++;
1114 
1115  // This is a normal termination of the axi stream from vitis functions
1116  if ((netWordTx.tlast == 1) || (((*processed_word_tx)*8) % PACK_SIZE == 0))
1117  {
1118  netWordTx.tlast = 1; // in case it is the 2nd or
1119  printf("DEBUGGGG: A netWordTx.tlast=1 ... sRxpToTxp_Data.empty()==%u \n", sRxpToTxp_Data.empty());
1121  }
1122 
1123  // This is our own termination based on the custom MTU we have set in PACK_SIZE.
1124  // TODO: We can map PACK_SIZE to a dynamically assigned value either through MMIO or header
1125  // in order to have a functional bitstream for any MTU size
1126  //if (((*processed_word_tx)*8) % PACK_SIZE == 0)
1127  //{
1128  // printf("DEBUGGGG: B (*processed_word_tx)*8) % PACK_SIZE == 0 ...\n");
1129  // netWordTx.tlast = 1;
1130  // dequeueFSM = WAIT_FOR_META;
1131  //}
1132 
1133  soTHIS_Shl_Data.write(netWordTx);
1134  }
1135  break;
1136  }
1137 }
1138 
1139 
1140 
1147 
1148  ap_uint<32> *pi_rank,
1149  ap_uint<32> *pi_size,
1150  //------------------------------------------------------
1151  //-- SHELL / This / UDP/TCP Interfaces
1152  //------------------------------------------------------
1153  stream<NetworkWord> &siSHL_This_Data,
1154  stream<NetworkWord> &soTHIS_Shl_Data,
1155  stream<NetworkMetaStream> &siNrc_meta,
1156  stream<NetworkMetaStream> &soNrc_meta,
1157  ap_uint<32> *po_rx_ports
1158 
1159  #ifdef ENABLE_DDR
1160  ,
1161  //------------------------------------------------------
1162  //-- SHELL / Role / Mem / Mp0 Interface
1163  //------------------------------------------------------
1164  //---- Read Path (MM2S) ------------
1165  // stream<DmCmd> &soMemRdCmdP0,
1166  // stream<DmSts> &siMemRdStsP0,
1167  // stream<Axis<MEMDW_512 > > &siMemReadP0,
1168  //---- Write Path (S2MM) -----------
1169  stream<DmCmd> &soMemWrCmdP0,
1170  stream<DmSts> &siMemWrStsP0,
1171  stream<Axis<MEMDW_512> > &soMemWriteP0,
1172  //------------------------------------------------------
1173  //-- SHELL / Role / Mem / Mp1 Interface
1174  //------------------------------------------------------
1175  membus_t *lcl_mem0,
1177  #endif
1178  )
1179 {
1180 
1181 
1182 //-- DIRECTIVES FOR THE BLOCK ---------------------------------------------
1183 //#pragma HLS INTERFACE ap_ctrl_none port=return
1184 
1185 //#pragma HLS INTERFACE ap_stable port=piSHL_This_MmioEchoCtrl
1186 
1187 #pragma HLS INTERFACE axis register both port=siSHL_This_Data
1188 #pragma HLS INTERFACE axis register both port=soTHIS_Shl_Data
1189 
1190 #pragma HLS INTERFACE axis register both port=siNrc_meta
1191 #pragma HLS INTERFACE axis register both port=soNrc_meta
1192 
1193 #pragma HLS INTERFACE ap_ovld register port=po_rx_ports name=poROL_NRC_Rx_ports
1194 
1195 #if HLS_VERSION < 20211
1196 #pragma HLS INTERFACE ap_stable register port=pi_rank name=piFMC_ROL_rank
1197 #pragma HLS INTERFACE ap_stable register port=pi_size name=piFMC_ROL_size
1198 #elif HLS_VERSION >= 20211
1199  #pragma HLS stable variable=pi_rank
1200  #pragma HLS stable variable=pi_size
1201 #else
1202  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
1203  exit(-1);
1204 #endif
1205 
1206 #ifdef ENABLE_DDR
1207 
1208 // Bundling: SHELL / Role / Mem / Mp0 / Read Interface
1209 // #pragma HLS INTERFACE axis register both port=soMemRdCmdP0
1210 // #pragma HLS INTERFACE axis register both port=siMemRdStsP0
1211 // #pragma HLS INTERFACE axis register both port=siMemReadP0
1212 
1213 // #pragma HLS DATA_PACK variable=soMemRdCmdP0 instance=soMemRdCmdP0
1214 // #pragma HLS DATA_PACK variable=siMemRdStsP0 instance=siMemRdStsP0
1215 
1216 // Bundling: SHELL / Role / Mem / Mp0 / Write Interface
1217 #pragma HLS INTERFACE axis register both port=soMemWrCmdP0
1218 #pragma HLS INTERFACE axis register both port=siMemWrStsP0
1219 #pragma HLS INTERFACE axis register both port=soMemWriteP0
1220 
1221 #if HLS_VERSION <= 20201
1222 #pragma HLS DATA_PACK variable=soMemWrCmdP0 instance=soMemWrCmdP0
1223 #pragma HLS DATA_PACK variable=siMemWrStsP0 instance=siMemWrStsP0
1224 #elif HLS_VERSION >= 20211
1225 #pragma HLS aggregate variable=soMemWrCmdP0 compact=bit
1226 #pragma HLS aggregate variable=siMemWrStsP0 compact=bit
1227 #else
1228  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
1229  exit(-1);
1230 #endif
1231 
1232 const unsigned int ddr_mem_depth = TOTMEMDW_512;
1233 const unsigned int ddr_latency = DDR_LATENCY;
1234 
1235 
1236 // When max burst size is 1KB, with 512bit bus we get 16 burst transactions
1237 // When max burst size is 4KB, with 512bit bus we get 64 burst transactions
1238 const unsigned int max_axi_rw_burst_length = 64;
1239 
1240 // Mapping LCL_MEM0 interface to moMEM_Mp1 channel
1241 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem0 bundle=moMEM_Mp1\
1242  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
1243  num_read_outstanding=16 num_write_outstanding=16 latency=ddr_latency
1244 
1245 // Mapping LCL_MEM1 interface to moMEM_Mp1 channel
1246 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem1 bundle=moMEM_Mp1 \
1247  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
1248  num_read_outstanding=16 num_write_outstanding=16 latency=ddr_latency
1249 
1250 #endif
1251 
1252  #pragma HLS DATAFLOW
1253 
1254  //-- LOCAL VARIABLES ------------------------------------------------------
1255  NetworkMetaStream meta_tmp = NetworkMetaStream();
1256  static stream<NetworkWord> sRxpToTxp_Data("sRxpToTxP_Data"); // FIXME: works even with no static
1257  static stream<NetworkMetaStream> sRxtoTx_Meta("sRxtoTx_Meta");
1258  static unsigned int processed_word_rx;
1259  static unsigned int processed_bytes_rx;
1260  static unsigned int processed_word_tx = 0;
1261  static stream<bool> sImageLoaded("sImageLoaded");
1262  static bool skip_read;
1263  static bool write_chunk_to_ddr_pending;
1264  static bool ready_to_accept_new_data;
1265  static bool signal_init;
1266  const int tot_transfers = TOT_TRANSFERS;
1267 #ifdef ENABLE_DDR
1268  static stream<ap_uint<MEMDW_512>> img_in_axi_stream ("img_in_axi_stream");
1269  const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
1270  static stream<bool> sMemBurstRx("sMemBurstRx");
1271 
1272 #else
1273  const int img_in_axi_stream_depth = MIN_RX_LOOPS;
1274  const int img_out_axi_stream_depth = MIN_TX_LOOPS;
1275 #ifdef USE_HLSLIB_DATAFLOW
1276  static hlslib::Stream<Data_t_in, MIN_RX_LOOPS> img_in_axi_stream ("img_in_axi_stream");
1277  static hlslib::Stream<Data_t_out, MIN_TX_LOOPS> img_out_axi_stream ("img_out_axi_stream");
1278 #else
1279  static stream<ap_uint<INPUT_PTR_WIDTH>> img_in_axi_stream ("img_in_axi_stream");
1280  static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
1281 #endif
1282 #endif
1283 
1284 
1285 //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
1286 #pragma HLS stream variable=sRxtoTx_Meta depth=tot_transfers
1287 #pragma HLS reset variable=enqueueFSM
1288 #pragma HLS reset variable=dequeueFSM
1289 #pragma HLS reset variable=MedianBlurFSM
1290 #pragma HLS reset variable=processed_word_rx
1291 #pragma HLS reset variable=processed_word_tx
1292 #pragma HLS reset variable=processed_bytes_rx
1293 //#pragma HLS reset variable=image_loaded
1294 #pragma HLS stream variable=sImageLoaded depth=1
1295 #pragma HLS reset variable=skip_read
1296 #pragma HLS reset variable=write_chunk_to_ddr_pending
1297 //#pragma HLS stream variable=sWriteChunkToDdrPending depth=2
1298 #pragma HLS reset variable=ready_to_accept_new_data
1299 #pragma HLS reset variable=signal_init
1300 
1301 #ifdef ENABLE_DDR
1302 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
1303 #pragma HLS stream variable=sProcessed_bytes_rx depth=img_in_axi_stream_depth
1304 #else
1305 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
1306 #pragma HLS stream variable=img_out_axi_stream depth=img_out_axi_stream_depth
1307 #endif
1308 
1309 
1310 #ifdef USE_HLSLIB_DATAFLOW
1326  // Dataflow functions running in parallel
1327  HLSLIB_DATAFLOW_INIT();
1328 
1329  HLSLIB_DATAFLOW_FUNCTION(pRXPath,
1330  siSHL_This_Data,
1331  siNrc_meta,
1332  sRxtoTx_Meta,
1333  img_in_axi_stream,
1334  meta_tmp,
1335  &processed_word_rx,
1336  &processed_bytes_rx,
1337  //&image_loaded
1338  sImageLoaded
1339  );
1340 
1341  HLSLIB_DATAFLOW_FUNCTION(pProcPath,
1342  sRxpToTxp_Data,
1343 #ifdef ENABLE_DDR
1344  lcl_mem0,
1345  lcl_mem1,
1346 #else
1347  img_in_axi_stream,
1348  img_out_axi_stream,
1349 #endif
1350  &image_loaded
1351  );
1352 
1353  HLSLIB_DATAFLOW_FUNCTION(pTXPath,
1354  soTHIS_Shl_Data,
1355  soNrc_meta,
1356  sRxpToTxp_Data,
1357  sRxtoTx_Meta,
1358  &processed_word_tx,
1359  pi_rank,
1360  pi_size);
1361 
1362  HLSLIB_DATAFLOW_FINALIZE();
1363 
1364 #else // !USE_HLSLIB_DATAFLOW
1365 
1367  pi_rank,
1368  pi_size,
1369  po_rx_ports
1370  );
1371 
1372 #ifdef ENABLE_DDR
1373  /*
1374  pRXPathDDR(
1375  siSHL_This_Data,
1376  siNrc_meta,
1377  sRxtoTx_Meta,
1378  //---- P0 Write Path (S2MM) -----------
1379  soMemWrCmdP0,
1380  siMemWrStsP0,
1381  soMemWriteP0,
1382  // ---- P1 Memory mapped --------------
1383  meta_tmp,
1384  &processed_bytes_rx,
1385  sImageLoaded
1386  );
1387  */
1389  siSHL_This_Data,
1390  siNrc_meta,
1391  sRxtoTx_Meta,
1392  img_in_axi_stream,
1393  sMemBurstRx
1394  );
1395 
1397  img_in_axi_stream,
1398  sMemBurstRx,
1399  //---- P0 Write Path (S2MM) -----------
1400  soMemWrCmdP0,
1401  siMemWrStsP0,
1402  soMemWriteP0,
1403  //---- P1 Memory mapped ---------------
1404  //&processed_bytes_rx,
1405  sImageLoaded
1406  );
1407 
1408 
1409 
1410  #else // !ENABLE_DDR
1411 
1412  pRXPath(
1413  siSHL_This_Data,
1414  siNrc_meta,
1415  sRxtoTx_Meta,
1416  img_in_axi_stream,
1417  meta_tmp,
1418  &processed_word_rx,
1419  &processed_bytes_rx,
1420  sImageLoaded
1421  );
1422 
1423 #endif // ENABLE_DDR
1424 
1425  pProcPath(
1426  sRxpToTxp_Data,
1427 #ifdef ENABLE_DDR
1428  lcl_mem0,
1429  lcl_mem1,
1430 #else
1431  img_in_axi_stream,
1432  img_out_axi_stream,
1433 #endif
1434  sImageLoaded
1435  );
1436 
1437  pTXPath(
1438  soTHIS_Shl_Data,
1439  soNrc_meta,
1440  sRxpToTxp_Data,
1441  sRxtoTx_Meta,
1442  &processed_word_tx,
1443  pi_rank,
1444  pi_size
1445  );
1446 
1447 #endif // USE_HLSLIB_DATAFLOW
1448 }
1449 
1450 
ap_uint< 32 > timeoutCnt
ap_uint< 32 > patternWriteNum
ap_uint< 1 > okay
ap_uint< 1 > decerr
ap_uint< 1 > slverr
ap_uint< 4 > tag
ap_uint< 1 > interr
#define WIDTH
#define IMGSIZE
#define IMG_PACKETS
#define HEIGHT
#define TRANSFERS_PER_CHUNK_DIVEND
Definition: harris.hpp:134
#define FSM_WR_PAT_STS_B
Definition: harris.hpp:87
#define FSM_WR_PAT_LOAD
Definition: harris.hpp:84
#define FSM_WR_PAT_CMD
Definition: harris.hpp:83
#define TRANSFERS_PER_CHUNK
Definition: harris.hpp:133
#define BPERMDW_512
Definition: harris.hpp:120
#define KWPERMDW_512
Definition: harris.hpp:121
#define FSM_WR_PAT_DATA
Definition: harris.hpp:85
#define PORTS_OPENED
Definition: harris.hpp:102
#define FSM_CHK_PROC_BYTES
Definition: harris.hpp:81
#define FSM_WR_PAT_STS_A
Definition: harris.hpp:86
#define FSM_WR_PAT_STS_C
Definition: harris.hpp:88
#define MIN_TX_LOOPS
#define BITS_PER_10GBITETHRNET_AXI_PACKET
#define MIN_RX_LOOPS
#define INPUT_PTR_WIDTH
#define OUTPUT_PTR_WIDTH
#define BYTES_PER_10GBITETHRNET_AXI_PACKET
#define TOT_TRANSFERS
Definition: config.h:70
void pPortAndDestionation(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, ap_uint< 32 > *po_rx_ports)
Definition: median_blur.cpp:63
uint8_t enqueueFSM
Definition: median_blur.cpp:51
#define MEDIANBLUR_RETURN_RESULTS_ABSORB_DDR_LAT
Definition: median_blur.hpp:72
#define TRANSFERS_PER_CHUNK_LAST_BURST
Definition: median_blur.cpp:57
void pTXPath(stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &soNrc_meta, stream< NetworkWord > &sRxpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size)
Transmit Path - From THIS to SHELL.
uint8_t MedianBlurFSM
Definition: median_blur.cpp:53
void storeWordToAxiStream(NetworkWord word, stream< ap_uint< 8 >> &img_in_axi_stream, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
Store a net word to a local AXI stream.
uint8_t enqueueStrToDdrFSM
Definition: median_blur.cpp:50
#define MEDIANBLUR_RETURN_RESULTS_FWD
Definition: median_blur.hpp:74
unsigned int sRxpToTxp_DataCounter
uint8_t dequeueFSM
Definition: median_blur.cpp:52
uint8_t enqueueRxToStrFSM
Definition: median_blur.cpp:49
void pRXPathDDROLD(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 > > &soMemWriteP0, NetworkMetaStream meta_tmp, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
Receive Path - From SHELL to THIS.
void pRXPath(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< ap_uint< 8 >> &img_in_axi_stream, NetworkMetaStream meta_tmp, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
Receive Path - From SHELL to THIS.
void storeWordToArray(uint64_t input, ap_uint< 8 > img[1024 *1024/((64/8))], unsigned int *processed_word, unsigned int *image_loaded)
Store a net word to local memory.
Definition: median_blur.cpp:81
void pRXPathNetToStream(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< ap_uint< 512 >> &img_in_axi_stream, stream< bool > &sMemBurstRx)
Receive Path - From SHELL to THIS.
void median_blur(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 > > &soMemWriteP0, membus_t *lcl_mem0, membus_t *lcl_mem1)
Main process of the MedianBlur Application directives.
void pProcPath(stream< NetworkWord > &sRxpToTxp_Data, membus_t *lcl_mem0, membus_t *lcl_mem1, stream< bool > &sImageLoaded)
Processing Path - Main processing FSM for Vitis kernels.
#define MEDIANBLUR_RETURN_RESULTS
Definition: median_blur.hpp:71
void pRXPathStreamToDDR(stream< ap_uint< 512 >> &img_in_axi_stream, stream< bool > &sMemBurstRx, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 > > &soMemWriteP0, stream< bool > &sImageLoaded)
Receive Path - From SHELL to THIS.
void fakeMedianBlurAccelStream(hls::stream< ap_axiu< 8, 0, 0, 0 > > &img_in_axi_stream, hls::stream< ap_axiu< 64, 0, 0, 0 > > &img_out_axi_stream, unsigned int min_rx_loops, unsigned int min_tx_loops)
void medianBlurAccelMem(membus_t *img_inp, membus_t *img_out, int rows, int cols)
Top-level accelerated function of the MedianBlur Application with array I/F.
void medianBlurAccelStream(hls::stream< ap_uint< 8 >> &img_in_axi_stream, hls::stream< ap_uint< 64 >> &img_out_axi_stream, int rows, int cols)
Top-level accelerated function of the MedianBlur Application with array I/F.
#define MEMDW_512
Definition: memtest.hpp:90
#define CYCLES_UNTIL_TIMEOUT
Definition: memtest.hpp:96
#define PacketFsmType
Definition: memtest.hpp:76
#define Data_t_out
Definition: memtest.cpp:30
#define DDR_LATENCY
Definition: memtest.hpp:98
#define Data_t_in
Definition: memtest.cpp:29
#define ENABLE_DDR
Definition: memtest.hpp:42
membus_512_t membus_t
Definition: memtest.hpp:92
#define PROCESSING_PACKET
Definition: memtest.hpp:73
#define TOTMEMDW_512
Definition: memtest.hpp:93
#define WAIT_FOR_META
Definition: memtest.hpp:71
membus_t lcl_mem0[16384]
membus_t lcl_mem1[16384]
#define PACK_SIZE
Definition: config.h:51
string input
Definition: test.py:9
ap_uint< 8 > NodeId
Definition: network.hpp:82
ap_uint<(D+7)/8 > tkeep
Definition: axi_utils.hpp:49
ap_uint< 1 > tlast
Definition: axi_utils.hpp:50
ap_uint< D > tdata
Definition: axi_utils.hpp:48
ap_uint< 1 > tlast
Definition: network.hpp:111
ap_uint< 8 > tkeep
Definition: network.hpp:110
NetworkMeta tdata
Definition: network.hpp:109
NetworkDataLength len
Definition: network.hpp:99
NodeId dst_rank
Definition: network.hpp:95
NodeId src_rank
Definition: network.hpp:97
NrcPort src_port
Definition: network.hpp:98
NrcPort dst_port
Definition: network.hpp:96
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51