cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
gammacorrection.cpp
Go to the documentation of this file.
1 
20 #include "../include/gammacorrection.hpp"
21 #include "../include/xf_gammacorrection_config.h"
22 
23 #ifdef USE_HLSLIB_DATAFLOW
24 #include "../../../../../hlslib/include/hlslib/xilinx/Stream.h"
25 #include "../../../../../hlslib/include/hlslib/xilinx/Simulation.h"
26 #endif
27 
28 #ifdef USE_HLSLIB_STREAM
29 using hlslib::Stream;
30 #endif
31 using hls::stream;
32 
33 //#define Data_t ap_uint<INPUT_PTR_WIDTH>
34 //#define Data_t NetworkWord
35 #define Data_t ap_axiu<INPUT_PTR_WIDTH, 0, 0, 0>
36 
40 
41 
42 
43 
47 void storeWordToArray(uint64_t input, ap_uint<INPUT_PTR_WIDTH> img[IMG_PACKETS],
48  unsigned int *processed_word, unsigned int *image_loaded)
49 {
50  #pragma HLS INLINE
51 
52  img[*processed_word] = (ap_uint<INPUT_PTR_WIDTH>) input;
53  printf("DEBUG in storeWordToArray: input = %u = 0x%16.16llX \n", input, input);
54  printf("DEBUG in storeWordToArray: img[%u]= %u = 0x%16.16llX \n", *processed_word,
55  (uint64_t)img[*processed_word], (uint64_t)img[*processed_word]);
56  if (*processed_word < IMG_PACKETS-1) {
57  *processed_word++;
58  }
59  else {
60  printf("DEBUG in storeWordToArray: WARNING - you've reached the max depth of img[%u]. Will put *processed_word = 0.\n", *processed_word);
61  *processed_word = 0;
62  *image_loaded = 1;
63  }
64 }
65 
66 
67 
72  NetworkWord word,
73  //Stream<Data_t, IMG_PACKETS> &img_in_axi_stream,
74  stream<Data_t> &img_in_axi_stream,
75  unsigned int *processed_word_rx,
76  unsigned int *image_loaded)
77 {
78  #pragma HLS INLINE
79 
80  Data_t v;
81  v.data = word.tdata;
82  v.keep = word.tkeep;
83  v.last = word.tlast;
84 
85  //Data_t v;
86  //v = word.tdata;
87 
88  img_in_axi_stream.write(v);
89 
90  if (*processed_word_rx < IMG_PACKETS-1) {
91  (*processed_word_rx)++;
92  }
93  else {
94  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_word_rx = 0.\n");
95  *processed_word_rx = 0;
96  *image_loaded = 1;
97  }
98 }
99 
100 
101 
102 
115 void pRXPath(
116  stream<NetworkWord> &siSHL_This_Data,
117  stream<NetworkMetaStream> &siNrc_meta,
118  stream<NetworkMetaStream> &sRxtoTx_Meta,
119  //Stream<Data_t, IMG_PACKETS> &img_in_axi_stream,
120  stream<Data_t> &img_in_axi_stream,
121  NetworkMetaStream meta_tmp,
122  unsigned int *processed_word_rx,
123  unsigned int *image_loaded
124  )
125 {
126  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
127  //#pragma HLS DATAFLOW interval=1
128  #pragma HLS INLINE
129  //-- LOCAL VARIABLES ------------------------------------------------------
130  UdpWord udpWord;
131 
132  switch(enqueueFSM)
133  {
134  case WAIT_FOR_META:
135  printf("DEBUG in pRXPath: enqueueFSM - WAIT_FOR_META, *processed_word_rx=%u\n",
136  *processed_word_rx);
137  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
138  {
139  meta_tmp = siNrc_meta.read();
140  meta_tmp.tlast = 1; //just to be sure...
141  sRxtoTx_Meta.write(meta_tmp);
143  }
144  //*processed_word_rx = 0;
145  *image_loaded = 0;
146  break;
147 
148  case PROCESSING_PACKET:
149  printf("DEBUG in pRXPath: enqueueFSM - PROCESSING_PACKET, *processed_word_rx=%u\n",
150  *processed_word_rx);
151  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full() )
152  {
153  //-- Read incoming data chunk
154  udpWord = siSHL_This_Data.read();
155  storeWordToAxiStream(udpWord, img_in_axi_stream, processed_word_rx, image_loaded);
156  if(udpWord.tlast == 1)
157  {
159  }
160  }
161  break;
162  }
163 
164 
165 }
166 
167 
168 
180  stream<NetworkWord> &sRxpToTxp_Data,
181  //Stream<Data_t, IMG_PACKETS> &img_in_axi_stream,
182  //Stream<Data_t, IMG_PACKETS> &img_out_axi_stream,
183  stream<Data_t> &img_in_axi_stream,
184  stream<Data_t> &img_out_axi_stream,
185  unsigned int *processed_word_rx,
186  unsigned int *image_loaded
187  )
188 {
189  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
190  //#pragma HLS DATAFLOW interval=1
191  #pragma HLS INLINE
192  //-- LOCAL VARIABLES ------------------------------------------------------
193  NetworkWord newWord;
194  uint16_t Thresh = 442;
195  float K = 0.04;
196  uint16_t k = K * (1 << 16); // Convert to Q0.16 format
197 
198 
199  switch(GammacorrectionFSM)
200  {
201  case WAIT_FOR_META:
202  printf("DEBUG in pProcPath: WAIT_FOR_META\n");
203  if ( (*image_loaded) == 1 )
204  {
206  *processed_word_rx = 0;
207  }
208  break;
209 
210  case PROCESSING_PACKET:
211  printf("DEBUG in pProcPath: PROCESSING_PACKET\n");
212  if ( !img_in_axi_stream.empty() && !img_out_axi_stream.full() )
213  {
214  GammacorrectionAccelStream(img_in_axi_stream, img_out_axi_stream, WIDTH, HEIGHT, Thresh, k);
215  if ( !img_out_axi_stream.empty() )
216  {
218  }
219  }
220  break;
221 
223  printf("DEBUG in pProcPath: GAMMACORRECTION_RETURN_RESULTS\n");
224  if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() )
225  {
226 
227  Data_t temp = img_out_axi_stream.read();
228  if ( img_out_axi_stream.empty() )
229  {
230  temp.last = 1;
232  }
233  else
234  {
235  temp.last = 0;
236  }
237  //TODO: find why Vitis kernel does not set keep and last by itself
238  temp.keep = 255;
239  newWord = NetworkWord(temp.data, temp.keep, temp.last);
240  sRxpToTxp_Data.write(newWord);
241  }
242  break;
243 
244  } // end switch
245 
246 }
247 
248 
249 
250 
262 void pTXPath(
263  stream<NetworkWord> &soTHIS_Shl_Data,
264  stream<NetworkMetaStream> &soNrc_meta,
265  stream<NetworkWord> &sRxpToTxp_Data,
266  stream<NetworkMetaStream> &sRxtoTx_Meta,
267  unsigned int *processed_word_tx,
268  ap_uint<32> *pi_rank,
269  ap_uint<32> *pi_size
270  )
271 {
272  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
273  //#pragma HLS DATAFLOW interval=1
274  #pragma HLS INLINE
275  //-- LOCAL VARIABLES ------------------------------------------------------
276  UdpWord udpWordTx;
277  NetworkMeta meta_in = NetworkMeta();
278 
279  switch(dequeueFSM)
280  {
282  printf("DEBUG in pTXPath: dequeueFSM=%d - WAIT_FOR_STREAM_PAIR, *processed_word_tx=%u\n",
283  dequeueFSM, *processed_word_tx);
284  //-- Forward incoming chunk to SHELL
285  *processed_word_tx = 0;
286 
287  /*
288  printf("!sRxpToTxp_Data.empty()=%d\n", !sRxpToTxp_Data.empty());
289  printf("!sRxtoTx_Meta.empty()=%d\n", !sRxtoTx_Meta.empty());
290  printf("!soTHIS_Shl_Data.full()=%d\n", !soTHIS_Shl_Data.full());
291  printf("!soNrc_meta.full()=%d\n", !soNrc_meta.full());
292  */
293 
294  if (( !sRxpToTxp_Data.empty() && !sRxtoTx_Meta.empty()
295  && !soTHIS_Shl_Data.full() && !soNrc_meta.full() ))
296  {
297  udpWordTx = sRxpToTxp_Data.read();
298 
299  // in case MTU=8 ensure tlast is set in WAIT_FOR_STREAM_PAIR and don't visit PROCESSING_PACKET
300  if (PACK_SIZE == 8)
301  {
302  udpWordTx.tlast = 1;
303  }
304  soTHIS_Shl_Data.write(udpWordTx);
305 
306  meta_in = sRxtoTx_Meta.read().tdata;
307  NetworkMetaStream meta_out_stream = NetworkMetaStream();
308  meta_out_stream.tlast = 1;
309  meta_out_stream.tkeep = 0xFF; //just to be sure
310 
311  //printf("rank: %d; size: %d; \n", (int) *pi_rank, (int) *pi_size);
312  meta_out_stream.tdata.dst_rank = (*pi_rank + 1) % *pi_size;
313  //printf("meat_out.dst_rank: %d\n", (int) meta_out_stream.tdata.dst_rank);
314 
315  meta_out_stream.tdata.dst_port = DEFAULT_TX_PORT;
316  meta_out_stream.tdata.src_rank = (NodeId) *pi_rank;
317  meta_out_stream.tdata.src_port = DEFAULT_RX_PORT;
318  //meta_out_stream.tdata.len = meta_in.len;
319  soNrc_meta.write(meta_out_stream);
320 
321  (*processed_word_tx)++;
322 
323  if(udpWordTx.tlast != 1)
324  {
326  }
327  }
328  break;
329 
330  case PROCESSING_PACKET:
331  printf("DEBUG in pTXPath: dequeueFSM=%d - PROCESSING_PACKET, *processed_word_tx=%u\n",
332  dequeueFSM, *processed_word_tx);
333  if( !sRxpToTxp_Data.empty() && !soTHIS_Shl_Data.full())
334  {
335  udpWordTx = sRxpToTxp_Data.read();
336 
337  // This is a normal termination of the axi stream from vitis functions
338  if(udpWordTx.tlast == 1)
339  {
341  }
342 
343  // This is our own termination based on the custom MTU we have set in PACK_SIZE.
344  // TODO: We can map PACK_SIZE to a dynamically assigned value either through MMIO or header
345  // in order to have a functional bitstream for any MTU size
346  (*processed_word_tx)++;
347  if (((*processed_word_tx)*8) % PACK_SIZE == 0)
348  {
349  udpWordTx.tlast = 1;
351  }
352 
353  soTHIS_Shl_Data.write(udpWordTx);
354  }
355  break;
356  }
357 }
358 
359 
360 
367 
368  ap_uint<32> *pi_rank,
369  ap_uint<32> *pi_size,
370  //------------------------------------------------------
371  //-- SHELL / This / Udp/TCP Interfaces
372  //------------------------------------------------------
373  stream<NetworkWord> &siSHL_This_Data,
374  stream<NetworkWord> &soTHIS_Shl_Data,
375  stream<NetworkMetaStream> &siNrc_meta,
376  stream<NetworkMetaStream> &soNrc_meta,
377  ap_uint<32> *po_rx_ports
378  )
379 {
380 
381  //-- DIRECTIVES FOR THE BLOCK ---------------------------------------------
382  //#pragma HLS INTERFACE ap_ctrl_none port=return
383 
384  //#pragma HLS INTERFACE ap_stable port=piSHL_This_MmioEchoCtrl
385 
386 #pragma HLS INTERFACE axis register both port=siSHL_This_Data
387 #pragma HLS INTERFACE axis register both port=soTHIS_Shl_Data
388 
389 #pragma HLS INTERFACE axis register both port=siNrc_meta
390 #pragma HLS INTERFACE axis register both port=soNrc_meta
391 
392 #pragma HLS INTERFACE ap_ovld register port=po_rx_ports name=poROL_NRC_Rx_ports
393 #pragma HLS INTERFACE ap_stable register port=pi_rank name=piFMC_ROL_rank
394 #pragma HLS INTERFACE ap_stable register port=pi_size name=piFMC_ROL_size
395 
396 
397  //-- LOCAL VARIABLES ------------------------------------------------------
399  static stream<NetworkWord> sRxpToTxp_Data("sRxpToTxP_Data"); // FIXME: works even with no static
400  static stream<NetworkMetaStream> sRxtoTx_Meta("sRxtoTx_Meta");
401  static unsigned int processed_word_rx;
402  static unsigned int processed_word_tx;
403  static unsigned int image_loaded;
404  const int img_packets = IMG_PACKETS;
405  const int tot_transfers = TOT_TRANSFERS;
406  static stream<Data_t> img_in_axi_stream ("img_in_axi_stream" );
407  static stream<Data_t> img_out_axi_stream("img_out_axi_stream");
408  //static Stream<Data_t, IMG_PACKETS> img_in_axi_stream ("img_in_axi_stream");
409  //static Stream<Data_t, IMG_PACKETS> img_out_axi_stream ("img_out_axi_stream");
410  *po_rx_ports = 0x1; //currently work only with default ports...
411 
412 
413  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
414 #pragma HLS DATAFLOW
415 //#pragma HLS STREAM variable=sRxpToTxp_Data depth=TOT_TRANSFERS
416 #pragma HLS stream variable=sRxtoTx_Meta depth=tot_transfers
417 #pragma HLS reset variable=enqueueFSM
418 #pragma HLS reset variable=dequeueFSM
419 #pragma HLS reset variable=GammacorrectionFSM
420 #pragma HLS reset variable=processed_word_rx
421 #pragma HLS reset variable=processed_word_tx
422 #pragma HLS reset variable=image_loaded
423 #pragma HLS stream variable=img_in_axi_stream depth=img_packets
424 #pragma HLS stream variable=img_out_axi_stream depth=img_packets
425 
426 
427 #ifdef USE_HLSLIB_DATAFLOW
428  /*
429  * Use this snippet to early check for C++ errors related to dataflow and bounded streams (empty
430  * and full) during simulation. It can also be both synthesized and used in co-simulation.
431  * Practically we use hlslib when we want to run simulation as close as possible to the HW, by
432  * executing all functions of dataflow in thread-safe parallel executions, i.e the function
433  * HLSLIB_DATAFLOW_FINALIZE() acts as a barrier for the threads spawned to serve every function
434  * called in HLSLIB_DATAFLOW_FUNCTION(func, args...).
435  */
436  // Dataflow functions running in parallel
437  HLSLIB_DATAFLOW_INIT();
438 
439  HLSLIB_DATAFLOW_FUNCTION(pRXPath,
440  siSHL_This_Data,
441  siNrc_meta,
442  sRxtoTx_Meta,
443  img_in_axi_stream,
444  meta_tmp,
445  &processed_word_rx,
446  &image_loaded);
447 
448  HLSLIB_DATAFLOW_FUNCTION(pProcPath,
449  sRxpToTxp_Data,
450  img_in_axi_stream,
451  img_out_axi_stream,
452  &processed_word_rx,
453  &image_loaded);
454 
455  HLSLIB_DATAFLOW_FUNCTION(pTXPath,
456  soTHIS_Shl_Data,
457  soNrc_meta,
458  sRxpToTxp_Data,
459  sRxtoTx_Meta,
460  &processed_word_tx,
461  pi_rank,
462  pi_size);
463 
464  HLSLIB_DATAFLOW_FINALIZE();
465 
466 #else // !USE_HLSLIB_DATAFLOW
467  pRXPath(
468  siSHL_This_Data,
469  siNrc_meta,
470  sRxtoTx_Meta,
471  img_in_axi_stream,
472  meta_tmp,
473  &processed_word_rx,
474  &image_loaded);
475 
476 
477  pProcPath(sRxpToTxp_Data,
478  img_in_axi_stream,
479  img_out_axi_stream,
480  &processed_word_rx,
481  &image_loaded);
482 
483  pTXPath(
484  soTHIS_Shl_Data,
485  soNrc_meta,
486  sRxpToTxp_Data,
487  sRxtoTx_Meta,
488  &processed_word_tx,
489  pi_rank,
490  pi_size);
491 #endif // USE_HLSLIB_DATAFLOW
492 }
493 
494 
uint8_t enqueueFSM
void pTXPath(stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &soNrc_meta, stream< NetworkWord > &sRxpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size)
Transmit Path - From THIS to SHELL.
uint8_t GammacorrectionFSM
#define GAMMACORRECTION_RETURN_RESULTS
void storeWordToAxiStream(NetworkWord word, stream< ap_axiu< 64, 0, 0, 0 > > &img_in_axi_stream, unsigned int *processed_word_rx, unsigned int *image_loaded)
Store a word from ethernet to a local AXI stream.
void gammacorrection(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports)
Main process of the Gammacorrection Application directives.
uint8_t dequeueFSM
void pProcPath(stream< NetworkWord > &sRxpToTxp_Data, stream< ap_axiu< 64, 0, 0, 0 > > &img_in_axi_stream, stream< ap_axiu< 64, 0, 0, 0 > > &img_out_axi_stream, unsigned int *processed_word_rx, unsigned int *image_loaded)
Processing Path - Main processing FSM for Vitis kernels.
void storeWordToArray(uint64_t input, ap_uint< 64 > img[256 *256/(64/8)], unsigned int *processed_word, unsigned int *image_loaded)
Store a word from ethernet to local memory.
void pRXPath(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< ap_axiu< 64, 0, 0, 0 > > &img_in_axi_stream, NetworkMetaStream meta_tmp, unsigned int *processed_word_rx, unsigned int *image_loaded)
Receive Path - From SHELL to THIS.
#define Data_t
#define WIDTH
#define IMG_PACKETS
#define HEIGHT
void GammacorrectionAccelStream(hls::stream< ap_axiu< 64, 0, 0, 0 > > &img_in_axi_stream, hls::stream< ap_axiu< 64, 0, 0, 0 > > &img_out_axi_stream, int rows, int cols, int threshold, int k)
Top-level accelerated function of the Gammacorrection Application with array I/F.
#define TOT_TRANSFERS
Definition: config.h:70
#define PacketFsmType
Definition: memtest.hpp:76
#define PROCESSING_PACKET
Definition: memtest.hpp:73
#define WAIT_FOR_STREAM_PAIR
Definition: memtest.hpp:72
#define WAIT_FOR_META
Definition: memtest.hpp:71
#define PACK_SIZE
Definition: config.h:51
#define DEFAULT_RX_PORT
Definition: nal.hpp:139
#define DEFAULT_TX_PORT
Definition: nal.hpp:138
string input
Definition: test.py:9
ap_uint< 8 > NodeId
Definition: network.hpp:82
ap_uint< 1 > tlast
Definition: network.hpp:111
ap_uint< 8 > tkeep
Definition: network.hpp:110
NetworkMeta tdata
Definition: network.hpp:109
NodeId dst_rank
Definition: network.hpp:95
NodeId src_rank
Definition: network.hpp:97
NrcPort src_port
Definition: network.hpp:98
NrcPort dst_port
Definition: network.hpp:96
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51
ap_uint< 1 > tlast