cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
warp_transform_hw_common.hpp
Go to the documentation of this file.
1 
17 
53 #ifndef _ROLE_WARPTRANSFORM_HW_COMMON_HPP_
54 #define _ROLE_WARPTRANSFORM_HW_COMMON_HPP_
55 
56 #include <stdio.h>
57 #include <iostream>
58 #include <hls_stream.h>
59 #include "ap_int.h"
60 #include <stdint.h>
61 #include "../../../../../HOST/vision/warp_transform/languages/cplusplus/include/config.h"//debug level define
62 #include "memory_utils.hpp" //for stream based communication with ddr
63 #include "network.hpp"
64 
65 using namespace hls;
66 
67 union float_bits_u {
68  unsigned int i;
69  float f;
70 };
74 
75 
86 template<typename Tin, typename Tout, unsigned int arraysize>
87 void pMyMemtestMemCpy(Tin* in, Tout* out){
88 #pragma HLS INLINE
89  for (unsigned int i = 0; i < arraysize; i++)
90  {
91 #pragma HLS PIPELINE II=1
92  *out = *in;
93  }
94 
95 }
96 
97 
111 template<typename Tin, typename Tout, const unsigned int arraysize>
112 void pMemCpyCircularBuff(Tin* buff, Tout* out_mem, unsigned int elems,unsigned int offset_buff){
113 #pragma HLS INLINE
114  unsigned int j = 0;
115  circ_buff_loop: for (unsigned int i = 0; i < elems; i++)
116  {
117 #pragma HLS PIPELINE II=1
118 #pragma HLS LOOP_TRIPCOUNT min = 1 max = arraysize
119  if(offset_buff+j==arraysize)//
120  {
121  offset_buff=0;
122  j=1;
123  out_mem[i] = buff[0];
124  }else{
125  out_mem[i] = buff[offset_buff+j];
126  j++;
127  }
128  }
129 
130 }
131 
132 
133 
145 template<typename Tin, typename Tout, const unsigned int burstsize>
146 void pReadAxiMemMapped2HlsStream(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems){
147 #pragma HLS INLINE
148  mmloop: for (unsigned int i = 0; i < elems; i++)
149  {
150 #pragma HLS PIPELINE II=1
151 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
152  Tout tmp = main_mem[i];
153  sOut.write(tmp);
154  }
155 
156 }
157 
158 
174 template<typename Tin, typename Tout, const unsigned int burstsize, typename Tcntr>
175 void pReadAxiMemMapped2HlsStreamCountFirst(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems, hls::stream<Tcntr>& cmd){
176 #pragma HLS INLINE
177 cmd.write(0);
178  mmloop: for (unsigned int i = 0; i < elems; i++)
179  {
180 #pragma HLS PIPELINE II=1
181 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
182  Tout tmp = main_mem[i];
183  sOut.write(tmp);
184  }
185  cmd.write(1);
186 
187 }
188 
189 
204 template<typename Tin, typename Tout, const unsigned int burstsize, typename Tcntr>
205 void pReadAxiMemMapped2HlsStreamCountActivated(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems, hls::stream<Tcntr>& cmd){
206 #pragma HLS INLINE
207  cmd.write(1);
208  mmloop: for (unsigned int i = 0; i < elems; i++)
209  {
210 #pragma HLS PIPELINE II=1
211 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
212  Tout tmp = main_mem[i];
213  sOut.write(tmp);
214  }
215  cmd.write(1);
216 }
217 
218 
219 
220 
224 template<typename Tin,const unsigned int loop_cnt,
225 const unsigned int bytes_per_loop, const unsigned int max_data_transfer>
227  NetworkWord word,
228  // stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
229  Tin &img_in_axi_stream,
230  unsigned int *processed_word_rx,
231  unsigned int *processed_bytes_rx,
232  stream<bool> &sImageLoaded
233 )
234 {
235  #pragma HLS INLINE
236  Data_t_in v;
237  // const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
238  // const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
239  unsigned int bytes_with_keep = 0;
240  for (unsigned int i=0; i<loop_cnt; i++) {
241  if ((word.tkeep >> i) == 0) {
242  printf("WARNING: value with tkeep=0 at i=%u\n", i);
243  continue;
244  }
245  v.data = (ap_uint<INPUT_PTR_WIDTH>)(word.tdata >> i*8);
246  v.keep = word.tkeep;
247  v.last = word.tlast;
248  img_in_axi_stream.write(v.data);
249  bytes_with_keep += bytes_per_loop;
250  }
251  if (*processed_bytes_rx < max_data_transfer){
252  // IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) {
253  (*processed_bytes_rx) += bytes_with_keep;
254  if (!sImageLoaded.full()) {
255  sImageLoaded.write(false);
256  }
257  }
258  else {
259  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_bytes_rx = 0.\n");
260  *processed_bytes_rx = 0;
261  if (!sImageLoaded.full()) {
262  sImageLoaded.write(true);
263  }
264  }
265 }
266 
267 
271 template<typename TInImg, const unsigned int img_pckts>
272 // void storeWordToArray(uint64_t input, ap_uint<INPUT_PTR_WIDTH> img[IMG_PACKETS],
273 void storeWordToArray(uint64_t input, TInImg img[img_pckts],
274  unsigned int *processed_word, unsigned int *image_loaded)
275 {
276  #pragma HLS INLINE
277 
278  img[*processed_word] = (TInImg) input;
279  printf("DEBUG in storeWordToArray: input = %u = 0x%16.16llX \n", input, input);
280  printf("DEBUG in storeWordToArray: img[%u]= %u = 0x%16.16llX \n", *processed_word,
281  (uint64_t)img[*processed_word], (uint64_t)img[*processed_word]);
282  if (*processed_word < img_pckts-1) {
283  *processed_word++;
284  }
285  else {
286  printf("DEBUG in storeWordToArray: WARNING - you've reached the max depth of img[%u]. Will put *processed_word = 0.\n", *processed_word);
287  *processed_word = 0;
288  *image_loaded = 1;
289  }
290 }
291 
295 
299 
300 const unsigned long int max_counter_cc = 4000000;
301 
302 //Original function from Xilinx Vitis Accel examples, template from DCO
303 // @DEPRECATED
304 //https://github.com/Xilinx/Vitis_Accel_Examples/blob/master/cpp_kernels/axi_burst_performance/src/test_kernel_common.hpp
305 template<typename Tin, typename Tout, unsigned int counter_precision=64>
306 void perfCounterProc(hls::stream<Tin>& cmd, hls::stream<Tout>& out, int direction, int burst_length, int nmbr_outstanding)
307 {
308 #pragma HLS INLINE off
309 
310  Tin input_cmd;
311  // wait to receive a value to start counting
312  ap_uint<counter_precision> cnt = cmd.read();
313 // keep counting until a value is available
314 count:
315  while (cmd.read_nb(input_cmd) == false) {
316  cnt++;
317  #if DEBUG_LEVEL == TRACE_ALL
318 #ifndef __SYNTHESIS__
319  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
320 #endif //__SYNTHESIS__
321 #endif
322  }
323 
324  // // write out kernel statistics to global memory
325  Tout tmp[1];//was 4
326  tmp[0] = cnt;
327  // tmp[1] = input_cmd;
328  //tmp[1] = burst_length;
329  // tmp[3] = nmbr_outstanding;
330  //memcpy(out, tmp, 4 * sizeof(Tout));
331  out.write(tmp[0]);
332  //out.write(tmp[1]);
333  //out.write(nmbr_outstanding); this
334  //out.write(input_cmd); Xilinx use this to count the errors but we are already counting so...
335 }
336 
337 //Original function from Xilinx Vitis Accel examples, template from DCO
338 // @DEPRECATED
339 //https://github.com/Xilinx/Vitis_Accel_Examples/blob/master/cpp_kernels/axi_burst_performance/src/test_kernel_common.hpp
340 template<typename Tin, typename Tout, unsigned int counter_precision=64>
341 void perfCounterProc2Mem(hls::stream<Tin>& cmd, Tout * out, int direction, int burst_length, int nmbr_outstanding) {
342 
343  Tin input_cmd;
344  // wait to receive a value to start counting
345  ap_uint<counter_precision> cnt = cmd.read();
346 // keep counting until a value is available
347 count:
348  while (cmd.read_nb(input_cmd) == false) {
349 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
350  cnt++;
351 
352 #if DEBUG_LEVEL == TRACE_ALL
353 #ifndef __SYNTHESIS__
354  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
355 #endif //__SYNTHESIS__
356 #endif
357  }
358  *out =cnt;
359 }
360 
361 
362 
374 template<typename Tin, typename Tout, unsigned int counter_precision=64>
375 void perfCounterProc2MemCountOnly(hls::stream<Tin>& cmd, Tout * out) {
376 
377  Tin input_cmd;
378  // wait to receive a value to start counting
379  ap_uint<counter_precision> cnt = cmd.read();
380 // keep counting until a value is available
381 count:
382  while (cmd.read_nb(input_cmd) == false) {
383 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
384  cnt++;
385 
386 #if DEBUG_LEVEL == TRACE_ALL
387 #ifndef __SYNTHESIS__
388  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
389 #endif //__SYNTHESIS__
390 #endif
391  }
392  *out =cnt;
393 }
394 
395 
396 
410 template<typename Tin, typename Tout, unsigned int counter_precision=64>
411 void perfCounterProc2MemCountIncremental(hls::stream<Tin>& cmd, Tout * out) {
412 
413  Tin input_cmd;
414  // wait to receive a value to start counting
415  ap_uint<counter_precision> cnt = cmd.read();
416 // keep counting until a value is available
417 count:
418  while (cmd.read_nb(input_cmd) == false) {
419 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
420  cnt++;
421 #if DEBUG_LEVEL == TRACE_ALL
422 #ifndef __SYNTHESIS__
423  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
424 #endif //__SYNTHESIS__
425 #endif
426  }
427  *out +=cnt;
428 }
429 
430 
442 template<typename Tin, typename Tout, unsigned int counter_precision=64>
443 void perfCounterMultipleCounts(hls::stream<Tin>& cmd, Tout * out) {
444  #pragma HLS interface ap_ctrl_none port=return
445  Tin input_cmd=1;
446 
447  // wait to receive a value to start counting
448  ap_uint<counter_precision> cnt = cmd.read();
449  reset:
450  while (input_cmd != 0)//a zero will stop the counter
451  {
452 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
453 #if DEBUG_LEVEL == TRACE_ALL
454  #ifndef __SYNTHESIS__
455  //printf("DEBUG begin to count :D input_cmd value = %s\n", input_cmd.to_string().c_str());
456 #endif //__SYNTHESIS__
457 #endif
458 // keep counting until a value is available
459 count:
460  while (cmd.read_nb(input_cmd) == false) {
461 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
462 #pragma HLS PIPELINE II=1
463  cnt++;
464 #if DEBUG_LEVEL == TRACE_ALL
465  #ifndef __SYNTHESIS__
466  // printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
467 #endif //__SYNTHESIS__
468 #endif
469  }
470  input_cmd=cmd.read();
471  }
472  *out +=cnt;
473 }
474 
475 
489 template<typename Tevent=bool, const unsigned int counter_width=32, const unsigned int maximum_counter_value_before_reset=4000000>
491 hls::stream<Tevent> &sOfEnableCCIncrement,
492 hls::stream<Tevent> &sOfResetCounter,
493 hls::stream<Tevent> &sOfGetTheCounter,
494 hls::stream<ap_uint<counter_width> > &oSClockCounter)
495 {
496 
497  static ap_uint<counter_width> internal_counter = 0;
498  static bool pop_the_counter = false;
499 #pragma HLS reset variable=internal_counter
500 #pragma HLS reset variable=pop_the_counter
501 //giving priority to the pop
502  if(!sOfGetTheCounter.empty()){
503  pop_the_counter = sOfGetTheCounter.read();
504  }
505  if (pop_the_counter && !oSClockCounter.full())
506  {
507  oSClockCounter.write(internal_counter);
508  pop_the_counter=false;
509  }
510  if(!sOfResetCounter.empty()){
511  bool reset_or_not = sOfResetCounter.read();
512  if (reset_or_not)
513  {
514  internal_counter = 0;
515  }
516  }
517  if(!sOfEnableCCIncrement.empty()){
518  bool increment = sOfEnableCCIncrement.read();
519  if (increment)
520  {
521  if(internal_counter==maximum_counter_value_before_reset){
522  internal_counter=1;
523  }else{
524  internal_counter++;
525  }
526 #if DEBUG_LEVEL == TRACE_ALL
527 #ifndef __SYNTHESIS__
528  printf("DEBUG pCountClockCycles counter value = %s\n", internal_counter.to_string().c_str());
529 #endif //__SYNTHESIS__
530 #endif
531  }
532  }
533 }
537 
538 #endif //_ROLE_WARPTRANSFORM_HW_COMMON_HPP_
#define Data_t_in
Definition: memtest.cpp:29
const unsigned long int max_counter_cc
void pCountClockCycles(hls::stream< Tevent > &sOfEnableCCIncrement, hls::stream< Tevent > &sOfResetCounter, hls::stream< Tevent > &sOfGetTheCounter, hls::stream< ap_uint< counter_width > > &oSClockCounter)
Count Clock Cycles between two events first sketch TODO: make it working without counting with the st...
void perfCounterProc2MemCountIncremental(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count and...
void perfCounterMultipleCounts(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count,...
void storeWordToAxiStream(NetworkWord word, Tin &img_in_axi_stream, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
Store a net word to a local AXI stream.
void perfCounterProc2Mem(hls::stream< Tin > &cmd, Tout *out, int direction, int burst_length, int nmbr_outstanding)
void storeWordToArray(uint64_t input, TInImg img[img_pckts], unsigned int *processed_word, unsigned int *image_loaded)
Store a net word to local memory.
void pReadAxiMemMapped2HlsStreamCountFirst(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
Copy a run-time variable amount of data to an hls stream with a given max it assumes also the initial...
void perfCounterProc2MemCountOnly(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count.
void pReadAxiMemMapped2HlsStream(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems)
Copy a run-time variable amount of data to an hls stream with a given max.
void perfCounterProc(hls::stream< Tin > &cmd, hls::stream< Tout > &out, int direction, int burst_length, int nmbr_outstanding)
void pMyMemtestMemCpy(Tin *in, Tout *out)
Copy a fixed compile time amount of data to another array.
void pReadAxiMemMapped2HlsStreamCountActivated(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
Copy a run-time variable amount of data to an hls stream with a given max it assumes "perfCounterMult...
void pMemCpyCircularBuff(Tin *buff, Tout *out_mem, unsigned int elems, unsigned int offset_buff)
Copy a run-time variable amount of data to another array employing the src as circular buffer i....
string input
Definition: test.py:9
out
Definition: test.py:12
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51