cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
sobel_hw_common.hpp
Go to the documentation of this file.
1 
17 
53 #ifndef _ROLE_SOBEL_HW_COMMON_HPP_
54 #define _ROLE_SOBEL_HW_COMMON_HPP_
55 
56 #include <stdio.h>
57 #include <iostream>
58 #include <hls_stream.h>
59 #include "ap_int.h"
60 #include <stdint.h>
61 #include "../../../../../HOST/vision/sobel/languages/cplusplus/include/config.h"//debug level define
62 #include "memory_utils.hpp" //for stream based communication with ddr
63 #include "network.hpp"
64 
65 using namespace hls;
66 
70 
71 
82 template<typename Tin, typename Tout, unsigned int arraysize>
83 void pMyMemtestMemCpy(Tin* in, Tout* out){
84 #pragma HLS INLINE
85  for (unsigned int i = 0; i < arraysize; i++)
86  {
87 #pragma HLS PIPELINE II=1
88  *out = *in;
89  }
90 
91 }
92 
93 
107 template<typename Tin, typename Tout, const unsigned int arraysize>
108 void pMemCpyCircularBuff(Tin* buff, Tout* out_mem, unsigned int elems,unsigned int offset_buff){
109 #pragma HLS INLINE
110  unsigned int j = 0;
111  circ_buff_loop: for (unsigned int i = 0; i < elems; i++)
112  {
113 #pragma HLS PIPELINE II=1
114 #pragma HLS LOOP_TRIPCOUNT min = 1 max = arraysize
115  if(offset_buff+j==arraysize)//
116  {
117  offset_buff=0;
118  j=1;
119  out_mem[i] = buff[0];
120  }else{
121  out_mem[i] = buff[offset_buff+j];
122  j++;
123  }
124  }
125 
126 }
127 
128 
129 
141 template<typename Tin, typename Tout, const unsigned int burstsize>
142 void pReadAxiMemMapped2HlsStream(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems){
143 #pragma HLS INLINE
144  mmloop: for (unsigned int i = 0; i < elems; i++)
145  {
146 #pragma HLS PIPELINE II=1
147 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
148  Tout tmp = main_mem[i];
149  sOut.write(tmp);
150  }
151 
152 }
153 
154 
170 template<typename Tin, typename Tout, const unsigned int burstsize, typename Tcntr>
171 void pReadAxiMemMapped2HlsStreamCountFirst(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems, hls::stream<Tcntr>& cmd){
172 #pragma HLS INLINE
173 cmd.write(0);
174  mmloop: for (unsigned int i = 0; i < elems; i++)
175  {
176 #pragma HLS PIPELINE II=1
177 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
178  Tout tmp = main_mem[i];
179  sOut.write(tmp);
180  }
181  cmd.write(1);
182 
183 }
184 
185 
200 template<typename Tin, typename Tout, const unsigned int burstsize, typename Tcntr>
201 void pReadAxiMemMapped2HlsStreamCountActivated(Tin* main_mem, hls::stream<Tout> &sOut, unsigned int elems, hls::stream<Tcntr>& cmd){
202 #pragma HLS INLINE
203  cmd.write(1);
204  mmloop: for (unsigned int i = 0; i < elems; i++)
205  {
206 #pragma HLS PIPELINE II=1
207 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
208  Tout tmp = main_mem[i];
209  sOut.write(tmp);
210  }
211  cmd.write(1);
212 }
213 
214 
215 
216 
220 template<typename Tin,const unsigned int loop_cnt,
221 const unsigned int bytes_per_loop, const unsigned int max_data_transfer>
223  NetworkWord word,
224  // stream<ap_uint<INPUT_PTR_WIDTH>> &img_in_axi_stream,
225  Tin &img_in_axi_stream,
226  unsigned int *processed_word_rx,
227  unsigned int *processed_bytes_rx,
228  stream<bool> &sImageLoaded
229 )
230 {
231  #pragma HLS INLINE
232  Data_t_in v;
233  // const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
234  // const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
235  unsigned int bytes_with_keep = 0;
236  for (unsigned int i=0; i<loop_cnt; i++) {
237  if ((word.tkeep >> i) == 0) {
238  printf("WARNING: value with tkeep=0 at i=%u\n", i);
239  continue;
240  }
241  v.data = (ap_uint<INPUT_PTR_WIDTH>)(word.tdata >> i*8);
242  v.keep = word.tkeep;
243  v.last = word.tlast;
244  img_in_axi_stream.write(v.data);
245  bytes_with_keep += bytes_per_loop;
246  }
247  if (*processed_bytes_rx < max_data_transfer){
248  // IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) {
249  (*processed_bytes_rx) += bytes_with_keep;
250  if (!sImageLoaded.full()) {
251  sImageLoaded.write(false);
252  }
253  }
254  else {
255  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_bytes_rx = 0.\n");
256  *processed_bytes_rx = 0;
257  if (!sImageLoaded.full()) {
258  sImageLoaded.write(true);
259  }
260  }
261 }
262 
263 
267 template<typename TInImg, const unsigned int img_pckts>
268 // void storeWordToArray(uint64_t input, ap_uint<INPUT_PTR_WIDTH> img[IMG_PACKETS],
269 void storeWordToArray(uint64_t input, TInImg img[img_pckts],
270  unsigned int *processed_word, unsigned int *image_loaded)
271 {
272  #pragma HLS INLINE
273 
274  img[*processed_word] = (TInImg) input;
275  printf("DEBUG in storeWordToArray: input = %u = 0x%16.16llX \n", input, input);
276  printf("DEBUG in storeWordToArray: img[%u]= %u = 0x%16.16llX \n", *processed_word,
277  (uint64_t)img[*processed_word], (uint64_t)img[*processed_word]);
278  if (*processed_word < img_pckts-1) {
279  *processed_word++;
280  }
281  else {
282  printf("DEBUG in storeWordToArray: WARNING - you've reached the max depth of img[%u]. Will put *processed_word = 0.\n", *processed_word);
283  *processed_word = 0;
284  *image_loaded = 1;
285  }
286 }
287 
291 
295 
296 const unsigned long int max_counter_cc = 4000000;
297 
298 //Original function from Xilinx Vitis Accel examples, template from DCO
299 // @DEPRECATED
300 //https://github.com/Xilinx/Vitis_Accel_Examples/blob/master/cpp_kernels/axi_burst_performance/src/test_kernel_common.hpp
301 template<typename Tin, typename Tout, unsigned int counter_precision=64>
302 void perfCounterProc(hls::stream<Tin>& cmd, hls::stream<Tout>& out, int direction, int burst_length, int nmbr_outstanding)
303 {
304 #pragma HLS INLINE off
305 
306  Tin input_cmd;
307  // wait to receive a value to start counting
308  ap_uint<counter_precision> cnt = cmd.read();
309 // keep counting until a value is available
310 count:
311  while (cmd.read_nb(input_cmd) == false) {
312  cnt++;
313  #if DEBUG_LEVEL == TRACE_ALL
314 #ifndef __SYNTHESIS__
315  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
316 #endif //__SYNTHESIS__
317 #endif
318  }
319 
320  // // write out kernel statistics to global memory
321  Tout tmp[1];//was 4
322  tmp[0] = cnt;
323  // tmp[1] = input_cmd;
324  //tmp[1] = burst_length;
325  // tmp[3] = nmbr_outstanding;
326  //memcpy(out, tmp, 4 * sizeof(Tout));
327  out.write(tmp[0]);
328  //out.write(tmp[1]);
329  //out.write(nmbr_outstanding); this
330  //out.write(input_cmd); Xilinx use this to count the errors but we are already counting so...
331 }
332 
333 //Original function from Xilinx Vitis Accel examples, template from DCO
334 // @DEPRECATED
335 //https://github.com/Xilinx/Vitis_Accel_Examples/blob/master/cpp_kernels/axi_burst_performance/src/test_kernel_common.hpp
336 template<typename Tin, typename Tout, unsigned int counter_precision=64>
337 void perfCounterProc2Mem(hls::stream<Tin>& cmd, Tout * out, int direction, int burst_length, int nmbr_outstanding) {
338 
339  Tin input_cmd;
340  // wait to receive a value to start counting
341  ap_uint<counter_precision> cnt = cmd.read();
342 // keep counting until a value is available
343 count:
344  while (cmd.read_nb(input_cmd) == false) {
345 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
346  cnt++;
347 
348 #if DEBUG_LEVEL == TRACE_ALL
349 #ifndef __SYNTHESIS__
350  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
351 #endif //__SYNTHESIS__
352 #endif
353  }
354  *out =cnt;
355 }
356 
357 
358 
370 template<typename Tin, typename Tout, unsigned int counter_precision=64>
371 void perfCounterProc2MemCountOnly(hls::stream<Tin>& cmd, Tout * out) {
372 
373  Tin input_cmd;
374  // wait to receive a value to start counting
375  ap_uint<counter_precision> cnt = cmd.read();
376 // keep counting until a value is available
377 count:
378  while (cmd.read_nb(input_cmd) == false) {
379 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
380  cnt++;
381 
382 #if DEBUG_LEVEL == TRACE_ALL
383 #ifndef __SYNTHESIS__
384  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
385 #endif //__SYNTHESIS__
386 #endif
387  }
388  *out =cnt;
389 }
390 
391 
392 
406 template<typename Tin, typename Tout, unsigned int counter_precision=64>
407 void perfCounterProc2MemCountIncremental(hls::stream<Tin>& cmd, Tout * out) {
408 
409  Tin input_cmd;
410  // wait to receive a value to start counting
411  ap_uint<counter_precision> cnt = cmd.read();
412 // keep counting until a value is available
413 count:
414  while (cmd.read_nb(input_cmd) == false) {
415 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
416  cnt++;
417 #if DEBUG_LEVEL == TRACE_ALL
418 #ifndef __SYNTHESIS__
419  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
420 #endif //__SYNTHESIS__
421 #endif
422  }
423  *out +=cnt;
424 }
425 
426 
438 template<typename Tin, typename Tout, unsigned int counter_precision=64>
439 void perfCounterMultipleCounts(hls::stream<Tin>& cmd, Tout * out) {
440  #pragma HLS interface ap_ctrl_none port=return
441  Tin input_cmd=1;
442 
443  // wait to receive a value to start counting
444  ap_uint<counter_precision> cnt = cmd.read();
445  reset:
446  while (input_cmd != 0)//a zero will stop the counter
447  {
448 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
449 #if DEBUG_LEVEL == TRACE_ALL
450  #ifndef __SYNTHESIS__
451  //printf("DEBUG begin to count :D input_cmd value = %s\n", input_cmd.to_string().c_str());
452 #endif //__SYNTHESIS__
453 #endif
454 // keep counting until a value is available
455 count:
456  while (cmd.read_nb(input_cmd) == false) {
457 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
458 #pragma HLS PIPELINE II=1
459  cnt++;
460 #if DEBUG_LEVEL == TRACE_ALL
461  #ifndef __SYNTHESIS__
462  // printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
463 #endif //__SYNTHESIS__
464 #endif
465  }
466  input_cmd=cmd.read();
467  }
468  *out +=cnt;
469 }
470 
471 
485 template<typename Tevent=bool, const unsigned int counter_width=32, const unsigned int maximum_counter_value_before_reset=4000000>
487 hls::stream<Tevent> &sOfEnableCCIncrement,
488 hls::stream<Tevent> &sOfResetCounter,
489 hls::stream<Tevent> &sOfGetTheCounter,
490 hls::stream<ap_uint<counter_width> > &oSClockCounter)
491 {
492 
493  static ap_uint<counter_width> internal_counter = 0;
494  static bool pop_the_counter = false;
495 #pragma HLS reset variable=internal_counter
496 #pragma HLS reset variable=pop_the_counter
497 //giving priority to the pop
498  if(!sOfGetTheCounter.empty()){
499  pop_the_counter = sOfGetTheCounter.read();
500  }
501  if (pop_the_counter && !oSClockCounter.full())
502  {
503  oSClockCounter.write(internal_counter);
504  pop_the_counter=false;
505  }
506  if(!sOfResetCounter.empty()){
507  bool reset_or_not = sOfResetCounter.read();
508  if (reset_or_not)
509  {
510  internal_counter = 0;
511  }
512  }
513  if(!sOfEnableCCIncrement.empty()){
514  bool increment = sOfEnableCCIncrement.read();
515  if (increment)
516  {
517  if(internal_counter==maximum_counter_value_before_reset){
518  internal_counter=1;
519  }else{
520  internal_counter++;
521  }
522 #if DEBUG_LEVEL == TRACE_ALL
523 #ifndef __SYNTHESIS__
524  printf("DEBUG pCountClockCycles counter value = %s\n", internal_counter.to_string().c_str());
525 #endif //__SYNTHESIS__
526 #endif
527  }
528  }
529 }
533 
534 #endif //_ROLE_SOBEL_HW_COMMON_HPP_
#define Data_t_in
Definition: memtest.cpp:29
const unsigned long int max_counter_cc
void pCountClockCycles(hls::stream< Tevent > &sOfEnableCCIncrement, hls::stream< Tevent > &sOfResetCounter, hls::stream< Tevent > &sOfGetTheCounter, hls::stream< ap_uint< counter_width > > &oSClockCounter)
Count Clock Cycles between two events first sketch TODO: make it working without counting with the st...
void perfCounterProc2MemCountIncremental(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count and...
void perfCounterMultipleCounts(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count,...
void storeWordToAxiStream(NetworkWord word, Tin &img_in_axi_stream, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
Store a net word to a local AXI stream.
void perfCounterProc2Mem(hls::stream< Tin > &cmd, Tout *out, int direction, int burst_length, int nmbr_outstanding)
void storeWordToArray(uint64_t input, TInImg img[img_pckts], unsigned int *processed_word, unsigned int *image_loaded)
Store a net word to local memory.
void pReadAxiMemMapped2HlsStreamCountFirst(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
Copy a run-time variable amount of data to an hls stream with a given max it assumes also the initial...
void perfCounterProc2MemCountOnly(hls::stream< Tin > &cmd, Tout *out)
Count Clock Cycles between two events, the first event init the counter the second stop the count.
void pReadAxiMemMapped2HlsStream(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems)
Copy a run-time variable amount of data to an hls stream with a given max.
void perfCounterProc(hls::stream< Tin > &cmd, hls::stream< Tout > &out, int direction, int burst_length, int nmbr_outstanding)
void pMyMemtestMemCpy(Tin *in, Tout *out)
Copy a fixed compile time amount of data to another array.
void pReadAxiMemMapped2HlsStreamCountActivated(Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
Copy a run-time variable amount of data to an hls stream with a given max it assumes "perfCounterMult...
void pMemCpyCircularBuff(Tin *buff, Tout *out_mem, unsigned int elems, unsigned int offset_buff)
Copy a run-time variable amount of data to another array employing the src as circular buffer i....
string input
Definition: test.py:9
out
Definition: test.py:12
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51