cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
warp_transform_processing.hpp
Go to the documentation of this file.
1 
17 
34 #ifndef _ROLE_WARPTRANSFORM_PROCESSING_HPP_
35 #define _ROLE_WARPTRANSFORM_PROCESSING_HPP_
36 
37 
38 #define FSM_PROCESSING_WAIT_FOR_META 0
39 #define FSM_PROCESSING_PCKT_PROC 1
40 #define FSM_PROCESSING_STOP 2
41 #define FSM_PROCESSING_START 3
42 #define FSM_PROCESSING_BURST_READING 4
43 #define FSM_PROCESSING_DATAFLOW_WRITE 5
44 #define FSM_PROCESSING_DATAFLOW_READ 6
45 #define FSM_PROCESSING_OUTPUT 7
46 #define FSM_PROCESSING_OUTPUT_2 8
47 #define FSM_PROCESSING_OUTPUT_3 9
48 #define FSM_PROCESSING_OUTPUT_4 10
49 #define FSM_PROCESSING_OUTPUT_5 11
50 #define FSM_PROCESSING_CONTINUOUS_RUN 12
51 #define FSM_PROCESSING_WAIT_FOR_DDR_CONTROLLER_EMPTYNESS 13
52 #define ProcessingFsmType uint8_t
53 
54 
55 
66 #ifndef ENABLE_DDR
67 template <typename TimgIn=ap_uint<INPUT_PTR_WIDTH>, typename TimgOut=ap_uint<OUTPUT_PTR_WIDTH>>
68 #endif // ENABLE_DDR
69 void pProcPath(
70  stream<NetworkWord> &sRxpToTxp_Data,
71  #ifdef ENABLE_DDR
72  //---- P1 Memory mapped ---------------
75  #else // !ENABLE_DDR
76  stream<TimgIn> &img_in_axi_stream,
77  stream<TimgOut> &img_out_axi_stream,
78  #endif // ENABLE_DDR
79  stream<bool> &sImageLoaded,
80  img_meta_t * img_rows,
81  img_meta_t * img_cols,
82  img_meta_t * img_chan,
83  // float tx_matrix[TRANSFORM_MATRIX_DIM]
84  hls::stream<float> &sTxMatrix
85  )
86 {
87  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
88  #pragma HLS INLINE off
89  #pragma HLS pipeline II=1
90 
91  //-- LOCAL VARIABLES ------------------------------------------------------
92  NetworkWord newWord;
93  uint16_t Thresh = 442;
94  float K = 0.04;
95  uint16_t k = K * (1 << 16); // Convert to Q0.16 format
96  static bool accel_called;
97  static unsigned int processed_word_proc;
98  static unsigned int timeoutCntAbs;
99  static unsigned int cnt_i;
100  static membus_t tmp;
101  Data_t_out temp;
102  #ifdef ENABLE_DDR
103  //static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
104  //#pragma HLS stream variable=img_out_axi_stream depth=9
105  static unsigned int ddr_addr_out;
106  #pragma HLS reset variable=ddr_addr_out
107  #endif
108  static PacketFsmType WarpTransformFSM = WAIT_FOR_META;
109  #pragma HLS reset variable=WarpTransformFSM
110 
111  #pragma HLS reset variable=accel_called
112  #pragma HLS reset variable=processed_word_proc
113  #pragma HLS reset variable=timeoutCntAbs
114  #pragma HLS reset variable=cnt_i
115  #pragma HLS reset variable=tmp
116  #pragma HLS reset variable=temp
117 
118  static img_meta_t lcl_img_rows=0;
119  static img_meta_t lcl_img_cols=0;
120  static img_meta_t lcl_img_chan=0;
121  #pragma HLS reset variable=lcl_img_rows
122  #pragma HLS reset variable=lcl_img_cols
123  #pragma HLS reset variable=lcl_img_chan
124 
125 
126  switch(WarpTransformFSM)
127  {
128  case WAIT_FOR_META:
129  printf("DEBUG in pProcPath: WAIT_FOR_META\n");
130  if (!sImageLoaded.empty())
131  {
132  if (sImageLoaded.read() == true) {
133  WarpTransformFSM = PROCESSING_PACKET;
134  accel_called = false;
135  processed_word_proc = 0;
136  #ifdef ENABLE_DDR
137  ddr_addr_out = 0;
138  timeoutCntAbs = 0;
139  cnt_i = 0;
140  #endif
141  lcl_img_rows = *img_rows;
142  lcl_img_cols = *img_cols;
143  lcl_img_chan = *img_chan;
144  }
145  }
146  break;
147 
148  case PROCESSING_PACKET:
149  printf("DEBUG in pProcPath: PROCESSING_PACKET\n");
150  #ifndef ENABLE_DDR
151  if ( !img_in_axi_stream.empty() && !img_out_axi_stream.full() )
152  {
153  #endif
154  if (accel_called == false) {
155  #ifdef ENABLE_DDR
156  warp_transformAccelMem(lcl_mem0, lcl_mem1, *img_rows, *img_cols, sTxMatrix);
157  #else // ! ENABLE_DDR
158  #ifdef FAKE_WarpTransform
159  fakeWarpTransformAccelStream(img_in_axi_stream, img_out_axi_stream, MIN_RX_LOOPS, MIN_TX_LOOPS, tx_matrix);
160  #else // !FAKE_WarpTransform
161  warpTransformAccelStream(img_in_axi_stream, img_out_axi_stream, img_rows, img_cols, tx_matrix);
162  #endif // FAKE_WarpTransform
163  #endif // ENABLE_DDR
164  accel_called = true;
165  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS;
166  }
167  #ifndef ENABLE_DDR
168  }
169  #endif
170  break;
171 
172  #ifdef ENABLE_DDR
174  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS, ddr_addr_out=%u\n", ddr_addr_out);
175  if (accel_called == true) {
176 
177  printf("DEBUG in pProcPath: Accumulated %u net words (%u B) to complete a single DDR word\n",
179  tmp = lcl_mem1[ddr_addr_out];
180  ddr_addr_out++;
182  timeoutCntAbs = 0;
183  }
184  break;
185 
187  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT [%u out of %u]\n", timeoutCntAbs, DDR_LATENCY);
188  if (timeoutCntAbs++ == DDR_LATENCY) {
189  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS_FWD; //WARPTRANSFORM_RETURN_RESULTS_UNPACK;
190  cnt_i = 0;
191  }
192  break;
194  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS_FWD\n");
195  //if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() ) {
196  if ( (cnt_i <= (MEMDW_512/OUTPUT_PTR_WIDTH) - 1) && !sRxpToTxp_Data.full() ) {
197 
198  //temp.data = img_out_axi_stream.read();
199  temp.data(0 ,63) = tmp(cnt_i*OUTPUT_PTR_WIDTH , cnt_i*OUTPUT_PTR_WIDTH+63);
200  if (processed_word_proc++ == MIN_TX_LOOPS-1) {
201  temp.last = 1;
202  WarpTransformFSM = WAIT_FOR_META;
203  }
204  else {
205  temp.last = 0;
206  }
207  //TODO: find why Vitis kernel does not set keep and last by itself
208  temp.keep = 255;
209  newWord = NetworkWord(temp.data, temp.keep, temp.last);
210  sRxpToTxp_Data.write(newWord);
211  cnt_i++;
212  }
213  else {
214  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS;
215  }
216 
217  break;
218 
219  #else // ! ENABLE_DDR
221  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS\n");
222  if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() )
223  {
224 
225  temp.data = img_out_axi_stream.read();
226  if ( img_out_axi_stream.empty() )
227  //if (processed_word_proc++ == MIN_TX_LOOPS-1)
228  {
229  temp.last = 1;
230  WarpTransformFSM = WAIT_FOR_META;
231  accel_called = false;
232  }
233  else
234  {
235  temp.last = 0;
236  }
237  //TODO: find why Vitis kernel does not set keep and last by itself
238  temp.keep = 255;
239  newWord = NetworkWord(temp.data, temp.keep, temp.last);
240  sRxpToTxp_Data.write(newWord);
241  }
242  break;
243  #endif // ENABLE_DDR
244  } // end switch
245 
246 }
247 
248 #endif //_ROLE_WARPTRANSFORM_PROCESSING_HPP_
#define BPERMDW_512
Definition: harris.hpp:120
#define KWPERMDW_512
Definition: harris.hpp:121
#define MIN_TX_LOOPS
#define MIN_RX_LOOPS
#define OUTPUT_PTR_WIDTH
#define MEMDW_512
Definition: memtest.hpp:90
#define PacketFsmType
Definition: memtest.hpp:76
#define Data_t_out
Definition: memtest.cpp:30
#define DDR_LATENCY
Definition: memtest.hpp:98
#define ENABLE_DDR
Definition: memtest.hpp:42
membus_512_t membus_t
Definition: memtest.hpp:92
#define PROCESSING_PACKET
Definition: memtest.hpp:73
#define WAIT_FOR_META
Definition: memtest.hpp:71
membus_t lcl_mem0[16384]
membus_t lcl_mem1[16384]
#define WARPTRANSFORM_RETURN_RESULTS
unsigned int img_meta_t
#define WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT
void pProcPath(stream< NetworkWord > &sRxpToTxp_Data, stream< TimgIn > &img_in_axi_stream, stream< TimgOut > &img_out_axi_stream, stream< bool > &sImageLoaded, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan, hls::stream< float > &sTxMatrix)
Processing Path - Main processing FSM for Vitis kernels.
#define WARPTRANSFORM_RETURN_RESULTS_FWD
void warp_transformAccelMem(membus_t *img_inp, membus_t *img_out, int rows, int cols, hls::stream< float > &sTxMatrix)
Top-level accelerated function of the WarpTransform Application with memory mapped interfaces.
void fakeWarpTransformAccelStream(hls::stream< ap_axiu< 8, 0, 0, 0 > > &img_in_axi_stream, hls::stream< ap_axiu< 64, 0, 0, 0 > > &img_out_axi_stream, unsigned int min_rx_loops, unsigned int min_tx_loops, float transform_matrix[9])
void warpTransformAccelStream(hls::stream< ap_uint< 8 >> &img_in_axi_stream, hls::stream< ap_uint< 64 >> &img_out_axi_stream, int rows, int cols, float transform_matrix[9])
Top-level accelerated function of the WarpTransform Application with array I/Fadd WARPTRANSFORM.