cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
warp_transform.cpp
Go to the documentation of this file.
1 
17 
36 #include "../include/warp_transform.hpp"
37 #include "../include/xf_warp_transform_config.h"
38 #include "../include/warp_transform_network_library.hpp"
39 #include "../include/warp_transform_hw_common.hpp"
40 #include "../include/warp_transform_processing.hpp"
41 
42 using hls::stream;
43 
44 
45 
52 
53  ap_uint<32> *pi_rank,
54  ap_uint<32> *pi_size,
55  //------------------------------------------------------
56  //-- SHELL / This / UDP/TCP Interfaces
57  //------------------------------------------------------
58  stream<NetworkWord> &siSHL_This_Data,
59  stream<NetworkWord> &soTHIS_Shl_Data,
60  stream<NetworkMetaStream> &siNrc_meta,
61  stream<NetworkMetaStream> &soNrc_meta,
62  ap_uint<32> *po_rx_ports
63 
64  #ifdef ENABLE_DDR
65  ,
66  //------------------------------------------------------
67  //-- SHELL / Role / Mem / Mp0 Interface
68  //------------------------------------------------------
69  //---- Read Path (MM2S) ------------
70  // stream<DmCmd> &soMemRdCmdP0,
71  // stream<DmSts> &siMemRdStsP0,
72  // stream<Axis<MEMDW_512 > > &siMemReadP0,
73  //---- Write Path (S2MM) -----------
74  stream<DmCmd> &soMemWrCmdP0,
75  stream<DmSts> &siMemWrStsP0,
76  stream<Axis<MEMDW_512>> &soMemWriteP0,
77  //------------------------------------------------------
78  //-- SHELL / Role / Mem / Mp1 Interface
79  //------------------------------------------------------
82  #endif
83  )
84 {
85 
86 
87 //-- DIRECTIVES FOR THE BLOCK ---------------------------------------------
88 #pragma HLS INTERFACE axis register both port=siSHL_This_Data
89 #pragma HLS INTERFACE axis register both port=soTHIS_Shl_Data
90 
91 #pragma HLS INTERFACE axis register both port=siNrc_meta
92 #pragma HLS INTERFACE axis register both port=soNrc_meta
93 
94 #pragma HLS INTERFACE ap_ovld register port=po_rx_ports name=poROL_NRC_Rx_ports
95 
96 #if HLS_VERSION < 20211
97 #pragma HLS INTERFACE ap_stable register port=pi_rank name=piFMC_ROL_rank
98 #pragma HLS INTERFACE ap_stable register port=pi_size name=piFMC_ROL_size
99 #elif HLS_VERSION >= 20211
100  #pragma HLS stable variable=pi_rank
101  #pragma HLS stable variable=pi_size
102 #else
103  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
104  exit(-1);
105 #endif
106 
107 #ifdef ENABLE_DDR
108 
109 // Bundling: SHELL / Role / Mem / Mp0 / Write Interface
110 #pragma HLS INTERFACE axis register both port=soMemWrCmdP0
111 #pragma HLS INTERFACE axis register both port=siMemWrStsP0
112 #pragma HLS INTERFACE axis register both port=soMemWriteP0
113 
114 #if HLS_VERSION <= 20201
115 #pragma HLS DATA_PACK variable=soMemWrCmdP0 instance=soMemWrCmdP0
116 #pragma HLS DATA_PACK variable=siMemWrStsP0 instance=siMemWrStsP0
117 #elif HLS_VERSION >= 20211
118 #pragma HLS aggregate variable=soMemWrCmdP0 compact=bit
119 #pragma HLS aggregate variable=siMemWrStsP0 compact=bit
120 #else
121  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
122  exit(-1);
123 #endif
124 
125 const unsigned int ddr_mem_depth = TOTMEMDW_512;
126 const unsigned int ddr_latency = DDR_LATENCY;
127 
128 
129 // When max burst size is 1KB, with 512bit bus we get 16 burst transactions
130 // When max burst size is 4KB, with 512bit bus we get 64 burst transactions
131 const unsigned int max_axi_rw_burst_length = 64;
132 const unsigned int num_outstanding_transactions = 256;
133 
134 // Mapping LCL_MEM0 interface to moMEM_Mp1 channel
135 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem0 bundle=moMEM_Mp1\
136  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
137  num_read_outstanding=num_outstanding_transactions num_write_outstanding=num_outstanding_transactions latency=ddr_latency
138 
139 // Mapping LCL_MEM1 interface to moMEM_Mp1 channel
140 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem1 bundle=moMEM_Mp1 \
141  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
142  num_read_outstanding=num_outstanding_transactions num_write_outstanding=num_outstanding_transactions latency=ddr_latency
143 
144 #endif
145 
146  #pragma HLS DATAFLOW
147 
148  //-- LOCAL VARIABLES ------------------------------------------------------
150  static stream<NetworkWord> sRxpToTxp_Data("sRxpToTxP_Data"); // FIXME: works even with no static
151  static stream<NetworkMetaStream> sRxtoTx_Meta("sRxtoTx_Meta");
152  static unsigned int processed_word_rx;
153  static unsigned int processed_bytes_rx;
154  static unsigned int processed_word_tx = 0;
155  static stream<bool> sImageLoaded("sImageLoaded");
156  static bool skip_read;
157  static bool write_chunk_to_ddr_pending;
158  static bool ready_to_accept_new_data;
159  static bool signal_init;
160  const int tot_transfers = TOT_TRANSFERS_TX;
161  const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
162  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET*loop_cnt);
163 
164 #ifdef ENABLE_DDR
165  static stream<membus_t> img_in_axi_stream ("img_in_axi_stream");
166  const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
167  static stream<bool> sMemBurstRx("sMemBurstRx");
168 
169 #else
170  const int img_in_axi_stream_depth = MIN_RX_LOOPS;
171  const int img_out_axi_stream_depth = MIN_TX_LOOPS;
172  static stream<ap_uint<INPUT_PTR_WIDTH>> img_in_axi_stream ("img_in_axi_stream");
173  static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
174 #endif
175  static stream<NodeId> sDstNode_sig("sDstNode_sig");
176 
177 
178 //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
179 #pragma HLS stream variable=sRxtoTx_Meta depth=tot_transfers
180 #pragma HLS reset variable=processed_word_rx
181 #pragma HLS reset variable=processed_word_tx
182 #pragma HLS reset variable=processed_bytes_rx
183 //#pragma HLS reset variable=image_loaded
184 #pragma HLS stream variable=sImageLoaded depth=1
185 #pragma HLS reset variable=skip_read
186 #pragma HLS reset variable=write_chunk_to_ddr_pending
187 //#pragma HLS stream variable=sWriteChunkToDdrPending depth=2
188 #pragma HLS reset variable=ready_to_accept_new_data
189 #pragma HLS reset variable=signal_init
190 #pragma HLS STREAM variable=sDstNode_sig depth=1
191 
192 #ifdef ENABLE_DDR
193 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
194 #pragma HLS stream variable=sProcessed_bytes_rx depth=img_in_axi_stream_depth
195 #else
196 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
197 #pragma HLS stream variable=img_out_axi_stream depth=img_out_axi_stream_depth
198 #endif
199 
200 static stream<float> sTxMatrix("sTxMatrix");
201 #pragma HLS stream variable=sTxMatrix depth=const_tx_matrix_dim
202 
203 // static float tx_matrix[TRANSFORM_MATRIX_DIM] = {1.5,0,0,0,1.8,0,0,0,0}; //scaling (reduction) left corner!!!
204 // #pragma HLS reset variable=tx_matrix
205 img_meta_t img_rows = FRAME_HEIGHT;
206 img_meta_t img_cols = FRAME_WIDTH;
207 img_meta_t img_chan = NPC1;
208 #pragma HLS reset variable=img_rows
209 #pragma HLS reset variable=img_cols
210 #pragma HLS reset variable=img_chan
211 
213  pi_rank,
214  pi_size,
215  sDstNode_sig,
216  po_rx_ports
217 );
218 
219 #ifdef ENABLE_DDR
220 
222  loop_cnt,
224  IMGSIZE,
226  siSHL_This_Data,
227  siNrc_meta,
228  sRxtoTx_Meta,
229  img_in_axi_stream,
230  sMemBurstRx,
231  &img_rows,
232  &img_cols,
233  &img_chan,
234  // tx_matrix
235  sTxMatrix
236  );
237 
238  pRXPathStreamToDDR< Axis<MEMDW_512>,
239  membus_t,
240  loop_cnt,
241  bytes_per_loop>(
242  img_in_axi_stream,
243  sMemBurstRx,
244  //---- P0 Write Path (S2MM) -----------
245  soMemWrCmdP0,
246  siMemWrStsP0,
247  soMemWriteP0,
248  //---- P1 Memory mapped ---------------
249  //&processed_bytes_rx,
250  sImageLoaded,
251  &img_rows,
252  &img_cols,
253  &img_chan
254  );
255 
256 
257 
258  #else // !ENABLE_DDR
259 
260  pRXPath(
261  siSHL_This_Data,
262  siNrc_meta,
263  sRxtoTx_Meta,
264  img_in_axi_stream,
265  meta_tmp,
266  &processed_word_rx,
267  &processed_bytes_rx,
268  sImageLoaded
269  );
270 
271 #endif // ENABLE_DDR
272 
273  pProcPath(
274  sRxpToTxp_Data,
275 #ifdef ENABLE_DDR
276  lcl_mem0,
277  lcl_mem1,
278 #else
279  img_in_axi_stream,
280  img_out_axi_stream,
281 #endif
282  sImageLoaded,
283  &img_rows,
284  &img_cols,
285  &img_chan,
286  // tx_matrix
287  sTxMatrix
288  );
289 
290  pTXPath(
291  soTHIS_Shl_Data,
292  soNrc_meta,
293  sRxpToTxp_Data,
294  sRxtoTx_Meta,
295  sDstNode_sig,
296  &processed_word_tx,
297  pi_rank,
298  &img_rows,
299  &img_cols,
300  &img_chan
301  );
302 }
303 
304 
#define NPC1
#define IMGSIZE
#define FRAME_HEIGHT
Definition: config.h:43
#define FRAME_WIDTH
Definition: config.h:46
#define TRANSFERS_PER_CHUNK
Definition: harris.hpp:133
void pProcPath(stream< NetworkWord > &sRxpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, NetworkMetaStream meta_tmp, varin *instruct, double *out, unsigned int *processed_word_rx, unsigned int *processed_word_proc, unsigned int *struct_loaded)
Processing Path - Main processing FSM for Vitis kernels.
#define MIN_TX_LOOPS
#define BITS_PER_10GBITETHRNET_AXI_PACKET
#define MIN_RX_LOOPS
#define BYTES_PER_10GBITETHRNET_AXI_PACKET
void pRXPathNetToStream(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< ap_uint< 512 >> &img_in_axi_stream, stream< bool > &sMemBurstRx)
Receive Path - From SHELL to THIS.
void pPortAndDestionation(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NodeId > &sDstNode_sig, ap_uint< 32 > *po_rx_ports)
pPortAndDestionation - Setup the port and the destination rank.
#define MEMDW_512
Definition: memtest.hpp:90
void pRXPath(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoProc_Meta, stream< NetworkWord > &sRxpToProcp_Data, NetworkMetaStream meta_tmp, bool *start_stop, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx)
Receive Path - From SHELL to THIS.
#define DDR_LATENCY
Definition: memtest.hpp:98
void pTXPath(stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &soNrc_meta, stream< NetworkWord > &sProcpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< NodeId > &sDstNode_sig, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank)
Transmit Path - From THIS to SHELL.
#define ENABLE_DDR
Definition: memtest.hpp:42
membus_512_t membus_t
Definition: memtest.hpp:92
#define TOTMEMDW_512
Definition: memtest.hpp:93
membus_t lcl_mem0[16384]
membus_t lcl_mem1[16384]
unsigned int img_meta_t
void warp_transform(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 >> &soMemWriteP0, membus_t *lcl_mem0, membus_t *lcl_mem1)
Main process of the WarpTransform Application directives.