cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
warp_transform.hpp
Go to the documentation of this file.
1 
17 
38 #ifndef _ROLE_WARPTRANSFORM_H_
39 #define _ROLE_WARPTRANSFORM_H_
40 
41 #include <stdio.h>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <math.h>
46 #include <hls_stream.h>
47 #include "ap_int.h"
48 #include <stdint.h>
49 #include "network.hpp"
50 #include "memory_utils.hpp"
51 
52 using namespace hls;
53 
54 // Define this option to load data from network to DDR memory before calling the kernel.
55 #define ENABLE_DDR
56 
59 enum EchoCtrl {
62  ECHO_OFF = 2
63 };
64 
65 
66 
69 enum MemTestCmd {
72  WRPTX_INVLD_CMD = 0
73 };
74 
75 //CMD 8 bitwdith up to 255 commands (0 is invalid)
76 #define WARPTRANSFORM_COMMANDS_HIGH_BIT WARPTRANSFORM_COMMANDS_BITWIDTH-1
77 #define WARPTRANSFORM_COMMANDS_LOW_BIT 0
78 #define WARPTRANSFORM_COMMANDS_BITWIDTH 8
79 typedef unsigned int img_meta_t;
80 #define TRANSFORM_MATRIX_DIM 9
82 #define ROLE_IS_WARPTRANSFORM
83 
84 #define WAIT_FOR_META 0
85 #define WAIT_FOR_STREAM_PAIR 1
86 #define PROCESSING_PACKET 2
87 #define LOAD_IN_STREAM 3
88 #define WARPTRANSFORM_RETURN_RESULTS 4
89 #define WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT 5
90 #define WARPTRANSFORM_RETURN_RESULTS_UNPACK 6
91 #define WARPTRANSFORM_RETURN_RESULTS_FWD 7
92 #define WAIT_FOR_TX 8
93 #define FSM_IDLE 9
94 #define FSM_CHK_SKIP 10
95 #define FSM_CHK_PROC_BYTES 11
96 #define FSM_CHK_WRT_CHNK_TO_DDR_PND 12
97 #define FSM_WR_PAT_CMD 13
98 #define FSM_WR_PAT_LOAD 14
99 #define FSM_WR_PAT_DATA 15
100 #define FSM_WR_PAT_STS_A 16
101 #define FSM_WR_PAT_STS_B 17
102 #define FSM_WR_PAT_STS_C 18
103 #define PROCESSING_PACKET_TXMAT 19
104 #define PROCESSING_PACKET_IMGMAT 20
105 #define WAIT_FOR_META_IMGMAT 21
106 #define PUSH_REMAINING_META 22
107 
108 #define PacketFsmType uint8_t
109 
110 
111 #define DEFAULT_TX_PORT 2718
112 #define DEFAULT_RX_PORT 2718
113 // Starting with 2718, this number corresponds to the extra opened ports of this role. Every bit set
114 // corresponds to one port.
115 // e.g. 0x1->2718, 0x2->2719, 0x3->[2718,2719], 0x7->[2718,2719,2720], 0x17->[2718-2722], etc.
116 #define PORTS_OPENED 0x1F
117 
118 #define Data_t_in ap_axiu<INPUT_PTR_WIDTH, 0, 0, 0>
119 #define Data_t_out ap_axiu<OUTPUT_PTR_WIDTH, 0, 0, 0>
120 
121 
122 #define MAX_NB_OF_ELMT_READ 16
123 typedef uint8_t mat_elmt_t; // change to float or double depending on your needs
124 
125 #define MAX_NB_OF_WORDS_READ (MAX_NB_OF_ELMT_READ*sizeof(mat_elmt_t)/BPERDW) // =2 if double =1 if float
126 #define MAX_NB_OF_ELMT_PERDW (BPERDW/sizeof(mat_elmt_t)) // =8 if double =16 if float
127 
128 
129 //------------------------------------ Declarations for DDR ----------------------------------------
130 
131 /* General memory Data Width is set as a parameter*/
132 /* 52-bit host AXI data width*/
133 #define MEMDW_512 512 // 512 Bus width in bits for cF DDR memory
134 #define BPERMDW_512 (MEMDW_512/8) // Bytes per DDR Memory Data Word, if MEMDW=512 => BPERMDW_512 = 64
135 #define KWPERMDW_512 (BPERMDW_512/sizeof(TYPE)) // Number of WarpTransform kernel words per DDR memory word
136 typedef ap_uint<MEMDW_512> membus_512_t; /* 512-bit ddr memory access */
138 #define TOTMEMDW_512 (1 + (IMGSIZE - 1) / BPERMDW_512)
139 
140 
145 #define CHECK_CHUNK_SIZE 0x1000
146 #define BYTE_PER_MEM_WORD BPERMDW_512 // 64
147 #define TRANSFERS_PER_CHUNK (CHECK_CHUNK_SIZE/BYTE_PER_MEM_WORD) //64
148 #define TRANSFERS_PER_CHUNK_DIVEND (TOTMEMDW_512-(TOTMEMDW_512/TRANSFERS_PER_CHUNK)*TRANSFERS_PER_CHUNK)
149 
150 
151 #define fsmStateDDRdef uint8_t
152 
153 // The maximum number of cycles allowed to acknowledge a write to DDR (i.e. read the status stream)
154 #define CYCLES_UNTIL_TIMEOUT 0x0100
155 #define TYPICAL_DDR_LATENCY 4
156 // The latency cycles of cF DDR. We've measured 52, but experimentally we take it if we divide by
157 // 4.769230769, taking into account the II=2 and the latency of the FSM
158 #define DDR_LATENCY (52/4)
159 #define EXTRA_DDR_LATENCY_DUE_II (64 + 8) // 8 is the write from input stream to local stream, 64 is read from local stream to DDR
160 /*
161  * A generic unsigned AXI4-Stream interface used all over the cloudFPGA place.
162  */
163 //Consider using axi_utils.hpp header?
164 // though some difference in the init
165 template<int D>
166 struct Axis {
167  ap_uint<D> tdata;
168  ap_uint<(D+7)/8> tkeep;
169  ap_uint<1> tlast;
170  Axis() {}
171  Axis(ap_uint<D> single_data) : tdata((ap_uint<D>)single_data), tkeep(1), tlast(1) {}
172 };
173 
175 
176  ap_uint<32> *pi_rank,
177  ap_uint<32> *pi_size,
178  //------------------------------------------------------
179  //-- SHELL / This / Udp/TCP Interfaces
180  //------------------------------------------------------
181  stream<NetworkWord> &siSHL_This_Data,
182  stream<NetworkWord> &soTHIS_Shl_Data,
183  stream<NetworkMetaStream> &siNrc_meta,
184  stream<NetworkMetaStream> &soNrc_meta,
185  ap_uint<32> *po_rx_ports
186 
187  #ifdef ENABLE_DDR
188  ,
189  //------------------------------------------------------
190  //-- SHELL / Role / Mem / Mp0 Interface
191  //------------------------------------------------------
192  //---- Read Path (MM2S) ------------
193  // stream<DmCmd> &soMemRdCmdP0,
194  // stream<DmSts> &siMemRdStsP0,
195  // stream<Axis<MEMDW_512 > > &siMemReadP0,
196  //---- Write Path (S2MM) -----------
197  stream<DmCmd> &soMemWrCmdP0,
198  stream<DmSts> &siMemWrStsP0,
199  stream<Axis<MEMDW_512> > &soMemWriteP0,
200  //------------------------------------------------------
201  //-- SHELL / Role / Mem / Mp1 Interface
202  //------------------------------------------------------
205  #endif
206 );
207 
208 
209 #endif
210 
211 
membus_512_t membus_t
Definition: memtest.hpp:92
ap_uint< 512 > membus_512_t
Definition: memtest.hpp:91
EchoCtrl
Definition: memtest.hpp:49
MemTestCmd
Definition: memtest.hpp:58
membus_t lcl_mem0[16384]
membus_t lcl_mem1[16384]
#define TRANSFORM_MATRIX_DIM
unsigned int img_meta_t
#define ENABLE_DDR
membus_512_t membus_t
ap_uint< 512 > membus_512_t
uint8_t mat_elmt_t
const unsigned int const_tx_matrix_dim
void warp_transform(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 > > &soMemWriteP0, membus_t *lcl_mem0, membus_t *lcl_mem1)
@ WRPTX_TXMAT_CMD
@ WRPTX_IMG_CMD
@ WRPTX_INVLD_CMD
@ ECHO_STORE_FWD
@ ECHO_OFF
@ ECHO_PATH_THRU
Axis(ap_uint< D > single_data)