cloudFPGA (cF) API  1.0
The documentation of the source code of cloudFPGA (cF)
WarpTransform HLS

This is a subgroup of WarpTransform accelerated function with only synthesizable (HLS) functions/classes. More...

Collaboration diagram for WarpTransform HLS:

Files

file  warp_transform.hpp
 The Role for a WarpTransform Example application (UDP or TCP)
 
file  warp_transform_hw_common.hpp
 A library for some common functionalities: Memory interaction Performance Counters.
 
file  warp_transform_network_library.hpp
 A library for some common Network-Related functionalities.
 
file  warp_transform_processing.hpp
 The processing function for the warp_transform filte.
 
file  xf_config_params.h
 The WarpTransform IP configuration header.
 
file  warp_transform.cpp
 The Role for a WarpTransform Example application (UDP or TCP)
 
file  xf_warp_transform_accel.cpp
 The WarpTransform top-level.
 

Classes

struct  Axis< D >
 
union  float_bits_u
 

Macros

#define ENABLE_DDR
 
#define WARPTRANSFORM_COMMANDS_HIGH_BIT   WARPTRANSFORM_COMMANDS_BITWIDTH-1
 
#define WARPTRANSFORM_COMMANDS_LOW_BIT   0
 
#define WARPTRANSFORM_COMMANDS_BITWIDTH   8
 
#define TRANSFORM_MATRIX_DIM   9
 
#define ROLE_IS_WARPTRANSFORM
 
#define WAIT_FOR_META   0
 
#define WAIT_FOR_STREAM_PAIR   1
 
#define PROCESSING_PACKET   2
 
#define LOAD_IN_STREAM   3
 
#define WARPTRANSFORM_RETURN_RESULTS   4
 
#define WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT   5
 
#define WARPTRANSFORM_RETURN_RESULTS_UNPACK   6
 
#define WARPTRANSFORM_RETURN_RESULTS_FWD   7
 
#define WAIT_FOR_TX   8
 
#define FSM_IDLE   9
 
#define FSM_CHK_SKIP   10
 
#define FSM_CHK_PROC_BYTES   11
 
#define FSM_CHK_WRT_CHNK_TO_DDR_PND   12
 
#define FSM_WR_PAT_CMD   13
 
#define FSM_WR_PAT_LOAD   14
 
#define FSM_WR_PAT_DATA   15
 
#define FSM_WR_PAT_STS_A   16
 
#define FSM_WR_PAT_STS_B   17
 
#define FSM_WR_PAT_STS_C   18
 
#define PROCESSING_PACKET_TXMAT   19
 
#define PROCESSING_PACKET_IMGMAT   20
 
#define WAIT_FOR_META_IMGMAT   21
 
#define PUSH_REMAINING_META   22
 
#define PacketFsmType   uint8_t
 
#define DEFAULT_TX_PORT   2718
 
#define DEFAULT_RX_PORT   2718
 
#define PORTS_OPENED   0x1F
 
#define Data_t_in   ap_axiu<INPUT_PTR_WIDTH, 0, 0, 0>
 
#define Data_t_out   ap_axiu<OUTPUT_PTR_WIDTH, 0, 0, 0>
 
#define MAX_NB_OF_ELMT_READ   16
 
#define MAX_NB_OF_WORDS_READ   (MAX_NB_OF_ELMT_READ*sizeof(mat_elmt_t)/BPERDW)
 
#define MAX_NB_OF_ELMT_PERDW   (BPERDW/sizeof(mat_elmt_t))
 
#define MEMDW_512   512
 
#define BPERMDW_512   (MEMDW_512/8)
 
#define KWPERMDW_512   (BPERMDW_512/sizeof(TYPE))
 
#define TOTMEMDW_512   (1 + (IMGSIZE - 1) / BPERMDW_512)
 
#define CHECK_CHUNK_SIZE   0x1000
 This define configures tha AXI burst size of DDRM memory-mapped interfaces AXI4 allows 4KiB, but Role's AXI interconnect is configured at max 1KiB 0x40->64, 0x400->1024B(1KiB), 0x1000->4KiB. More...
 
#define BYTE_PER_MEM_WORD   BPERMDW_512
 
#define TRANSFERS_PER_CHUNK   (CHECK_CHUNK_SIZE/BYTE_PER_MEM_WORD)
 
#define TRANSFERS_PER_CHUNK_DIVEND   (TOTMEMDW_512-(TOTMEMDW_512/TRANSFERS_PER_CHUNK)*TRANSFERS_PER_CHUNK)
 
#define fsmStateDDRdef   uint8_t
 
#define CYCLES_UNTIL_TIMEOUT   0x0100
 
#define TYPICAL_DDR_LATENCY   4
 
#define DDR_LATENCY   (52/4)
 
#define EXTRA_DDR_LATENCY_DUE_II   (64 + 8)
 
#define FSM_WRITE_NEW_DATA   0
 
#define FSM_DONE   1
 
#define PortFsmType   uint8_t
 
#define WARPTRANSFORM_CHNNEL_BITWIDTH   3
 
#define WARPTRANSFORM_COLS_BITWIDTH   16
 
#define WARPTRANSFORM_ROWS_BITWIDTH   16
 
#define WARPTRANSFORM_ROWS_HIGH_BIT   NETWORK_WORD_BIT_WIDTH-1
 
#define WARPTRANSFORM_ROWS_LOW_BIT   NETWORK_WORD_BIT_WIDTH-WARPTRANSFORM_ROWS_BITWIDTH
 
#define WARPTRANSFORM_COLS_HIGH_BIT   WARPTRANSFORM_ROWS_LOW_BIT-1
 
#define WARPTRANSFORM_COLS_LOW_BIT   WARPTRANSFORM_ROWS_LOW_BIT-WARPTRANSFORM_COLS_BITWIDTH
 
#define WARPTRANSFORM_CHNNEL_HIGH_BIT   WARPTRANSFORM_COLS_LOW_BIT-1
 
#define WARPTRANSFORM_CHNNEL_LOW_BIT   WARPTRANSFORM_COLS_LOW_BIT-WARPTRANSFORM_CHNNEL_BITWIDTH
 
#define FSM_PROCESSING_WAIT_FOR_META   0
 
#define FSM_PROCESSING_PCKT_PROC   1
 
#define FSM_PROCESSING_STOP   2
 
#define FSM_PROCESSING_START   3
 
#define FSM_PROCESSING_BURST_READING   4
 
#define FSM_PROCESSING_DATAFLOW_WRITE   5
 
#define FSM_PROCESSING_DATAFLOW_READ   6
 
#define FSM_PROCESSING_OUTPUT   7
 
#define FSM_PROCESSING_OUTPUT_2   8
 
#define FSM_PROCESSING_OUTPUT_3   9
 
#define FSM_PROCESSING_OUTPUT_4   10
 
#define FSM_PROCESSING_OUTPUT_5   11
 
#define FSM_PROCESSING_CONTINUOUS_RUN   12
 
#define FSM_PROCESSING_WAIT_FOR_DDR_CONTROLLER_EMPTYNESS   13
 
#define ProcessingFsmType   uint8_t
 
#define RO   0
 
#define NO   1
 
#define RGBA   0
 
#define GRAY   1
 

Typedefs

typedef unsigned int img_meta_t
 
typedef uint8_t mat_elmt_t
 
typedef ap_uint< 512 > membus_512_t
 
typedef membus_512_t membus_t
 

Enumerations

enum  EchoCtrl {
  ECHO_PATH_THRU = 0 , ECHO_STORE_FWD = 1 , ECHO_OFF = 2 , ECHO_PATH_THRU = 0 ,
  ECHO_STORE_FWD = 1 , ECHO_OFF = 2 , ECHO_PATH_THRU = 0 , ECHO_STORE_FWD = 1 ,
  ECHO_OFF = 2 , ECHO_PATH_THRU = 0 , ECHO_STORE_FWD = 1 , ECHO_OFF = 2 ,
  ECHO_PATH_THRU = 0 , ECHO_STORE_FWD = 1 , ECHO_OFF = 2 , ECHO_PATH_THRU = 0 ,
  ECHO_STORE_FWD = 1 , ECHO_OFF = 2 , ECHO_PATH_THRU = 0 , ECHO_STORE_FWD = 1 ,
  ECHO_OFF = 2 , ECHO_STORE_FWD = 0 , ECHO_PATH_THRU = 1 , ECHO_CTRL_DISABLED = 0 ,
  ECHO_PATH_THRU = 1 , ECHO_STORE_FWD = 2 , ECHO_OFF = 3
}
 
enum  MemTestCmd {
  TEST_BURSTSIZE_CMD = 4 , TEST_ENDOFTESTS_CMD = 3 , TEST_STOP_CMD = 2 , TEST_START_CMD = 1 ,
  TEST_INVLD_CMD = 0 , WRPTX_IMG_CMD = 2 , WRPTX_TXMAT_CMD = 1 , WRPTX_INVLD_CMD = 0
}
 

Functions

void warp_transform (ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 > > &soMemWriteP0, membus_t *lcl_mem0, membus_t *lcl_mem1)
 
template<typename Tin , typename Tout , unsigned int arraysize>
void pMyMemtestMemCpy (Tin *in, Tout *out)
 Copy a fixed compile time amount of data to another array. More...
 
template<typename Tin , typename Tout , const unsigned int arraysize>
void pMemCpyCircularBuff (Tin *buff, Tout *out_mem, unsigned int elems, unsigned int offset_buff)
 Copy a run-time variable amount of data to another array employing the src as circular buffer i.e., handling overflow. More...
 
template<typename Tin , typename Tout , const unsigned int burstsize>
void pReadAxiMemMapped2HlsStream (Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems)
 Copy a run-time variable amount of data to an hls stream with a given max. More...
 
template<typename Tin , typename Tout , const unsigned int burstsize, typename Tcntr >
void pReadAxiMemMapped2HlsStreamCountFirst (Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
 Copy a run-time variable amount of data to an hls stream with a given max it assumes also the initialization of a perf counter of "perfCounterMultipleCounts" function. More...
 
template<typename Tin , typename Tout , const unsigned int burstsize, typename Tcntr >
void pReadAxiMemMapped2HlsStreamCountActivated (Tin *main_mem, hls::stream< Tout > &sOut, unsigned int elems, hls::stream< Tcntr > &cmd)
 Copy a run-time variable amount of data to an hls stream with a given max it assumes "perfCounterMultipleCounts" function already initialized so it just incr. More...
 
template<typename Tin , const unsigned int loop_cnt, const unsigned int bytes_per_loop, const unsigned int max_data_transfer>
void storeWordToAxiStream (NetworkWord word, Tin &img_in_axi_stream, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, stream< bool > &sImageLoaded)
 Store a net word to a local AXI stream. More...
 
template<typename TInImg , const unsigned int img_pckts>
void storeWordToArray (uint64_t input, TInImg img[img_pckts], unsigned int *processed_word, unsigned int *image_loaded)
 Store a net word to local memory. More...
 
template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc (hls::stream< Tin > &cmd, hls::stream< Tout > &out, int direction, int burst_length, int nmbr_outstanding)
 
template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2Mem (hls::stream< Tin > &cmd, Tout *out, int direction, int burst_length, int nmbr_outstanding)
 
template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2MemCountOnly (hls::stream< Tin > &cmd, Tout *out)
 Count Clock Cycles between two events, the first event init the counter the second stop the count. More...
 
template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2MemCountIncremental (hls::stream< Tin > &cmd, Tout *out)
 Count Clock Cycles between two events, the first event init the counter the second stop the count and increment the out register TODO: seems not working at the csim lvl (never tested below) when executing single DUT step, hanging stream values. More...
 
template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterMultipleCounts (hls::stream< Tin > &cmd, Tout *out)
 Count Clock Cycles between two events, the first event init the counter the second stop the count, a 0 after the init stop definitevely the counter. More...
 
template<typename Tevent = bool, const unsigned int counter_width = 32, const unsigned int maximum_counter_value_before_reset = 4000000>
void pCountClockCycles (hls::stream< Tevent > &sOfEnableCCIncrement, hls::stream< Tevent > &sOfResetCounter, hls::stream< Tevent > &sOfGetTheCounter, hls::stream< ap_uint< counter_width > > &oSClockCounter)
 Count Clock Cycles between two events first sketch TODO: make it working without counting with the stream or reshaping as FSM. More...
 
void pPortAndDestionation (ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, hls::stream< NodeId > &sDstNode_sig, ap_uint< 32 > *po_rx_ports)
 pPortAndDestionation - Setup the port and the destination rank. More...
 
void pRXPath (hls::stream< NetworkWord > &siSHL_This_Data, hls::stream< NetworkMetaStream > &siNrc_meta, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< ap_uint< INPUT_PTR_WIDTH >> &img_in_axi_stream, NetworkMetaStream meta_tmp, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx, hls::stream< bool > &sImageLoaded)
 Receive Path - From SHELL to THIS. FIXME: never checked, just substitute this one from DID. More...
 
template<typename TMemWrd , const unsigned int loop_cnt, const unsigned int cTransfers_Per_Chunk, const unsigned int max_img_size, const unsigned int cBytesPer10GbitEthAXIPckt>
void pRXPathNetToStream (hls::stream< NetworkWord > &siSHL_This_Data, hls::stream< NetworkMetaStream > &siNrc_meta, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< TMemWrd > &img_in_axi_stream, hls::stream< bool > &sMemBurstRx, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan, hls::stream< float > &sTxMatrix)
 Receive Path - From SHELL to THIS. Function for accumulating a memory word and write it Not ready for complete parametrization. More...
 
template<typename TStreamMemWrd , typename TMemWrd , const unsigned int loop_cnt, const unsigned int bytes_per_loop>
void pRXPathStreamToDDR (hls::stream< TMemWrd > &img_in_axi_stream, hls::stream< bool > &sMemBurstRx, hls::stream< DmCmd > &soMemWrCmdP0, hls::stream< DmSts > &siMemWrStsP0, hls::stream< TStreamMemWrd > &soMemWriteP0, hls::stream< bool > &sImageLoaded, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan)
 Receive Path - From RX path stream word aligned to store towards the DDR. More...
 
void pTXPath (hls::stream< NetworkWord > &soTHIS_Shl_Data, hls::stream< NetworkMetaStream > &soNrc_meta, hls::stream< NetworkWord > &sProcpToTxp_Data, hls::stream< NetworkMetaStream > &sRxtoTx_Meta, hls::stream< NodeId > &sDstNode_sig, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan)
 Transmit Path - From THIS to SHELL. More...
 
template<typename TimgIn = ap_uint<INPUT_PTR_WIDTH>, typename TimgOut = ap_uint<OUTPUT_PTR_WIDTH>>
void pProcPath (stream< NetworkWord > &sRxpToTxp_Data, stream< TimgIn > &img_in_axi_stream, stream< TimgOut > &img_out_axi_stream, stream< bool > &sImageLoaded, img_meta_t *img_rows, img_meta_t *img_cols, img_meta_t *img_chan, hls::stream< float > &sTxMatrix)
 Processing Path - Main processing FSM for Vitis kernels. More...
 
void warp_transform (ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NetworkWord > &siSHL_This_Data, stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &soNrc_meta, ap_uint< 32 > *po_rx_ports, stream< DmCmd > &soMemWrCmdP0, stream< DmSts > &siMemWrStsP0, stream< Axis< 512 >> &soMemWriteP0, membus_t *lcl_mem0, membus_t *lcl_mem1)
 Main process of the WarpTransform Application directives. More...
 
void setUpTxMatrixFromStream (float transform_matrix[9], hls::stream< float > &sTxMatrix)
 
void warptTransformAccelArray (ap_uint< 8 > *img_in, float *transform, ap_uint< 64 > *img_out, int rows, int cols)
 Top-level accelerated function of the WarptTransform Application with array I/F used only for simulation/TB purposes. More...
 
void warpTransformAccelStream (hls::stream< ap_uint< 8 >> &img_in_axi_stream, hls::stream< ap_uint< 64 >> &img_out_axi_stream, int rows, int cols, float transform_matrix[9])
 Top-level accelerated function of the WarpTransform Application with array I/Fadd WARPTRANSFORM. More...
 
void warp_transformAccelMem (membus_t *img_inp, membus_t *img_out, int rows, int cols, hls::stream< float > &sTxMatrix)
 Top-level accelerated function of the WarpTransform Application with memory mapped interfaces. More...
 

Variables

const unsigned int const_tx_matrix_dim = 9
 
unsigned int float_bits_u::i
 
float float_bits_u::f
 
const unsigned long int max_counter_cc = 4000000
 

Detailed Description

This is a subgroup of WarpTransform accelerated function with only synthesizable (HLS) functions/classes.

Macro Definition Documentation

◆ BPERMDW_512

#define BPERMDW_512   (MEMDW_512/8)

Definition at line 134 of file warp_transform.hpp.

◆ BYTE_PER_MEM_WORD

#define BYTE_PER_MEM_WORD   BPERMDW_512

Definition at line 146 of file warp_transform.hpp.

◆ CHECK_CHUNK_SIZE

#define CHECK_CHUNK_SIZE   0x1000

This define configures tha AXI burst size of DDRM memory-mapped interfaces AXI4 allows 4KiB, but Role's AXI interconnect is configured at max 1KiB 0x40->64, 0x400->1024B(1KiB), 0x1000->4KiB.

Definition at line 145 of file warp_transform.hpp.

◆ CYCLES_UNTIL_TIMEOUT

#define CYCLES_UNTIL_TIMEOUT   0x0100

Definition at line 154 of file warp_transform.hpp.

◆ Data_t_in

#define Data_t_in   ap_axiu<INPUT_PTR_WIDTH, 0, 0, 0>

Definition at line 118 of file warp_transform.hpp.

◆ Data_t_out

#define Data_t_out   ap_axiu<OUTPUT_PTR_WIDTH, 0, 0, 0>

Definition at line 119 of file warp_transform.hpp.

◆ DDR_LATENCY

#define DDR_LATENCY   (52/4)

Definition at line 158 of file warp_transform.hpp.

◆ DEFAULT_RX_PORT

#define DEFAULT_RX_PORT   2718

Definition at line 112 of file warp_transform.hpp.

◆ DEFAULT_TX_PORT

#define DEFAULT_TX_PORT   2718

Definition at line 111 of file warp_transform.hpp.

◆ ENABLE_DDR

#define ENABLE_DDR

Definition at line 55 of file warp_transform.hpp.

◆ EXTRA_DDR_LATENCY_DUE_II

#define EXTRA_DDR_LATENCY_DUE_II   (64 + 8)

Definition at line 159 of file warp_transform.hpp.

◆ FSM_CHK_PROC_BYTES

#define FSM_CHK_PROC_BYTES   11

Definition at line 95 of file warp_transform.hpp.

◆ FSM_CHK_SKIP

#define FSM_CHK_SKIP   10

Definition at line 94 of file warp_transform.hpp.

◆ FSM_CHK_WRT_CHNK_TO_DDR_PND

#define FSM_CHK_WRT_CHNK_TO_DDR_PND   12

Definition at line 96 of file warp_transform.hpp.

◆ FSM_DONE

#define FSM_DONE   1

Definition at line 57 of file warp_transform_network_library.hpp.

◆ FSM_IDLE

#define FSM_IDLE   9

Definition at line 93 of file warp_transform.hpp.

◆ FSM_PROCESSING_BURST_READING

#define FSM_PROCESSING_BURST_READING   4

Definition at line 42 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_CONTINUOUS_RUN

#define FSM_PROCESSING_CONTINUOUS_RUN   12

Definition at line 50 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_DATAFLOW_READ

#define FSM_PROCESSING_DATAFLOW_READ   6

Definition at line 44 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_DATAFLOW_WRITE

#define FSM_PROCESSING_DATAFLOW_WRITE   5

Definition at line 43 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_OUTPUT

#define FSM_PROCESSING_OUTPUT   7

Definition at line 45 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_OUTPUT_2

#define FSM_PROCESSING_OUTPUT_2   8

Definition at line 46 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_OUTPUT_3

#define FSM_PROCESSING_OUTPUT_3   9

Definition at line 47 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_OUTPUT_4

#define FSM_PROCESSING_OUTPUT_4   10

Definition at line 48 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_OUTPUT_5

#define FSM_PROCESSING_OUTPUT_5   11

Definition at line 49 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_PCKT_PROC

#define FSM_PROCESSING_PCKT_PROC   1

Definition at line 39 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_START

#define FSM_PROCESSING_START   3

Definition at line 41 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_STOP

#define FSM_PROCESSING_STOP   2

Definition at line 40 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_WAIT_FOR_DDR_CONTROLLER_EMPTYNESS

#define FSM_PROCESSING_WAIT_FOR_DDR_CONTROLLER_EMPTYNESS   13

Definition at line 51 of file warp_transform_processing.hpp.

◆ FSM_PROCESSING_WAIT_FOR_META

#define FSM_PROCESSING_WAIT_FOR_META   0

Definition at line 38 of file warp_transform_processing.hpp.

◆ FSM_WR_PAT_CMD

#define FSM_WR_PAT_CMD   13

Definition at line 97 of file warp_transform.hpp.

◆ FSM_WR_PAT_DATA

#define FSM_WR_PAT_DATA   15

Definition at line 99 of file warp_transform.hpp.

◆ FSM_WR_PAT_LOAD

#define FSM_WR_PAT_LOAD   14

Definition at line 98 of file warp_transform.hpp.

◆ FSM_WR_PAT_STS_A

#define FSM_WR_PAT_STS_A   16

Definition at line 100 of file warp_transform.hpp.

◆ FSM_WR_PAT_STS_B

#define FSM_WR_PAT_STS_B   17

Definition at line 101 of file warp_transform.hpp.

◆ FSM_WR_PAT_STS_C

#define FSM_WR_PAT_STS_C   18

Definition at line 102 of file warp_transform.hpp.

◆ FSM_WRITE_NEW_DATA

#define FSM_WRITE_NEW_DATA   0

Definition at line 56 of file warp_transform_network_library.hpp.

◆ fsmStateDDRdef

#define fsmStateDDRdef   uint8_t

Definition at line 151 of file warp_transform.hpp.

◆ GRAY

#define GRAY   1

Definition at line 60 of file xf_config_params.h.

◆ KWPERMDW_512

#define KWPERMDW_512   (BPERMDW_512/sizeof(TYPE))

Definition at line 135 of file warp_transform.hpp.

◆ LOAD_IN_STREAM

#define LOAD_IN_STREAM   3

Definition at line 87 of file warp_transform.hpp.

◆ MAX_NB_OF_ELMT_PERDW

#define MAX_NB_OF_ELMT_PERDW   (BPERDW/sizeof(mat_elmt_t))

Definition at line 126 of file warp_transform.hpp.

◆ MAX_NB_OF_ELMT_READ

#define MAX_NB_OF_ELMT_READ   16

Definition at line 122 of file warp_transform.hpp.

◆ MAX_NB_OF_WORDS_READ

#define MAX_NB_OF_WORDS_READ   (MAX_NB_OF_ELMT_READ*sizeof(mat_elmt_t)/BPERDW)

Definition at line 125 of file warp_transform.hpp.

◆ MEMDW_512

#define MEMDW_512   512

Definition at line 133 of file warp_transform.hpp.

◆ NO

#define NO   1

Definition at line 57 of file xf_config_params.h.

◆ PacketFsmType

#define PacketFsmType   uint8_t

Definition at line 108 of file warp_transform.hpp.

◆ PortFsmType

#define PortFsmType   uint8_t

Definition at line 58 of file warp_transform_network_library.hpp.

◆ PORTS_OPENED

#define PORTS_OPENED   0x1F

Definition at line 116 of file warp_transform.hpp.

◆ PROCESSING_PACKET

#define PROCESSING_PACKET   2

Definition at line 86 of file warp_transform.hpp.

◆ PROCESSING_PACKET_IMGMAT

#define PROCESSING_PACKET_IMGMAT   20

Definition at line 104 of file warp_transform.hpp.

◆ PROCESSING_PACKET_TXMAT

#define PROCESSING_PACKET_TXMAT   19

Definition at line 103 of file warp_transform.hpp.

◆ ProcessingFsmType

#define ProcessingFsmType   uint8_t

Definition at line 52 of file warp_transform_processing.hpp.

◆ PUSH_REMAINING_META

#define PUSH_REMAINING_META   22

Definition at line 106 of file warp_transform.hpp.

◆ RGBA

#define RGBA   0

Definition at line 59 of file xf_config_params.h.

◆ RO

#define RO   0

Definition at line 56 of file xf_config_params.h.

◆ ROLE_IS_WARPTRANSFORM

#define ROLE_IS_WARPTRANSFORM

Definition at line 82 of file warp_transform.hpp.

◆ TOTMEMDW_512

#define TOTMEMDW_512   (1 + (IMGSIZE - 1) / BPERMDW_512)

Definition at line 138 of file warp_transform.hpp.

◆ TRANSFERS_PER_CHUNK

#define TRANSFERS_PER_CHUNK   (CHECK_CHUNK_SIZE/BYTE_PER_MEM_WORD)

Definition at line 147 of file warp_transform.hpp.

◆ TRANSFERS_PER_CHUNK_DIVEND

#define TRANSFERS_PER_CHUNK_DIVEND   (TOTMEMDW_512-(TOTMEMDW_512/TRANSFERS_PER_CHUNK)*TRANSFERS_PER_CHUNK)

Definition at line 148 of file warp_transform.hpp.

◆ TRANSFORM_MATRIX_DIM

#define TRANSFORM_MATRIX_DIM   9

Definition at line 80 of file warp_transform.hpp.

◆ TYPICAL_DDR_LATENCY

#define TYPICAL_DDR_LATENCY   4

Definition at line 155 of file warp_transform.hpp.

◆ WAIT_FOR_META

#define WAIT_FOR_META   0

Definition at line 84 of file warp_transform.hpp.

◆ WAIT_FOR_META_IMGMAT

#define WAIT_FOR_META_IMGMAT   21

Definition at line 105 of file warp_transform.hpp.

◆ WAIT_FOR_STREAM_PAIR

#define WAIT_FOR_STREAM_PAIR   1

Definition at line 85 of file warp_transform.hpp.

◆ WAIT_FOR_TX

#define WAIT_FOR_TX   8

Definition at line 92 of file warp_transform.hpp.

◆ WARPTRANSFORM_CHNNEL_BITWIDTH

#define WARPTRANSFORM_CHNNEL_BITWIDTH   3

Definition at line 70 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_CHNNEL_HIGH_BIT

#define WARPTRANSFORM_CHNNEL_HIGH_BIT   WARPTRANSFORM_COLS_LOW_BIT-1

Definition at line 80 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_CHNNEL_LOW_BIT

#define WARPTRANSFORM_CHNNEL_LOW_BIT   WARPTRANSFORM_COLS_LOW_BIT-WARPTRANSFORM_CHNNEL_BITWIDTH

Definition at line 81 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_COLS_BITWIDTH

#define WARPTRANSFORM_COLS_BITWIDTH   16

Definition at line 71 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_COLS_HIGH_BIT

#define WARPTRANSFORM_COLS_HIGH_BIT   WARPTRANSFORM_ROWS_LOW_BIT-1

Definition at line 77 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_COLS_LOW_BIT

#define WARPTRANSFORM_COLS_LOW_BIT   WARPTRANSFORM_ROWS_LOW_BIT-WARPTRANSFORM_COLS_BITWIDTH

Definition at line 78 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_COMMANDS_BITWIDTH

#define WARPTRANSFORM_COMMANDS_BITWIDTH   8

Definition at line 78 of file warp_transform.hpp.

◆ WARPTRANSFORM_COMMANDS_HIGH_BIT

#define WARPTRANSFORM_COMMANDS_HIGH_BIT   WARPTRANSFORM_COMMANDS_BITWIDTH-1

Definition at line 76 of file warp_transform.hpp.

◆ WARPTRANSFORM_COMMANDS_LOW_BIT

#define WARPTRANSFORM_COMMANDS_LOW_BIT   0

Definition at line 77 of file warp_transform.hpp.

◆ WARPTRANSFORM_RETURN_RESULTS

#define WARPTRANSFORM_RETURN_RESULTS   4

Definition at line 88 of file warp_transform.hpp.

◆ WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT

#define WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT   5

Definition at line 89 of file warp_transform.hpp.

◆ WARPTRANSFORM_RETURN_RESULTS_FWD

#define WARPTRANSFORM_RETURN_RESULTS_FWD   7

Definition at line 91 of file warp_transform.hpp.

◆ WARPTRANSFORM_RETURN_RESULTS_UNPACK

#define WARPTRANSFORM_RETURN_RESULTS_UNPACK   6

Definition at line 90 of file warp_transform.hpp.

◆ WARPTRANSFORM_ROWS_BITWIDTH

#define WARPTRANSFORM_ROWS_BITWIDTH   16

Definition at line 72 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_ROWS_HIGH_BIT

#define WARPTRANSFORM_ROWS_HIGH_BIT   NETWORK_WORD_BIT_WIDTH-1

Definition at line 74 of file warp_transform_network_library.hpp.

◆ WARPTRANSFORM_ROWS_LOW_BIT

#define WARPTRANSFORM_ROWS_LOW_BIT   NETWORK_WORD_BIT_WIDTH-WARPTRANSFORM_ROWS_BITWIDTH

Definition at line 75 of file warp_transform_network_library.hpp.

Typedef Documentation

◆ img_meta_t

typedef unsigned int img_meta_t

Definition at line 79 of file warp_transform.hpp.

◆ mat_elmt_t

typedef uint8_t mat_elmt_t

Definition at line 123 of file warp_transform.hpp.

◆ membus_512_t

typedef ap_uint< 512 > membus_512_t

Definition at line 136 of file warp_transform.hpp.

◆ membus_t

Definition at line 137 of file warp_transform.hpp.

Enumeration Type Documentation

◆ EchoCtrl

enum EchoCtrl

SHELL/MMIO/EchoCtrl - Config Register

Enumerator
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 
ECHO_STORE_FWD 
ECHO_PATH_THRU 
ECHO_CTRL_DISABLED 
ECHO_PATH_THRU 
ECHO_STORE_FWD 
ECHO_OFF 

Definition at line 59 of file warp_transform.hpp.

59  {
60  ECHO_PATH_THRU = 0,
61  ECHO_STORE_FWD = 1,
62  ECHO_OFF = 2
63 };
@ ECHO_STORE_FWD
@ ECHO_OFF
@ ECHO_PATH_THRU

◆ MemTestCmd

enum MemTestCmd

Internal WarpTransform accelerator command

Enumerator
TEST_BURSTSIZE_CMD 
TEST_ENDOFTESTS_CMD 
TEST_STOP_CMD 
TEST_START_CMD 
TEST_INVLD_CMD 
WRPTX_IMG_CMD 
WRPTX_TXMAT_CMD 
WRPTX_INVLD_CMD 

Definition at line 69 of file warp_transform.hpp.

69  {
70  WRPTX_IMG_CMD = 2,
71  WRPTX_TXMAT_CMD = 1,
72  WRPTX_INVLD_CMD = 0
73 };
@ WRPTX_TXMAT_CMD
@ WRPTX_IMG_CMD
@ WRPTX_INVLD_CMD

Function Documentation

◆ pCountClockCycles()

template<typename Tevent = bool, const unsigned int counter_width = 32, const unsigned int maximum_counter_value_before_reset = 4000000>
void pCountClockCycles ( hls::stream< Tevent > &  sOfEnableCCIncrement,
hls::stream< Tevent > &  sOfResetCounter,
hls::stream< Tevent > &  sOfGetTheCounter,
hls::stream< ap_uint< counter_width > > &  oSClockCounter 
)

Count Clock Cycles between two events first sketch TODO: make it working without counting with the stream or reshaping as FSM.

Parameters
[in]sOfEnableCCIncrement
[in]sOfResetCounter
[in]sOfGetTheCounter
[in]oSClockCounter
[in]Teventthe event datatype
[in]counter_widththe counter precision
[in]maximum_counter_value_before_resetthe maxmimum amount of cc count before auto reset
Returns
Nothing.

Definition at line 490 of file warp_transform_hw_common.hpp.

495 {
496 
497  static ap_uint<counter_width> internal_counter = 0;
498  static bool pop_the_counter = false;
499 #pragma HLS reset variable=internal_counter
500 #pragma HLS reset variable=pop_the_counter
501 //giving priority to the pop
502  if(!sOfGetTheCounter.empty()){
503  pop_the_counter = sOfGetTheCounter.read();
504  }
505  if (pop_the_counter && !oSClockCounter.full())
506  {
507  oSClockCounter.write(internal_counter);
508  pop_the_counter=false;
509  }
510  if(!sOfResetCounter.empty()){
511  bool reset_or_not = sOfResetCounter.read();
512  if (reset_or_not)
513  {
514  internal_counter = 0;
515  }
516  }
517  if(!sOfEnableCCIncrement.empty()){
518  bool increment = sOfEnableCCIncrement.read();
519  if (increment)
520  {
521  if(internal_counter==maximum_counter_value_before_reset){
522  internal_counter=1;
523  }else{
524  internal_counter++;
525  }
526 #if DEBUG_LEVEL == TRACE_ALL
527 #ifndef __SYNTHESIS__
528  printf("DEBUG pCountClockCycles counter value = %s\n", internal_counter.to_string().c_str());
529 #endif //__SYNTHESIS__
530 #endif
531  }
532  }
533 }

◆ perfCounterMultipleCounts()

template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterMultipleCounts ( hls::stream< Tin > &  cmd,
Tout *  out 
)

Count Clock Cycles between two events, the first event init the counter the second stop the count, a 0 after the init stop definitevely the counter.

Parameters
[in]cmdthe performance counter cmd stream, first is init second stop(0)/continue(everything else)
[out]outthe output register of where store the incremental counter value
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]counter_precisionthe maxmimum amount of data
Returns
Nothing.

Definition at line 443 of file warp_transform_hw_common.hpp.

443  {
444  #pragma HLS interface ap_ctrl_none port=return
445  Tin input_cmd=1;
446 
447  // wait to receive a value to start counting
448  ap_uint<counter_precision> cnt = cmd.read();
449  reset:
450  while (input_cmd != 0)//a zero will stop the counter
451  {
452 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
453 #if DEBUG_LEVEL == TRACE_ALL
454  #ifndef __SYNTHESIS__
455  //printf("DEBUG begin to count :D input_cmd value = %s\n", input_cmd.to_string().c_str());
456 #endif //__SYNTHESIS__
457 #endif
458 // keep counting until a value is available
459 count:
460  while (cmd.read_nb(input_cmd) == false) {
461 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
462 #pragma HLS PIPELINE II=1
463  cnt++;
464 #if DEBUG_LEVEL == TRACE_ALL
465  #ifndef __SYNTHESIS__
466  // printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
467 #endif //__SYNTHESIS__
468 #endif
469  }
470  input_cmd=cmd.read();
471  }
472  *out +=cnt;
473 }
out
Definition: test.py:12

◆ perfCounterProc()

template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc ( hls::stream< Tin > &  cmd,
hls::stream< Tout > &  out,
int  direction,
int  burst_length,
int  nmbr_outstanding 
)

Definition at line 306 of file warp_transform_hw_common.hpp.

307 {
308 #pragma HLS INLINE off
309 
310  Tin input_cmd;
311  // wait to receive a value to start counting
312  ap_uint<counter_precision> cnt = cmd.read();
313 // keep counting until a value is available
314 count:
315  while (cmd.read_nb(input_cmd) == false) {
316  cnt++;
317  #if DEBUG_LEVEL == TRACE_ALL
318 #ifndef __SYNTHESIS__
319  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
320 #endif //__SYNTHESIS__
321 #endif
322  }
323 
324  // // write out kernel statistics to global memory
325  Tout tmp[1];//was 4
326  tmp[0] = cnt;
327  // tmp[1] = input_cmd;
328  //tmp[1] = burst_length;
329  // tmp[3] = nmbr_outstanding;
330  //memcpy(out, tmp, 4 * sizeof(Tout));
331  out.write(tmp[0]);
332  //out.write(tmp[1]);
333  //out.write(nmbr_outstanding); this
334  //out.write(input_cmd); Xilinx use this to count the errors but we are already counting so...
335 }

◆ perfCounterProc2Mem()

template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2Mem ( hls::stream< Tin > &  cmd,
Tout *  out,
int  direction,
int  burst_length,
int  nmbr_outstanding 
)

Definition at line 341 of file warp_transform_hw_common.hpp.

341  {
342 
343  Tin input_cmd;
344  // wait to receive a value to start counting
345  ap_uint<counter_precision> cnt = cmd.read();
346 // keep counting until a value is available
347 count:
348  while (cmd.read_nb(input_cmd) == false) {
349 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
350  cnt++;
351 
352 #if DEBUG_LEVEL == TRACE_ALL
353 #ifndef __SYNTHESIS__
354  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
355 #endif //__SYNTHESIS__
356 #endif
357  }
358  *out =cnt;
359 }

◆ perfCounterProc2MemCountIncremental()

template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2MemCountIncremental ( hls::stream< Tin > &  cmd,
Tout *  out 
)

Count Clock Cycles between two events, the first event init the counter the second stop the count and increment the out register TODO: seems not working at the csim lvl (never tested below) when executing single DUT step, hanging stream values.

Parameters
[in]cmdthe performance counter cmd stream, first is init second stop
[out]outthe output register of where increment the counter value
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]counter_precisionthe maxmimum amount of data
Returns
Nothing.

Definition at line 411 of file warp_transform_hw_common.hpp.

411  {
412 
413  Tin input_cmd;
414  // wait to receive a value to start counting
415  ap_uint<counter_precision> cnt = cmd.read();
416 // keep counting until a value is available
417 count:
418  while (cmd.read_nb(input_cmd) == false) {
419 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
420  cnt++;
421 #if DEBUG_LEVEL == TRACE_ALL
422 #ifndef __SYNTHESIS__
423  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
424 #endif //__SYNTHESIS__
425 #endif
426  }
427  *out +=cnt;
428 }

◆ perfCounterProc2MemCountOnly()

template<typename Tin , typename Tout , unsigned int counter_precision = 64>
void perfCounterProc2MemCountOnly ( hls::stream< Tin > &  cmd,
Tout *  out 
)

Count Clock Cycles between two events, the first event init the counter the second stop the count.

Parameters
[in]cmdthe performance counter cmd stream, first is init second stop
[out]outthe output register of where store the counter value
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]counter_precisionthe maxmimum amount of data
Returns
Nothing.

Definition at line 375 of file warp_transform_hw_common.hpp.

375  {
376 
377  Tin input_cmd;
378  // wait to receive a value to start counting
379  ap_uint<counter_precision> cnt = cmd.read();
380 // keep counting until a value is available
381 count:
382  while (cmd.read_nb(input_cmd) == false) {
383 #pragma HLS LOOP_TRIPCOUNT min = 1 max = max_counter_cc
384  cnt++;
385 
386 #if DEBUG_LEVEL == TRACE_ALL
387 #ifndef __SYNTHESIS__
388  printf("DEBUG perfCounterProc counter value = %s\n", cnt.to_string().c_str());
389 #endif //__SYNTHESIS__
390 #endif
391  }
392  *out =cnt;
393 }

◆ pMemCpyCircularBuff()

template<typename Tin , typename Tout , const unsigned int arraysize>
void pMemCpyCircularBuff ( Tin *  buff,
Tout *  out_mem,
unsigned int  elems,
unsigned int  offset_buff 
)

Copy a run-time variable amount of data to another array employing the src as circular buffer i.e., handling overflow.

Parameters
[out]out_memthe dst ptr
[in]buffthe src ptr, or the circular buffer
[in]elemsthe current amount of data to tx
[in]offset_buffthe initial offest in the circular buffer
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]arraysizethe maxmimum amount of data
Returns
Nothing.

Definition at line 112 of file warp_transform_hw_common.hpp.

112  {
113 #pragma HLS INLINE
114  unsigned int j = 0;
115  circ_buff_loop: for (unsigned int i = 0; i < elems; i++)
116  {
117 #pragma HLS PIPELINE II=1
118 #pragma HLS LOOP_TRIPCOUNT min = 1 max = arraysize
119  if(offset_buff+j==arraysize)//
120  {
121  offset_buff=0;
122  j=1;
123  out_mem[i] = buff[0];
124  }else{
125  out_mem[i] = buff[offset_buff+j];
126  j++;
127  }
128  }
129 
130 }

◆ pMyMemtestMemCpy()

template<typename Tin , typename Tout , unsigned int arraysize>
void pMyMemtestMemCpy ( Tin *  in,
Tout *  out 
)

Copy a fixed compile time amount of data to another array.

Parameters
[out]outthe dst ptr
[in]inthe src ptr
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]arraysizethe fixed amount of data
Returns
Nothing.

Definition at line 87 of file warp_transform_hw_common.hpp.

87  {
88 #pragma HLS INLINE
89  for (unsigned int i = 0; i < arraysize; i++)
90  {
91 #pragma HLS PIPELINE II=1
92  *out = *in;
93  }
94 
95 }

◆ pPortAndDestionation()

void pPortAndDestionation ( ap_uint< 32 > *  pi_rank,
ap_uint< 32 > *  pi_size,
hls::stream< NodeId > &  sDstNode_sig,
ap_uint< 32 > *  po_rx_ports 
)

pPortAndDestionation - Setup the port and the destination rank.

Parameters
[in]pi_rank
[in]pi_size
[out]sDstNode_sig
[out]po_rx_ports
Returns
Nothing.

Definition at line 97 of file warp_transform_network_library.hpp.

103 {
104  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
105 #pragma HLS INLINE off
106  //-- STATIC VARIABLES (with RESET) ------------------------------------------
107  static PortFsmType port_fsm = FSM_WRITE_NEW_DATA;
108 #pragma HLS reset variable=port_fsm
109 
110  switch(port_fsm)
111  {
112  default:
113  case FSM_WRITE_NEW_DATA:
114  printf("DEBUG in pPortAndDestionation: port_fsm - FSM_WRITE_NEW_DATA\n");
115  //WarpTransform app needs to be reset to process new rank
116  if(!sDstNode_sig.full())
117  {
118  NodeId dst_rank = (*pi_rank + 1) % *pi_size;
119  #if DEBUG_LEVEL == TRACE_ALL
120  printf("rank: %d; size: %d; \n", (int) *pi_rank, (int) *pi_size);
121  #endif
122  sDstNode_sig.write(dst_rank);
123  port_fsm = FSM_DONE;
124  }
125  break;
126  case FSM_DONE:
127  printf("DEBUG in pPortAndDestionation: port_fsm - FSM_DONE\n");
128  *po_rx_ports = PORTS_OPENED;
129  break;
130  }
131 
132 }
#define PORTS_OPENED
Definition: harris.hpp:102
#define FSM_WRITE_NEW_DATA
ap_uint< 8 > NodeId
Definition: network.hpp:82

◆ pProcPath()

template<typename TimgIn = ap_uint<INPUT_PTR_WIDTH>, typename TimgOut = ap_uint<OUTPUT_PTR_WIDTH>>
void pProcPath ( stream< NetworkWord > &  sRxpToTxp_Data,
stream< TimgIn > &  img_in_axi_stream,
stream< TimgOut > &  img_out_axi_stream,
stream< bool > &  sImageLoaded,
img_meta_t img_rows,
img_meta_t img_cols,
img_meta_t img_chan,
hls::stream< float > &  sTxMatrix 
)

Processing Path - Main processing FSM for Vitis kernels.

Parameters
[out]sRxpToTxp_Data
[in]img_in_axi_stream
[in]img_out_axi_stream
[out]processed_word_rx
[in]sImageLoaded
Returns
Nothing.

Definition at line 69 of file warp_transform_processing.hpp.

86 {
87  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
88  #pragma HLS INLINE off
89  #pragma HLS pipeline II=1
90 
91  //-- LOCAL VARIABLES ------------------------------------------------------
92  NetworkWord newWord;
93  uint16_t Thresh = 442;
94  float K = 0.04;
95  uint16_t k = K * (1 << 16); // Convert to Q0.16 format
96  static bool accel_called;
97  static unsigned int processed_word_proc;
98  static unsigned int timeoutCntAbs;
99  static unsigned int cnt_i;
100  static membus_t tmp;
101  Data_t_out temp;
102  #ifdef ENABLE_DDR
103  //static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
104  //#pragma HLS stream variable=img_out_axi_stream depth=9
105  static unsigned int ddr_addr_out;
106  #pragma HLS reset variable=ddr_addr_out
107  #endif
108  static PacketFsmType WarpTransformFSM = WAIT_FOR_META;
109  #pragma HLS reset variable=WarpTransformFSM
110 
111  #pragma HLS reset variable=accel_called
112  #pragma HLS reset variable=processed_word_proc
113  #pragma HLS reset variable=timeoutCntAbs
114  #pragma HLS reset variable=cnt_i
115  #pragma HLS reset variable=tmp
116  #pragma HLS reset variable=temp
117 
118  static img_meta_t lcl_img_rows=0;
119  static img_meta_t lcl_img_cols=0;
120  static img_meta_t lcl_img_chan=0;
121  #pragma HLS reset variable=lcl_img_rows
122  #pragma HLS reset variable=lcl_img_cols
123  #pragma HLS reset variable=lcl_img_chan
124 
125 
126  switch(WarpTransformFSM)
127  {
128  case WAIT_FOR_META:
129  printf("DEBUG in pProcPath: WAIT_FOR_META\n");
130  if (!sImageLoaded.empty())
131  {
132  if (sImageLoaded.read() == true) {
133  WarpTransformFSM = PROCESSING_PACKET;
134  accel_called = false;
135  processed_word_proc = 0;
136  #ifdef ENABLE_DDR
137  ddr_addr_out = 0;
138  timeoutCntAbs = 0;
139  cnt_i = 0;
140  #endif
141  lcl_img_rows = *img_rows;
142  lcl_img_cols = *img_cols;
143  lcl_img_chan = *img_chan;
144  }
145  }
146  break;
147 
148  case PROCESSING_PACKET:
149  printf("DEBUG in pProcPath: PROCESSING_PACKET\n");
150  #ifndef ENABLE_DDR
151  if ( !img_in_axi_stream.empty() && !img_out_axi_stream.full() )
152  {
153  #endif
154  if (accel_called == false) {
155  #ifdef ENABLE_DDR
156  warp_transformAccelMem(lcl_mem0, lcl_mem1, *img_rows, *img_cols, sTxMatrix);
157  #else // ! ENABLE_DDR
158  #ifdef FAKE_WarpTransform
159  fakeWarpTransformAccelStream(img_in_axi_stream, img_out_axi_stream, MIN_RX_LOOPS, MIN_TX_LOOPS, tx_matrix);
160  #else // !FAKE_WarpTransform
161  warpTransformAccelStream(img_in_axi_stream, img_out_axi_stream, img_rows, img_cols, tx_matrix);
162  #endif // FAKE_WarpTransform
163  #endif // ENABLE_DDR
164  accel_called = true;
165  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS;
166  }
167  #ifndef ENABLE_DDR
168  }
169  #endif
170  break;
171 
172  #ifdef ENABLE_DDR
174  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS, ddr_addr_out=%u\n", ddr_addr_out);
175  if (accel_called == true) {
176 
177  printf("DEBUG in pProcPath: Accumulated %u net words (%u B) to complete a single DDR word\n",
179  tmp = lcl_mem1[ddr_addr_out];
180  ddr_addr_out++;
182  timeoutCntAbs = 0;
183  }
184  break;
185 
187  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT [%u out of %u]\n", timeoutCntAbs, DDR_LATENCY);
188  if (timeoutCntAbs++ == DDR_LATENCY) {
189  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS_FWD; //WARPTRANSFORM_RETURN_RESULTS_UNPACK;
190  cnt_i = 0;
191  }
192  break;
194  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS_FWD\n");
195  //if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() ) {
196  if ( (cnt_i <= (MEMDW_512/OUTPUT_PTR_WIDTH) - 1) && !sRxpToTxp_Data.full() ) {
197 
198  //temp.data = img_out_axi_stream.read();
199  temp.data(0 ,63) = tmp(cnt_i*OUTPUT_PTR_WIDTH , cnt_i*OUTPUT_PTR_WIDTH+63);
200  if (processed_word_proc++ == MIN_TX_LOOPS-1) {
201  temp.last = 1;
202  WarpTransformFSM = WAIT_FOR_META;
203  }
204  else {
205  temp.last = 0;
206  }
207  //TODO: find why Vitis kernel does not set keep and last by itself
208  temp.keep = 255;
209  newWord = NetworkWord(temp.data, temp.keep, temp.last);
210  sRxpToTxp_Data.write(newWord);
211  cnt_i++;
212  }
213  else {
214  WarpTransformFSM = WARPTRANSFORM_RETURN_RESULTS;
215  }
216 
217  break;
218 
219  #else // ! ENABLE_DDR
221  printf("DEBUG in pProcPath: WARPTRANSFORM_RETURN_RESULTS\n");
222  if ( !img_out_axi_stream.empty() && !sRxpToTxp_Data.full() )
223  {
224 
225  temp.data = img_out_axi_stream.read();
226  if ( img_out_axi_stream.empty() )
227  //if (processed_word_proc++ == MIN_TX_LOOPS-1)
228  {
229  temp.last = 1;
230  WarpTransformFSM = WAIT_FOR_META;
231  accel_called = false;
232  }
233  else
234  {
235  temp.last = 0;
236  }
237  //TODO: find why Vitis kernel does not set keep and last by itself
238  temp.keep = 255;
239  newWord = NetworkWord(temp.data, temp.keep, temp.last);
240  sRxpToTxp_Data.write(newWord);
241  }
242  break;
243  #endif // ENABLE_DDR
244  } // end switch
245 
246 }
#define BPERMDW_512
Definition: harris.hpp:120
#define KWPERMDW_512
Definition: harris.hpp:121
#define MIN_TX_LOOPS
#define MIN_RX_LOOPS
#define OUTPUT_PTR_WIDTH
#define MEMDW_512
Definition: memtest.hpp:90
#define PacketFsmType
Definition: memtest.hpp:76
#define Data_t_out
Definition: memtest.cpp:30
#define DDR_LATENCY
Definition: memtest.hpp:98
membus_512_t membus_t
Definition: memtest.hpp:92
#define PROCESSING_PACKET
Definition: memtest.hpp:73
#define WAIT_FOR_META
Definition: memtest.hpp:71
membus_t lcl_mem0[16384]
membus_t lcl_mem1[16384]
#define WARPTRANSFORM_RETURN_RESULTS
unsigned int img_meta_t
#define WARPTRANSFORM_RETURN_RESULTS_ABSORB_DDR_LAT
#define WARPTRANSFORM_RETURN_RESULTS_FWD
void warp_transformAccelMem(membus_t *img_inp, membus_t *img_out, int rows, int cols, hls::stream< float > &sTxMatrix)
Top-level accelerated function of the WarpTransform Application with memory mapped interfaces.
void fakeWarpTransformAccelStream(hls::stream< ap_axiu< 8, 0, 0, 0 > > &img_in_axi_stream, hls::stream< ap_axiu< 64, 0, 0, 0 > > &img_out_axi_stream, unsigned int min_rx_loops, unsigned int min_tx_loops, float transform_matrix[9])
void warpTransformAccelStream(hls::stream< ap_uint< 8 >> &img_in_axi_stream, hls::stream< ap_uint< 64 >> &img_out_axi_stream, int rows, int cols, float transform_matrix[9])
Top-level accelerated function of the WarpTransform Application with array I/Fadd WARPTRANSFORM.
Here is the call graph for this function:

◆ pReadAxiMemMapped2HlsStream()

template<typename Tin , typename Tout , const unsigned int burstsize>
void pReadAxiMemMapped2HlsStream ( Tin *  main_mem,
hls::stream< Tout > &  sOut,
unsigned int  elems 
)

Copy a run-time variable amount of data to an hls stream with a given max.

Parameters
[out]main_memthe src ptr to read
[in]sOutthe dst hls stream
[in]elemsthe current amount of data to tx
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]burstsizethe maxmimum amount of data
Returns
Nothing.

Definition at line 146 of file warp_transform_hw_common.hpp.

146  {
147 #pragma HLS INLINE
148  mmloop: for (unsigned int i = 0; i < elems; i++)
149  {
150 #pragma HLS PIPELINE II=1
151 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
152  Tout tmp = main_mem[i];
153  sOut.write(tmp);
154  }
155 
156 }

◆ pReadAxiMemMapped2HlsStreamCountActivated()

template<typename Tin , typename Tout , const unsigned int burstsize, typename Tcntr >
void pReadAxiMemMapped2HlsStreamCountActivated ( Tin *  main_mem,
hls::stream< Tout > &  sOut,
unsigned int  elems,
hls::stream< Tcntr > &  cmd 
)

Copy a run-time variable amount of data to an hls stream with a given max it assumes "perfCounterMultipleCounts" function already initialized so it just incr.

Parameters
[out]main_memthe src ptr to read
[in]sOutthe dst hls stream
[in]elemsthe current amount of data to tx
[in]cmdthe performance counter cmd stream
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]burstsizethe maxmimum amount of data
[in]Tcntrthe cmd perf counter datatype
Returns
Nothing.

Definition at line 205 of file warp_transform_hw_common.hpp.

205  {
206 #pragma HLS INLINE
207  cmd.write(1);
208  mmloop: for (unsigned int i = 0; i < elems; i++)
209  {
210 #pragma HLS PIPELINE II=1
211 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
212  Tout tmp = main_mem[i];
213  sOut.write(tmp);
214  }
215  cmd.write(1);
216 }

◆ pReadAxiMemMapped2HlsStreamCountFirst()

template<typename Tin , typename Tout , const unsigned int burstsize, typename Tcntr >
void pReadAxiMemMapped2HlsStreamCountFirst ( Tin *  main_mem,
hls::stream< Tout > &  sOut,
unsigned int  elems,
hls::stream< Tcntr > &  cmd 
)

Copy a run-time variable amount of data to an hls stream with a given max it assumes also the initialization of a perf counter of "perfCounterMultipleCounts" function.

Parameters
[out]main_memthe src ptr to read
[in]sOutthe dst hls stream
[in]elemsthe current amount of data to tx
[in]cmdthe performance counter cmd stream
[in]Tinthe input datatype
[in]Toutthe output datatype
[in]burstsizethe maxmimum amount of data
[in]Tcntrthe cmd perf counter datatype
Returns
Nothing.

Definition at line 175 of file warp_transform_hw_common.hpp.

175  {
176 #pragma HLS INLINE
177 cmd.write(0);
178  mmloop: for (unsigned int i = 0; i < elems; i++)
179  {
180 #pragma HLS PIPELINE II=1
181 #pragma HLS LOOP_TRIPCOUNT min = 1 max = burstsize
182  Tout tmp = main_mem[i];
183  sOut.write(tmp);
184  }
185  cmd.write(1);
186 
187 }

◆ pRXPath()

void pRXPath ( hls::stream< NetworkWord > &  siSHL_This_Data,
hls::stream< NetworkMetaStream > &  siNrc_meta,
hls::stream< NetworkMetaStream > &  sRxtoTx_Meta,
hls::stream< ap_uint< INPUT_PTR_WIDTH >> &  img_in_axi_stream,
NetworkMetaStream  meta_tmp,
unsigned int *  processed_word_rx,
unsigned int *  processed_bytes_rx,
hls::stream< bool > &  sImageLoaded 
)

Receive Path - From SHELL to THIS. FIXME: never checked, just substitute this one from DID.

Parameters
[in]siSHL_This_Data
[in]siNrc_meta
[out]sRxtoTx_Meta
[out]img_in_axi_stream
[out]meta_tmp
[out]processed_word
[out]sImageLoaded
Returns
Nothing.

Definition at line 149 of file warp_transform_network_library.hpp.

159 {
160  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
161  #pragma HLS INLINE off
162  #pragma HLS pipeline II=1
163 
164  //-- LOCAL VARIABLES ------------------------------------------------------
165  static NetworkWord netWord;
167  #pragma HLS reset variable=enqueueFSM
168  const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
169  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
170 
171  switch(enqueueFSM)
172  {
173  case WAIT_FOR_META:
174  printf("DEBUG in pRXPath: enqueueFSM - WAIT_FOR_META, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
175  *processed_word_rx, *processed_bytes_rx);
176  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
177  {
178  meta_tmp = siNrc_meta.read();
179  meta_tmp.tlast = 1; //just to be sure...
180  sRxtoTx_Meta.write(meta_tmp);
182  }
183  break;
184 
185  case PROCESSING_PACKET:
186  printf("DEBUG in pRXPath: enqueueFSM - PROCESSING_PACKET, *processed_word_rx=%u, *processed_bytes_rx=%u\n",
187  *processed_word_rx, *processed_bytes_rx);
188  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
189  {
190  //-- Read incoming data chunk
191  netWord = siSHL_This_Data.read();
192  storeWordToAxiStream<stream<ap_uint<INPUT_PTR_WIDTH>>,
193  loop_cnt,
194  bytes_per_loop,
196  (netWord, img_in_axi_stream, processed_word_rx, processed_bytes_rx,
197  sImageLoaded);
198  if(netWord.tlast == 1)
199  {
201  }
202  }
203  break;
204  }
205 }
#define IMGSIZE
#define BITS_PER_10GBITETHRNET_AXI_PACKET
#define INPUT_PTR_WIDTH
#define BYTES_PER_10GBITETHRNET_AXI_PACKET
uint8_t enqueueFSM
Definition: uppercase.cpp:54
ap_uint< 1 > tlast
Definition: network.hpp:111

◆ pRXPathNetToStream()

template<typename TMemWrd , const unsigned int loop_cnt, const unsigned int cTransfers_Per_Chunk, const unsigned int max_img_size, const unsigned int cBytesPer10GbitEthAXIPckt>
void pRXPathNetToStream ( hls::stream< NetworkWord > &  siSHL_This_Data,
hls::stream< NetworkMetaStream > &  siNrc_meta,
hls::stream< NetworkMetaStream > &  sRxtoTx_Meta,
hls::stream< TMemWrd > &  img_in_axi_stream,
hls::stream< bool > &  sMemBurstRx,
img_meta_t img_rows,
img_meta_t img_cols,
img_meta_t img_chan,
hls::stream< float > &  sTxMatrix 
)

Receive Path - From SHELL to THIS. Function for accumulating a memory word and write it Not ready for complete parametrization.

Parameters
[in]siSHL_This_Datathe data rx from network
[in]siNrc_metameta from network
[out]sRxtoTx_Metameta to the tx path
[out]img_in_axi_streamthe image data packed in 512 bits
[out]sMemBurstRxassessing the burst is ready
Returns
Nothing.

Definition at line 221 of file warp_transform_network_library.hpp.

233 {
234  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
235  #pragma HLS INLINE off
236 
237  //-- LOCAL VARIABLES ------------------------------------------------------
238  static NetworkWord netWord;
239  // const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
240  static NetworkMetaStream meta_tmp;
241  static TMemWrd v = 0;
242  static unsigned int cnt_wr_stream = 0, cnt_wr_burst = 0;
243  static unsigned int processed_net_bytes_rx = 0;
244  static unsigned int tx_mat_idx = 0;
245  #pragma HLS reset variable=meta_tmp
246  #pragma HLS reset variable=tx_mat_idx
247  #pragma HLS reset variable=cnt_wr_stream
248  #pragma HLS reset variable=cnt_wr_burst
249  #pragma HLS reset variable=processed_net_bytes_rx
251  #pragma HLS reset variable=enqueueRxToStrFSM
252  unsigned int expected_input_meta = TOT_TRANSFERS_TX;
253  unsigned int expected_output_meta = TOT_TRANSFERS_RX;
254  unsigned int received_and_fwded_meta = 0;
255  #pragma HLS reset variable=expected_input_meta
256  #pragma HLS reset variable=expected_output_meta
257  #pragma HLS reset variable=received_and_fwded_meta
258 
259 
260  switch(enqueueRxToStrFSM)
261  {
262  case WAIT_FOR_META:
263  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META\n");
264 
265  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
266  {
267  meta_tmp = siNrc_meta.read();
268  meta_tmp.tlast = 1; //just to be sure...
269  //sRxtoTx_Meta.write(meta_tmp);
271  expected_output_meta = TOT_TRANSFERS_RX;
272  received_and_fwded_meta = 0;
273  }
274  break;
275 
276 case PROCESSING_PACKET:
277  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
278  if ( !siSHL_This_Data.empty())
279  {
280  //-- Read incoming data chunk
281  netWord = siSHL_This_Data.read();
282  switch(netWord.tdata.range(WARPTRANSFORM_COMMANDS_HIGH_BIT,WARPTRANSFORM_COMMANDS_LOW_BIT))//the command is in the first 8 bits
283  {
284  case(WRPTX_TXMAT_CMD):
286  tx_mat_idx = 0;
287  break;
288  case(WRPTX_IMG_CMD):
292  expected_output_meta = rows * cols;
293  std::cout << "DEBUG pRXPathNetToStream - img rows =" << rows << " cols=" << cols << " chan=" << chan << std::endl;
294  *img_rows = rows;
295  *img_cols = cols;
296  *img_chan = chan;
298  break;
299  //TODO: fix the default case
300  // default: // invalid cmd
301  // break;
302  // //might be consume data? dk
303  }
304  }
305  break;
306 
308  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_IMGMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
309  if ( !siSHL_This_Data.empty() && !img_in_axi_stream.full())
310  {
311  //-- Read incoming data chunk
312  netWord = siSHL_This_Data.read();
313  printf("DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
314  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
315  if ((netWord.tkeep >> cnt_wr_stream) == 0) {
316  printf("WARNING: value with tkeep=0 at cnt_wr_stream=%u\n", cnt_wr_stream);
317  }
318  v(cnt_wr_stream*64, (cnt_wr_stream+1)*64-1) = netWord.tdata(0,63);
319  if ((cnt_wr_stream++ == loop_cnt-1) || (netWord.tlast == 1)) {
320  // std::cout << std::hex << v << std::endl; // print hexadecimal value
321  std::cout << "DEBUG in pRXPathNetToStream: Pushing to img_in_axi_stream :" << std::hex << v << std::endl;
322  img_in_axi_stream.write(v);
323  if ((cnt_wr_burst++ == cTransfers_Per_Chunk-1) ||
324  ((processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) &&
325  (netWord.tlast == 1))) {
326  if (!sMemBurstRx.full()) {
327  sMemBurstRx.write(true);
328  }
329  cnt_wr_burst = 0;
330  }
331  if (netWord.tlast == 1) {
332  //Next state logic
333  if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt)
334  {
335  if( received_and_fwded_meta < expected_output_meta){
336  sRxtoTx_Meta.write(meta_tmp);
337  received_and_fwded_meta++;
339  }else{
341  }
342  }else{
344 
345  }
346  }
347  cnt_wr_stream = 0;
348  }
349  if (processed_net_bytes_rx == max_img_size-cBytesPer10GbitEthAXIPckt) {
350  processed_net_bytes_rx = 0;
351  }
352  else {
353  processed_net_bytes_rx += cBytesPer10GbitEthAXIPckt;
354  }
355  }
356  break;
358  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - WAIT_FOR_META_IMGMAT\n");
359 
360  if ( !siNrc_meta.empty() && !sRxtoTx_Meta.full() )
361  {
362  meta_tmp = siNrc_meta.read();
363  meta_tmp.tlast = 1; //just to be sure...
364  sRxtoTx_Meta.write(meta_tmp);
365  received_and_fwded_meta++;
367  }
368  break;
370  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PROCESSING_PACKET_TXMAT, processed_net_bytes_rx=%u\n", processed_net_bytes_rx);
371  if ( !siSHL_This_Data.empty())
372  {
373  //-- Read incoming data chunk
374  netWord = siSHL_This_Data.read();
375  printf("DEBUG in pRXPathNetToStream: Data write = {D=0x%16.16llX, K=0x%2.2X, L=%d} \n",
376  netWord.tdata.to_long(), netWord.tkeep.to_int(), netWord.tlast.to_int());
377  float_bits_u tmp1;
378  float_bits_u tmp2;
379  tmp1.i = netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32);
380  // unsigned int tmp1 = netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32);
381  // unsigned int tmp2 = netWord.tdata.range(32-1,0);
382  tmp2.i = netWord.tdata.range(32-1,0);
383  //always write one float
384  // tx_matrix[tx_mat_idx]=tmp1.f;
385  sTxMatrix.write(tmp1.f);
386  // std::cout << "DEBUG in pRXPathNetToStream: tmp1=" << tmp1 << " tmp2=" << tmp2 << std::endl;
387  // std::cout << "DEBUG in pRXPathNetToStream: tmp1=" << std::hex << netWord.tdata.range(NETWORK_WORD_BIT_WIDTH-1,32) << " tmp2=" << netWord.tdata.range(32-1,0) << std::dec << std::endl;
388 
389  std::cout << "DEBUG in pRXPathNetToStream: tx matrix =" << tmp1.f << std::endl;
390  tx_mat_idx++;
391  std::cout << "DEBUG in pRXPathNetToStream: tx matrix id=" << tx_mat_idx << std::endl;
392 
393  if ((tx_mat_idx == TRANSFORM_MATRIX_DIM) || (netWord.tlast == 1)) {
394  std::cout << "DEBUG in pRXPathNetToStream: end of matrix rx communication" << std::endl;
395  //end of rx --> w8 for something; else there is the image after the tx matrix
396  if (netWord.tlast == 1) {
398  }else{
400  }
401  tx_mat_idx = 0;
402 
403  } else { //not at the end of the matrix nor the tlast two float to write
404 
405  // tx_matrix[tx_mat_idx]=tmp2.f;
406  sTxMatrix.write(tmp2.f);
407 
408  tx_mat_idx++;
409  }
410  }
411  break;
413  printf("DEBUG in pRXPathNetToStream: enqueueRxToStrFSM - PUSH_REMAINING__META\n");
414 
415  if ( !sRxtoTx_Meta.full() )
416  {
417  if( received_and_fwded_meta < expected_output_meta){
418  sRxtoTx_Meta.write(meta_tmp);
419  received_and_fwded_meta++;
421  }else{
423  }
424  }
425  break;
426  }
427 }
uint8_t enqueueRxToStrFSM
Definition: median_blur.cpp:49
#define TRANSFORM_MATRIX_DIM
#define WARPTRANSFORM_COLS_HIGH_BIT
#define PROCESSING_PACKET_IMGMAT
#define PROCESSING_PACKET_TXMAT
#define WARPTRANSFORM_CHNNEL_LOW_BIT
#define WARPTRANSFORM_COMMANDS_HIGH_BIT
#define PUSH_REMAINING_META
#define WAIT_FOR_META_IMGMAT
#define WARPTRANSFORM_COLS_LOW_BIT
#define WARPTRANSFORM_CHNNEL_HIGH_BIT
#define WARPTRANSFORM_ROWS_HIGH_BIT
#define WARPTRANSFORM_COMMANDS_LOW_BIT
#define WARPTRANSFORM_ROWS_LOW_BIT
#define NETWORK_WORD_BIT_WIDTH
Definition: network.hpp:46
ap_uint< 64 > tdata
Definition: network.hpp:49
ap_uint< 8 > tkeep
Definition: network.hpp:50
ap_uint< 1 > tlast
Definition: network.hpp:51

◆ pRXPathStreamToDDR()

template<typename TStreamMemWrd , typename TMemWrd , const unsigned int loop_cnt, const unsigned int bytes_per_loop>
void pRXPathStreamToDDR ( hls::stream< TMemWrd > &  img_in_axi_stream,
hls::stream< bool > &  sMemBurstRx,
hls::stream< DmCmd > &  soMemWrCmdP0,
hls::stream< DmSts > &  siMemWrStsP0,
hls::stream< TStreamMemWrd > &  soMemWriteP0,
hls::stream< bool > &  sImageLoaded,
img_meta_t img_rows,
img_meta_t img_cols,
img_meta_t img_chan 
)

Receive Path - From RX path stream word aligned to store towards the DDR.

Parameters
[in]img_in_axi_stream
[in]sMemBurstRx
[out]soMemWrCmdP0
[out]siMemWrStsP0
[out]soMemWriteP0
[out]sImageLoaded
Returns
Nothing.

Definition at line 443 of file warp_transform_network_library.hpp.

456 {
457  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
458  #pragma HLS INLINE off
459  #pragma HLS pipeline II=1
460 
461  //-- LOCAL VARIABLES ------------------------------------------------------
462  static TMemWrd v = 0;
463  static unsigned int cur_transfers_per_chunk;
464  static unsigned int cnt_wr_stream, cnt_wr_img_loaded;
465  static unsigned int ddr_addr_in;
467  #pragma HLS reset variable=enqueueStrToDdrFSM
468 
469  static ap_uint<32> patternWriteNum;
470  static ap_uint<32> timeoutCnt;
471 
472  static TStreamMemWrd memP0;
473  static DmSts memWrStsP0;
474  static unsigned int processed_bytes_rx;
475 
476  #pragma HLS reset variable=cur_transfers_per_chunk
477  #pragma HLS reset variable=cnt_wr_stream
478  #pragma HLS reset variable=cnt_wr_img_loaded
479  #pragma HLS reset variable=ddr_addr_in
480  #pragma HLS reset variable=patternWriteNum
481  #pragma HLS reset variable=timeoutCnt
482  #pragma HLS reset variable=memP0
483  #pragma HLS reset variable=memWrStsP0
484 
485  static img_meta_t lcl_img_rows=0;
486  static img_meta_t lcl_img_cols=0;
487  static img_meta_t lcl_img_chan=0;
488  #pragma HLS reset variable=lcl_img_rows
489  #pragma HLS reset variable=lcl_img_cols
490  #pragma HLS reset variable=lcl_img_chan
491 
492  switch(enqueueStrToDdrFSM)
493  {
494  case WAIT_FOR_META:
495  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - WAIT_FOR_META, processed_bytes_rx=%u\n",
496  processed_bytes_rx);
497 
498  if ( !img_in_axi_stream.empty() )
499  {
500  if ((processed_bytes_rx) == 0) {
501  memP0.tdata = 0;
502  memP0.tlast = 0;
503  memP0.tkeep = 0;
504  patternWriteNum = 0;
505  timeoutCnt = 0;
506  cur_transfers_per_chunk = 0;
507  ddr_addr_in = 0;
508  cnt_wr_stream = 0;
509  v = 0;
510  memWrStsP0.tag = 0;
511  memWrStsP0.interr = 0;
512  memWrStsP0.decerr = 0;
513  memWrStsP0.slverr = 0;
514  memWrStsP0.okay = 0;
515  lcl_img_rows = *img_rows;
516  lcl_img_cols = *img_cols;
517  lcl_img_chan = *img_chan;
518  }
520  }
521  break;
522 
523  case FSM_CHK_PROC_BYTES:
524  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_CHK_PROC_BYTES, processed_bytes_rx=%u\n", processed_bytes_rx);
525  if (processed_bytes_rx < IMGSIZE-bytes_per_loop) {
526  (processed_bytes_rx) += bytes_per_loop;
527  }
528  else {
529  printf("DEBUG in pRXPathStreamToDDR: WARNING - you have reached the max depth of img. Will put processed_bytes_rx = 0.\n");
530  processed_bytes_rx = 0;
531  }
533  break;
534 
535 case FSM_WR_PAT_CMD:
536  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_CMD\n");
537  if ( !soMemWrCmdP0.full() ) {
538  //-- Post a memory write command to SHELL/Mem/Mp0
539  if (processed_bytes_rx == 0){
540  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK_LAST_BURST;
541  }
542  else {
543  cur_transfers_per_chunk = TRANSFERS_PER_CHUNK;
544  }
545  if (patternWriteNum == 0) { // Write cmd only the fitst time of every burst
546  soMemWrCmdP0.write(DmCmd(ddr_addr_in * BPERMDW_512, cur_transfers_per_chunk*BPERMDW_512)); // Byte-addresable
547  }
548  ddr_addr_in++;
550  }
551  break;
552 
553 case FSM_WR_PAT_LOAD:
554  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_LOAD\n");
555  // -- Assemble a 512-bit memory word with input values from stream
556  if (patternWriteNum++ >= cur_transfers_per_chunk - 1) {
557  if (!sMemBurstRx.empty()) {
558  if (sMemBurstRx.read() == true) {
559  patternWriteNum = 0;
561  }
562  }
563  }
564  else {
565  if((processed_bytes_rx) == 0) {
567  }
568  else {
570  }
571  }
572  break;
573 
574 case FSM_WR_PAT_DATA:
575  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_DATA\n");
576  if (!soMemWriteP0.full()) {
577  //-- Write a memory word to DRAM
578  if (!img_in_axi_stream.empty()) {
579  memP0.tdata = img_in_axi_stream.read();
580  ap_uint<8> keepVal = 0xFF;
581  memP0.tkeep = (ap_uint<64>) (keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal, keepVal);
582  if (patternWriteNum++ == cur_transfers_per_chunk - 1) {
583  printf("DEBUG: (patternWriteNum == cur_transfers_per_chunk -1) \n");
584  memP0.tlast = 1;
585  cnt_wr_img_loaded = 0;
586  timeoutCnt = 0;
587  patternWriteNum = 0;
589  }
590  else {
591  memP0.tlast = 0;
592  }
593  std::cout << "DEBUG in pRXPathStreamToDDR: Pushing to soMemWriteP0 :" << std::hex << memP0.tdata << std::endl;
594  soMemWriteP0.write(memP0);
595  }
596  }
597  break;
598 
599 case FSM_WR_PAT_STS_A:
600  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_A\n");
601  if (!siMemWrStsP0.empty()) {
602  printf(" 1 \n");
603  //-- Get the memory write status for Mem/Mp0
604  siMemWrStsP0.read(memWrStsP0);
606  }
607  else {
608  if (timeoutCnt++ >= CYCLES_UNTIL_TIMEOUT) {
609  memWrStsP0.tag = 0;
610  memWrStsP0.interr = 0;
611  memWrStsP0.decerr = 0;
612  memWrStsP0.slverr = 0;
613  memWrStsP0.okay = 0;
615  }
616  }
617  break;
618 
619 case FSM_WR_PAT_STS_B:
620  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_B\n");
621  if ((memWrStsP0.tag == 0x0) && (memWrStsP0.okay == 1)) {
622  if ((processed_bytes_rx) == 0) {
623  if (!sImageLoaded.full()) {
624  if (cnt_wr_img_loaded++ >= 1) {
625  sImageLoaded.write(false);
627  }
628  else {
629  sImageLoaded.write(true);
630  }
631  }
632  }
633  else {
635  }
636  }
637  else {
638  ; // TODO: handle errors on memWrStsP0
639  }
640  break;
641 
642 case FSM_WR_PAT_STS_C:
643  printf("DEBUG in pRXPathStreamToDDR: enqueueStrToDdrFSM - FSM_WR_PAT_STS_C\n");
644  if((processed_bytes_rx) == 0) {
646  }
647  else {
649  }
650  break;
651 }
652 
653 }
ap_uint< 32 > timeoutCnt
ap_uint< 32 > patternWriteNum
ap_uint< 1 > okay
ap_uint< 1 > decerr
ap_uint< 1 > slverr
ap_uint< 4 > tag
ap_uint< 1 > interr
#define FSM_WR_PAT_STS_B
Definition: harris.hpp:87
#define FSM_WR_PAT_LOAD
Definition: harris.hpp:84
#define FSM_WR_PAT_CMD
Definition: harris.hpp:83
#define TRANSFERS_PER_CHUNK
Definition: harris.hpp:133
#define FSM_WR_PAT_DATA
Definition: harris.hpp:85
#define FSM_CHK_PROC_BYTES
Definition: harris.hpp:81
#define FSM_WR_PAT_STS_A
Definition: harris.hpp:86
#define FSM_WR_PAT_STS_C
Definition: harris.hpp:88
#define TRANSFERS_PER_CHUNK_LAST_BURST
Definition: median_blur.cpp:57
uint8_t enqueueStrToDdrFSM
Definition: median_blur.cpp:50
#define CYCLES_UNTIL_TIMEOUT
Definition: memtest.hpp:96

◆ pTXPath()

void pTXPath ( hls::stream< NetworkWord > &  soTHIS_Shl_Data,
hls::stream< NetworkMetaStream > &  soNrc_meta,
hls::stream< NetworkWord > &  sProcpToTxp_Data,
hls::stream< NetworkMetaStream > &  sRxtoTx_Meta,
hls::stream< NodeId > &  sDstNode_sig,
unsigned int *  processed_word_tx,
ap_uint< 32 > *  pi_rank,
img_meta_t img_rows,
img_meta_t img_cols,
img_meta_t img_chan 
)

Transmit Path - From THIS to SHELL.

Parameters
[out]soTHIS_Shl_Data
[out]soNrc_meta
[in]sProcpToTxp_Data
[in]sRxtoTx_Meta
[in]pi_rank
[in]sDstNode_sig
Returns
Nothing.

Definition at line 668 of file warp_transform_network_library.hpp.

680 {
681  //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
682  //#pragma HLS DATAFLOW interval=1
683  #pragma HLS INLINE off
684 
685  //-- STATIC DATAFLOW VARIABLES ------------------------------------------
686  static NodeId dst_rank;
688  #pragma HLS reset variable=dequeueFSM
689 
690  //-- LOCAL VARIABLES ------------------------------------------------------
691  NetworkWord netWordTx;
692  NetworkMeta meta_in = NetworkMeta();
693  NetworkMetaStream meta_out_stream = NetworkMetaStream();
694 
695  #pragma HLS reset variable=dst_rank
696  #pragma HLS reset variable=netWordTx
697 
698  static img_meta_t lcl_img_rows=0;
699  static img_meta_t lcl_img_cols=0;
700  static img_meta_t lcl_img_chan=0;
701  #pragma HLS reset variable=lcl_img_rows
702  #pragma HLS reset variable=lcl_img_cols
703  #pragma HLS reset variable=lcl_img_chan
704  static bool tx_ongoing = false;
705 
706  switch(dequeueFSM)
707  {
708  default:
709  case WAIT_FOR_META:
710  if(!sDstNode_sig.empty())
711  {
712  dst_rank = sDstNode_sig.read();
714  //WarpTransform app needs to be reset to process new rank
715  }
716  break;
718  //#if DEBUG_LEVEL == TRACE_ALL
719  printf("DEBUG in pTXPath: dequeueFSM=%d - WAIT_FOR_STREAM_PAIR, *processed_word_tx=%u\n",
720  dequeueFSM, *processed_word_tx);
721  // #endif
722  //-- Forward incoming chunk to SHELL
723  //WarpTransform-related
724  if (*processed_word_tx == MIN_TX_LOOPS) {
725  *processed_word_tx = 0;
726  tx_ongoing = false;
727  }
728 
729  if (( !sProcpToTxp_Data.empty() && !sRxtoTx_Meta.empty()
730  && !soTHIS_Shl_Data.full() && !soNrc_meta.full() ))
731  {
732  netWordTx = sProcpToTxp_Data.read();
733 
734  // in case MTU=8 ensure tlast is set in WAIT_FOR_STREAM_PAIR and don't visit PROCESSING_PACKET
735  if (PACK_SIZE == 8)
736  {
737  netWordTx.tlast = 1;
738  }
739  soTHIS_Shl_Data.write(netWordTx);
740 
741  meta_in = sRxtoTx_Meta.read().tdata;
742  meta_out_stream.tlast = 1;
743  meta_out_stream.tkeep = 0xFF; //just to be sure
744 
745  //WarpTransform-related Forcing the SHELL to wait for tlast
746  meta_out_stream.tdata.len = 0;
747 
748  meta_out_stream.tdata.dst_rank = dst_rank;
749  meta_out_stream.tdata.src_rank = (NodeId) *pi_rank;
750  meta_out_stream.tdata.dst_port = meta_in.src_port;
751  meta_out_stream.tdata.src_port = meta_in.dst_port;
752 
753  soNrc_meta.write(meta_out_stream);
754 
755  (*processed_word_tx)++;
756  printf("DEBUG: Checking netWordTx.tlast...\n");
757  if(netWordTx.tlast != 1)
758  {
760  }
761  }
762  break;
763 
764  case PROCESSING_PACKET:
765  //#if DEBUG_LEVEL == TRACE_ALL
766  printf("DEBUG in pTXPath: dequeueFSM=%d - PROCESSING_PACKET, *processed_word_tx=%u\n",
767  dequeueFSM, *processed_word_tx);
768  //#endif
769  // if (!tx_ongoing && !sInImgRows.empty() && !sInImgCols.empty() && !sInImgChan.empty())
770  // {
771  // img_rows = sInImgRows.read();
772  // img_cols = sInImgCols.read();
773  // img_chan = sInImgChan.read();
774  // tx_ongoing = true;
775  // }
776  if (!tx_ongoing)
777  {
778  lcl_img_rows = *img_rows;
779  lcl_img_cols = *img_cols;
780  lcl_img_chan = *img_chan;
781  tx_ongoing = true;
782  }
783 
784  if( !sProcpToTxp_Data.empty() && !soTHIS_Shl_Data.full())
785  {
786  netWordTx = sProcpToTxp_Data.read();
787  // This is our own termination based on the custom MTU we have set in PACK_SIZE.
788  // TODO: We can map PACK_SIZE to a dynamically assigned value either through MMIO or header
789  // in order to have a functional bitstream for any MTU size
790  (*processed_word_tx)++;
791 
792  // This is a normal termination of the axi stream from vitis functions
793  // This is a normal termination of the axi stream from vitis functions
794  if ((netWordTx.tlast == 1) || (((*processed_word_tx)*8) % PACK_SIZE == 0))
795  {
796  netWordTx.tlast = 1; // in case it is the 2nd or
797  printf("DEBUG: A netWordTx.tlast=1 ... sProcpToTxp_Data.empty()==%u \n", sProcpToTxp_Data.empty());
799  }
800 
801  soTHIS_Shl_Data.write(netWordTx);
802  }
803  break;
804  }
805 }
#define WAIT_FOR_STREAM_PAIR
Definition: memtest.hpp:72
#define PACK_SIZE
Definition: config.h:51
uint8_t dequeueFSM
Definition: uppercase.cpp:55
ap_uint< 8 > tkeep
Definition: network.hpp:110
NetworkMeta tdata
Definition: network.hpp:109
NetworkDataLength len
Definition: network.hpp:99
NodeId dst_rank
Definition: network.hpp:95
NodeId src_rank
Definition: network.hpp:97
NrcPort src_port
Definition: network.hpp:98
NrcPort dst_port
Definition: network.hpp:96

◆ setUpTxMatrixFromStream()

void setUpTxMatrixFromStream ( float  transform_matrix[9],
hls::stream< float > &  sTxMatrix 
)

Definition at line 61 of file xf_warp_transform_accel.cpp.

64  {
65 
66  if(!sTxMatrix.empty()){
67  for(int i=0; i<TRANSFORM_MATRIX_DIM; i++){
68  #pragma HLS PIPELINE
69  transform_matrix[i] = sTxMatrix.read();
70  }
71  }
72 
73 }
Here is the caller graph for this function:

◆ storeWordToArray()

template<typename TInImg , const unsigned int img_pckts>
void storeWordToArray ( uint64_t  input,
TInImg  img[img_pckts],
unsigned int *  processed_word,
unsigned int *  image_loaded 
)

Store a net word to local memory.

Returns
Nothing.

Definition at line 273 of file warp_transform_hw_common.hpp.

275 {
276  #pragma HLS INLINE
277 
278  img[*processed_word] = (TInImg) input;
279  printf("DEBUG in storeWordToArray: input = %u = 0x%16.16llX \n", input, input);
280  printf("DEBUG in storeWordToArray: img[%u]= %u = 0x%16.16llX \n", *processed_word,
281  (uint64_t)img[*processed_word], (uint64_t)img[*processed_word]);
282  if (*processed_word < img_pckts-1) {
283  *processed_word++;
284  }
285  else {
286  printf("DEBUG in storeWordToArray: WARNING - you've reached the max depth of img[%u]. Will put *processed_word = 0.\n", *processed_word);
287  *processed_word = 0;
288  *image_loaded = 1;
289  }
290 }
string input
Definition: test.py:9

◆ storeWordToAxiStream()

template<typename Tin , const unsigned int loop_cnt, const unsigned int bytes_per_loop, const unsigned int max_data_transfer>
void storeWordToAxiStream ( NetworkWord  word,
Tin &  img_in_axi_stream,
unsigned int *  processed_word_rx,
unsigned int *  processed_bytes_rx,
stream< bool > &  sImageLoaded 
)

Store a net word to a local AXI stream.

Returns
Nothing.

Definition at line 226 of file warp_transform_hw_common.hpp.

234 {
235  #pragma HLS INLINE
236  Data_t_in v;
237  // const unsigned int loop_cnt = (BITS_PER_10GBITETHRNET_AXI_PACKET/INPUT_PTR_WIDTH);
238  // const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET/loop_cnt);
239  unsigned int bytes_with_keep = 0;
240  for (unsigned int i=0; i<loop_cnt; i++) {
241  if ((word.tkeep >> i) == 0) {
242  printf("WARNING: value with tkeep=0 at i=%u\n", i);
243  continue;
244  }
245  v.data = (ap_uint<INPUT_PTR_WIDTH>)(word.tdata >> i*8);
246  v.keep = word.tkeep;
247  v.last = word.tlast;
248  img_in_axi_stream.write(v.data);
249  bytes_with_keep += bytes_per_loop;
250  }
251  if (*processed_bytes_rx < max_data_transfer){
252  // IMGSIZE-BYTES_PER_10GBITETHRNET_AXI_PACKET) {
253  (*processed_bytes_rx) += bytes_with_keep;
254  if (!sImageLoaded.full()) {
255  sImageLoaded.write(false);
256  }
257  }
258  else {
259  printf("DEBUG in storeWordToAxiStream: WARNING - you've reached the max depth of img. Will put *processed_bytes_rx = 0.\n");
260  *processed_bytes_rx = 0;
261  if (!sImageLoaded.full()) {
262  sImageLoaded.write(true);
263  }
264  }
265 }
#define Data_t_in
Definition: memtest.cpp:29

◆ warp_transform() [1/2]

void warp_transform ( ap_uint< 32 > *  pi_rank,
ap_uint< 32 > *  pi_size,
stream< NetworkWord > &  siSHL_This_Data,
stream< NetworkWord > &  soTHIS_Shl_Data,
stream< NetworkMetaStream > &  siNrc_meta,
stream< NetworkMetaStream > &  soNrc_meta,
ap_uint< 32 > *  po_rx_ports,
stream< DmCmd > &  soMemWrCmdP0,
stream< DmSts > &  siMemWrStsP0,
stream< Axis< 512 > > &  soMemWriteP0,
membus_t lcl_mem0,
membus_t lcl_mem1 
)
Here is the caller graph for this function:

◆ warp_transform() [2/2]

void warp_transform ( ap_uint< 32 > *  pi_rank,
ap_uint< 32 > *  pi_size,
stream< NetworkWord > &  siSHL_This_Data,
stream< NetworkWord > &  soTHIS_Shl_Data,
stream< NetworkMetaStream > &  siNrc_meta,
stream< NetworkMetaStream > &  soNrc_meta,
ap_uint< 32 > *  po_rx_ports,
stream< DmCmd > &  soMemWrCmdP0,
stream< DmSts > &  siMemWrStsP0,
stream< Axis< 512 >> &  soMemWriteP0,
membus_t lcl_mem0,
membus_t lcl_mem1 
)

Main process of the WarpTransform Application directives.

Deprecated:
This functions is using deprecated AXI stream interface
Returns
Nothing.

Definition at line 51 of file warp_transform.cpp.

84 {
85 
86 
87 //-- DIRECTIVES FOR THE BLOCK ---------------------------------------------
88 #pragma HLS INTERFACE axis register both port=siSHL_This_Data
89 #pragma HLS INTERFACE axis register both port=soTHIS_Shl_Data
90 
91 #pragma HLS INTERFACE axis register both port=siNrc_meta
92 #pragma HLS INTERFACE axis register both port=soNrc_meta
93 
94 #pragma HLS INTERFACE ap_ovld register port=po_rx_ports name=poROL_NRC_Rx_ports
95 
96 #if HLS_VERSION < 20211
97 #pragma HLS INTERFACE ap_stable register port=pi_rank name=piFMC_ROL_rank
98 #pragma HLS INTERFACE ap_stable register port=pi_size name=piFMC_ROL_size
99 #elif HLS_VERSION >= 20211
100  #pragma HLS stable variable=pi_rank
101  #pragma HLS stable variable=pi_size
102 #else
103  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
104  exit(-1);
105 #endif
106 
107 #ifdef ENABLE_DDR
108 
109 // Bundling: SHELL / Role / Mem / Mp0 / Write Interface
110 #pragma HLS INTERFACE axis register both port=soMemWrCmdP0
111 #pragma HLS INTERFACE axis register both port=siMemWrStsP0
112 #pragma HLS INTERFACE axis register both port=soMemWriteP0
113 
114 #if HLS_VERSION <= 20201
115 #pragma HLS DATA_PACK variable=soMemWrCmdP0 instance=soMemWrCmdP0
116 #pragma HLS DATA_PACK variable=siMemWrStsP0 instance=siMemWrStsP0
117 #elif HLS_VERSION >= 20211
118 #pragma HLS aggregate variable=soMemWrCmdP0 compact=bit
119 #pragma HLS aggregate variable=siMemWrStsP0 compact=bit
120 #else
121  printf("ERROR: Invalid HLS_VERSION=%s\n", HLS_VERSION);
122  exit(-1);
123 #endif
124 
125 const unsigned int ddr_mem_depth = TOTMEMDW_512;
126 const unsigned int ddr_latency = DDR_LATENCY;
127 
128 
129 // When max burst size is 1KB, with 512bit bus we get 16 burst transactions
130 // When max burst size is 4KB, with 512bit bus we get 64 burst transactions
131 const unsigned int max_axi_rw_burst_length = 64;
132 const unsigned int num_outstanding_transactions = 256;
133 
134 // Mapping LCL_MEM0 interface to moMEM_Mp1 channel
135 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem0 bundle=moMEM_Mp1\
136  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
137  num_read_outstanding=num_outstanding_transactions num_write_outstanding=num_outstanding_transactions latency=ddr_latency
138 
139 // Mapping LCL_MEM1 interface to moMEM_Mp1 channel
140 #pragma HLS INTERFACE m_axi depth=ddr_mem_depth port=lcl_mem1 bundle=moMEM_Mp1 \
141  max_read_burst_length=max_axi_rw_burst_length max_write_burst_length=max_axi_rw_burst_length offset=direct \
142  num_read_outstanding=num_outstanding_transactions num_write_outstanding=num_outstanding_transactions latency=ddr_latency
143 
144 #endif
145 
146  #pragma HLS DATAFLOW
147 
148  //-- LOCAL VARIABLES ------------------------------------------------------
150  static stream<NetworkWord> sRxpToTxp_Data("sRxpToTxP_Data"); // FIXME: works even with no static
151  static stream<NetworkMetaStream> sRxtoTx_Meta("sRxtoTx_Meta");
152  static unsigned int processed_word_rx;
153  static unsigned int processed_bytes_rx;
154  static unsigned int processed_word_tx = 0;
155  static stream<bool> sImageLoaded("sImageLoaded");
156  static bool skip_read;
157  static bool write_chunk_to_ddr_pending;
158  static bool ready_to_accept_new_data;
159  static bool signal_init;
160  const int tot_transfers = TOT_TRANSFERS_TX;
161  const unsigned int loop_cnt = (MEMDW_512/BITS_PER_10GBITETHRNET_AXI_PACKET);
162  const unsigned int bytes_per_loop = (BYTES_PER_10GBITETHRNET_AXI_PACKET*loop_cnt);
163 
164 #ifdef ENABLE_DDR
165  static stream<membus_t> img_in_axi_stream ("img_in_axi_stream");
166  const unsigned int img_in_axi_stream_depth = TRANSFERS_PER_CHUNK; // the AXI burst size
167  static stream<bool> sMemBurstRx("sMemBurstRx");
168 
169 #else
170  const int img_in_axi_stream_depth = MIN_RX_LOOPS;
171  const int img_out_axi_stream_depth = MIN_TX_LOOPS;
172  static stream<ap_uint<INPUT_PTR_WIDTH>> img_in_axi_stream ("img_in_axi_stream");
173  static stream<ap_uint<OUTPUT_PTR_WIDTH>> img_out_axi_stream ("img_out_axi_stream");
174 #endif
175  static stream<NodeId> sDstNode_sig("sDstNode_sig");
176 
177 
178 //-- DIRECTIVES FOR THIS PROCESS ------------------------------------------
179 #pragma HLS stream variable=sRxtoTx_Meta depth=tot_transfers
180 #pragma HLS reset variable=processed_word_rx
181 #pragma HLS reset variable=processed_word_tx
182 #pragma HLS reset variable=processed_bytes_rx
183 //#pragma HLS reset variable=image_loaded
184 #pragma HLS stream variable=sImageLoaded depth=1
185 #pragma HLS reset variable=skip_read
186 #pragma HLS reset variable=write_chunk_to_ddr_pending
187 //#pragma HLS stream variable=sWriteChunkToDdrPending depth=2
188 #pragma HLS reset variable=ready_to_accept_new_data
189 #pragma HLS reset variable=signal_init
190 #pragma HLS STREAM variable=sDstNode_sig depth=1
191 
192 #ifdef ENABLE_DDR
193 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
194 #pragma HLS stream variable=sProcessed_bytes_rx depth=img_in_axi_stream_depth
195 #else
196 #pragma HLS stream variable=img_in_axi_stream depth=img_in_axi_stream_depth
197 #pragma HLS stream variable=img_out_axi_stream depth=img_out_axi_stream_depth
198 #endif
199 
200 static stream<float> sTxMatrix("sTxMatrix");
201 #pragma HLS stream variable=sTxMatrix depth=const_tx_matrix_dim
202 
203 // static float tx_matrix[TRANSFORM_MATRIX_DIM] = {1.5,0,0,0,1.8,0,0,0,0}; //scaling (reduction) left corner!!!
204 // #pragma HLS reset variable=tx_matrix
205 img_meta_t img_rows = FRAME_HEIGHT;
206 img_meta_t img_cols = FRAME_WIDTH;
207 img_meta_t img_chan = NPC1;
208 #pragma HLS reset variable=img_rows
209 #pragma HLS reset variable=img_cols
210 #pragma HLS reset variable=img_chan
211 
213  pi_rank,
214  pi_size,
215  sDstNode_sig,
216  po_rx_ports
217 );
218 
219 #ifdef ENABLE_DDR
220 
222  loop_cnt,
224  IMGSIZE,
226  siSHL_This_Data,
227  siNrc_meta,
228  sRxtoTx_Meta,
229  img_in_axi_stream,
230  sMemBurstRx,
231  &img_rows,
232  &img_cols,
233  &img_chan,
234  // tx_matrix
235  sTxMatrix
236  );
237 
238  pRXPathStreamToDDR< Axis<MEMDW_512>,
239  membus_t,
240  loop_cnt,
241  bytes_per_loop>(
242  img_in_axi_stream,
243  sMemBurstRx,
244  //---- P0 Write Path (S2MM) -----------
245  soMemWrCmdP0,
246  siMemWrStsP0,
247  soMemWriteP0,
248  //---- P1 Memory mapped ---------------
249  //&processed_bytes_rx,
250  sImageLoaded,
251  &img_rows,
252  &img_cols,
253  &img_chan
254  );
255 
256 
257 
258  #else // !ENABLE_DDR
259 
260  pRXPath(
261  siSHL_This_Data,
262  siNrc_meta,
263  sRxtoTx_Meta,
264  img_in_axi_stream,
265  meta_tmp,
266  &processed_word_rx,
267  &processed_bytes_rx,
268  sImageLoaded
269  );
270 
271 #endif // ENABLE_DDR
272 
273  pProcPath(
274  sRxpToTxp_Data,
275 #ifdef ENABLE_DDR
276  lcl_mem0,
277  lcl_mem1,
278 #else
279  img_in_axi_stream,
280  img_out_axi_stream,
281 #endif
282  sImageLoaded,
283  &img_rows,
284  &img_cols,
285  &img_chan,
286  // tx_matrix
287  sTxMatrix
288  );
289 
290  pTXPath(
291  soTHIS_Shl_Data,
292  soNrc_meta,
293  sRxpToTxp_Data,
294  sRxtoTx_Meta,
295  sDstNode_sig,
296  &processed_word_tx,
297  pi_rank,
298  &img_rows,
299  &img_cols,
300  &img_chan
301  );
302 }
#define NPC1
#define FRAME_HEIGHT
Definition: config.h:43
#define FRAME_WIDTH
Definition: config.h:46
void pProcPath(stream< NetworkWord > &sRxpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, NetworkMetaStream meta_tmp, varin *instruct, double *out, unsigned int *processed_word_rx, unsigned int *processed_word_proc, unsigned int *struct_loaded)
Processing Path - Main processing FSM for Vitis kernels.
void pRXPathNetToStream(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< ap_uint< 512 >> &img_in_axi_stream, stream< bool > &sMemBurstRx)
Receive Path - From SHELL to THIS.
void pPortAndDestionation(ap_uint< 32 > *pi_rank, ap_uint< 32 > *pi_size, stream< NodeId > &sDstNode_sig, ap_uint< 32 > *po_rx_ports)
pPortAndDestionation - Setup the port and the destination rank.
void pRXPath(stream< NetworkWord > &siSHL_This_Data, stream< NetworkMetaStream > &siNrc_meta, stream< NetworkMetaStream > &sRxtoProc_Meta, stream< NetworkWord > &sRxpToProcp_Data, NetworkMetaStream meta_tmp, bool *start_stop, unsigned int *processed_word_rx, unsigned int *processed_bytes_rx)
Receive Path - From SHELL to THIS.
void pTXPath(stream< NetworkWord > &soTHIS_Shl_Data, stream< NetworkMetaStream > &soNrc_meta, stream< NetworkWord > &sProcpToTxp_Data, stream< NetworkMetaStream > &sRxtoTx_Meta, stream< NodeId > &sDstNode_sig, unsigned int *processed_word_tx, ap_uint< 32 > *pi_rank)
Transmit Path - From THIS to SHELL.
#define ENABLE_DDR
Definition: memtest.hpp:42
#define TOTMEMDW_512
Definition: memtest.hpp:93
Here is the call graph for this function:

◆ warp_transformAccelMem()

void warp_transformAccelMem ( membus_t img_inp,
membus_t img_out,
int  rows,
int  cols,
hls::stream< float > &  sTxMatrix 
)

Top-level accelerated function of the WarpTransform Application with memory mapped interfaces.

Returns
Nothing.

Definition at line 222 of file xf_warp_transform_accel.cpp.

228  {
229  // clang-format on
230  #pragma HLS INLINE off
231 
232  xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> imgInput(rows, cols);
233  // clang-format off
234  #pragma HLS stream variable=imgInput.data depth=4
235  // clang-format on
236 
237  #ifndef FAKE_WarpTransform
238  xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> imgOutput(rows, cols);
239  // clang-format off
240  #pragma HLS stream variable=imgOutput.data depth=4
241  // clang-format on
242  #endif
243 
244  // clang-format off
245  #pragma HLS DATAFLOW
246  // clang-format on
247 
248  // Copy transform data from global memory to local memory:
249  static float transform_matrix[TRANSFORM_MATRIX_DIM];
250 
251  //prepare the tx matrix
252  setUpTxMatrixFromStream(transform_matrix,sTxMatrix );
253 
254  // Feed a cv matrix from ddr memory
255  xf::cv::Array2xfMat<MEMDW_512, XF_8UC1, HEIGHT, WIDTH, NPIX>(img_inp, imgInput);
256 
257  #ifdef FAKE_WarpTransform
258  // Feed ddr memory from a cv matrix
259  xf::cv::xfMat2Array<MEMDW_512, XF_8UC1, HEIGHT, WIDTH, NPIX>(imgInput, img_out);
260  #else
261 
262  // Run xfOpenCV kernel:
264  XF_USE_URAM>(imgInput, imgOutput, transform_matrix);
265 
266  // Feed ddr memory from a cv matrix
267  xf::cv::xfMat2Array<MEMDW_512, XF_8UC1, HEIGHT, WIDTH, NPIX>(imgOutput, img_out);
268  #endif
269 
270 
271 }
#define XF_USE_URAM
#define WIDTH
#define HEIGHT
#define TYPE
void setUpTxMatrixFromStream(float transform_matrix[9], hls::stream< float > &sTxMatrix)
#define TRANSFORM_TYPE
#define START_PROC
#define INTERPOLATION
#define NUM_STORE_ROWS
Here is the call graph for this function:
Here is the caller graph for this function:

◆ warpTransformAccelStream()

void warpTransformAccelStream ( hls::stream< ap_uint< 8 >> &  img_in_axi_stream,
hls::stream< ap_uint< 64 >> &  img_out_axi_stream,
int  rows,
int  cols,
float  transform_matrix[9] 
)

Top-level accelerated function of the WarpTransform Application with array I/Fadd WARPTRANSFORM.

Returns
Nothing.

Definition at line 129 of file xf_warp_transform_accel.cpp.

133  {
134  // clang-format on
135  #pragma HLS INLINE off
136 
137  xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> imgInput(rows, cols);
138  // clang-format off
139  #pragma HLS stream variable=imgInput.data depth=2
140  // clang-format on
141 
142  xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> imgOutput(rows, cols);
143  // clang-format off
144  #pragma HLS stream variable=imgOutput.data depth=2
145  // clang-format on
146 
147  // clang-format off
148  #pragma HLS DATAFLOW
149  // clang-format on
150  //FIXME: not static matrix
151  //float transform_matrix[9]={1.5,0,0,0,1.8,0,0,0,0};
152  // float transform_matrix[9]={0.87,-0.5,0,0.5,0.87,0,0,0,1};
153 
154  accel_utils accel_utils_obj;
155 
156  int dstMat_cols_align_npc = ((imgInput.cols + (NPIX - 1)) >> XF_BITSHIFT(NPIX)) << XF_BITSHIFT(NPIX);
157 
158  accel_utils_obj.hlsStrm2xfMat<INPUT_PTR_WIDTH, TYPE, HEIGHT, WIDTH, NPIX, (HEIGHT * WIDTH) / NPIX>(img_in_axi_stream, imgInput, dstMat_cols_align_npc);
159 
160  // Run xfOpenCV kernel:
162  XF_USE_URAM>(imgInput, imgOutput, transform_matrix);
163 
164  int srcMat_cols_align_npc = ((imgOutput.cols + (NPIX - 1)) >> XF_BITSHIFT(NPIX)) << XF_BITSHIFT(NPIX);
165 
166  accel_utils_obj.xfMat2hlsStrm<OUTPUT_PTR_WIDTH, TYPE, HEIGHT, WIDTH, NPIX, HEIGHT*((WIDTH + NPIX - 1) / NPIX)>(imgOutput, img_out_axi_stream,
167  srcMat_cols_align_npc);
168 }
#define NPIX
Here is the caller graph for this function:

◆ warptTransformAccelArray()

void warptTransformAccelArray ( ap_uint< 8 > *  img_in,
float *  transform,
ap_uint< 64 > *  img_out,
int  rows,
int  cols 
)

Top-level accelerated function of the WarptTransform Application with array I/F used only for simulation/TB purposes.

Returns
Nothing.

Definition at line 83 of file xf_warp_transform_accel.cpp.

84  {
85  const int pROWS = HEIGHT;
86  const int pCOLS = WIDTH;
87  const int pNPC1 = NPIX;
88 
89  xf::cv::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> imgInput(rows, cols);
90 // clang-format off
91  #pragma HLS stream variable=imgInput.data depth=2
92  // clang-format on
93  xf::cv::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> imgOutput(rows, cols);
94 // clang-format off
95  #pragma HLS stream variable=imgOutput.data depth=2
96 // clang-format on
97 
98 // clang-format off
99  #pragma HLS DATAFLOW
100  // clang-format on
101 
102  // Copy transform data from global memory to local memory:
103  float transform_matrix[9];
104 
105  for (unsigned int i = 0; i < 9; ++i) {
106 // clang-format off
107  #pragma HLS PIPELINE
108  // clang-format on
109  transform_matrix[i] = transform[i];
110  }
111 
112  xf::cv::Array2xfMat<INPUT_PTR_WIDTH, TYPE, HEIGHT, WIDTH, NPC1>(img_in, imgInput);
114  XF_USE_URAM>(imgInput, imgOutput, transform_matrix);
115  xf::cv::xfMat2Array<OUTPUT_PTR_WIDTH, XF_8UC1, HEIGHT, WIDTH, NPIX>(imgOutput, img_out);
116 }
Here is the caller graph for this function:

Variable Documentation

◆ const_tx_matrix_dim

const unsigned int const_tx_matrix_dim = 9

Definition at line 81 of file warp_transform.hpp.

◆ f

float float_bits_u::f

Definition at line 69 of file warp_transform_hw_common.hpp.

◆ i

unsigned int float_bits_u::i

Definition at line 68 of file warp_transform_hw_common.hpp.

◆ max_counter_cc

const unsigned long int max_counter_cc = 4000000

Definition at line 300 of file warp_transform_hw_common.hpp.