kernel/0376-WIN2030-16550-perf-Optimize-dma-performance.patch
2025-03-08 09:54:45 -05:00

193 lines
7.6 KiB
Diff

From cc9b3b13e10d5ae08fded6c0ff89c3d32e1029af Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang@eswincomputing.com>
Date: Fri, 3 Jan 2025 18:10:25 +0800
Subject: [PATCH 376/416] WIN2030-16550:perf:Optimize dma performance
Changelogs:
1. Dynamically adjust the parameter msize
2. Set the outstanding parameter to maximum
Signed-off-by: Xiang Xu <xuxiang@eswincomputing.com>
Change-Id: I5be0477a7ab59c20cf712298d23e221d998934cc
---
.../dts/eswin/eswin-win2030-die0-soc.dtsi | 2 +-
.../dma/dw-axi-dmac/dw-axi-dmac-platform.c | 39 +++++++++++++++++--
drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 7 +++-
3 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/arch/riscv/boot/dts/eswin/eswin-win2030-die0-soc.dtsi b/arch/riscv/boot/dts/eswin/eswin-win2030-die0-soc.dtsi
index 2403d2c604fd..ecfff2bf0334 100644
--- a/arch/riscv/boot/dts/eswin/eswin-win2030-die0-soc.dtsi
+++ b/arch/riscv/boot/dts/eswin/eswin-win2030-die0-soc.dtsi
@@ -392,7 +392,7 @@ d0_aon_dmac: dma-controller-aon@0x518c0000 {
snps,data-width = <3>;
snps,block-size = <0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000 0x80000>;
snps,axi-max-burst-len = <32>;
- // snps,max-msize = <64>;
+ snps,max-msize = <64>;
#size-cells = <2>;
#address-cells = <2>;
dma-ranges = <0x0 0x80000000 0x0 0x80000000 0x100 0x0>;
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
index 78fbfd5d8f8c..152ef7167318 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -129,6 +129,8 @@ static inline void axi_chan_config_write(struct axi_dma_chan *chan,
config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS |
config->src_per << CH_CFG_H_SRC_PER_POS |
config->dst_per << CH_CFG_H_DST_PER_POS |
+ 0xF <<CH_CFG_H_SRC_OSR_LMT_POS |
+ 0xF <<CH_CFG_H_DST_OSR_LMT_POS |
config->prior << CH_CFG_H_PRIORITY_POS;
} else {
cfg_lo |= config->src_per << CH_CFG2_L_SRC_PER_POS |
@@ -136,6 +138,8 @@ static inline void axi_chan_config_write(struct axi_dma_chan *chan,
cfg_hi = config->tt_fc << CH_CFG2_H_TT_FC_POS |
config->hs_sel_src << CH_CFG2_H_HS_SEL_SRC_POS |
config->hs_sel_dst << CH_CFG2_H_HS_SEL_DST_POS |
+ 0xF <<CH_CFG2_H_SRC_OSR_LMT_POS |
+ 0xF <<CH_CFG2_H_DST_OSR_LMT_POS |
config->prior << CH_CFG2_H_PRIORITY_POS;
}
axi_chan_iowrite32(chan, CH_CFG_L, cfg_lo);
@@ -688,6 +692,8 @@ static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
size_t block_ts;
u32 ctllo, ctlhi;
u32 burst_len;
+ u32 src_maxburst;
+ u32 dst_maxburst;
axi_block_ts = chan->chip->dw->hdata->block_size[chan->id];
@@ -700,6 +706,8 @@ static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
return -EINVAL;
}
+ src_maxburst = chan->chip->dw->hdata->max_msize;
+ dst_maxburst = chan->chip->dw->hdata->max_msize;
switch (chan->direction) {
case DMA_MEM_TO_DEV:
reg_width = __ffs(chan->config.dst_addr_width);
@@ -708,6 +716,7 @@ static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
mem_width << CH_CTL_L_SRC_WIDTH_POS |
DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS |
DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS;
+ dst_maxburst = chan->config.dst_maxburst;
block_ts = len >> mem_width;
break;
case DMA_DEV_TO_MEM:
@@ -717,6 +726,7 @@ static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
mem_width << CH_CTL_L_DST_WIDTH_POS |
DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS;
+ src_maxburst = chan->config.src_maxburst;
block_ts = len >> reg_width;
break;
default:
@@ -753,6 +763,14 @@ static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS;
+ if(is_power_of_2(dst_maxburst) && is_power_of_2(src_maxburst))
+ {
+ dst_maxburst = order_base_2(dst_maxburst)? order_base_2(dst_maxburst) - 1 : 0;
+ src_maxburst = order_base_2(src_maxburst)? order_base_2(src_maxburst) - 1 : 0;
+ }else
+ dev_err(chan->chip->dev, "dst_burst or src_burst error!\n");
+ ctllo |= dst_maxburst << CH_CTL_L_DST_MSIZE_POS |
+ src_maxburst << CH_CTL_L_SRC_MSIZE_POS;
hw_desc->lli->ctl_lo = cpu_to_le32(ctllo);
set_desc_src_master(hw_desc, chan);
@@ -955,7 +973,7 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
size_t block_ts, max_block_ts, xfer_len;
struct axi_dma_hw_desc *hw_desc = NULL;
struct axi_dma_desc *desc = NULL;
- u32 xfer_width, reg, num;
+ u32 xfer_width, reg, num, max_burst_len;
u64 llp = 0;
u8 lms = 0; /* Select AXI0 master for LLI fetching */
@@ -963,6 +981,9 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+ max_burst_len = chan->chip->dw->hdata->max_msize;
+ max_burst_len = order_base_2(max_burst_len)? order_base_2(max_burst_len) - 1 : 0;
+
xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, len);
num = DIV_ROUND_UP(len, max_block_ts << xfer_width);
desc = axi_desc_alloc(num);
@@ -1013,8 +1034,8 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
}
hw_desc->lli->ctl_hi = cpu_to_le32(reg);
- reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
- DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+ reg = (max_burst_len << CH_CTL_L_DST_MSIZE_POS |
+ max_burst_len << CH_CTL_L_SRC_MSIZE_POS |
xfer_width << CH_CTL_L_DST_WIDTH_POS |
xfer_width << CH_CTL_L_SRC_WIDTH_POS |
DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
@@ -1482,6 +1503,18 @@ static int parse_device_properties(struct axi_dma_chip *chip)
chip->dw->hdata->axi_rw_burst_len = tmp;
}
+ /* axi-max-burst-len is optional property */
+ ret = device_property_read_u32(dev, "snps,max-msize", &tmp);
+ if (!ret) {
+ if (tmp > 1024)
+ return -EINVAL;
+ if (tmp < 1)
+ return -EINVAL;
+
+ chip->dw->hdata->max_msize = tmp;
+ }else
+ chip->dw->hdata->max_msize = 4;
+
return 0;
}
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
index 3bd0e65ed010..738954532c93 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -49,6 +49,7 @@ struct dw_axi_dma_hcfg {
u32 priority[DMAC_MAX_CHANNELS];
/* maximum supported axi burst length */
u32 axi_rw_burst_len;
+ u32 max_msize;
/* Register map for DMAX_NUM_CHANNELS <= 8 */
bool reg_map_8_channels;
bool restrict_axi_burst_len;
@@ -224,7 +225,7 @@ static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
#define UNUSED_CHANNEL 0x3F /* Set unused DMA channel to 0x3F */
#define DMA_APB_HS_SEL_BIT_SIZE 0x08 /* HW handshake bits per channel */
#define DMA_APB_HS_SEL_MASK 0xFF /* HW handshake select masks */
-#define MAX_BLOCK_SIZE 0x1000 /* 1024 blocks * 512 bytes data width */
+#define MAX_BLOCK_SIZE 0x200000 /* 1024 blocks * 512 bytes data width */
#define DMA_REG_MAP_CH_REF 0x08 /* Channel count to choose register map */
/* DMAC_CFG */
@@ -304,6 +305,8 @@ enum {
#define CH_CTL_L_SRC_MAST BIT(0)
/* CH_CFG_H */
+#define CH_CFG_H_DST_OSR_LMT_POS 27
+#define CH_CFG_H_SRC_OSR_LMT_POS 23
#define CH_CFG_H_PRIORITY_POS 17
#define CH_CFG_H_DST_PER_POS 12
#define CH_CFG_H_SRC_PER_POS 7
@@ -344,6 +347,8 @@ enum {
#define CH_CFG2_H_HS_SEL_SRC_POS 3
#define CH_CFG2_H_HS_SEL_DST_POS 4
#define CH_CFG2_H_PRIORITY_POS 15
+#define CH_CFG2_H_SRC_OSR_LMT_POS 23
+#define CH_CFG2_H_DST_OSR_LMT_POS 27
/**
* DW AXI DMA channel interrupts
*
--
2.48.1