LinuxLists.cc - Re: [PATCH v1 2/2] dmaengine: Add DW AXI DMAC driver

2017-03-14 02:59:40

Subject: Re: [PATCH v1 2/2] dmaengine: Add DW AXI DMAC driver

On Tue, Feb 21, 2017 at 11:38:04PM +0300, Eugeniy Paltsev wrote:

> +static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
> +{
> + struct dw_axi_dma *dw = chan->chip->dw;
> + struct axi_dma_desc *desc;
> + dma_addr_t phys;
> +
> + desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);

GFP_NOWAIT please

> +static void axi_desc_put(struct axi_dma_desc *desc)
> +{
> + struct axi_dma_chan *chan = desc->chan;
> + struct dw_axi_dma *dw = chan->chip->dw;
> + struct axi_dma_desc *child, *_next;
> + unsigned int descs_put = 0;
> +
> + list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
> + list_del(&child->xfer_list);
> + dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
> + descs_put++;
> + }
> +
> + dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
> + descs_put++;
> +
> + chan->descs_allocated -= descs_put;

why not decrement chan->descs_allocated?

> +
> + dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
> + axi_chan_name(chan), descs_put, chan->descs_allocated);
> +}
> +
> +static void vchan_desc_put(struct virt_dma_desc *vdesc)
> +{
> + axi_desc_put(vd_to_axi_desc(vdesc));
> +}

well this has no logic and becomes a dummy fn, so why do we need it.

> +
> +static enum dma_status
> +dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
> + struct dma_tx_state *txstate)
> +{
> + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
> + enum dma_status ret;
> +
> + ret = dma_cookie_status(dchan, cookie, txstate);
> +
> + if (chan->is_paused && ret == DMA_IN_PROGRESS)
> + return DMA_PAUSED;

no residue calculation?

> +static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
> +{
> + struct axi_dma_desc *desc;
> + struct virt_dma_desc *vd;
> +
> + vd = vchan_next_desc(&chan->vc);

unnecessary empty line

> +static void dma_chan_free_chan_resources(struct dma_chan *dchan)
> +{
> + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
> +
> + /* ASSERT: channel is idle */
> + if (axi_chan_is_hw_enable(chan))
> + dev_err(dchan2dev(dchan), "%s is non-idle!\n",
> + axi_chan_name(chan));
> +
> + axi_chan_disable(chan);
> + axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
> +
> + vchan_free_chan_resources(&chan->vc);
> +
> + dev_vdbg(dchan2dev(dchan), "%s: %s: descriptor still allocated: %u\n",
> + __func__, axi_chan_name(chan), chan->descs_allocated);
> +
> + pm_runtime_put_sync_suspend(chan->chip->dev);

any reason why sync variant is used?

> +static struct dma_async_tx_descriptor *
> +dma_chan_prep_dma_sg(struct dma_chan *dchan,
> + struct scatterlist *dst_sg, unsigned int dst_nents,
> + struct scatterlist *src_sg, unsigned int src_nents,
> + unsigned long flags)
> +{
> + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
> + struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
> + size_t dst_len = 0, src_len = 0, xfer_len = 0;
> + dma_addr_t dst_adr = 0, src_adr = 0;
> + u32 src_width, dst_width;
> + size_t block_ts, max_block_ts;
> + u32 reg;
> + u8 lms = 0;
> +
> + dev_vdbg(chan2dev(chan), "%s: %s: sn: %d dn: %d flags: 0x%lx",
> + __func__, axi_chan_name(chan), src_nents, dst_nents, flags);
> +
> + if (unlikely(dst_nents == 0 || src_nents == 0))
> + return NULL;
> +
> + if (unlikely(dst_sg == NULL || src_sg == NULL))
> + return NULL;
> +
> + max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
> +
> + /*
> + * Loop until there is either no more source or no more destination
> + * scatterlist entry.
> + */
> + while ((dst_len || (dst_sg && dst_nents)) &&
> + (src_len || (src_sg && src_nents))) {
> + if (dst_len == 0) {
> + dst_adr = sg_dma_address(dst_sg);
> + dst_len = sg_dma_len(dst_sg);
> +
> + dst_sg = sg_next(dst_sg);
> + dst_nents--;
> + }
> +
> + /* Process src sg list */
> + if (src_len == 0) {
> + src_adr = sg_dma_address(src_sg);
> + src_len = sg_dma_len(src_sg);
> +
> + src_sg = sg_next(src_sg);
> + src_nents--;
> + }
> +
> + /* Min of src and dest length will be this xfer length */
> + xfer_len = min_t(size_t, src_len, dst_len);
> + if (xfer_len == 0)
> + continue;
> +
> + /* Take care for the alignment */
> + src_width = axi_chan_get_xfer_width(chan, src_adr,
> + dst_adr, xfer_len);
> + /*
> + * Actually src_width and dst_width can be different, but make
> + * them same to be simpler.
> + * TODO: REVISIT: Can we optimize it?
> + */
> + dst_width = src_width;

this is memcpy so you should assume default which give optimal perf. If
required you can have a configurable parameter to help people tune

> +
> + /*
> + * block_ts indicates the total number of data of width
> + * src_width to be transferred in a DMA block transfer.
> + * BLOCK_TS register should be set to block_ts -1
> + */
> + block_ts = xfer_len >> src_width;
> + if (block_ts > max_block_ts) {
> + block_ts = max_block_ts;
> + xfer_len = max_block_ts << src_width;
> + }
> +
> + desc = axi_desc_get(chan);
> + if (unlikely(!desc))
> + goto err_desc_get;
> +
> + write_desc_sar(desc, src_adr);
> + write_desc_dar(desc, dst_adr);
> + desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
> + desc->lli.ctl_hi = cpu_to_le32(CH_CTL_H_LLI_VALID);
> +
> + reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
> + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
> + dst_width << CH_CTL_L_DST_WIDTH_POS |
> + src_width << CH_CTL_L_SRC_WIDTH_POS |
> + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
> + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
> + desc->lli.ctl_lo = cpu_to_le32(reg);
> +
> + set_desc_src_master(desc);
> + set_desc_dest_master(desc);
> +
> + /* Manage transfer list (xfer_list) */
> + if (!first) {
> + first = desc;
> + } else {
> + list_add_tail(&desc->xfer_list, &first->xfer_list);

and since you use vchan why do you need this list

> +/* Deep inside these burning buildings voices die to be heard */

??

> +#define CH_CTL_L_DST_MSIZE_POS 18
> +#define CH_CTL_L_SRC_MSIZE_POS 14

empty line here

> +#define CH_CTL_L_DST_MAST_POS 2
> +#define CH_CTL_L_DST_MAST BIT(CH_CTL_L_DST_MAST_POS)

do we need to define _POS and can't we do BIT(2)..?

> +enum {
> + DWAXIDMAC_IRQ_NONE = 0x0,
> + DWAXIDMAC_IRQ_BLOCK_TRF = BIT(0), /* block transfer complete */
> + DWAXIDMAC_IRQ_DMA_TRF = BIT(1), /* dma transfer complete */
> + DWAXIDMAC_IRQ_SRC_TRAN = BIT(3), /* source transaction complete */
> + DWAXIDMAC_IRQ_DST_TRAN = BIT(4), /* destination transaction complete */

Pls add these comments kernel-doc style for the enum

--
~Vinod

2017-03-28 19:29:36

by Eugeniy Paltsev

[permalink] [raw]

Subject: Re: [PATCH v1 2/2] dmaengine: Add DW AXI DMAC driver

Hi Vinod!

Thanks for respond.
My comments below.

On Tue, 2017-03-14 at 08:30 +0530, Vinod Koul wrote:
> On Tue, Feb 21, 2017 at 11:38:04PM +0300, Eugeniy Paltsev wrote:
>
> > +static void vchan_desc_put(struct virt_dma_desc *vdesc)
> > +{
> > + axi_desc_put(vd_to_axi_desc(vdesc));
> > +}
> well this has no logic and becomes a dummy fn, so why do we need it.
>
Both functions (vchan_desc_put and axi_desc_put) are used.
First one is put as callback to vchan, second one is used when we need
to free descriptors after errors in preparing function (before we call
vchan_tx_prep).

> >
> > +
> > +static enum dma_status
> > +dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
> > +   struct dma_tx_state *txstate)
> > +{
> > + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
> > + enum dma_status ret;
> > +
> > + ret = dma_cookie_status(dchan, cookie, txstate);
> > +
> > + if (chan->is_paused && ret == DMA_IN_PROGRESS)
> > + return DMA_PAUSED;
> no residue calculation?

I don't think we need residue calculation in mem-to-mem transfers. I'm
planning to add residue calculation at once with DMA_SLAVE
functionality.

> >
> > +static void dma_chan_free_chan_resources(struct dma_chan *dchan)
> > +{
> > + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
> > +
> > + /* ASSERT: channel is idle */
> > + if (axi_chan_is_hw_enable(chan))
> > + dev_err(dchan2dev(dchan), "%s is non-idle!\n",
> > + axi_chan_name(chan));
> > +
> > + axi_chan_disable(chan);
> > + axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
> > +
> > + vchan_free_chan_resources(&chan->vc);
> > +
> > + dev_vdbg(dchan2dev(dchan), "%s: %s: descriptor still
> > allocated: %u\n",
> > + __func__, axi_chan_name(chan), chan-
> > >descs_allocated);
> > +
> > + pm_runtime_put_sync_suspend(chan->chip->dev);
> any reason why sync variant is used?
>
I re-read the documentation and yes - I can use async variant here (and
in the other parts of this driver).

> > + block_ts = xfer_len >> src_width;
> > + if (block_ts > max_block_ts) {
> > + block_ts = max_block_ts;
> > + xfer_len = max_block_ts << src_width;
> > + }
> > +
> > + desc = axi_desc_get(chan);
> > + if (unlikely(!desc))
> > + goto err_desc_get;
> > +
> > + write_desc_sar(desc, src_adr);
> > + write_desc_dar(desc, dst_adr);
> > + desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
> > + desc->lli.ctl_hi =
> > cpu_to_le32(CH_CTL_H_LLI_VALID);
> > +
> > + reg = (DWAXIDMAC_BURST_TRANS_LEN_4 <<
> > CH_CTL_L_DST_MSIZE_POS |
> > +        DWAXIDMAC_BURST_TRANS_LEN_4 <<
> > CH_CTL_L_SRC_MSIZE_POS |
> > +        dst_width << CH_CTL_L_DST_WIDTH_POS |
> > +        src_width << CH_CTL_L_SRC_WIDTH_POS |
> > +        DWAXIDMAC_CH_CTL_L_INC <<
> > CH_CTL_L_DST_INC_POS |
> > +        DWAXIDMAC_CH_CTL_L_INC <<
> > CH_CTL_L_SRC_INC_POS);
> > + desc->lli.ctl_lo = cpu_to_le32(reg);
> > +
> > + set_desc_src_master(desc);
> > + set_desc_dest_master(desc);
> > +
> > + /* Manage transfer list (xfer_list) */
> > + if (!first) {
> > + first = desc;
> > + } else {
> > + list_add_tail(&desc->xfer_list, &first-
> > >xfer_list);
> and since you use vchan why do you need this list

Each virtual descriptor encapsulates several hardware descriptors,
which belong to same transfer.
This list (xfer_list) is used only for allocating/freeing these
descriptors and it doesn't affect on virtual dma work logic.

--
Eugeniy Paltsev