This patchset does various improvments to Broadcom SBA-RAID
driver and also adds SBA-RAID DT nodes for Stingray SOC.
The patches are based on "[PATCH v2 0/7] FlexRM driver improvements"
and can also be found at sba-raid-imp-v1 branch of
https://github.com/Broadcom/arm64-linux.git
Anup Patel (6):
dma: bcm-sba-raid: Improve memory allocation in SBA RAID driver
dma: bcm-sba-raid: Peek mbox when we are left with no free requests
dma: bcm-sba-raid: pre-ack async tx descriptor
dma: bcm-sba-raid: Break sba_process_deferred_requests() into two
parts
dma: bcm-sba-raid: Add debugfs support
arm64: dts: Add SBA-RAID DT nodes for Stingray SoC
.../boot/dts/broadcom/stingray/stingray-fs4.dtsi | 64 +++
drivers/dma/bcm-sba-raid.c | 527 ++++++++++++---------
2 files changed, 364 insertions(+), 227 deletions(-)
--
2.7.4
This patch improves memory allocation in SBA RAID driver in
following ways:
1. Simplify struct sba_request to reduce memory consumption
2. Allocate sba resources before registering dma device
Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Scott Branden <[email protected]>
Reviewed-by: Ray Jui <[email protected]>
Reviewed-by: Vikram Prakash <[email protected]>
---
drivers/dma/bcm-sba-raid.c | 439 +++++++++++++++++++++++----------------------
1 file changed, 226 insertions(+), 213 deletions(-)
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index e41bbc7..6d15fed 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -48,7 +48,8 @@
#include "dmaengine.h"
-/* SBA command related defines */
+/* ====== Driver macros and defines ===== */
+
#define SBA_TYPE_SHIFT 48
#define SBA_TYPE_MASK GENMASK(1, 0)
#define SBA_TYPE_A 0x0
@@ -82,39 +83,41 @@
#define SBA_CMD_WRITE_BUFFER 0xc
#define SBA_CMD_GALOIS 0xe
-/* Driver helper macros */
+#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
+
#define to_sba_request(tx) \
container_of(tx, struct sba_request, tx)
#define to_sba_device(dchan) \
container_of(dchan, struct sba_device, dma_chan)
-enum sba_request_state {
- SBA_REQUEST_STATE_FREE = 1,
- SBA_REQUEST_STATE_ALLOCED = 2,
- SBA_REQUEST_STATE_PENDING = 3,
- SBA_REQUEST_STATE_ACTIVE = 4,
- SBA_REQUEST_STATE_RECEIVED = 5,
- SBA_REQUEST_STATE_COMPLETED = 6,
- SBA_REQUEST_STATE_ABORTED = 7,
+/* ===== Driver data structures ===== */
+
+enum sba_request_flags {
+ SBA_REQUEST_STATE_FREE = 0x001,
+ SBA_REQUEST_STATE_ALLOCED = 0x002,
+ SBA_REQUEST_STATE_PENDING = 0x004,
+ SBA_REQUEST_STATE_ACTIVE = 0x008,
+ SBA_REQUEST_STATE_RECEIVED = 0x010,
+ SBA_REQUEST_STATE_COMPLETED = 0x020,
+ SBA_REQUEST_STATE_ABORTED = 0x040,
+ SBA_REQUEST_STATE_MASK = 0x0ff,
+ SBA_REQUEST_FENCE = 0x100,
};
struct sba_request {
/* Global state */
struct list_head node;
struct sba_device *sba;
- enum sba_request_state state;
- bool fence;
+ u32 flags;
/* Chained requests management */
struct sba_request *first;
struct list_head next;
- unsigned int next_count;
atomic_t next_pending_count;
/* BRCM message data */
- void *resp;
- dma_addr_t resp_dma;
- struct brcm_sba_command *cmds;
struct brcm_message msg;
struct dma_async_tx_descriptor tx;
+ /* SBA commands */
+ struct brcm_sba_command cmds[0];
};
enum sba_version {
@@ -128,11 +131,11 @@ struct sba_device {
/* DT configuration parameters */
enum sba_version ver;
/* Derived configuration parameters */
- u32 max_req;
u32 hw_buf_size;
u32 hw_resp_size;
u32 max_pq_coefs;
u32 max_pq_srcs;
+ u32 max_req;
u32 max_cmd_per_req;
u32 max_xor_srcs;
u32 max_resp_pool_size;
@@ -152,7 +155,6 @@ struct sba_device {
void *cmds_base;
dma_addr_t cmds_dma_base;
spinlock_t reqs_lock;
- struct sba_request *reqs;
bool reqs_fence;
struct list_head reqs_alloc_list;
struct list_head reqs_pending_list;
@@ -161,10 +163,9 @@ struct sba_device {
struct list_head reqs_completed_list;
struct list_head reqs_aborted_list;
struct list_head reqs_free_list;
- int reqs_free_count;
};
-/* ====== SBA command helper routines ===== */
+/* ====== Command helper routines ===== */
static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
{
@@ -196,7 +197,7 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
}
-/* ====== Channel resource management routines ===== */
+/* ====== General helper routines ===== */
static struct sba_request *sba_alloc_request(struct sba_device *sba)
{
@@ -204,24 +205,20 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
struct sba_request *req = NULL;
spin_lock_irqsave(&sba->reqs_lock, flags);
-
req = list_first_entry_or_null(&sba->reqs_free_list,
struct sba_request, node);
- if (req) {
+ if (req)
list_move_tail(&req->node, &sba->reqs_alloc_list);
- req->state = SBA_REQUEST_STATE_ALLOCED;
- req->fence = false;
- req->first = req;
- INIT_LIST_HEAD(&req->next);
- req->next_count = 1;
- atomic_set(&req->next_pending_count, 1);
-
- sba->reqs_free_count--;
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ if (!req)
+ return NULL;
- dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
- }
+ req->flags = SBA_REQUEST_STATE_ALLOCED;
+ req->first = req;
+ INIT_LIST_HEAD(&req->next);
+ atomic_set(&req->next_pending_count, 1);
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
return req;
}
@@ -231,7 +228,8 @@ static void _sba_pending_request(struct sba_device *sba,
struct sba_request *req)
{
lockdep_assert_held(&sba->reqs_lock);
- req->state = SBA_REQUEST_STATE_PENDING;
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_PENDING;
list_move_tail(&req->node, &sba->reqs_pending_list);
if (list_empty(&sba->reqs_active_list))
sba->reqs_fence = false;
@@ -246,9 +244,10 @@ static bool _sba_active_request(struct sba_device *sba,
sba->reqs_fence = false;
if (sba->reqs_fence)
return false;
- req->state = SBA_REQUEST_STATE_ACTIVE;
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_ACTIVE;
list_move_tail(&req->node, &sba->reqs_active_list);
- if (req->fence)
+ if (req->flags & SBA_REQUEST_FENCE)
sba->reqs_fence = true;
return true;
}
@@ -258,7 +257,8 @@ static void _sba_abort_request(struct sba_device *sba,
struct sba_request *req)
{
lockdep_assert_held(&sba->reqs_lock);
- req->state = SBA_REQUEST_STATE_ABORTED;
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_ABORTED;
list_move_tail(&req->node, &sba->reqs_aborted_list);
if (list_empty(&sba->reqs_active_list))
sba->reqs_fence = false;
@@ -269,42 +269,34 @@ static void _sba_free_request(struct sba_device *sba,
struct sba_request *req)
{
lockdep_assert_held(&sba->reqs_lock);
- req->state = SBA_REQUEST_STATE_FREE;
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_FREE;
list_move_tail(&req->node, &sba->reqs_free_list);
if (list_empty(&sba->reqs_active_list))
sba->reqs_fence = false;
- sba->reqs_free_count++;
}
-static void sba_received_request(struct sba_request *req)
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_complete_request(struct sba_device *sba,
+ struct sba_request *req)
{
- unsigned long flags;
- struct sba_device *sba = req->sba;
-
- spin_lock_irqsave(&sba->reqs_lock, flags);
- req->state = SBA_REQUEST_STATE_RECEIVED;
- list_move_tail(&req->node, &sba->reqs_received_list);
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ lockdep_assert_held(&sba->reqs_lock);
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_COMPLETED;
+ list_move_tail(&req->node, &sba->reqs_completed_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
}
-static void sba_complete_chained_requests(struct sba_request *req)
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_received_request(struct sba_device *sba,
+ struct sba_request *req)
{
- unsigned long flags;
- struct sba_request *nreq;
- struct sba_device *sba = req->sba;
-
- spin_lock_irqsave(&sba->reqs_lock, flags);
-
- req->state = SBA_REQUEST_STATE_COMPLETED;
- list_move_tail(&req->node, &sba->reqs_completed_list);
- list_for_each_entry(nreq, &req->next, next) {
- nreq->state = SBA_REQUEST_STATE_COMPLETED;
- list_move_tail(&nreq->node, &sba->reqs_completed_list);
- }
+ lockdep_assert_held(&sba->reqs_lock);
+ req->flags = SBA_REQUEST_STATE_RECEIVED;
+ list_move_tail(&req->node, &sba->reqs_received_list);
if (list_empty(&sba->reqs_active_list))
sba->reqs_fence = false;
-
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
}
static void sba_free_chained_requests(struct sba_request *req)
@@ -332,8 +324,7 @@ static void sba_chain_request(struct sba_request *first,
list_add_tail(&req->next, &first->next);
req->first = first;
- first->next_count++;
- atomic_set(&first->next_pending_count, first->next_count);
+ atomic_inc(&first->next_pending_count);
spin_unlock_irqrestore(&sba->reqs_lock, flags);
}
@@ -383,26 +374,6 @@ static void sba_cleanup_pending_requests(struct sba_device *sba)
spin_unlock_irqrestore(&sba->reqs_lock, flags);
}
-/* ====== DMAENGINE callbacks ===== */
-
-static void sba_free_chan_resources(struct dma_chan *dchan)
-{
- /*
- * Channel resources are pre-alloced so we just free-up
- * whatever we can so that we can re-use pre-alloced
- * channel resources next time.
- */
- sba_cleanup_nonpending_requests(to_sba_device(dchan));
-}
-
-static int sba_device_terminate_all(struct dma_chan *dchan)
-{
- /* Cleanup all pending requests */
- sba_cleanup_pending_requests(to_sba_device(dchan));
-
- return 0;
-}
-
static int sba_send_mbox_request(struct sba_device *sba,
struct sba_request *req)
{
@@ -428,17 +399,27 @@ static int sba_send_mbox_request(struct sba_device *sba,
return 0;
}
-static void sba_issue_pending(struct dma_chan *dchan)
+static void sba_process_deferred_requests(struct sba_device *sba)
{
int ret;
+ u32 count;
unsigned long flags;
- struct sba_request *req, *req1;
- struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *req;
+ struct dma_async_tx_descriptor *tx;
spin_lock_irqsave(&sba->reqs_lock, flags);
- /* Process all pending request */
- list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node) {
+ /* Count pending requests */
+ count = 0;
+ list_for_each_entry(req, &sba->reqs_pending_list, node)
+ count++;
+
+ /* Process pending requests */
+ while (!list_empty(&sba->reqs_pending_list) && count) {
+ /* Get the first pending request */
+ req = list_first_entry(&sba->reqs_pending_list,
+ struct sba_request, node);
+
/* Try to make request active */
if (!_sba_active_request(sba, req))
break;
@@ -453,11 +434,102 @@ static void sba_issue_pending(struct dma_chan *dchan)
_sba_pending_request(sba, req);
break;
}
+
+ count--;
}
+ /* Count completed requests */
+ count = 0;
+ list_for_each_entry(req, &sba->reqs_completed_list, node)
+ count++;
+
+ /* Process completed requests */
+ while (!list_empty(&sba->reqs_completed_list) && count) {
+ req = list_first_entry(&sba->reqs_completed_list,
+ struct sba_request, node);
+ list_del_init(&req->node);
+ tx = &req->tx;
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ WARN_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ dma_cookie_complete(tx);
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+ dma_descriptor_unmap(tx);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ dma_run_dependencies(tx);
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* If waiting for 'ack' then move to completed list */
+ if (!async_tx_test_ack(&req->tx))
+ _sba_complete_request(sba, req);
+ else
+ _sba_free_request(sba, req);
+
+ count--;
+ }
+
+ /* Re-check pending and completed work */
+ count = 0;
+ if (!list_empty(&sba->reqs_pending_list) ||
+ !list_empty(&sba->reqs_completed_list))
+ count = 1;
+
spin_unlock_irqrestore(&sba->reqs_lock, flags);
}
+static void sba_process_received_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Mark request as received */
+ _sba_received_request(sba, req);
+
+ /* Update request */
+ if (!atomic_dec_return(&req->first->next_pending_count))
+ _sba_complete_request(sba, req->first);
+ if (req->first != req)
+ _sba_free_request(sba, req);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+/* ====== DMAENGINE callbacks ===== */
+
+static void sba_free_chan_resources(struct dma_chan *dchan)
+{
+ /*
+ * Channel resources are pre-alloced so we just free-up
+ * whatever we can so that we can re-use pre-alloced
+ * channel resources next time.
+ */
+ sba_cleanup_nonpending_requests(to_sba_device(dchan));
+}
+
+static int sba_device_terminate_all(struct dma_chan *dchan)
+{
+ /* Cleanup all pending requests */
+ sba_cleanup_pending_requests(to_sba_device(dchan));
+
+ return 0;
+}
+
+static void sba_issue_pending(struct dma_chan *dchan)
+{
+ struct sba_device *sba = to_sba_device(dchan);
+
+ /* Process deferred requests */
+ sba_process_deferred_requests(sba);
+}
+
static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
{
unsigned long flags;
@@ -506,6 +578,7 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
{
u64 cmd;
u32 c_mdata;
+ dma_addr_t resp_dma = req->tx.phys;
struct brcm_sba_command *cmdsp = cmds;
/* Type-B command to load dummy data into buf0 */
@@ -521,7 +594,7 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
cmdsp->cmd = cmd;
*cmdsp->cmd_dma = cpu_to_le64(cmd);
cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
- cmdsp->data = req->resp_dma;
+ cmdsp->data = resp_dma;
cmdsp->data_len = req->sba->hw_resp_size;
cmdsp++;
@@ -542,11 +615,11 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
- cmdsp->data = req->resp_dma;
+ cmdsp->data = resp_dma;
cmdsp->data_len = req->sba->hw_resp_size;
cmdsp++;
@@ -573,7 +646,7 @@ sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags)
* Force fence so that no requests are submitted
* until DMA callback for this request is invoked.
*/
- req->fence = true;
+ req->flags |= SBA_REQUEST_FENCE;
/* Fillup request message */
sba_fillup_interrupt_msg(req, req->cmds, &req->msg);
@@ -593,6 +666,7 @@ static void sba_fillup_memcpy_msg(struct sba_request *req,
{
u64 cmd;
u32 c_mdata;
+ dma_addr_t resp_dma = req->tx.phys;
struct brcm_sba_command *cmdsp = cmds;
/* Type-B command to load data into buf0 */
@@ -629,7 +703,7 @@ static void sba_fillup_memcpy_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -656,7 +730,8 @@ sba_prep_dma_memcpy_req(struct sba_device *sba,
req = sba_alloc_request(sba);
if (!req)
return NULL;
- req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
/* Fillup request message */
sba_fillup_memcpy_msg(req, req->cmds, &req->msg,
@@ -711,6 +786,7 @@ static void sba_fillup_xor_msg(struct sba_request *req,
u64 cmd;
u32 c_mdata;
unsigned int i;
+ dma_addr_t resp_dma = req->tx.phys;
struct brcm_sba_command *cmdsp = cmds;
/* Type-B command to load data into buf0 */
@@ -766,7 +842,7 @@ static void sba_fillup_xor_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -793,7 +869,8 @@ sba_prep_dma_xor_req(struct sba_device *sba,
req = sba_alloc_request(sba);
if (!req)
return NULL;
- req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
/* Fillup request message */
sba_fillup_xor_msg(req, req->cmds, &req->msg,
@@ -854,6 +931,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
u64 cmd;
u32 c_mdata;
unsigned int i;
+ dma_addr_t resp_dma = req->tx.phys;
struct brcm_sba_command *cmdsp = cmds;
if (pq_continue) {
@@ -947,7 +1025,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -974,7 +1052,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -1002,7 +1080,8 @@ sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
req = sba_alloc_request(sba);
if (!req)
return NULL;
- req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
/* Fillup request messages */
sba_fillup_pq_msg(req, dmaf_continue(flags),
@@ -1027,6 +1106,7 @@ static void sba_fillup_pq_single_msg(struct sba_request *req,
u64 cmd;
u32 c_mdata;
u8 pos, dpos = raid6_gflog[scf];
+ dma_addr_t resp_dma = req->tx.phys;
struct brcm_sba_command *cmdsp = cmds;
if (!dst_p)
@@ -1105,7 +1185,7 @@ static void sba_fillup_pq_single_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -1226,7 +1306,7 @@ static void sba_fillup_pq_single_msg(struct sba_request *req,
cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
if (req->sba->hw_resp_size) {
cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
- cmdsp->resp = req->resp_dma;
+ cmdsp->resp = resp_dma;
cmdsp->resp_len = req->sba->hw_resp_size;
}
cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -1255,7 +1335,8 @@ sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
req = sba_alloc_request(sba);
if (!req)
return NULL;
- req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
/* Fillup request messages */
sba_fillup_pq_single_msg(req, dmaf_continue(flags),
@@ -1370,40 +1451,10 @@ sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
/* ====== Mailbox callbacks ===== */
-static void sba_dma_tx_actions(struct sba_request *req)
-{
- struct dma_async_tx_descriptor *tx = &req->tx;
-
- WARN_ON(tx->cookie < 0);
-
- if (tx->cookie > 0) {
- dma_cookie_complete(tx);
-
- /*
- * Call the callback (must not sleep or submit new
- * operations to this channel)
- */
- if (tx->callback)
- tx->callback(tx->callback_param);
-
- dma_descriptor_unmap(tx);
- }
-
- /* Run dependent operations */
- dma_run_dependencies(tx);
-
- /* If waiting for 'ack' then move to completed list */
- if (!async_tx_test_ack(&req->tx))
- sba_complete_chained_requests(req);
- else
- sba_free_chained_requests(req);
-}
-
static void sba_receive_message(struct mbox_client *cl, void *msg)
{
- unsigned long flags;
struct brcm_message *m = msg;
- struct sba_request *req = m->ctx, *req1;
+ struct sba_request *req = m->ctx;
struct sba_device *sba = req->sba;
/* Error count if message has error */
@@ -1411,36 +1462,11 @@ static void sba_receive_message(struct mbox_client *cl, void *msg)
dev_err(sba->dev, "%s got message with error %d",
dma_chan_name(&sba->dma_chan), m->error);
- /* Mark request as received */
- sba_received_request(req);
-
- /* Wait for all chained requests to be completed */
- if (atomic_dec_return(&req->first->next_pending_count))
- goto done;
-
- /* Point to first request */
- req = req->first;
-
- /* Update request */
- if (req->state == SBA_REQUEST_STATE_RECEIVED)
- sba_dma_tx_actions(req);
- else
- sba_free_chained_requests(req);
-
- spin_lock_irqsave(&sba->reqs_lock, flags);
-
- /* Re-check all completed request waiting for 'ack' */
- list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node) {
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
- sba_dma_tx_actions(req);
- spin_lock_irqsave(&sba->reqs_lock, flags);
- }
-
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ /* Process received request */
+ sba_process_received_request(sba, req);
-done:
- /* Try to submit pending request */
- sba_issue_pending(&sba->dma_chan);
+ /* Process deferred requests */
+ sba_process_deferred_requests(sba);
}
/* ====== Platform driver routines ===== */
@@ -1450,13 +1476,13 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
int i, j, p, ret = 0;
struct sba_request *req = NULL;
- sba->resp_base = dma_alloc_coherent(sba->dma_dev.dev,
+ sba->resp_base = dma_alloc_coherent(sba->mbox_dev,
sba->max_resp_pool_size,
&sba->resp_dma_base, GFP_KERNEL);
if (!sba->resp_base)
return -ENOMEM;
- sba->cmds_base = dma_alloc_coherent(sba->dma_dev.dev,
+ sba->cmds_base = dma_alloc_coherent(sba->mbox_dev,
sba->max_cmds_pool_size,
&sba->cmds_dma_base, GFP_KERNEL);
if (!sba->cmds_base) {
@@ -1474,31 +1500,21 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
INIT_LIST_HEAD(&sba->reqs_aborted_list);
INIT_LIST_HEAD(&sba->reqs_free_list);
- sba->reqs = devm_kcalloc(sba->dev, sba->max_req,
- sizeof(*req), GFP_KERNEL);
- if (!sba->reqs) {
- ret = -ENOMEM;
- goto fail_free_cmds_pool;
- }
-
for (i = 0, p = 0; i < sba->max_req; i++) {
- req = &sba->reqs[i];
+ req = devm_kzalloc(sba->dev,
+ sizeof(*req) +
+ sba->max_cmd_per_req * sizeof(req->cmds[0]),
+ GFP_KERNEL);
+ if (!req) {
+ ret = -ENOMEM;
+ goto fail_free_cmds_pool;
+ }
INIT_LIST_HEAD(&req->node);
req->sba = sba;
- req->state = SBA_REQUEST_STATE_FREE;
+ req->flags = SBA_REQUEST_STATE_FREE;
INIT_LIST_HEAD(&req->next);
- req->next_count = 1;
atomic_set(&req->next_pending_count, 0);
- req->fence = false;
- req->resp = sba->resp_base + p;
- req->resp_dma = sba->resp_dma_base + p;
p += sba->hw_resp_size;
- req->cmds = devm_kcalloc(sba->dev, sba->max_cmd_per_req,
- sizeof(*req->cmds), GFP_KERNEL);
- if (!req->cmds) {
- ret = -ENOMEM;
- goto fail_free_cmds_pool;
- }
for (j = 0; j < sba->max_cmd_per_req; j++) {
req->cmds[j].cmd = 0;
req->cmds[j].cmd_dma = sba->cmds_base +
@@ -1510,20 +1526,18 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
memset(&req->msg, 0, sizeof(req->msg));
dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
req->tx.tx_submit = sba_tx_submit;
- req->tx.phys = req->resp_dma;
+ req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size;
list_add_tail(&req->node, &sba->reqs_free_list);
}
- sba->reqs_free_count = sba->max_req;
-
return 0;
fail_free_cmds_pool:
- dma_free_coherent(sba->dma_dev.dev,
+ dma_free_coherent(sba->mbox_dev,
sba->max_cmds_pool_size,
sba->cmds_base, sba->cmds_dma_base);
fail_free_resp_pool:
- dma_free_coherent(sba->dma_dev.dev,
+ dma_free_coherent(sba->mbox_dev,
sba->max_resp_pool_size,
sba->resp_base, sba->resp_dma_base);
return ret;
@@ -1532,9 +1546,9 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
static void sba_freeup_channel_resources(struct sba_device *sba)
{
dmaengine_terminate_all(&sba->dma_chan);
- dma_free_coherent(sba->dma_dev.dev, sba->max_cmds_pool_size,
+ dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size,
sba->cmds_base, sba->cmds_dma_base);
- dma_free_coherent(sba->dma_dev.dev, sba->max_resp_pool_size,
+ dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size,
sba->resp_base, sba->resp_dma_base);
sba->resp_base = NULL;
sba->resp_dma_base = 0;
@@ -1625,6 +1639,13 @@ static int sba_probe(struct platform_device *pdev)
sba->dev = &pdev->dev;
platform_set_drvdata(pdev, sba);
+ /* Number of channels equals number of mailbox channels */
+ ret = of_count_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells");
+ if (ret <= 0)
+ return -ENODEV;
+ mchans_count = ret;
+
/* Determine SBA version from DT compatible string */
if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba"))
sba->ver = SBA_VER_1;
@@ -1637,14 +1658,12 @@ static int sba_probe(struct platform_device *pdev)
/* Derived Configuration parameters */
switch (sba->ver) {
case SBA_VER_1:
- sba->max_req = 1024;
sba->hw_buf_size = 4096;
sba->hw_resp_size = 8;
sba->max_pq_coefs = 6;
sba->max_pq_srcs = 6;
break;
case SBA_VER_2:
- sba->max_req = 1024;
sba->hw_buf_size = 4096;
sba->hw_resp_size = 8;
sba->max_pq_coefs = 30;
@@ -1658,6 +1677,7 @@ static int sba_probe(struct platform_device *pdev)
default:
return -EINVAL;
}
+ sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL * mchans_count;
sba->max_cmd_per_req = sba->max_pq_srcs + 3;
sba->max_xor_srcs = sba->max_cmd_per_req - 1;
sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size;
@@ -1671,22 +1691,14 @@ static int sba_probe(struct platform_device *pdev)
sba->client.knows_txdone = false;
sba->client.tx_tout = 0;
- /* Number of channels equals number of mailbox channels */
- ret = of_count_phandle_with_args(pdev->dev.of_node,
- "mboxes", "#mbox-cells");
- if (ret <= 0)
- return -ENODEV;
- mchans_count = ret;
- sba->mchans_count = 0;
- atomic_set(&sba->mchans_current, 0);
-
/* Allocate mailbox channel array */
- sba->mchans = devm_kcalloc(&pdev->dev, sba->mchans_count,
+ sba->mchans = devm_kcalloc(&pdev->dev, mchans_count,
sizeof(*sba->mchans), GFP_KERNEL);
if (!sba->mchans)
return -ENOMEM;
/* Request mailbox channels */
+ sba->mchans_count = 0;
for (i = 0; i < mchans_count; i++) {
sba->mchans[i] = mbox_request_channel(&sba->client, i);
if (IS_ERR(sba->mchans[i])) {
@@ -1695,6 +1707,7 @@ static int sba_probe(struct platform_device *pdev)
}
sba->mchans_count++;
}
+ atomic_set(&sba->mchans_current, 0);
/* Find-out underlying mailbox device */
ret = of_parse_phandle_with_args(pdev->dev.of_node,
@@ -1723,15 +1736,15 @@ static int sba_probe(struct platform_device *pdev)
}
}
- /* Register DMA device with linux async framework */
- ret = sba_async_register(sba);
+ /* Prealloc channel resource */
+ ret = sba_prealloc_channel_resources(sba);
if (ret)
goto fail_free_mchans;
- /* Prealloc channel resource */
- ret = sba_prealloc_channel_resources(sba);
+ /* Register DMA device with linux async framework */
+ ret = sba_async_register(sba);
if (ret)
- goto fail_async_dev_unreg;
+ goto fail_free_resources;
/* Print device info */
dev_info(sba->dev, "%s using SBAv%d and %d mailbox channels",
@@ -1740,8 +1753,8 @@ static int sba_probe(struct platform_device *pdev)
return 0;
-fail_async_dev_unreg:
- dma_async_device_unregister(&sba->dma_dev);
+fail_free_resources:
+ sba_freeup_channel_resources(sba);
fail_free_mchans:
for (i = 0; i < sba->mchans_count; i++)
mbox_free_channel(sba->mchans[i]);
@@ -1753,10 +1766,10 @@ static int sba_remove(struct platform_device *pdev)
int i;
struct sba_device *sba = platform_get_drvdata(pdev);
- sba_freeup_channel_resources(sba);
-
dma_async_device_unregister(&sba->dma_dev);
+ sba_freeup_channel_resources(sba);
+
for (i = 0; i < sba->mchans_count; i++)
mbox_free_channel(sba->mchans[i]);
--
2.7.4
We should peek mbox channels when we are left with no free
sba_requests in sba_alloc_request()
Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Ray Jui <[email protected]>
Reviewed-by: Scott Branden <[email protected]>
---
drivers/dma/bcm-sba-raid.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index 6d15fed..321420b 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -199,6 +199,14 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
/* ====== General helper routines ===== */
+static void sba_peek_mchans(struct sba_device *sba)
+{
+ int mchan_idx;
+
+ for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
+ mbox_client_peek_data(sba->mchans[mchan_idx]);
+}
+
static struct sba_request *sba_alloc_request(struct sba_device *sba)
{
unsigned long flags;
@@ -210,8 +218,17 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
if (req)
list_move_tail(&req->node, &sba->reqs_alloc_list);
spin_unlock_irqrestore(&sba->reqs_lock, flags);
- if (!req)
+
+ if (!req) {
+ /*
+ * We have no more free requests so, we peek
+ * mailbox channels hoping few active requests
+ * would have completed which will create more
+ * room for new requests.
+ */
+ sba_peek_mchans(sba);
return NULL;
+ }
req->flags = SBA_REQUEST_STATE_ALLOCED;
req->first = req;
@@ -558,17 +575,15 @@ static enum dma_status sba_tx_status(struct dma_chan *dchan,
dma_cookie_t cookie,
struct dma_tx_state *txstate)
{
- int mchan_idx;
enum dma_status ret;
struct sba_device *sba = to_sba_device(dchan);
- for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
- mbox_client_peek_data(sba->mchans[mchan_idx]);
-
ret = dma_cookie_status(dchan, cookie, txstate);
if (ret == DMA_COMPLETE)
return ret;
+ sba_peek_mchans(sba);
+
return dma_cookie_status(dchan, cookie, txstate);
}
--
2.7.4
We should pre-ack async tx descriptor at time of
allocating SBA request (just like other RAID drivers).
Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Ray Jui <[email protected]>
Reviewed-by: Scott Branden <[email protected]>
---
drivers/dma/bcm-sba-raid.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index 321420b..db5e3db 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -236,6 +236,7 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
atomic_set(&req->next_pending_count, 1);
dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+ async_tx_ack(&req->tx);
return req;
}
--
2.7.4
This patch breaks sba_process_deferred_requests() into two parts
sba_process_received_request() and _sba_process_pending_requests()
for readability.
In addition, we remove redundant SBA_REQUEST_STATE_RECEIVED state
and ensure that all requests in a chained request should be freed
only after all have been received.
Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Scott Branden <[email protected]>
---
drivers/dma/bcm-sba-raid.c | 130 ++++++++++++++++-----------------------------
1 file changed, 47 insertions(+), 83 deletions(-)
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index db5e3db..b92c756 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -97,9 +97,8 @@ enum sba_request_flags {
SBA_REQUEST_STATE_ALLOCED = 0x002,
SBA_REQUEST_STATE_PENDING = 0x004,
SBA_REQUEST_STATE_ACTIVE = 0x008,
- SBA_REQUEST_STATE_RECEIVED = 0x010,
- SBA_REQUEST_STATE_COMPLETED = 0x020,
- SBA_REQUEST_STATE_ABORTED = 0x040,
+ SBA_REQUEST_STATE_COMPLETED = 0x010,
+ SBA_REQUEST_STATE_ABORTED = 0x020,
SBA_REQUEST_STATE_MASK = 0x0ff,
SBA_REQUEST_FENCE = 0x100,
};
@@ -159,7 +158,6 @@ struct sba_device {
struct list_head reqs_alloc_list;
struct list_head reqs_pending_list;
struct list_head reqs_active_list;
- struct list_head reqs_received_list;
struct list_head reqs_completed_list;
struct list_head reqs_aborted_list;
struct list_head reqs_free_list;
@@ -306,17 +304,6 @@ static void _sba_complete_request(struct sba_device *sba,
sba->reqs_fence = false;
}
-/* Note: Must be called with sba->reqs_lock held */
-static void _sba_received_request(struct sba_device *sba,
- struct sba_request *req)
-{
- lockdep_assert_held(&sba->reqs_lock);
- req->flags = SBA_REQUEST_STATE_RECEIVED;
- list_move_tail(&req->node, &sba->reqs_received_list);
- if (list_empty(&sba->reqs_active_list))
- sba->reqs_fence = false;
-}
-
static void sba_free_chained_requests(struct sba_request *req)
{
unsigned long flags;
@@ -358,10 +345,6 @@ static void sba_cleanup_nonpending_requests(struct sba_device *sba)
list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
_sba_free_request(sba, req);
- /* Freeup all received request */
- list_for_each_entry_safe(req, req1, &sba->reqs_received_list, node)
- _sba_free_request(sba, req);
-
/* Freeup all completed request */
list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node)
_sba_free_request(sba, req);
@@ -417,22 +400,20 @@ static int sba_send_mbox_request(struct sba_device *sba,
return 0;
}
-static void sba_process_deferred_requests(struct sba_device *sba)
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_process_pending_requests(struct sba_device *sba)
{
int ret;
u32 count;
- unsigned long flags;
struct sba_request *req;
- struct dma_async_tx_descriptor *tx;
-
- spin_lock_irqsave(&sba->reqs_lock, flags);
-
- /* Count pending requests */
- count = 0;
- list_for_each_entry(req, &sba->reqs_pending_list, node)
- count++;
- /* Process pending requests */
+ /*
+ * Process few pending requests
+ *
+ * For now, we process (<number_of_mailbox_channels> * 8)
+ * number of requests at a time.
+ */
+ count = sba->mchans_count * 8;
while (!list_empty(&sba->reqs_pending_list) && count) {
/* Get the first pending request */
req = list_first_entry(&sba->reqs_pending_list,
@@ -443,11 +424,7 @@ static void sba_process_deferred_requests(struct sba_device *sba)
break;
/* Send request to mailbox channel */
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
ret = sba_send_mbox_request(sba, req);
- spin_lock_irqsave(&sba->reqs_lock, flags);
-
- /* If something went wrong then keep request pending */
if (ret < 0) {
_sba_pending_request(sba, req);
break;
@@ -455,20 +432,18 @@ static void sba_process_deferred_requests(struct sba_device *sba)
count--;
}
+}
- /* Count completed requests */
- count = 0;
- list_for_each_entry(req, &sba->reqs_completed_list, node)
- count++;
-
- /* Process completed requests */
- while (!list_empty(&sba->reqs_completed_list) && count) {
- req = list_first_entry(&sba->reqs_completed_list,
- struct sba_request, node);
- list_del_init(&req->node);
- tx = &req->tx;
+static void sba_process_received_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ unsigned long flags;
+ struct dma_async_tx_descriptor *tx;
+ struct sba_request *nreq, *first = req->first;
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ /* Process only after all chained requests are received */
+ if (!atomic_dec_return(&first->next_pending_count)) {
+ tx = &first->tx;
WARN_ON(tx->cookie < 0);
if (tx->cookie > 0) {
@@ -483,41 +458,31 @@ static void sba_process_deferred_requests(struct sba_device *sba)
spin_lock_irqsave(&sba->reqs_lock, flags);
- /* If waiting for 'ack' then move to completed list */
- if (!async_tx_test_ack(&req->tx))
- _sba_complete_request(sba, req);
- else
- _sba_free_request(sba, req);
-
- count--;
- }
-
- /* Re-check pending and completed work */
- count = 0;
- if (!list_empty(&sba->reqs_pending_list) ||
- !list_empty(&sba->reqs_completed_list))
- count = 1;
-
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
-}
-
-static void sba_process_received_request(struct sba_device *sba,
- struct sba_request *req)
-{
- unsigned long flags;
+ /* Free all requests chained to first request */
+ list_for_each_entry(nreq, &first->next, next)
+ _sba_free_request(sba, nreq);
+ INIT_LIST_HEAD(&first->next);
- spin_lock_irqsave(&sba->reqs_lock, flags);
+ /* The client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(tx))
+ _sba_complete_request(sba, first);
+ else
+ _sba_free_request(sba, first);
- /* Mark request as received */
- _sba_received_request(sba, req);
+ /* Cleanup completed requests */
+ list_for_each_entry_safe(req, nreq,
+ &sba->reqs_completed_list, node) {
+ if (async_tx_test_ack(&req->tx))
+ _sba_free_request(sba, req);
+ }
- /* Update request */
- if (!atomic_dec_return(&req->first->next_pending_count))
- _sba_complete_request(sba, req->first);
- if (req->first != req)
- _sba_free_request(sba, req);
+ /* Process pending requests */
+ _sba_process_pending_requests(sba);
- spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ }
}
/* ====== DMAENGINE callbacks ===== */
@@ -542,10 +507,13 @@ static int sba_device_terminate_all(struct dma_chan *dchan)
static void sba_issue_pending(struct dma_chan *dchan)
{
+ unsigned long flags;
struct sba_device *sba = to_sba_device(dchan);
- /* Process deferred requests */
- sba_process_deferred_requests(sba);
+ /* Process pending requests */
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ _sba_process_pending_requests(sba);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
}
static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -1480,9 +1448,6 @@ static void sba_receive_message(struct mbox_client *cl, void *msg)
/* Process received request */
sba_process_received_request(sba, req);
-
- /* Process deferred requests */
- sba_process_deferred_requests(sba);
}
/* ====== Platform driver routines ===== */
@@ -1511,7 +1476,6 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
INIT_LIST_HEAD(&sba->reqs_alloc_list);
INIT_LIST_HEAD(&sba->reqs_pending_list);
INIT_LIST_HEAD(&sba->reqs_active_list);
- INIT_LIST_HEAD(&sba->reqs_received_list);
INIT_LIST_HEAD(&sba->reqs_completed_list);
INIT_LIST_HEAD(&sba->reqs_aborted_list);
INIT_LIST_HEAD(&sba->reqs_free_list);
--
2.7.4
This patch adds debugfs support to report stats via debugfs
which in-turn will help debug hang situations.
Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Ray Jui <[email protected]>
Reviewed-by: Scott Branden <[email protected]>
---
drivers/dma/bcm-sba-raid.c | 82 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 1 deletion(-)
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index b92c756..dd1e89e 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -36,6 +36,7 @@
*/
#include <linux/bitops.h>
+#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/list.h>
@@ -161,6 +162,9 @@ struct sba_device {
struct list_head reqs_completed_list;
struct list_head reqs_aborted_list;
struct list_head reqs_free_list;
+ /* DebugFS directory entries */
+ struct dentry *root;
+ struct dentry *stats;
};
/* ====== Command helper routines ===== */
@@ -485,6 +489,45 @@ static void sba_process_received_request(struct sba_device *sba,
}
}
+static void sba_write_stats_in_seqfile(struct sba_device *sba,
+ struct seq_file *file)
+{
+ unsigned long flags;
+ struct sba_request *req;
+ u32 free_count = 0, alloced_count = 0, pending_count = 0;
+ u32 active_count = 0, aborted_count = 0, completed_count = 0;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ list_for_each_entry(req, &sba->reqs_free_list, node)
+ free_count++;
+
+ list_for_each_entry(req, &sba->reqs_alloc_list, node)
+ alloced_count++;
+
+ list_for_each_entry(req, &sba->reqs_pending_list, node)
+ pending_count++;
+
+ list_for_each_entry(req, &sba->reqs_active_list, node)
+ active_count++;
+
+ list_for_each_entry(req, &sba->reqs_aborted_list, node)
+ aborted_count++;
+
+ list_for_each_entry(req, &sba->reqs_completed_list, node)
+ completed_count++;
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ seq_printf(file, "maximum requests = %d\n", sba->max_req);
+ seq_printf(file, "free requests = %d\n", free_count);
+ seq_printf(file, "alloced requests = %d\n", alloced_count);
+ seq_printf(file, "pending requests = %d\n", pending_count);
+ seq_printf(file, "active requests = %d\n", active_count);
+ seq_printf(file, "aborted requests = %d\n", aborted_count);
+ seq_printf(file, "completed requests = %d\n", completed_count);
+}
+
/* ====== DMAENGINE callbacks ===== */
static void sba_free_chan_resources(struct dma_chan *dchan)
@@ -1450,6 +1493,19 @@ static void sba_receive_message(struct mbox_client *cl, void *msg)
sba_process_received_request(sba, req);
}
+/* ====== Debugfs callbacks ====== */
+
+static int sba_debugfs_stats_show(struct seq_file *file, void *offset)
+{
+ struct platform_device *pdev = to_platform_device(file->private);
+ struct sba_device *sba = platform_get_drvdata(pdev);
+
+ /* Write stats in file */
+ sba_write_stats_in_seqfile(sba, file);
+
+ return 0;
+}
+
/* ====== Platform driver routines ===== */
static int sba_prealloc_channel_resources(struct sba_device *sba)
@@ -1721,10 +1777,30 @@ static int sba_probe(struct platform_device *pdev)
if (ret)
goto fail_free_mchans;
+ /* Check availability of debugfs */
+ if (!debugfs_initialized())
+ goto skip_debugfs;
+
+ /* Create debugfs root entry */
+ sba->root = debugfs_create_dir(dev_name(sba->dev), NULL);
+ if (IS_ERR_OR_NULL(sba->root)) {
+ ret = PTR_ERR_OR_ZERO(sba->root);
+ goto fail_free_resources;
+ }
+
+ /* Create debugfs stats entry */
+ sba->stats = debugfs_create_devm_seqfile(sba->dev, "stats", sba->root,
+ sba_debugfs_stats_show);
+ if (IS_ERR_OR_NULL(sba->stats)) {
+ ret = PTR_ERR_OR_ZERO(sba->stats);
+ goto fail_free_debugfs_root;
+ }
+skip_debugfs:
+
/* Register DMA device with linux async framework */
ret = sba_async_register(sba);
if (ret)
- goto fail_free_resources;
+ goto fail_free_debugfs_root;
/* Print device info */
dev_info(sba->dev, "%s using SBAv%d and %d mailbox channels",
@@ -1733,6 +1809,8 @@ static int sba_probe(struct platform_device *pdev)
return 0;
+fail_free_debugfs_root:
+ debugfs_remove_recursive(sba->root);
fail_free_resources:
sba_freeup_channel_resources(sba);
fail_free_mchans:
@@ -1748,6 +1826,8 @@ static int sba_remove(struct platform_device *pdev)
dma_async_device_unregister(&sba->dma_dev);
+ debugfs_remove_recursive(sba->root);
+
sba_freeup_channel_resources(sba);
for (i = 0; i < sba->mchans_count; i++)
--
2.7.4
This patch adds Broadcom SBA-RAID DT nodes for Stingray SoC.
The Stingray SoC has total 32 SBA-RAID FlexRM rings and it has
8 CPUs so we create 8 SBA-RAID instances (one for each CPU).
This way Linux DMAENGINE will have one SBA-RAID DMA device for
each CPU.
Signed-off-by: Anup Patel <[email protected]>
---
.../boot/dts/broadcom/stingray/stingray-fs4.dtsi | 64 ++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-fs4.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-fs4.dtsi
index 1f927c4..8bf1dc6 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray-fs4.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-fs4.dtsi
@@ -51,4 +51,68 @@
msi-parent = <&gic_its 0x4300>;
#mbox-cells = <3>;
};
+
+ raid0: raid@0 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 0 0x1 0xff00>,
+ <&raid_mbox 1 0x1 0xff00>,
+ <&raid_mbox 2 0x1 0xff00>,
+ <&raid_mbox 3 0x1 0xff00>;
+ };
+
+ raid1: raid@1 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 4 0x1 0xff00>,
+ <&raid_mbox 5 0x1 0xff00>,
+ <&raid_mbox 6 0x1 0xff00>,
+ <&raid_mbox 7 0x1 0xff00>;
+ };
+
+ raid2: raid@2 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 8 0x1 0xff00>,
+ <&raid_mbox 9 0x1 0xff00>,
+ <&raid_mbox 10 0x1 0xff00>,
+ <&raid_mbox 11 0x1 0xff00>;
+ };
+
+ raid3: raid@3 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 12 0x1 0xff00>,
+ <&raid_mbox 13 0x1 0xff00>,
+ <&raid_mbox 14 0x1 0xff00>,
+ <&raid_mbox 15 0x1 0xff00>;
+ };
+
+ raid4: raid@4 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 16 0x1 0xff00>,
+ <&raid_mbox 17 0x1 0xff00>,
+ <&raid_mbox 18 0x1 0xff00>,
+ <&raid_mbox 19 0x1 0xff00>;
+ };
+
+ raid5: raid@5 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 20 0x1 0xff00>,
+ <&raid_mbox 21 0x1 0xff00>,
+ <&raid_mbox 22 0x1 0xff00>,
+ <&raid_mbox 23 0x1 0xff00>;
+ };
+
+ raid6: raid@6 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 24 0x1 0xff00>,
+ <&raid_mbox 25 0x1 0xff00>,
+ <&raid_mbox 26 0x1 0xff00>,
+ <&raid_mbox 27 0x1 0xff00>;
+ };
+
+ raid7: raid@7 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 28 0x1 0xff00>,
+ <&raid_mbox 29 0x1 0xff00>,
+ <&raid_mbox 30 0x1 0xff00>,
+ <&raid_mbox 31 0x1 0xff00>;
+ };
};
--
2.7.4
On Wed, Jul 26, 2017 at 11:06:38AM +0530, Anup Patel wrote:
> This patchset does various improvments to Broadcom SBA-RAID
> driver and also adds SBA-RAID DT nodes for Stingray SOC.
>
> The patches are based on "[PATCH v2 0/7] FlexRM driver improvements"
> and can also be found at sba-raid-imp-v1 branch of
> https://github.com/Broadcom/arm64-linux.git
and i cant apply this series without these?
>
> Anup Patel (6):
> dma: bcm-sba-raid: Improve memory allocation in SBA RAID driver
> dma: bcm-sba-raid: Peek mbox when we are left with no free requests
> dma: bcm-sba-raid: pre-ack async tx descriptor
> dma: bcm-sba-raid: Break sba_process_deferred_requests() into two
> parts
> dma: bcm-sba-raid: Add debugfs support
why are these tagged dma, I have told you explcitly to fix this!!
> arm64: dts: Add SBA-RAID DT nodes for Stingray SoC
>
> .../boot/dts/broadcom/stingray/stingray-fs4.dtsi | 64 +++
> drivers/dma/bcm-sba-raid.c | 527 ++++++++++++---------
> 2 files changed, 364 insertions(+), 227 deletions(-)
>
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe dmaengine" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
~Vinod
On Wed, Jul 26, 2017 at 11:06:39AM +0530, Anup Patel wrote:
> This patch improves memory allocation in SBA RAID driver in
> following ways:
> 1. Simplify struct sba_request to reduce memory consumption
what is the simplification?? You need to document that
> 2. Allocate sba resources before registering dma device
what is the motivation for that
So, reading this log doesnt help me to know what to expect in this patch
>
> Signed-off-by: Anup Patel <[email protected]>
> Reviewed-by: Scott Branden <[email protected]>
> Reviewed-by: Ray Jui <[email protected]>
> Reviewed-by: Vikram Prakash <[email protected]>
> ---
> drivers/dma/bcm-sba-raid.c | 439 +++++++++++++++++++++++----------------------
> 1 file changed, 226 insertions(+), 213 deletions(-)
>
> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
> index e41bbc7..6d15fed 100644
> --- a/drivers/dma/bcm-sba-raid.c
> +++ b/drivers/dma/bcm-sba-raid.c
> @@ -48,7 +48,8 @@
>
> #include "dmaengine.h"
>
> -/* SBA command related defines */
> +/* ====== Driver macros and defines ===== */
why this noise, seems unrelated to the change!
> +
> #define SBA_TYPE_SHIFT 48
> #define SBA_TYPE_MASK GENMASK(1, 0)
> #define SBA_TYPE_A 0x0
> @@ -82,39 +83,41 @@
> #define SBA_CMD_WRITE_BUFFER 0xc
> #define SBA_CMD_GALOIS 0xe
>
> -/* Driver helper macros */
> +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
> +
> #define to_sba_request(tx) \
> container_of(tx, struct sba_request, tx)
> #define to_sba_device(dchan) \
> container_of(dchan, struct sba_device, dma_chan)
>
> -enum sba_request_state {
> - SBA_REQUEST_STATE_FREE = 1,
> - SBA_REQUEST_STATE_ALLOCED = 2,
> - SBA_REQUEST_STATE_PENDING = 3,
> - SBA_REQUEST_STATE_ACTIVE = 4,
> - SBA_REQUEST_STATE_RECEIVED = 5,
> - SBA_REQUEST_STATE_COMPLETED = 6,
> - SBA_REQUEST_STATE_ABORTED = 7,
> +/* ===== Driver data structures ===== */
> +
> +enum sba_request_flags {
> + SBA_REQUEST_STATE_FREE = 0x001,
> + SBA_REQUEST_STATE_ALLOCED = 0x002,
> + SBA_REQUEST_STATE_PENDING = 0x004,
> + SBA_REQUEST_STATE_ACTIVE = 0x008,
> + SBA_REQUEST_STATE_RECEIVED = 0x010,
> + SBA_REQUEST_STATE_COMPLETED = 0x020,
> + SBA_REQUEST_STATE_ABORTED = 0x040,
> + SBA_REQUEST_STATE_MASK = 0x0ff,
> + SBA_REQUEST_FENCE = 0x100,
how does this help in mem alloctn?
> };
>
> struct sba_request {
> /* Global state */
> struct list_head node;
> struct sba_device *sba;
> - enum sba_request_state state;
> - bool fence;
> + u32 flags;
> /* Chained requests management */
> struct sba_request *first;
> struct list_head next;
> - unsigned int next_count;
> atomic_t next_pending_count;
> /* BRCM message data */
> - void *resp;
> - dma_addr_t resp_dma;
> - struct brcm_sba_command *cmds;
> struct brcm_message msg;
> struct dma_async_tx_descriptor tx;
> + /* SBA commands */
> + struct brcm_sba_command cmds[0];
> };
>
> enum sba_version {
> @@ -128,11 +131,11 @@ struct sba_device {
> /* DT configuration parameters */
> enum sba_version ver;
> /* Derived configuration parameters */
> - u32 max_req;
> u32 hw_buf_size;
> u32 hw_resp_size;
> u32 max_pq_coefs;
> u32 max_pq_srcs;
> + u32 max_req;
> u32 max_cmd_per_req;
> u32 max_xor_srcs;
> u32 max_resp_pool_size;
> @@ -152,7 +155,6 @@ struct sba_device {
> void *cmds_base;
> dma_addr_t cmds_dma_base;
> spinlock_t reqs_lock;
> - struct sba_request *reqs;
> bool reqs_fence;
> struct list_head reqs_alloc_list;
> struct list_head reqs_pending_list;
> @@ -161,10 +163,9 @@ struct sba_device {
> struct list_head reqs_completed_list;
> struct list_head reqs_aborted_list;
> struct list_head reqs_free_list;
> - int reqs_free_count;
> };
>
> -/* ====== SBA command helper routines ===== */
> +/* ====== Command helper routines ===== */
more noise..
>
> static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
> {
> @@ -196,7 +197,7 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
> ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
> }
>
> -/* ====== Channel resource management routines ===== */
> +/* ====== General helper routines ===== */
and it keeps getting more interesting, sigh!!!
>
> static struct sba_request *sba_alloc_request(struct sba_device *sba)
> {
> @@ -204,24 +205,20 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
> struct sba_request *req = NULL;
>
> spin_lock_irqsave(&sba->reqs_lock, flags);
> -
> req = list_first_entry_or_null(&sba->reqs_free_list,
> struct sba_request, node);
> - if (req) {
> + if (req)
> list_move_tail(&req->node, &sba->reqs_alloc_list);
> - req->state = SBA_REQUEST_STATE_ALLOCED;
> - req->fence = false;
> - req->first = req;
> - INIT_LIST_HEAD(&req->next);
> - req->next_count = 1;
> - atomic_set(&req->next_pending_count, 1);
> -
> - sba->reqs_free_count--;
> + spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + if (!req)
> + return NULL;
>
> - dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
> - }
> + req->flags = SBA_REQUEST_STATE_ALLOCED;
> + req->first = req;
> + INIT_LIST_HEAD(&req->next);
> + atomic_set(&req->next_pending_count, 1);
Cant fathom how this helps w/ mem allocation
>
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
>
> return req;
> }
> @@ -231,7 +228,8 @@ static void _sba_pending_request(struct sba_device *sba,
> struct sba_request *req)
> {
> lockdep_assert_held(&sba->reqs_lock);
> - req->state = SBA_REQUEST_STATE_PENDING;
> + req->flags &= ~SBA_REQUEST_STATE_MASK;
> + req->flags |= SBA_REQUEST_STATE_PENDING;
> list_move_tail(&req->node, &sba->reqs_pending_list);
> if (list_empty(&sba->reqs_active_list))
> sba->reqs_fence = false;
> @@ -246,9 +244,10 @@ static bool _sba_active_request(struct sba_device *sba,
> sba->reqs_fence = false;
> if (sba->reqs_fence)
> return false;
> - req->state = SBA_REQUEST_STATE_ACTIVE;
> + req->flags &= ~SBA_REQUEST_STATE_MASK;
> + req->flags |= SBA_REQUEST_STATE_ACTIVE;
> list_move_tail(&req->node, &sba->reqs_active_list);
> - if (req->fence)
> + if (req->flags & SBA_REQUEST_FENCE)
> sba->reqs_fence = true;
> return true;
> }
> @@ -258,7 +257,8 @@ static void _sba_abort_request(struct sba_device *sba,
> struct sba_request *req)
> {
> lockdep_assert_held(&sba->reqs_lock);
> - req->state = SBA_REQUEST_STATE_ABORTED;
> + req->flags &= ~SBA_REQUEST_STATE_MASK;
> + req->flags |= SBA_REQUEST_STATE_ABORTED;
> list_move_tail(&req->node, &sba->reqs_aborted_list);
> if (list_empty(&sba->reqs_active_list))
> sba->reqs_fence = false;
> @@ -269,42 +269,34 @@ static void _sba_free_request(struct sba_device *sba,
> struct sba_request *req)
> {
> lockdep_assert_held(&sba->reqs_lock);
> - req->state = SBA_REQUEST_STATE_FREE;
> + req->flags &= ~SBA_REQUEST_STATE_MASK;
> + req->flags |= SBA_REQUEST_STATE_FREE;
> list_move_tail(&req->node, &sba->reqs_free_list);
> if (list_empty(&sba->reqs_active_list))
> sba->reqs_fence = false;
> - sba->reqs_free_count++;
> }
>
> -static void sba_received_request(struct sba_request *req)
> +/* Note: Must be called with sba->reqs_lock held */
> +static void _sba_complete_request(struct sba_device *sba,
> + struct sba_request *req)
> {
> - unsigned long flags;
> - struct sba_device *sba = req->sba;
> -
> - spin_lock_irqsave(&sba->reqs_lock, flags);
> - req->state = SBA_REQUEST_STATE_RECEIVED;
> - list_move_tail(&req->node, &sba->reqs_received_list);
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + lockdep_assert_held(&sba->reqs_lock);
> + req->flags &= ~SBA_REQUEST_STATE_MASK;
> + req->flags |= SBA_REQUEST_STATE_COMPLETED;
> + list_move_tail(&req->node, &sba->reqs_completed_list);
> + if (list_empty(&sba->reqs_active_list))
> + sba->reqs_fence = false;
Ok am going to stop here, sorry can't review it further.
Please split stuff up, make logical incremental patchsets and resubmit...
--
~Vinod
On Wed, Jul 26, 2017 at 11:06:40AM +0530, Anup Patel wrote:
> We should peek mbox channels when we are left with no free
> sba_requests in sba_alloc_request()
and why is the world should we do that, how does that help??
>
> Signed-off-by: Anup Patel <[email protected]>
> Reviewed-by: Ray Jui <[email protected]>
> Reviewed-by: Scott Branden <[email protected]>
> ---
> drivers/dma/bcm-sba-raid.c | 25 ++++++++++++++++++++-----
> 1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
> index 6d15fed..321420b 100644
> --- a/drivers/dma/bcm-sba-raid.c
> +++ b/drivers/dma/bcm-sba-raid.c
> @@ -199,6 +199,14 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
>
> /* ====== General helper routines ===== */
>
> +static void sba_peek_mchans(struct sba_device *sba)
> +{
> + int mchan_idx;
> +
> + for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
> + mbox_client_peek_data(sba->mchans[mchan_idx]);
> +}
> +
> static struct sba_request *sba_alloc_request(struct sba_device *sba)
> {
> unsigned long flags;
> @@ -210,8 +218,17 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
> if (req)
> list_move_tail(&req->node, &sba->reqs_alloc_list);
> spin_unlock_irqrestore(&sba->reqs_lock, flags);
> - if (!req)
> +
> + if (!req) {
> + /*
> + * We have no more free requests so, we peek
> + * mailbox channels hoping few active requests
> + * would have completed which will create more
> + * room for new requests.
> + */
> + sba_peek_mchans(sba);
> return NULL;
> + }
>
> req->flags = SBA_REQUEST_STATE_ALLOCED;
> req->first = req;
> @@ -558,17 +575,15 @@ static enum dma_status sba_tx_status(struct dma_chan *dchan,
> dma_cookie_t cookie,
> struct dma_tx_state *txstate)
> {
> - int mchan_idx;
> enum dma_status ret;
> struct sba_device *sba = to_sba_device(dchan);
>
> - for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
> - mbox_client_peek_data(sba->mchans[mchan_idx]);
> -
> ret = dma_cookie_status(dchan, cookie, txstate);
> if (ret == DMA_COMPLETE)
> return ret;
>
> + sba_peek_mchans(sba);
> +
> return dma_cookie_status(dchan, cookie, txstate);
> }
>
> --
> 2.7.4
>
--
~Vinod
On Wed, Jul 26, 2017 at 11:06:42AM +0530, Anup Patel wrote:
> This patch breaks sba_process_deferred_requests() into two parts
> sba_process_received_request() and _sba_process_pending_requests()
> for readability.
>
> In addition,
that should be a separate patch then... no?
> we remove redundant SBA_REQUEST_STATE_RECEIVED state
this should be one more...
> and ensure that all requests in a chained request should be freed
> only after all have been received.
and then this, right?
>
> Signed-off-by: Anup Patel <[email protected]>
> Reviewed-by: Scott Branden <[email protected]>
> ---
> drivers/dma/bcm-sba-raid.c | 130 ++++++++++++++++-----------------------------
> 1 file changed, 47 insertions(+), 83 deletions(-)
>
> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
> index db5e3db..b92c756 100644
> --- a/drivers/dma/bcm-sba-raid.c
> +++ b/drivers/dma/bcm-sba-raid.c
> @@ -97,9 +97,8 @@ enum sba_request_flags {
> SBA_REQUEST_STATE_ALLOCED = 0x002,
> SBA_REQUEST_STATE_PENDING = 0x004,
> SBA_REQUEST_STATE_ACTIVE = 0x008,
> - SBA_REQUEST_STATE_RECEIVED = 0x010,
> - SBA_REQUEST_STATE_COMPLETED = 0x020,
> - SBA_REQUEST_STATE_ABORTED = 0x040,
> + SBA_REQUEST_STATE_COMPLETED = 0x010,
> + SBA_REQUEST_STATE_ABORTED = 0x020,
so we changed this is patch 1, only to change it here. why....!!!!
so let me stop here again and repeat myself again about splitting stuff up,
blah blah, good patches, blah blah, read the Documentation blah blah.. and
hope someones listening :(
> SBA_REQUEST_STATE_MASK = 0x0ff,
> SBA_REQUEST_FENCE = 0x100,
> };
> @@ -159,7 +158,6 @@ struct sba_device {
> struct list_head reqs_alloc_list;
> struct list_head reqs_pending_list;
> struct list_head reqs_active_list;
> - struct list_head reqs_received_list;
> struct list_head reqs_completed_list;
> struct list_head reqs_aborted_list;
> struct list_head reqs_free_list;
> @@ -306,17 +304,6 @@ static void _sba_complete_request(struct sba_device *sba,
> sba->reqs_fence = false;
> }
>
> -/* Note: Must be called with sba->reqs_lock held */
> -static void _sba_received_request(struct sba_device *sba,
> - struct sba_request *req)
> -{
> - lockdep_assert_held(&sba->reqs_lock);
> - req->flags = SBA_REQUEST_STATE_RECEIVED;
> - list_move_tail(&req->node, &sba->reqs_received_list);
> - if (list_empty(&sba->reqs_active_list))
> - sba->reqs_fence = false;
> -}
> -
> static void sba_free_chained_requests(struct sba_request *req)
> {
> unsigned long flags;
> @@ -358,10 +345,6 @@ static void sba_cleanup_nonpending_requests(struct sba_device *sba)
> list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
> _sba_free_request(sba, req);
>
> - /* Freeup all received request */
> - list_for_each_entry_safe(req, req1, &sba->reqs_received_list, node)
> - _sba_free_request(sba, req);
> -
> /* Freeup all completed request */
> list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node)
> _sba_free_request(sba, req);
> @@ -417,22 +400,20 @@ static int sba_send_mbox_request(struct sba_device *sba,
> return 0;
> }
>
> -static void sba_process_deferred_requests(struct sba_device *sba)
> +/* Note: Must be called with sba->reqs_lock held */
> +static void _sba_process_pending_requests(struct sba_device *sba)
> {
> int ret;
> u32 count;
> - unsigned long flags;
> struct sba_request *req;
> - struct dma_async_tx_descriptor *tx;
> -
> - spin_lock_irqsave(&sba->reqs_lock, flags);
> -
> - /* Count pending requests */
> - count = 0;
> - list_for_each_entry(req, &sba->reqs_pending_list, node)
> - count++;
>
> - /* Process pending requests */
> + /*
> + * Process few pending requests
> + *
> + * For now, we process (<number_of_mailbox_channels> * 8)
> + * number of requests at a time.
> + */
> + count = sba->mchans_count * 8;
> while (!list_empty(&sba->reqs_pending_list) && count) {
> /* Get the first pending request */
> req = list_first_entry(&sba->reqs_pending_list,
> @@ -443,11 +424,7 @@ static void sba_process_deferred_requests(struct sba_device *sba)
> break;
>
> /* Send request to mailbox channel */
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> ret = sba_send_mbox_request(sba, req);
> - spin_lock_irqsave(&sba->reqs_lock, flags);
> -
> - /* If something went wrong then keep request pending */
> if (ret < 0) {
> _sba_pending_request(sba, req);
> break;
> @@ -455,20 +432,18 @@ static void sba_process_deferred_requests(struct sba_device *sba)
>
> count--;
> }
> +}
>
> - /* Count completed requests */
> - count = 0;
> - list_for_each_entry(req, &sba->reqs_completed_list, node)
> - count++;
> -
> - /* Process completed requests */
> - while (!list_empty(&sba->reqs_completed_list) && count) {
> - req = list_first_entry(&sba->reqs_completed_list,
> - struct sba_request, node);
> - list_del_init(&req->node);
> - tx = &req->tx;
> +static void sba_process_received_request(struct sba_device *sba,
> + struct sba_request *req)
> +{
> + unsigned long flags;
> + struct dma_async_tx_descriptor *tx;
> + struct sba_request *nreq, *first = req->first;
>
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + /* Process only after all chained requests are received */
> + if (!atomic_dec_return(&first->next_pending_count)) {
> + tx = &first->tx;
>
> WARN_ON(tx->cookie < 0);
> if (tx->cookie > 0) {
> @@ -483,41 +458,31 @@ static void sba_process_deferred_requests(struct sba_device *sba)
>
> spin_lock_irqsave(&sba->reqs_lock, flags);
>
> - /* If waiting for 'ack' then move to completed list */
> - if (!async_tx_test_ack(&req->tx))
> - _sba_complete_request(sba, req);
> - else
> - _sba_free_request(sba, req);
> -
> - count--;
> - }
> -
> - /* Re-check pending and completed work */
> - count = 0;
> - if (!list_empty(&sba->reqs_pending_list) ||
> - !list_empty(&sba->reqs_completed_list))
> - count = 1;
> -
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> -}
> -
> -static void sba_process_received_request(struct sba_device *sba,
> - struct sba_request *req)
> -{
> - unsigned long flags;
> + /* Free all requests chained to first request */
> + list_for_each_entry(nreq, &first->next, next)
> + _sba_free_request(sba, nreq);
> + INIT_LIST_HEAD(&first->next);
>
> - spin_lock_irqsave(&sba->reqs_lock, flags);
> + /* The client is allowed to attach dependent operations
> + * until 'ack' is set
> + */
> + if (!async_tx_test_ack(tx))
> + _sba_complete_request(sba, first);
> + else
> + _sba_free_request(sba, first);
>
> - /* Mark request as received */
> - _sba_received_request(sba, req);
> + /* Cleanup completed requests */
> + list_for_each_entry_safe(req, nreq,
> + &sba->reqs_completed_list, node) {
> + if (async_tx_test_ack(&req->tx))
> + _sba_free_request(sba, req);
> + }
>
> - /* Update request */
> - if (!atomic_dec_return(&req->first->next_pending_count))
> - _sba_complete_request(sba, req->first);
> - if (req->first != req)
> - _sba_free_request(sba, req);
> + /* Process pending requests */
> + _sba_process_pending_requests(sba);
>
> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + spin_unlock_irqrestore(&sba->reqs_lock, flags);
> + }
> }
>
> /* ====== DMAENGINE callbacks ===== */
> @@ -542,10 +507,13 @@ static int sba_device_terminate_all(struct dma_chan *dchan)
>
> static void sba_issue_pending(struct dma_chan *dchan)
> {
> + unsigned long flags;
> struct sba_device *sba = to_sba_device(dchan);
>
> - /* Process deferred requests */
> - sba_process_deferred_requests(sba);
> + /* Process pending requests */
> + spin_lock_irqsave(&sba->reqs_lock, flags);
> + _sba_process_pending_requests(sba);
> + spin_unlock_irqrestore(&sba->reqs_lock, flags);
> }
>
> static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
> @@ -1480,9 +1448,6 @@ static void sba_receive_message(struct mbox_client *cl, void *msg)
>
> /* Process received request */
> sba_process_received_request(sba, req);
> -
> - /* Process deferred requests */
> - sba_process_deferred_requests(sba);
> }
>
> /* ====== Platform driver routines ===== */
> @@ -1511,7 +1476,6 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
> INIT_LIST_HEAD(&sba->reqs_alloc_list);
> INIT_LIST_HEAD(&sba->reqs_pending_list);
> INIT_LIST_HEAD(&sba->reqs_active_list);
> - INIT_LIST_HEAD(&sba->reqs_received_list);
> INIT_LIST_HEAD(&sba->reqs_completed_list);
> INIT_LIST_HEAD(&sba->reqs_aborted_list);
> INIT_LIST_HEAD(&sba->reqs_free_list);
> --
> 2.7.4
>
--
~Vinod
On Wed, Jul 26, 2017 at 10:33 PM, Vinod Koul <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 11:06:38AM +0530, Anup Patel wrote:
>> This patchset does various improvments to Broadcom SBA-RAID
>> driver and also adds SBA-RAID DT nodes for Stingray SOC.
>>
>> The patches are based on "[PATCH v2 0/7] FlexRM driver improvements"
>> and can also be found at sba-raid-imp-v1 branch of
>> https://github.com/Broadcom/arm64-linux.git
>
> and i cant apply this series without these?
The dependency is only because of last DT patch for stingray.
We are going to separate patchset for Stingray DT changes
so I will drop PATCH6 and have this part of Stingray DT patchset.
>
>>
>> Anup Patel (6):
>> dma: bcm-sba-raid: Improve memory allocation in SBA RAID driver
>> dma: bcm-sba-raid: Peek mbox when we are left with no free requests
>> dma: bcm-sba-raid: pre-ack async tx descriptor
>> dma: bcm-sba-raid: Break sba_process_deferred_requests() into two
>> parts
>> dma: bcm-sba-raid: Add debugfs support
>
> why are these tagged dma, I have told you explcitly to fix this!!
Sure, I will remove "dma:" tag in next revision.
Regards,
Anup
On Wed, Jul 26, 2017 at 10:39 PM, Vinod Koul <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 11:06:39AM +0530, Anup Patel wrote:
>> This patch improves memory allocation in SBA RAID driver in
>> following ways:
>> 1. Simplify struct sba_request to reduce memory consumption
>
> what is the simplification?? You need to document that
OK, will make it a separate patch with detailed commit description.
>
>> 2. Allocate sba resources before registering dma device
>
> what is the motivation for that
>
> So, reading this log doesnt help me to know what to expect in this patch
OK, this also requires separate patch with detailed commit description.
>
>>
>> Signed-off-by: Anup Patel <[email protected]>
>> Reviewed-by: Scott Branden <[email protected]>
>> Reviewed-by: Ray Jui <[email protected]>
>> Reviewed-by: Vikram Prakash <[email protected]>
>> ---
>> drivers/dma/bcm-sba-raid.c | 439 +++++++++++++++++++++++----------------------
>> 1 file changed, 226 insertions(+), 213 deletions(-)
>>
>> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
>> index e41bbc7..6d15fed 100644
>> --- a/drivers/dma/bcm-sba-raid.c
>> +++ b/drivers/dma/bcm-sba-raid.c
>> @@ -48,7 +48,8 @@
>>
>> #include "dmaengine.h"
>>
>> -/* SBA command related defines */
>> +/* ====== Driver macros and defines ===== */
>
> why this noise, seems unrelated to the change!
This is just minor beautification. Again, I will put this
in separate patch.
>
>> +
>> #define SBA_TYPE_SHIFT 48
>> #define SBA_TYPE_MASK GENMASK(1, 0)
>> #define SBA_TYPE_A 0x0
>> @@ -82,39 +83,41 @@
>> #define SBA_CMD_WRITE_BUFFER 0xc
>> #define SBA_CMD_GALOIS 0xe
>>
>> -/* Driver helper macros */
>> +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
>> +
>> #define to_sba_request(tx) \
>> container_of(tx, struct sba_request, tx)
>> #define to_sba_device(dchan) \
>> container_of(dchan, struct sba_device, dma_chan)
>>
>> -enum sba_request_state {
>> - SBA_REQUEST_STATE_FREE = 1,
>> - SBA_REQUEST_STATE_ALLOCED = 2,
>> - SBA_REQUEST_STATE_PENDING = 3,
>> - SBA_REQUEST_STATE_ACTIVE = 4,
>> - SBA_REQUEST_STATE_RECEIVED = 5,
>> - SBA_REQUEST_STATE_COMPLETED = 6,
>> - SBA_REQUEST_STATE_ABORTED = 7,
>> +/* ===== Driver data structures ===== */
>> +
>> +enum sba_request_flags {
>> + SBA_REQUEST_STATE_FREE = 0x001,
>> + SBA_REQUEST_STATE_ALLOCED = 0x002,
>> + SBA_REQUEST_STATE_PENDING = 0x004,
>> + SBA_REQUEST_STATE_ACTIVE = 0x008,
>> + SBA_REQUEST_STATE_RECEIVED = 0x010,
>> + SBA_REQUEST_STATE_COMPLETED = 0x020,
>> + SBA_REQUEST_STATE_ABORTED = 0x040,
>> + SBA_REQUEST_STATE_MASK = 0x0ff,
>> + SBA_REQUEST_FENCE = 0x100,
>
> how does this help in mem alloctn?
>
>> };
>>
>> struct sba_request {
>> /* Global state */
>> struct list_head node;
>> struct sba_device *sba;
>> - enum sba_request_state state;
>> - bool fence;
>> + u32 flags;
>> /* Chained requests management */
>> struct sba_request *first;
>> struct list_head next;
>> - unsigned int next_count;
>> atomic_t next_pending_count;
>> /* BRCM message data */
>> - void *resp;
>> - dma_addr_t resp_dma;
>> - struct brcm_sba_command *cmds;
>> struct brcm_message msg;
>> struct dma_async_tx_descriptor tx;
>> + /* SBA commands */
>> + struct brcm_sba_command cmds[0];
>> };
>>
>> enum sba_version {
>> @@ -128,11 +131,11 @@ struct sba_device {
>> /* DT configuration parameters */
>> enum sba_version ver;
>> /* Derived configuration parameters */
>> - u32 max_req;
>> u32 hw_buf_size;
>> u32 hw_resp_size;
>> u32 max_pq_coefs;
>> u32 max_pq_srcs;
>> + u32 max_req;
>> u32 max_cmd_per_req;
>> u32 max_xor_srcs;
>> u32 max_resp_pool_size;
>> @@ -152,7 +155,6 @@ struct sba_device {
>> void *cmds_base;
>> dma_addr_t cmds_dma_base;
>> spinlock_t reqs_lock;
>> - struct sba_request *reqs;
>> bool reqs_fence;
>> struct list_head reqs_alloc_list;
>> struct list_head reqs_pending_list;
>> @@ -161,10 +163,9 @@ struct sba_device {
>> struct list_head reqs_completed_list;
>> struct list_head reqs_aborted_list;
>> struct list_head reqs_free_list;
>> - int reqs_free_count;
>> };
>>
>> -/* ====== SBA command helper routines ===== */
>> +/* ====== Command helper routines ===== */
>
> more noise..
>
>>
>> static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
>> {
>> @@ -196,7 +197,7 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
>> ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
>> }
>>
>> -/* ====== Channel resource management routines ===== */
>> +/* ====== General helper routines ===== */
>
> and it keeps getting more interesting, sigh!!!
>
>>
>> static struct sba_request *sba_alloc_request(struct sba_device *sba)
>> {
>> @@ -204,24 +205,20 @@ static struct sba_request *sba_alloc_request(struct sba_device *sba)
>> struct sba_request *req = NULL;
>>
>> spin_lock_irqsave(&sba->reqs_lock, flags);
>> -
>> req = list_first_entry_or_null(&sba->reqs_free_list,
>> struct sba_request, node);
>> - if (req) {
>> + if (req)
>> list_move_tail(&req->node, &sba->reqs_alloc_list);
>> - req->state = SBA_REQUEST_STATE_ALLOCED;
>> - req->fence = false;
>> - req->first = req;
>> - INIT_LIST_HEAD(&req->next);
>> - req->next_count = 1;
>> - atomic_set(&req->next_pending_count, 1);
>> -
>> - sba->reqs_free_count--;
>> + spin_unlock_irqrestore(&sba->reqs_lock, flags);
>> + if (!req)
>> + return NULL;
>>
>> - dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
>> - }
>> + req->flags = SBA_REQUEST_STATE_ALLOCED;
>> + req->first = req;
>> + INIT_LIST_HEAD(&req->next);
>> + atomic_set(&req->next_pending_count, 1);
>
> Cant fathom how this helps w/ mem allocation
This is to reduce to duration for which "sba->reqs_lock" is held.
I will make this also a separate patch.
>
>>
>> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
>> + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
>>
>> return req;
>> }
>> @@ -231,7 +228,8 @@ static void _sba_pending_request(struct sba_device *sba,
>> struct sba_request *req)
>> {
>> lockdep_assert_held(&sba->reqs_lock);
>> - req->state = SBA_REQUEST_STATE_PENDING;
>> + req->flags &= ~SBA_REQUEST_STATE_MASK;
>> + req->flags |= SBA_REQUEST_STATE_PENDING;
>> list_move_tail(&req->node, &sba->reqs_pending_list);
>> if (list_empty(&sba->reqs_active_list))
>> sba->reqs_fence = false;
>> @@ -246,9 +244,10 @@ static bool _sba_active_request(struct sba_device *sba,
>> sba->reqs_fence = false;
>> if (sba->reqs_fence)
>> return false;
>> - req->state = SBA_REQUEST_STATE_ACTIVE;
>> + req->flags &= ~SBA_REQUEST_STATE_MASK;
>> + req->flags |= SBA_REQUEST_STATE_ACTIVE;
>> list_move_tail(&req->node, &sba->reqs_active_list);
>> - if (req->fence)
>> + if (req->flags & SBA_REQUEST_FENCE)
>> sba->reqs_fence = true;
>> return true;
>> }
>> @@ -258,7 +257,8 @@ static void _sba_abort_request(struct sba_device *sba,
>> struct sba_request *req)
>> {
>> lockdep_assert_held(&sba->reqs_lock);
>> - req->state = SBA_REQUEST_STATE_ABORTED;
>> + req->flags &= ~SBA_REQUEST_STATE_MASK;
>> + req->flags |= SBA_REQUEST_STATE_ABORTED;
>> list_move_tail(&req->node, &sba->reqs_aborted_list);
>> if (list_empty(&sba->reqs_active_list))
>> sba->reqs_fence = false;
>> @@ -269,42 +269,34 @@ static void _sba_free_request(struct sba_device *sba,
>> struct sba_request *req)
>> {
>> lockdep_assert_held(&sba->reqs_lock);
>> - req->state = SBA_REQUEST_STATE_FREE;
>> + req->flags &= ~SBA_REQUEST_STATE_MASK;
>> + req->flags |= SBA_REQUEST_STATE_FREE;
>> list_move_tail(&req->node, &sba->reqs_free_list);
>> if (list_empty(&sba->reqs_active_list))
>> sba->reqs_fence = false;
>> - sba->reqs_free_count++;
>> }
>>
>> -static void sba_received_request(struct sba_request *req)
>> +/* Note: Must be called with sba->reqs_lock held */
>> +static void _sba_complete_request(struct sba_device *sba,
>> + struct sba_request *req)
>> {
>> - unsigned long flags;
>> - struct sba_device *sba = req->sba;
>> -
>> - spin_lock_irqsave(&sba->reqs_lock, flags);
>> - req->state = SBA_REQUEST_STATE_RECEIVED;
>> - list_move_tail(&req->node, &sba->reqs_received_list);
>> - spin_unlock_irqrestore(&sba->reqs_lock, flags);
>> + lockdep_assert_held(&sba->reqs_lock);
>> + req->flags &= ~SBA_REQUEST_STATE_MASK;
>> + req->flags |= SBA_REQUEST_STATE_COMPLETED;
>> + list_move_tail(&req->node, &sba->reqs_completed_list);
>> + if (list_empty(&sba->reqs_active_list))
>> + sba->reqs_fence = false;
>
> Ok am going to stop here, sorry can't review it further.
>
> Please split stuff up, make logical incremental patchsets and resubmit...
OK, I will split this into incremental patchsets and try to
make it easier for review.
Thanks,
Anup
On Wed, Jul 26, 2017 at 10:40 PM, Vinod Koul <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 11:06:40AM +0530, Anup Patel wrote:
>> We should peek mbox channels when we are left with no free
>> sba_requests in sba_alloc_request()
>
> and why is the world should we do that, how does that help??
When setting up RAID array on several NVMe disk we observed
that sba_alloc_request() start failing (due to no free requests left)
and RAID array setup becomes very slow.
Doing mbox channel peek when we have no free requests left,
improves performance of RAID array setup.
This change is inspired from mv_chan_alloc_slot() implemented
in drivers/dma/mv_xor.c
Regards,
Anup
On Wed, Jul 26, 2017 at 10:45 PM, Vinod Koul <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 11:06:42AM +0530, Anup Patel wrote:
>> This patch breaks sba_process_deferred_requests() into two parts
>> sba_process_received_request() and _sba_process_pending_requests()
>> for readability.
>>
>> In addition,
>
> that should be a separate patch then... no?
>
>> we remove redundant SBA_REQUEST_STATE_RECEIVED state
>
> this should be one more...
OK, I will make this separate patch.
>
>> and ensure that all requests in a chained request should be freed
>> only after all have been received.
>
> and then this, right?
OK.
>
>>
>> Signed-off-by: Anup Patel <[email protected]>
>> Reviewed-by: Scott Branden <[email protected]>
>> ---
>> drivers/dma/bcm-sba-raid.c | 130 ++++++++++++++++-----------------------------
>> 1 file changed, 47 insertions(+), 83 deletions(-)
>>
>> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
>> index db5e3db..b92c756 100644
>> --- a/drivers/dma/bcm-sba-raid.c
>> +++ b/drivers/dma/bcm-sba-raid.c
>> @@ -97,9 +97,8 @@ enum sba_request_flags {
>> SBA_REQUEST_STATE_ALLOCED = 0x002,
>> SBA_REQUEST_STATE_PENDING = 0x004,
>> SBA_REQUEST_STATE_ACTIVE = 0x008,
>> - SBA_REQUEST_STATE_RECEIVED = 0x010,
>> - SBA_REQUEST_STATE_COMPLETED = 0x020,
>> - SBA_REQUEST_STATE_ABORTED = 0x040,
>> + SBA_REQUEST_STATE_COMPLETED = 0x010,
>> + SBA_REQUEST_STATE_ABORTED = 0x020,
>
> so we changed this is patch 1, only to change it here. why....!!!!
>
> so let me stop here again and repeat myself again about splitting stuff up,
> blah blah, good patches, blah blah, read the Documentation blah blah.. and
> hope someones listening :(
OK, I will address your comments.
Regards,
Anup
On Thu, Jul 27, 2017 at 09:42:33AM +0530, Anup Patel wrote:
> On Wed, Jul 26, 2017 at 10:39 PM, Vinod Koul <[email protected]> wrote:
> > On Wed, Jul 26, 2017 at 11:06:39AM +0530, Anup Patel wrote:
> >> drivers/dma/bcm-sba-raid.c | 439 +++++++++++++++++++++++----------------------
> >> 1 file changed, 226 insertions(+), 213 deletions(-)
> >>
> >> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
> >> index e41bbc7..6d15fed 100644
> >> --- a/drivers/dma/bcm-sba-raid.c
> >> +++ b/drivers/dma/bcm-sba-raid.c
> >> @@ -48,7 +48,8 @@
> >>
> >> #include "dmaengine.h"
> >>
> >> -/* SBA command related defines */
> >> +/* ====== Driver macros and defines ===== */
> >
> > why this noise, seems unrelated to the change!
>
> This is just minor beautification. Again, I will put this
> in separate patch.
Well you can't shove garlands under an unrelated change. By all means throw
the whole garden out there, but please as a separate patch
>
> >
> >> +
> >> #define SBA_TYPE_SHIFT 48
> >> #define SBA_TYPE_MASK GENMASK(1, 0)
> >> #define SBA_TYPE_A 0x0
> >> @@ -82,39 +83,41 @@
> >> #define SBA_CMD_WRITE_BUFFER 0xc
> >> #define SBA_CMD_GALOIS 0xe
> >>
> >> -/* Driver helper macros */
> >> +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
> >> +
> >> #define to_sba_request(tx) \
> >> container_of(tx, struct sba_request, tx)
> >> #define to_sba_device(dchan) \
> >> container_of(dchan, struct sba_device, dma_chan)
> >>
> >> -enum sba_request_state {
> >> - SBA_REQUEST_STATE_FREE = 1,
> >> - SBA_REQUEST_STATE_ALLOCED = 2,
> >> - SBA_REQUEST_STATE_PENDING = 3,
> >> - SBA_REQUEST_STATE_ACTIVE = 4,
> >> - SBA_REQUEST_STATE_RECEIVED = 5,
> >> - SBA_REQUEST_STATE_COMPLETED = 6,
> >> - SBA_REQUEST_STATE_ABORTED = 7,
> >> +/* ===== Driver data structures ===== */
> >> +
> >> +enum sba_request_flags {
> >> + SBA_REQUEST_STATE_FREE = 0x001,
> >> + SBA_REQUEST_STATE_ALLOCED = 0x002,
> >> + SBA_REQUEST_STATE_PENDING = 0x004,
> >> + SBA_REQUEST_STATE_ACTIVE = 0x008,
> >> + SBA_REQUEST_STATE_RECEIVED = 0x010,
> >> + SBA_REQUEST_STATE_COMPLETED = 0x020,
> >> + SBA_REQUEST_STATE_ABORTED = 0x040,
> >> + SBA_REQUEST_STATE_MASK = 0x0ff,
> >> + SBA_REQUEST_FENCE = 0x100,
> >
> > how does this help in mem alloctn?
??
--
~Vinod
On Thu, Jul 27, 2017 at 10:25:25AM +0530, Anup Patel wrote:
> On Wed, Jul 26, 2017 at 10:40 PM, Vinod Koul <[email protected]> wrote:
> > On Wed, Jul 26, 2017 at 11:06:40AM +0530, Anup Patel wrote:
> >> We should peek mbox channels when we are left with no free
> >> sba_requests in sba_alloc_request()
> >
> > and why is the world should we do that, how does that help??
>
> When setting up RAID array on several NVMe disk we observed
> that sba_alloc_request() start failing (due to no free requests left)
> and RAID array setup becomes very slow.
>
> Doing mbox channel peek when we have no free requests left,
> improves performance of RAID array setup.
How about documenting this tribal knowledge in the changelog. Changelogs are
very useful, 6 months down the line, you will struggle to remember why this
was changed..
>
> This change is inspired from mv_chan_alloc_slot() implemented
> in drivers/dma/mv_xor.c
>
> Regards,
> Anup
--
~Vinod
On Fri, Jul 28, 2017 at 8:45 AM, Vinod Koul <[email protected]> wrote:
> On Thu, Jul 27, 2017 at 10:25:25AM +0530, Anup Patel wrote:
>> On Wed, Jul 26, 2017 at 10:40 PM, Vinod Koul <[email protected]> wrote:
>> > On Wed, Jul 26, 2017 at 11:06:40AM +0530, Anup Patel wrote:
>> >> We should peek mbox channels when we are left with no free
>> >> sba_requests in sba_alloc_request()
>> >
>> > and why is the world should we do that, how does that help??
>>
>> When setting up RAID array on several NVMe disk we observed
>> that sba_alloc_request() start failing (due to no free requests left)
>> and RAID array setup becomes very slow.
>>
>> Doing mbox channel peek when we have no free requests left,
>> improves performance of RAID array setup.
>
> How about documenting this tribal knowledge in the changelog. Changelogs are
> very useful, 6 months down the line, you will struggle to remember why this
> was changed..
Sure, I will have detailed commit description for this.
Regards,
Anup
On Fri, Jul 28, 2017 at 8:43 AM, Vinod Koul <[email protected]> wrote:
> On Thu, Jul 27, 2017 at 09:42:33AM +0530, Anup Patel wrote:
>> On Wed, Jul 26, 2017 at 10:39 PM, Vinod Koul <[email protected]> wrote:
>> > On Wed, Jul 26, 2017 at 11:06:39AM +0530, Anup Patel wrote:
>
>> >> drivers/dma/bcm-sba-raid.c | 439 +++++++++++++++++++++++----------------------
>> >> 1 file changed, 226 insertions(+), 213 deletions(-)
>> >>
>> >> diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
>> >> index e41bbc7..6d15fed 100644
>> >> --- a/drivers/dma/bcm-sba-raid.c
>> >> +++ b/drivers/dma/bcm-sba-raid.c
>> >> @@ -48,7 +48,8 @@
>> >>
>> >> #include "dmaengine.h"
>> >>
>> >> -/* SBA command related defines */
>> >> +/* ====== Driver macros and defines ===== */
>> >
>> > why this noise, seems unrelated to the change!
>>
>> This is just minor beautification. Again, I will put this
>> in separate patch.
>
> Well you can't shove garlands under an unrelated change. By all means throw
> the whole garden out there, but please as a separate patch
Sure, I will have separate patch for this beautification.
>
>>
>> >
>> >> +
>> >> #define SBA_TYPE_SHIFT 48
>> >> #define SBA_TYPE_MASK GENMASK(1, 0)
>> >> #define SBA_TYPE_A 0x0
>> >> @@ -82,39 +83,41 @@
>> >> #define SBA_CMD_WRITE_BUFFER 0xc
>> >> #define SBA_CMD_GALOIS 0xe
>> >>
>> >> -/* Driver helper macros */
>> >> +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
>> >> +
>> >> #define to_sba_request(tx) \
>> >> container_of(tx, struct sba_request, tx)
>> >> #define to_sba_device(dchan) \
>> >> container_of(dchan, struct sba_device, dma_chan)
>> >>
>> >> -enum sba_request_state {
>> >> - SBA_REQUEST_STATE_FREE = 1,
>> >> - SBA_REQUEST_STATE_ALLOCED = 2,
>> >> - SBA_REQUEST_STATE_PENDING = 3,
>> >> - SBA_REQUEST_STATE_ACTIVE = 4,
>> >> - SBA_REQUEST_STATE_RECEIVED = 5,
>> >> - SBA_REQUEST_STATE_COMPLETED = 6,
>> >> - SBA_REQUEST_STATE_ABORTED = 7,
>> >> +/* ===== Driver data structures ===== */
>> >> +
>> >> +enum sba_request_flags {
>> >> + SBA_REQUEST_STATE_FREE = 0x001,
>> >> + SBA_REQUEST_STATE_ALLOCED = 0x002,
>> >> + SBA_REQUEST_STATE_PENDING = 0x004,
>> >> + SBA_REQUEST_STATE_ACTIVE = 0x008,
>> >> + SBA_REQUEST_STATE_RECEIVED = 0x010,
>> >> + SBA_REQUEST_STATE_COMPLETED = 0x020,
>> >> + SBA_REQUEST_STATE_ABORTED = 0x040,
>> >> + SBA_REQUEST_STATE_MASK = 0x0ff,
>> >> + SBA_REQUEST_FENCE = 0x100,
>> >
>> > how does this help in mem alloctn?
>
> ??
Ahh, I missed to address this comment.
Currently, we have separate "bool" flag for fenced
sba_request and separate "state" variable in
sba_request. We are have merged this two things
in common "u32 flags" in sba_request. In future,
we can use more bits in "u32 flags" as required
without disturbing the sba_request.
I will make this separate patch.
I agree, I have covered many changes in PATCH1
which makes it hard for you to review.
Thanks,
Anup