/*
   <:copyright-BRCM:2013:DUAL/GPL:standard
   
      Copyright (c) 2013 Broadcom 
      All Rights Reserved
   
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2, as published by
   the Free Software Foundation (the "GPL").
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   
   A copy of the GPL is available at http://www.broadcom.com/licenses/GPLv2.php, or by
   writing to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
   
   :>
 */

/******************************************************************************/
/*                                                                            */
/* File Description:                                                          */
/*                                                                            */
/* This file contains the implementation of the Runner CPU ring interface     */
/*                                                                            */
/******************************************************************************/

/*****************************************************************************/
/*                                                                           */
/* Include files                                                             */
/*                                                                           */
/*****************************************************************************/
#ifdef RDP_SIM
#define INTERN_PRINT bdmf_trace
#define ____cacheline_aligned 
#else
#define INTERN_PRINT printk
#endif

#include <bdmf_interface.h>
#include "rdp_cpu_ring.h"
#include "rdd_cpu_rx.h"
#include "rdp_drv_proj_cntr.h"
#include "rdd_runner_proj_defs.h"
#include "rdp_cpu_ring_inline.h"
#include "rdp_cpu_feed_ring.h"
#include "rdp_mm.h"
#include "rdp_drv_xpm.h"
#include "bdmf_system.h"
#include "rdp_drv_shell.h"
#include "xrdp_drv_rnr_regs_ag.h"

static uint32_t init_shell = 0;
int stats_reason[2][rdpa_cpu_reason__num_of]  = {}; /* reason statistics for US/DS */
EXPORT_SYMBOL(stats_reason);

#define RW_INDEX_SIZE (sizeof(uint16_t))
#define SYNC_FIFO_ADDRESS (IMAGE_CPU_TX_SYNC_FIFO_TABLE_ADDRESS + CPU_TX_SYNC_FIFO_ENTRY_FIFO_WORD_OFFSET * 4)

ring_descriptor_t ____cacheline_aligned host_ring[D_NUM_OF_RING_DESCRIPTORS]  = {};
EXPORT_SYMBOL(host_ring);

void rdp_buffer_recycle_non_feed(uint32_t token_id);

int cpu_ring_shell_list_rings(void *shell_priv, int start_from)
{
    uint32_t cntr;
    uint32_t first = 0, last = D_NUM_OF_RING_DESCRIPTORS;
    uint16_t read_idx = 0, write_idx = 0;

    rdp_cpu_shell_print(shell_priv, "CPU RX Ring Descriptors\n");
    rdp_cpu_shell_print(shell_priv, "------------------------------\n");

    if (start_from != -1)
    {
        first = start_from;
        last = first + 1;
    }

    for (cntr = first; cntr < last; cntr++)
    {
        char *ring_type;
        if (!host_ring[cntr].num_of_entries) 
            continue;

        ring_type = "RX";

        rdp_cpu_get_read_idx(host_ring[cntr].ring_id, host_ring[cntr].type, &read_idx);
        rdp_cpu_get_write_idx(host_ring[cntr].ring_id, host_ring[cntr].type, &write_idx);
#if defined(CONFIG_RNR_FEED_RING)
        if (host_ring[cntr].type == rdpa_ring_feed)
        {
            ring_type = "Feed";
#ifdef CPU_RING_DEBUG
            rdp_cpu_shell_print(shell_priv, "Feed Ring allocation failures: = %d:\n", host_ring[cntr].stats_buff_err);
#endif            
        }
        else  
#endif        
        if (host_ring[cntr].type == rdpa_ring_recycle)
            ring_type = "Recycle";
        else  if (host_ring[cntr].type == rdpa_ring_cpu_tx)
            ring_type = "Cpu_tx";

        rdp_cpu_shell_print(shell_priv, "CPU %s Ring Queue = %d:\n", ring_type, cntr);
        rdp_cpu_shell_print(shell_priv, "\tCPU %s Ring Queue id= %d\n",ring_type, host_ring[cntr].ring_id);
        rdp_cpu_shell_print(shell_priv, "\tNumber of entries = %d\n", host_ring[cntr].num_of_entries);
        rdp_cpu_shell_print(shell_priv, "\tSize of entry = %d bytes\n", host_ring[cntr].size_of_entry);
        rdp_cpu_shell_print(shell_priv, "\tAllocated Packet size = %d bytes\n", host_ring[cntr].packet_size);
        rdp_cpu_shell_print(shell_priv, "\tRing Base address = 0x%px\n", host_ring[cntr].base);

        rdp_cpu_shell_print(shell_priv, "\tRing Write index = %d shadow = %d\n", write_idx, host_ring[cntr].shadow_write_idx);
        rdp_cpu_shell_print(shell_priv, "\tRing Read index = %d shadow = %d\n", read_idx, host_ring[cntr].shadow_read_idx);
        rdp_cpu_shell_print(shell_priv, "\tCurrently Queued = %d\n", rdp_cpu_ring_get_queued(cntr));
        rdp_cpu_shell_print(shell_priv, "\tLowest Filling Level = %d\n", host_ring[cntr].lowest_filling_level);

#if defined(CONFIG_BCM_RUNNER_FEED_RING_DYNAMIC)
        if (host_ring[cntr].type == rdpa_ring_feed)
        {
            rdp_cpu_shell_print(shell_priv, "\tCurrent Allocated Buffers = %d\n", (int)atomic_read(&allocated_buffers));
            rdp_cpu_shell_print(shell_priv, "\tMax Allocated Buffers = %d\n", max_allocated_buffers);
            rdp_cpu_shell_print(shell_priv, "\tFeed Ring Max Allowed Buffers = %d\n", feed_ring_max_buffers);
            rdp_cpu_shell_print(shell_priv, "\tFeed Ring Low Threshold = %d\n", feed_ring_low_threshold);
            rdp_cpu_shell_print(shell_priv, "\tFeed Min Buffers in Ring = %d\n", feed_min_buffers_in_ring);
        }
#endif
        rdp_cpu_shell_print(shell_priv, "-------------------------------\n");
        rdp_cpu_shell_print(shell_priv, "\n\n");
    }

    return 0;
}

void rdp_cpu_ring_print_phys_addr(void *shell_priv, uint32_t addr_hi, uint32_t addr_low)
{
    bdmf_phys_addr_t phys_addr;

    SET_ADDR_HIGH_LOW(addr_hi, addr_low, phys_addr);

    rdp_cpu_shell_print(shell_priv, "\ttype: absolute address\n");
    rdp_cpu_shell_print(shell_priv, "\tpacket DDR phys low: 0x%x\n", addr_low);
    rdp_cpu_shell_print(shell_priv, "\tpacket DDR phys hi: 0x%x\n", addr_hi);
    rdp_cpu_shell_print(shell_priv, "\tpacket DDR phys: 0x%px\n", (uintptr_t *)phys_addr);
    rdp_cpu_shell_print(shell_priv, "\tpacket DDR virt uncached address: 0x%px\n", (void *)PHYS_TO_UNCACHED(phys_addr));
}

static void cpu_ring_pd_print_fields(void *shell_priv, CPU_RX_DESCRIPTOR_STRUCT *pdPtr)
{
    rdp_cpu_shell_print(shell_priv, "descriptor fields:\n");
    if (pdPtr->abs)
    {
        rdp_cpu_ring_print_phys_addr(shell_priv, pdPtr->host_buffer_data_ptr_hi, pdPtr->host_buffer_data_ptr_low);
        rdp_cpu_shell_print(shell_priv, "\tpacket len: %d\n", pdPtr->plen);
    }
    else
    {
        rdp_cpu_shell_print(shell_priv, "\ttype: fpm\n");
        rdp_cpu_shell_print(shell_priv, "\tfpm_id: 0x%x\n", pdPtr->fpm_idx);
        rdp_cpu_shell_print(shell_priv, "\tpacket len: %d\n", pdPtr->plen);
    }

    if (pdPtr->vport >= RDD_CPU_VPORT_FIRST && pdPtr->vport <= RDD_CPU_VPORT_LAST)
    {
#if !defined(G9991_COMMON)
        rdp_cpu_shell_print(shell_priv, "\tsource: CPU/WLAN\n");
#else
        rdp_cpu_shell_print(shell_priv, "\tsource: CPU\n");
#endif
        rdp_cpu_shell_print(shell_priv, "\tdata offset: %d\n", pdPtr->data_offset);
        rdp_cpu_shell_print(shell_priv, "\treason: %d\n", pdPtr->reason);
        rdp_cpu_shell_print(shell_priv, "\tssid: %d\n", pdPtr->ssid);
        rdp_cpu_shell_print(shell_priv, "\tvport: %d\n", pdPtr->vport);

        rdp_cpu_shell_print(shell_priv, "\tis_rx_offload: %d\n", pdPtr->is_rx_offload);
        rdp_cpu_shell_print(shell_priv, "\tis_exception: %d\n", pdPtr->is_exception);
        rdp_cpu_shell_print(shell_priv, "\tis_ucast: %d\n", pdPtr->is_ucast);
        rdp_cpu_shell_print(shell_priv, "\ttx_prio: %d\n", pdPtr->mcast_tx_prio);
        rdp_cpu_shell_print(shell_priv, "\tcolor: %d\n", pdPtr->color);
#if !defined(G9991_COMMON)
        rdp_cpu_shell_print(shell_priv, "\tdst_ssid_vector / metadata: 0x%x\n", pdPtr->dst_ssid_vector);
        rdp_cpu_shell_print(shell_priv, "\twl_metadata: 0x%x\n", pdPtr->word_32[CPU_RX_DESCRIPTOR_CPU_RX_METADATA_UNION_WORD_OFFSET]); /* wl_metadata takes all 32 bits */
#endif
    }
#if defined(RDP_UFC)
    else if (pdPtr->is_emac)
#else
    else if (pdPtr->is_src_lan)
#endif
    {
        rdp_cpu_shell_print(shell_priv, "\tsource: LAN\n");
        rdp_cpu_shell_print(shell_priv, "\tdata offset: %d\n", pdPtr->data_offset);
        rdp_cpu_shell_print(shell_priv, "\treason: %d\n", pdPtr->reason);
        rdp_cpu_shell_print(shell_priv, "\tsource port: %d\n", pdPtr->vport);
    }
    else
    {
        rdp_cpu_shell_print(shell_priv, "\tsource: WAN\n");
        rdp_cpu_shell_print(shell_priv, "\tdata offset: %d\n", pdPtr->data_offset);
        rdp_cpu_shell_print(shell_priv, "\treason: %d\n", pdPtr->reason);
        rdp_cpu_shell_print(shell_priv, "\tsource port: %d\n", pdPtr->vport);
        rdp_cpu_shell_print(shell_priv, "\tWAN flow id: %d\n", pdPtr->wan_flow_id);
    }
}

static void cpu_tx_pd_print_fields(void *shell_priv, RING_CPU_TX_DESCRIPTOR_STRUCT *pdPtr)
{
    rdp_cpu_shell_print(shell_priv, "Cpu TX descriptor fields:\n");

    rdp_cpu_shell_print(shell_priv, "\tis_egress %d\n", pdPtr->is_egress);
    rdp_cpu_shell_print(shell_priv, "\tfirst_level_q: %d\n", pdPtr->first_level_q);
    rdp_cpu_shell_print(shell_priv, "\tpacket_length: %d\n", pdPtr->packet_length);
    rdp_cpu_shell_print(shell_priv, "\tsk_buf_ptr address:\n");
    rdp_cpu_ring_print_phys_addr(shell_priv, pdPtr->sk_buf_ptr_high, pdPtr->sk_buf_ptr_low);

    rdp_cpu_shell_print(shell_priv, "\tcolor: %d\n", pdPtr->color);
    rdp_cpu_shell_print(shell_priv, "\tdo_not_recycle: %d\n", pdPtr->do_not_recycle);
    rdp_cpu_shell_print(shell_priv, "\tflag_1588: %d\n", pdPtr->flag_1588);
#if defined(RDP_UFC)
    rdp_cpu_shell_print(shell_priv, "\tlan: %d\n", pdPtr->is_emac);
#else
    rdp_cpu_shell_print(shell_priv, "\tlan: %d\n", pdPtr->lan);
#endif
    rdp_cpu_shell_print(shell_priv, "\twan_flow_source_port: %d\n", pdPtr->wan_flow_source_port);
    rdp_cpu_shell_print(shell_priv, "\tfpm_fallback: %d\n", pdPtr->fpm_fallback);

    rdp_cpu_shell_print(shell_priv, "\tsbpm_copy: %d\n", pdPtr->sbpm_copy);
    rdp_cpu_shell_print(shell_priv, "\ttarget_mem_0: %d\n", pdPtr->target_mem_0);
    rdp_cpu_shell_print(shell_priv, "\tabs: %d\n", pdPtr->abs);
    rdp_cpu_shell_print(shell_priv, "\tlag_index: %d\n", pdPtr->lag_index);
    
    rdp_cpu_shell_print(shell_priv, "\tpkt_buf_ptr address\n");
    rdp_cpu_ring_print_phys_addr(shell_priv, pdPtr->pkt_buf_ptr_high, pdPtr->pkt_buf_ptr_low_or_fpm_bn0);
}

static void cpu_recycle_pd_print_fields(void *shell_priv, CPU_RECYCLE_DESCRIPTOR *pdPtr)
{
    rdp_cpu_shell_print(shell_priv, "Recycle descriptor fields:\n");
    rdp_cpu_ring_print_phys_addr(shell_priv, pdPtr->skb_ptr_hi, pdPtr->skb_ptr_low);

    rdp_cpu_shell_print(shell_priv, "\tpacket type: %d\n", pdPtr->abs);
#if defined(CONFIG_RNR_FEED_RING)
    rdp_cpu_shell_print(shell_priv, "\tfrom_feed_ring: 0x%x\n", pdPtr->from_feed_ring);
#endif
    rdp_cpu_shell_print(shell_priv, "\treserved: 0x%x\n", pdPtr->reserved0);
}

int cpu_ring_shell_print_pd(void *shell_priv, uint32_t ring_id, uint32_t pdIndex)
{
    if (host_ring[ring_id].type == rdpa_ring_data)
    {
        CPU_RX_DESCRIPTOR_STRUCT host_ring_desc;

        memcpy(&host_ring_desc, &((CPU_RX_DESCRIPTOR_STRUCT *)host_ring[ring_id].base)[pdIndex],
            sizeof(CPU_RX_DESCRIPTOR_STRUCT));

        rdp_cpu_shell_print(shell_priv, "descriptor unswapped: %08x %08x %08x %08x\n",
            host_ring_desc.word_32[0], host_ring_desc.word_32[1], host_ring_desc.word_32[2],
            host_ring_desc.word_32[3]);

        swap_4_words(host_ring_desc.word_32, host_ring_desc.word_32);
        
        rdp_cpu_shell_print(shell_priv, "descriptor swapped  : %08x %08x %08x %08x\n", 
            host_ring_desc.word_32[0], host_ring_desc.word_32[1], host_ring_desc.word_32[2],
            host_ring_desc.word_32[3]);

        cpu_ring_pd_print_fields(shell_priv, &host_ring_desc);
    }
    else if (host_ring[ring_id].type == rdpa_ring_recycle)
    {
        CPU_RECYCLE_DESCRIPTOR host_rc_desc;

        memcpy(&host_rc_desc, &((CPU_RECYCLE_DESCRIPTOR *)host_ring[ring_id].base)[pdIndex],
            sizeof(CPU_RECYCLE_DESCRIPTOR));

        rdp_cpu_shell_print(shell_priv, "feed descriptor unswapped: %08x %08x\n",
            host_rc_desc.word_32[0], host_rc_desc.word_32[1]);

        host_rc_desc.dword_64[0] = swap4bytes64(host_rc_desc.dword_64[0]);

        rdp_cpu_shell_print(shell_priv, "descriptor swapped  : %08x %08x\n", 
            host_rc_desc.word_32[0], host_rc_desc.word_32[1]);

        cpu_recycle_pd_print_fields(shell_priv, &host_rc_desc);
    }
#if defined(CONFIG_RNR_FEED_RING)
    else if (host_ring[ring_id].type == rdpa_ring_feed)
    {
        CPU_FEED_DESCRIPTOR host_feed_desc;

        memcpy(&host_feed_desc, &((CPU_FEED_DESCRIPTOR *)host_ring[ring_id].base)[pdIndex],
            sizeof(CPU_FEED_DESCRIPTOR));
        rdp_cpu_shell_print(shell_priv, "feed descriptor unswapped: %08x %08x\n",
            host_feed_desc.word0, host_feed_desc.word1);

        host_feed_desc.word0 = swap4bytes(host_feed_desc.word0);
        host_feed_desc.word1 = swap4bytes(host_feed_desc.word1);

        rdp_cpu_shell_print(shell_priv, "descriptor swapped  : %08x %08x\n", 
            host_feed_desc.word0, host_feed_desc.word1);

        rdp_cpu_feed_pd_print_fields(shell_priv, &host_feed_desc);
    }
#endif
    else if (host_ring[ring_id].type == rdpa_ring_cpu_tx)
    {
        RING_CPU_TX_DESCRIPTOR_STRUCT cpu_tx_desc = {};
        RING_CPU_TX_DESCRIPTOR_STRUCT cpu_tx_swapped;

        rdp_cpu_shell_print(shell_priv, "host_ring[%d].base %px, size %d\n", ring_id, host_ring[ring_id].base, (int)sizeof(RING_CPU_TX_DESCRIPTOR_STRUCT));
        rdp_cpu_shell_print(shell_priv, "hostpd[%d].address %px\n", pdIndex, &((RING_CPU_TX_DESCRIPTOR_STRUCT *)host_ring[ring_id].base)[pdIndex]);
        memcpy(&cpu_tx_desc, &((RING_CPU_TX_DESCRIPTOR_STRUCT *)host_ring[ring_id].base)[pdIndex],
            sizeof(RING_CPU_TX_DESCRIPTOR_STRUCT));
        rdp_cpu_shell_print(shell_priv, "CPU TX descriptor unswapped: %08x %08x %08x %08x\n",
            ((uint32_t *)&cpu_tx_desc)[0], ((uint32_t *)&cpu_tx_desc)[1], ((uint32_t *)&cpu_tx_desc)[2], ((uint32_t *)&cpu_tx_desc)[3]);

        swap_4_words((uint32_t *)&cpu_tx_desc, cpu_tx_swapped.word_32);

        rdp_cpu_shell_print(shell_priv, "descriptor swapped  : %08x %08x %08x %08x\n", 
            cpu_tx_swapped.word_32[0], cpu_tx_swapped.word_32[1], cpu_tx_swapped.word_32[2], cpu_tx_swapped.word_32[3]);

        cpu_tx_pd_print_fields(shell_priv, &cpu_tx_swapped);
    }

    return 0;
}

#if defined(CONFIG_RUNNER_GDX_SUPPORT) && defined(RDP_UFC)
static int bdmf_cpu_gdx_cntrs(bdmf_session_handle session, const bdmfmon_cmd_parm_t parm[], uint16_t n_parms)
{
    uint64_t temp_cntr_arr[6];
    int32_t rc;

    rc = drv_cntr_counter_read(CNTR_GROUP_DHD_CTR, DHD_CTR_GROUP_CPU_RX_GDX_QUEUE_CNT, temp_cntr_arr);
    rdp_cpu_shell_print(session, "CPU_RX_GDX_QUEUE_CNT: %d, rc %d\n", (uint32_t)temp_cntr_arr[0], rc);
    rc = drv_cntr_counter_read(CNTR_GROUP_VARIOUS, COUNTER_CPU_RX_GDX_QUEUE_OCCUPANCY_DROP, temp_cntr_arr);
    rdp_cpu_shell_print(session, "CPU_RX_GDX_QUEUE_OCCUPANCY_DROP: %d, rc %d\n", (uint32_t)temp_cntr_arr[0], rc);

    return 0;
}
#endif
static int bdmf_cpu_ring_shell_list_rings(bdmf_session_handle session, const bdmfmon_cmd_parm_t parm[], uint16_t n_parms)
{
    int start_from = -1;

    if (n_parms == 1)
        start_from = (uint32_t)parm[0].value.unumber;

    return cpu_ring_shell_list_rings(session, start_from);
}

int ring_make_shell_commands(void)
{
    bdmfmon_handle_t driver_dir, cpu_dir;

    if (!(driver_dir = bdmfmon_dir_find (NULL, "driver")))
    {
        driver_dir = bdmfmon_dir_add(NULL, "driver", "Device Drivers", BDMF_ACCESS_ADMIN, NULL);

        if (!driver_dir)
            return (1);
    }

    cpu_dir = bdmfmon_dir_add(driver_dir, "cpur", "CPU Ring Interface Driver", BDMF_ACCESS_ADMIN, NULL);

    if (!cpu_dir)
        return (1);


    MAKE_BDMF_SHELL_CMD( cpu_dir, "sar",   "Show available rings", bdmf_cpu_ring_shell_list_rings,
        BDMFMON_MAKE_PARM_RANGE( "ring_id", "ring id", BDMFMON_PARM_NUMBER, BDMFMON_PARM_FLAG_OPTIONAL, 0, D_NUM_OF_RING_DESCRIPTORS) );
#if defined(CONFIG_RUNNER_GDX_SUPPORT) && defined(RDP_UFC)
    MAKE_BDMF_SHELL_CMD_NOPARM( cpu_dir, "gdx",   "Show CPU_RX_GDX counters", bdmf_cpu_gdx_cntrs);
#endif
#if defined(CONFIG_RNR_FEED_RING)
    MAKE_BDMF_SHELL_CMD( cpu_dir, "vrpd",     "View Ring packet descriptor", bdmf_cpu_ring_shell_print_pd,
        BDMFMON_MAKE_PARM_RANGE( "ring_id", "ring id", BDMFMON_PARM_NUMBER, 0, 0, D_NUM_OF_RING_DESCRIPTORS ),
        BDMFMON_MAKE_PARM_RANGE( "descriptor", "packet descriptor index ", BDMFMON_PARM_NUMBER, 0, 0, FEED_RING_SIZE) );
#endif

    return 0;
}

/*delete a preallocated ring*/
int rdp_cpu_ring_delete_ring(uint32_t ring_id)
{
    ring_descriptor_t *pDescriptor;
    int rc;

    pDescriptor = &host_ring[ring_id];
    if (!pDescriptor->num_of_entries)
    {
        INTERN_PRINT("ERROR:deleting ring_id %d which does not exists!", ring_id);
        return -1;
    }

    rc = rdp_cpu_ring_buffers_free(pDescriptor);
    if (rc)
    {
        INTERN_PRINT("ERROR: failed free ring buffers ring_id %d, err %d\n", ring_id, rc);
        return rc;
    }

    /* free any buffers in buff_cache */
    while (pDescriptor->buff_cache_cnt)
    {
        pDescriptor->databuf_free(pDescriptor->buff_cache[--pDescriptor->buff_cache_cnt], 0, pDescriptor);
    }

    /*free buff_cache */
    if (pDescriptor->buff_cache)
        CACHED_FREE(pDescriptor->buff_cache);

    /*delete the ring of descriptors in case of non-coherent*/
    if (pDescriptor->base)
    {
        rdp_mm_aligned_free(pDescriptor->base, pDescriptor->ring_ddr_size);
    }
    pDescriptor->num_of_entries = 0;

    return 0;
}
EXPORT_SYMBOL(rdp_cpu_ring_delete_ring);

int rdp_cpu_ring_create_ring(uint32_t ring_id, uint8_t ring_type, uint32_t entries, bdmf_phys_addr_t *ring_head,
    uint32_t packetSize, RING_CB_FUNC *ringCb, uint32_t ring_prio)
{
    return rdp_cpu_ring_create_ring_ex(ring_id, ring_type, entries, ring_head, NULL, packetSize, ringCb, ring_prio);
}
EXPORT_SYMBOL(rdp_cpu_ring_create_ring);

int rdp_cpu_ring_create_ring_ex(uint32_t ring_id, uint8_t ring_type, uint32_t entries, bdmf_phys_addr_t *ring_head,
    bdmf_phys_addr_t *rw_idx_addr, uint32_t packetSize, RING_CB_FUNC *ringCb, uint32_t ring_prio)
{
    ring_descriptor_t *pDescriptor;
    int32_t is_feed_ring = 0;

    if ((ring_id >= RING_ID_NUM_OF) || ((ring_type == rdpa_ring_data) && (ring_id > DATA_RING_ID_LAST)))
    {
        INTERN_PRINT("ERROR: ring_id %d out of range(%d)", ring_id, RING_ID_NUM_OF);
        return -1;
    }
#if defined(CONFIG_RNR_FEED_RING) 
    if (ring_type == rdpa_ring_feed)
        pDescriptor = &host_ring[FEED_RING_ID];
    else
#endif
        pDescriptor = &host_ring[ring_id];

    if (pDescriptor->num_of_entries)
    {
        INTERN_PRINT("ERROR: ring_id %d already exists! must be deleted first",ring_id);
        return -1;
    }

    if (!entries)
    {
        INTERN_PRINT("ERROR: can't create ring with 0 packets\n");
        return -1;
    }

    /* for cpu_tx ring, ring size has to be power of 2 */
    if ((ring_type == rdpa_ring_cpu_tx) && (((entries - 1) & entries) != 0))
    {
        INTERN_PRINT("ERROR: can't create ring with %d packets, that's not power of 2\n", entries);
        return -1;
    }

    /*set ring parameters*/
    pDescriptor->ring_id = ring_id;
    pDescriptor->num_of_entries = entries;
    pDescriptor->num_of_entries_mask = pDescriptor->num_of_entries - 1;
    pDescriptor->ring_prio = ring_prio;

#if defined(CONFIG_RNR_FEED_RING)
    if (ring_type == rdpa_ring_feed)
        pDescriptor->size_of_entry		= sizeof(CPU_FEED_DESCRIPTOR);
    else 
#endif
    if (ring_type == rdpa_ring_recycle)
        pDescriptor->size_of_entry		= sizeof(CPU_RECYCLE_DESCRIPTOR);
    else if (ring_type == rdpa_ring_cpu_tx)
        pDescriptor->size_of_entry		= sizeof(RING_CPU_TX_DESCRIPTOR_STRUCT);
    else if (ring_type == rdpa_ring_data)
        pDescriptor->size_of_entry		= sizeof(CPU_RX_DESCRIPTOR_STRUCT);

    INTERN_PRINT("Creating CPU ring for queue number %d with %d packets descriptor=0x%px, size_of_entry %d\n",
        ring_id, entries, pDescriptor, pDescriptor->size_of_entry);

    pDescriptor->buff_cache_cnt = 0;
    pDescriptor->packet_size = packetSize;
    pDescriptor->type = ring_type;
    if (ring_type == rdpa_ring_cpu_tx)
        bdmf_fastlock_init(&pDescriptor->ring_lock);

    pDescriptor->databuf_alloc  = rdp_databuf_alloc;
    pDescriptor->databuf_free   = rdp_databuf_free;
    pDescriptor->data_dump = rdp_packet_dump;
    
    if (ringCb) /* overwrite if needed */
    {
        pDescriptor->data_dump = ringCb->data_dump;
        pDescriptor->buff_mem_context = ringCb->buff_mem_context;
    }

    /*TODO:update the comment  allocate buff_cache which helps to reduce the overhead of when 
     * allocating data buffers to ring descriptor */
    pDescriptor->buff_cache = (uint8_t **)(CACHED_MALLOC_ATOMIC(sizeof(uint8_t *) * MAX_BUFS_IN_CACHE));
    if (pDescriptor->buff_cache == NULL)
    {
        INTERN_PRINT("failed to allocate memory for cache of data buffers\n");
        return -1;
    }

    /*allocate ring descriptors - must be non-cacheable memory*/
#if defined(CONFIG_RNR_FEED_RING)
    is_feed_ring = (ring_type == rdpa_ring_feed);
#endif
    if ((ring_type == rdpa_ring_recycle) || is_feed_ring)
    {
        /* make base aligned to 64 byte */
        pDescriptor->ring_ddr_size = (((pDescriptor->size_of_entry * entries) + RW_INDEX_SIZE * 2 + 63) >> 6) << 6;
        pDescriptor->base = rdp_mm_aligned_alloc(pDescriptor->ring_ddr_size, &pDescriptor->base_phys);
        if (pDescriptor->base == NULL)
        {
            INTERN_PRINT("%s:%d:Failed to allocate memory for ring descriptor\n", __FUNCTION__, __LINE__);
            rdp_cpu_ring_delete_ring(ring_id);
            return -1;
        }

        pDescriptor->read_idx = (uint16_t *)((uintptr_t)pDescriptor->base + (pDescriptor->size_of_entry * entries));
        pDescriptor->write_idx = (uint16_t *)((uintptr_t)pDescriptor->read_idx + RW_INDEX_SIZE);
        *pDescriptor->read_idx = 0;
        *pDescriptor->write_idx = 0;
        if (rw_idx_addr)
            *rw_idx_addr = pDescriptor->base_phys + (pDescriptor->size_of_entry * entries);
    }
    else
    {
        pDescriptor->ring_ddr_size = pDescriptor->size_of_entry * entries;
        pDescriptor->base =  rdp_mm_aligned_alloc(pDescriptor->ring_ddr_size, &pDescriptor->base_phys);
    }

    if (pDescriptor->base == NULL)
    {
        INTERN_PRINT("%s:%d:Failed to allocate memory for ring descriptor\n", __FUNCTION__, __LINE__);
        rdp_cpu_ring_delete_ring(ring_id);
        return -1;
    }

    if (rdp_cpu_ring_buffers_init(pDescriptor, ring_id))
        return -1;

    /*feed ring lowest filling level is the lowest number of entries seen during ring lifetime.*/
    pDescriptor->lowest_filling_level = (uint16_t)entries;

    *ring_head = pDescriptor->base_phys;

    INTERN_PRINT("Done initializing Ring %d Base=0x%px num of entries= %d RDD Base=%lx descriptor=0x%px\n",
        ring_id, pDescriptor->base, pDescriptor->num_of_entries, (unsigned long)pDescriptor->base_phys, pDescriptor);
    {
        if (!init_shell)
        {
            if (ring_make_shell_commands())
            {
                INTERN_PRINT("Failed to create ring bdmf shell commands\n");
                return 1;
            }
            init_shell = 1;
        }
    }

    return 0;
}
EXPORT_SYMBOL(rdp_cpu_ring_create_ring_ex);

/**********************************************************************
 *   _rdp_cpu_tx_ring_indices_alloc
 *      helper function to allocate read write indices for cpu_tx rings
 *
 **********************************************************************/
static void _rdp_cpu_tx_ring_indices_alloc(ring_descriptor_t *pDescriptor, ring_id_t ring_id)
{
    uint16_t *buffer_ptr;
    bdmf_phys_addr_t phy_addr = 0;
    uint8_t  idx = 0;

    buffer_ptr = rdp_mm_aligned_alloc((sizeof(uint16_t) * 2), &phy_addr);
    pDescriptor->read_idx = buffer_ptr;
    pDescriptor->write_idx = &buffer_ptr[1];

    /* indices initialization */
    *pDescriptor->read_idx = 0;
    *pDescriptor->write_idx = 0;

#if defined(CONFIG_CPU_TX_MCORE) 
    if (ring_id == TX_MCORE_RING_ID)
    { 
        RDD_CPU_TX_RING_INDICES_READ_IDX_WRITE_G(0, RDD_CPU_TX_MCORE_RING_INDICES_VALUES_TABLE_ADDRESS_ARR, 0);
        RDD_CPU_TX_RING_INDICES_WRITE_IDX_WRITE_G(0, RDD_CPU_TX_MCORE_RING_INDICES_VALUES_TABLE_ADDRESS_ARR, 0);
        return;
    }
#endif

    if (ring_id == TX_HIGH_PRIO_RING_ID)
        idx = 0;
    else if (ring_id == TX_LOW_PRIO_RING_ID)
        idx = 1;
 
    RDD_CPU_TX_RING_INDICES_READ_IDX_WRITE_G(0, RDD_CPU_TX_RING_INDICES_VALUES_TABLE_ADDRESS_ARR, idx);
    RDD_CPU_TX_RING_INDICES_WRITE_IDX_WRITE_G(0, RDD_CPU_TX_RING_INDICES_VALUES_TABLE_ADDRESS_ARR, idx);
    
    /* initiate SYNC_FIFO_TABLE both write and read pointers should point FIFO head*/
    RDD_CPU_TX_SYNC_FIFO_ENTRY_WRITE_PTR_WRITE_G((SYNC_FIFO_ADDRESS + idx * CPU_TX_SYNC_FIFO_ENTRY_BYTE_SIZE),
        RDD_CPU_TX_SYNC_FIFO_TABLE_ADDRESS_ARR, idx);
    RDD_CPU_TX_SYNC_FIFO_ENTRY_READ_PTR_WRITE_G((SYNC_FIFO_ADDRESS + idx * CPU_TX_SYNC_FIFO_ENTRY_BYTE_SIZE),
        RDD_CPU_TX_SYNC_FIFO_TABLE_ADDRESS_ARR, idx);
}

/**********************************************************************
 *    rdp_cpu_tx_rings_indices_alloc
 *      allocate read write indices for rings  TX_HIGH_PRIO_RING_ID,
 *      TX_LOW_PRIO_RING_ID,
 *      also update theire addreces in runner firmware.
 *      allocated buffer structure:
 *
 **********************************************************************/
void rdp_cpu_tx_rings_indices_alloc(void)
{
    ring_descriptor_t *pDescriptor_high = &host_ring[TX_HIGH_PRIO_RING_ID];
    ring_descriptor_t *pDescriptor_low = &host_ring[TX_LOW_PRIO_RING_ID];
#if defined(CONFIG_CPU_TX_MCORE) 
    ring_descriptor_t *pDescriptor_mcore = &host_ring[TX_MCORE_RING_ID];
#endif

    _rdp_cpu_tx_ring_indices_alloc(pDescriptor_high, TX_HIGH_PRIO_RING_ID);
    _rdp_cpu_tx_ring_indices_alloc(pDescriptor_low, TX_LOW_PRIO_RING_ID);
#if defined(CONFIG_CPU_TX_MCORE) 
    _rdp_cpu_tx_ring_indices_alloc(pDescriptor_mcore, TX_MCORE_RING_ID);
#endif
}
EXPORT_SYMBOL(rdp_cpu_tx_rings_indices_alloc);

#ifdef CPU_RING_DEBUG
void rdp_cpu_dump_data_cb(bdmf_index queue, bdmf_boolean enabled)
{
    host_ring[queue].dump_enable = enabled;
}
EXPORT_SYMBOL(rdp_cpu_dump_data_cb);
#endif
 
/*this function if for debug purposes*/
int	rdp_cpu_ring_get_queue_size(uint32_t ring_idx)
{
    return host_ring[ring_idx].num_of_entries;
}
 
uint32_t rdp_cpu_packets_count(ring_descriptor_t *rd, uint16_t read_idx, uint16_t write_idx)
{
    uint32_t packets;
    if (read_idx <= write_idx)
        packets = write_idx - read_idx;
    else
        packets = (rd->num_of_entries - read_idx) + write_idx;
    return packets;
}

/*this function if for debug purposes and should not be called during runtime*/
/*TODO:Add mutex to protect when reading while packets read from another context*/
int	rdp_cpu_ring_get_queued(uint32_t ring_idx)
{
    ring_descriptor_t *pDescriptor = &host_ring[ring_idx];
    uint32_t                    packets     = 0;
    uint16_t read_idx = 0, write_idx = 0;
    rdp_cpu_get_read_idx(pDescriptor->ring_id, pDescriptor->type, &read_idx);
    rdp_cpu_get_write_idx(pDescriptor->ring_id, pDescriptor->type, &write_idx);

    if (pDescriptor->num_of_entries == 0)
        return 0;
    packets = rdp_cpu_packets_count(pDescriptor, read_idx, write_idx);

    return packets;
}

int	rdp_cpu_ring_flush(uint32_t ring_id)
{
    ring_descriptor_t *pDescriptor = &host_ring[ring_id];
    int rc;
#if defined(CONFIG_RNR_FEED_RING) 
    if (host_ring[ring_id].type != rdpa_ring_feed)
#endif
    {
        rc = rdp_cpu_ring_buffers_free(pDescriptor);
        if (rc)
        {
            INTERN_PRINT("ERROR: failed free ring buffers ring_id %d, err %d\n", ring_id, rc);
            return rc;
        }
    }
    INTERN_PRINT("cpu Ring %d has been flushed\n", ring_id);

    return 0;
}

int	rdp_cpu_ring_not_empty(uint32_t ring_id)
{
    ring_descriptor_t *pDescriptor = &host_ring[ring_id];
    uint32_t read_idx = 0, write_idx = 0;

    read_idx = pDescriptor->shadow_read_idx;
    write_idx = pDescriptor->shadow_write_idx;

    return read_idx != write_idx ? 1 : 0;
}

int rdp_cpu_ring_is_full(uint32_t ring_id)
{
    ring_descriptor_t *pDescriptor = &host_ring[ring_id];

    return (pDescriptor->num_of_entries - rdp_cpu_ring_get_queued(ring_id) < 10);
}

/*this API get the pointer of the next available packet and reallocate buffer in ring
 * in the descriptor is optimized to 16 bytes cache line, 6838 has 16 bytes cache line
 * while 68500 has 32 bytes cache line, so we don't prefetch the descriptor to cache
 * Also on ARM platform we are not sure of how to skip L2 cache, and use only L1 cache
 * so for now  always use uncached accesses to Packet Descriptor(pTravel)
 */

inline int rdp_cpu_ring_read_packet_refill(uint32_t ring_id, rdpa_cpu_rx_info_t *info)
{
    uint32_t ret;
    ring_descriptor_t *pDescriptor = &host_ring[ring_id];

    ret = _ReadPacketFromRing(pDescriptor, info);
    if (ret)
        return ret;

#ifndef CONFIG_BCM_CACHE_COHERENCY
    bdmf_dcache_inv((unsigned long)(info->data + info->data_offset), info->size);
#endif

    return ret;
}

/* interrupt routine Recycle ring*/
static inline int _rdp_cpu_ring_recycle_free_host_buf(ring_descriptor_t *ring_descr)
{
    volatile CPU_RECYCLE_DESCRIPTOR *cpu_recycle_descr;
    CPU_RECYCLE_DESCRIPTOR rx_desc;
    uintptr_t phys_ptr;
    void *data_ptr;
    uint32_t read_idx = ring_descr->shadow_read_idx;
    uint32_t write_idx =ring_descr->shadow_write_idx;

    if (unlikely(read_idx == write_idx))
    {
        return BDMF_ERR_NO_MORE;
    }

    cpu_recycle_descr = &((CPU_RECYCLE_DESCRIPTOR *)ring_descr->base)[read_idx];

#ifdef CONFIG_BCM_CACHE_COHERENCY
    /*Before accessing the descriptors must do barrier */
    dma_rmb();
#endif

    rx_desc.dword_64[0] = swap4bytes64(cpu_recycle_descr->dword_64[0]);
    SET_ADDR_HIGH_LOW(rx_desc.skb_ptr_hi, rx_desc.skb_ptr_low, phys_ptr);

    data_ptr = (void *)RDD_PHYS_TO_VIRT(phys_ptr);
#if !defined(RDP_SIM) && defined(CONFIG_RNR_FEED_RING)
    if (rx_desc.from_feed_ring)
        sysb_recycle_function_cb(PDATA_TO_PFKBUFF(data_ptr, BCM_PKT_HEADROOM));
    else
#endif
#if defined(CONFIG_BCM_FKB_LITE ) && defined(CONFIG_RNR_FEED_RING)
    if (IS_FKBUFF_PTR(data_ptr)) {
        FkBuff_t * fxb = PNBUFF_2_FKBUFF(data_ptr);
        sysb_recycle_function_cb(fxb);
    }
    else
#endif
    {
      bdmf_sysb_free(data_ptr);
#if defined(CONFIG_BCM_FKB_LITE)  && 0 /* DO FOLLOWING IF WE DONT WANT RECYCLE FKBLITE */
      if (IS_FKBUFF_PTR(data_ptr))
        atomic_dec(&allocated_buffers);
#endif
    }

    ring_descr->shadow_read_idx = (++read_idx) % ring_descr->num_of_entries;

    return BDMF_ERR_OK;
}

int rdp_cpu_ring_recycle_free_host_buf(int ring_id, int budget)
{
    ring_descriptor_t *ring_descr;
    int rc = 0;
    int i;

    /*do not invoke SOFT_IRQTX before freeing budget*/
#ifndef RDP_SIM
    local_bh_disable();
#endif    
    ring_descr = &host_ring[ring_id];

    /* Update with real value*/
    rdp_cpu_get_write_idx(ring_descr->ring_id, rdpa_ring_recycle, &ring_descr->shadow_write_idx);
    for (i = 0; i < budget; i++)
    {
        rc = _rdp_cpu_ring_recycle_free_host_buf(ring_descr);
        if (rc)
            break;
    }
    rdp_cpu_inc_read_idx(ring_id, rdpa_ring_recycle, i);

#ifndef RDP_SIM    
    local_bh_enable();
#endif

    return i;
}

EXPORT_SYMBOL(rdp_cpu_ring_recycle_free_host_buf);
void rdp_recycle_buf_to_xpm(void *pdata)
{
    uint32_t token_id;

    token_id = drv_xpm_buffer_virt_to_id(pdata);

    if (likely(token_id != -1))
        rdp_buffer_recycle_non_feed(token_id);
}

#if defined(RDP_SIM) && defined(CONFIG_CPU_RX_FROM_XPM)
void rdp_recycle_buf_to_xpm_sim(rdpa_cpu_rx_info_t *info)
{
    rdp_recycle_buf_to_xpm(PDATA_TO_PFKBUFF(info->data, BCM_PKT_HEADROOM));
    rdp_runner_cpu_task_wakeup(get_runner_idx(CPU_RINGS_FPM_REFILL_CORE_NUMBER), CPU_RINGS_FPM_REFILL_THREAD_NUMBER); /* wake up refill task anyway */
}
#endif
void rdp_buffer_recycle_non_feed(uint32_t token_id)
{
    ddr_token_info_t *info;

    info = drv_xpm_ddr_token_info_get(token_id);

    if (unlikely(!info||!info->is_xpm_valid))
    {
        BDMF_TRACE_ERR("is_xpm_valid == 0, token_id %d\n", token_id);
        return;
    }

    /* pool id should be set in the token info */
    drv_xpm_free_buffer(0, token_id, info);
}

EXPORT_SYMBOL(rdp_recycle_buf_to_xpm);
/* Callback Functions */

void rdp_packet_dump(uint32_t ringId, rdpa_cpu_rx_info_t *info)
{
    char name[10];

    sprintf(name, "Queue-%d", ringId);
#ifdef __KERNEL__
    rdpa_cpu_rx_dump_packet(name, rdpa_cpu_host, ringId, info, 0);
#endif
}
EXPORT_SYMBOL(rdp_packet_dump);

/* BPM */
void *rdp_databuf_alloc(ring_descriptor_t *pDescriptor)
{
    if (likely(pDescriptor->buff_cache_cnt))
    {
        return (void *)(pDescriptor->buff_cache[--pDescriptor->buff_cache_cnt]);
    }
    else
    {
        uint32_t alloc_cnt = 0;

        /* refill the local cache from global pool */
        alloc_cnt = bdmf_sysb_databuf_alloc((void **)pDescriptor->buff_cache, MAX_BUFS_IN_CACHE, pDescriptor->ring_prio, 0);

        if (alloc_cnt)
        {
            pDescriptor->buff_cache_cnt = alloc_cnt;
            return (void *)(pDescriptor->buff_cache[--pDescriptor->buff_cache_cnt]);
        }
    }
    return NULL;
}
EXPORT_SYMBOL(rdp_databuf_alloc);

void rdp_databuf_free(void *pBuf, uint32_t context, ring_descriptor_t *pDescriptor)
{
    bdmf_sysb_databuf_free(pBuf, context);
}
EXPORT_SYMBOL(rdp_databuf_free);

/* Kmem_Cache */

void *rdp_databuf_alloc_cache(ring_descriptor_t *pDescriptor)
{
    uint32_t alloc_cnt = 0;
    int i;

    if (likely(pDescriptor->buff_cache_cnt))
    {
        return (void *)(pDescriptor->buff_cache[--pDescriptor->buff_cache_cnt]);
    }

    /* refill the local cache from global pool */
    for (i=0; i<MAX_BUFS_IN_CACHE; i++, alloc_cnt++)
    {
        uint8_t *datap;

        /* allocate from kernel directly */
        datap = kmem_cache_alloc((struct kmem_cache *)(pDescriptor->buff_mem_context), GFP_ATOMIC);

        /* do a cache invalidate of the buffer */
        bdmf_dcache_inv((unsigned long)datap, pDescriptor->packet_size);

        pDescriptor->buff_cache[i] = datap;
    }

    pDescriptor->buff_cache_cnt = alloc_cnt;
    return (void *)(pDescriptor->buff_cache[--pDescriptor->buff_cache_cnt]);
}
EXPORT_SYMBOL(rdp_databuf_alloc_cache);

void rdp_databuf_free_cache(void *pBuf, uint32_t context, ring_descriptor_t *pDescriptor)
{
    kmem_cache_free((struct kmem_cache *)(pDescriptor->buff_mem_context), pBuf);
}
EXPORT_SYMBOL(rdp_databuf_free_cache);

void rdp_cpu_ring_read_idx_ddr_sync(uint32_t ring_id)
{
    ring_descriptor_t *ring_descr;

    ring_descr = &host_ring[ring_id];
    if (ring_descr->accum_inc)
        rdp_cpu_ring_desc_read_idx_sync(ring_descr);
}

void rdp_runner_cpu_task_wakeup(uint32_t runner_id, uint32_t task_id)
{
#ifndef RDP_SIM
    ag_drv_rnr_regs_cfg_cpu_wakeup_set(runner_id, task_id);
#else
    rdp_cpu_runner_wakeup(runner_id, task_id);
#endif 
}
