#include <soc.h>
#include <dram/memcntlr.h>
#include <util.h>

extern mc_info_t meminfo;

typedef struct {
    int init_dck_pi;
    int init_dqs_pi;
    int init_dq_pi;
    int init_cs_pi;
    u32_t raw_dqs_pi[2];
    u32_t raw_dck_pi[2];
    u32_t raw_dq_pi[2];
    u32_t raw_cs_pi[2];
    // RX delay
    u32_t raw_dqs_in_pos[9];
    u32_t raw_dqs_in_neg[9];
    u32_t raw_dq_dly[8];
    u32_t raw_dm_dly[4];
    u32_t raw_rd_fifo;
    int res_dck_pi;
    int res_dqs_pi;
    int res_dq_pi;
    int res_cs_pi;
    u32_t res_dqs_in_pos[9];
    u32_t res_dqs_in_neg[9];
    u32_t res_dq_dly[8];
    u32_t res_dm_dly[4];
    u32_t res_rd_dly;
} PI_CALI_T;


#define TEST_MASK0              (0xFFFFFFFF)//(0x01FFFE00)
#define TEST_MASK1              (0xFFFFFFFF)//(0x007FFE00)
#define TEST_MASK2              (0xFFFFFFFF)//(0x000FFFFF)
#define TEST_MASK3              (0xFFFFFFFF)//(0xFFFFF000)

#define ORDEAL_UNIT_SIZE    (128)       // 128 Byte
#define ORDEAL_UNIT_LEN     (ORDEAL_UNIT_SIZE/4)       // 32 words
#define ORDEAL_ADDR         (0x80001000)
#define ORDEAL_LEN          (0x800)
#define _cache_flush()      _soc.bios.dcache_writeback_invalidate_all()

u32_t pat_ary[ORDEAL_UNIT_LEN] __attribute__ ((aligned(128)));

MEMCNTLR_SECTION
int prb7_pattern_gen(u32_t *ary)
{
    int bit_cnt;
    short dq_loop_07; // dq[07:00]
    short dq_loop_15; // dq[15:08]
    short dq_loop_23; // dq[23:16]
    short dq_loop_31; // dq[31:24]

    short dq_07=0; // dq[07:00]
    short dq_15=0; // dq[15:08]
    short dq_23=0; // dq[23:16]
    short dq_31=0; // dq[31:24]
    int dq;    // dq[31:00]

    int pat_cnt = 0;
    unsigned int data_array[4], in_bit[127];
    data_array[0] = 0xfe041851;
    data_array[1] = 0xe459d4fa;
    data_array[2] = 0x1c49b5bd;
    data_array[3] = 0x8d2ee654;

    for (bit_cnt =0; bit_cnt<127; bit_cnt++) {
        in_bit[bit_cnt] = (data_array[(bit_cnt/32)] & (0x1 << (31-bit_cnt))) >> (31-bit_cnt);
        //printf("II: in_bit[%d]=0x%08x\n", bit_cnt, in_bit[bit_cnt]);
    }

    // generate DQ[31:0]
    dq_loop_07 = 0;  // dq[07:00]
    dq_loop_15 = 32; // dq[15:08]
    dq_loop_23 = 64; // dq[23:16]
    dq_loop_31 = 96; // dq[31:24]

    
    for (pat_cnt =0; pat_cnt < ORDEAL_UNIT_LEN; pat_cnt++) {
        // dq[07:00]
        for(bit_cnt=7; bit_cnt >=0; bit_cnt--) {
            dq_07 = (in_bit[dq_loop_07] << bit_cnt) | dq_07;
            if(dq_loop_07 == 126){
                dq_loop_07 = 0;
            } else {
                dq_loop_07 = dq_loop_07 + 1;
            }
            //printf("II: dq_07=0x%x\n", dq_07);
        }

        // dq[15:08]
        for(bit_cnt=7; bit_cnt >=0; bit_cnt--) {
            dq_15 = (in_bit[dq_loop_15] << bit_cnt) | dq_15;
            if(dq_loop_15 == 126){
                dq_loop_15 = 0;
            } else {
                dq_loop_15 = dq_loop_15 + 1;
            }
        }
        
        // dq[23:16]
        for(bit_cnt=7; bit_cnt >=0; bit_cnt--) {
            dq_23 = (in_bit[dq_loop_23] << bit_cnt) | dq_23;
            if(dq_loop_23 == 126){
                dq_loop_23 = 0;
            } else {
                dq_loop_23 = dq_loop_23 + 1;
            }
        }

        // dq[34:24]
        for(bit_cnt=7; bit_cnt >=0; bit_cnt--) {
            dq_31 = (in_bit[dq_loop_31] << bit_cnt) | dq_31;
            if(dq_loop_31 == 126){
                dq_loop_31 = 0;
            } else {
                dq_loop_31 = dq_loop_31 + 1;
            }
        }

        dq = (dq_31 <<24) | (dq_23 <<16) |(dq_15 <<8) | dq_07;
        //printf ("[%03d] 0x%08X\n", pat_cnt, dq);
        //printf (f_data, "%8x\n", dq);
        ary[pat_cnt] = dq;
        
        dq_07 = 0; // dq[07:00]
        dq_15 = 0; // dq[15:08]
        dq_23 = 0; // dq[23:16]
        dq_31 = 0; // dq[31:24]
    }
    return 0;
}

u32_t ocp_memory_ordeal(u32_t sAddr, u32_t len) 
{
    volatile u32_t *dst, *src;
    u32_t l = len;
    u32_t res = 0;
    dst = (volatile u32_t *)sAddr;
    src = pat_ary;
    while(l>=ORDEAL_UNIT_SIZE) {
        // assume: cache line size: 4 ways * 32Byte
        UNROLLING_MCPY_128BYTE(src, dst);
        l-=ORDEAL_UNIT_SIZE;
        dst+=ORDEAL_UNIT_LEN;
        // cache flush
        _cache_flush();
    } 

    l = len;
    dst = (volatile u32_t *)(sAddr+len-4);
    while(l>=ORDEAL_UNIT_SIZE) {
        // assume: cache line size: 4 ways * 32Byte
        for(src=(volatile u32_t *)(pat_ary+ORDEAL_UNIT_LEN-1);(u32_t)src>=(u32_t)pat_ary;src--, dst--) {
            res |= *src ^ *dst;
            //printf("DD: dst(0x%08x)=0x%08x, pat(0x%08x)=0x%08x, res=0x%08x\n", dst, *dst, src, *src, res);
        }
        l-=ORDEAL_UNIT_SIZE;
    } 
    return res;    
}

MEMCNTLR_SECTION
u32_t bstc_sequential_test(u32_t sAddr, u32_t len) {
    u32_t con_addr, test_addr;
    u32_t cnt, data_cnt=0;
    const u32_t burst=4;
    u32_t pat;

    BCR_T bcr;
    BSTC_CMD_T cmd = {{0}};

    BSTC_INFO_T info = { .col_size = get_col_size(),
                         .bnk_size = get_bnk_size(),
                         .wr_ent_cnt = BSTC_SRAM_CMD_ENTY/2,
                         .rd_ent_cnt = BSTC_SRAM_CMD_ENTY/2};
    
    printf("DD: (%s) sAddr 0x%x, len 0x%x\n", __FUNCTION__, sAddr, len);
    for (test_addr=sAddr, pat=test_addr; test_addr<(sAddr+len);)
    {
        mc_enable_bstc();

        // set BCR
        bcr.f.rd_ex_cnt = burst*info.rd_ent_cnt*(RXI310_BUS_WIDTH/32);;
        bcr.f.at_err_stop = 1;
        bcr.f.dis_msk = 1;
        bcr.f.loop = 0; // loop mode
        bcr.f.cmp = 1;
        bcr.f.stop = 0;
        BCRrv = bcr.v;

        // set loop count
        RMOD_BCT(loop_cnt, 0);
        con_addr = BSTC_SRAM_CMD_BASE;
        for (cnt=0; cnt<info.wr_ent_cnt; cnt++, con_addr+=0x8) {
            // set CMD_SRAM
            cmd.f.bank = BSTC_VA2BNK(info, test_addr);
            cmd.f.col = BSTC_VA2COL(info, test_addr);
            cmd.f.row = BSTC_VA2ROW(info, test_addr);
            cmd.f.bl = burst;
            cmd.f.cmd = BSTC_WR_CMD;    // write
            BSTC_CMD_WRITE(con_addr, cmd);
            //printf("DD: con_addr 0x%x test_addr 0x%x\n", con_addr, test_addr);

            // set CMD_SRAM, follow write
            cmd.f.cmd = BSTC_RD_CMD;    // read
            BSTC_CMD_WRITE(con_addr+info.wr_ent_cnt*8, cmd);
            //printf("DD: con_addr 0x%x test_addr 0x%x\n", con_addr, test_addr);
            test_addr+= burst*(RXI310_BUS_WIDTH/8);
        }

        // prepare WD and RG
        con_addr = BSTC_SRAM_WD_BASE;
        for (cnt=0; cnt<(info.wr_ent_cnt*burst); cnt++) {
            for (data_cnt=0; data_cnt<BSTC_SRAM_WD_BUSW; data_cnt+=32, con_addr+=4, pat+=4) { // 32-bit
                REG32(con_addr)=pat;
                REG32(con_addr+(BSTC_SRAM_RG_BASE-BSTC_SRAM_WD_BASE))=pat;
                //printf("DD: 0x%x = 0x%x (0x%x)\n", con_addr, pat, REG32(con_addr));
            }
        }

        // kick off BSTC
        printf("II: kick off BSTC WRITE READ, addr=0x%x... ", test_addr);
        RMOD_CCR(btt, 1, init, 0, dpit, 0);

        while (0==RFLD_CCR(btt)){puts("*\b");};
        puts("done\n");

        // disable BSTC
        mc_disable_bstc();

        // check BST and BER
        if (0!=BERrv) {
            BST_T bst = {.v=BSTrv};
            printf("EE: data received=%d, error count=%d, first error addr.=0x%x\n",
                bst.f.rd_in_st ,bst.f.err_cnt, bst.f.err_fst_th);
            printf("EE: error bit 0x%x\n", BERrv);
            return 1;
        }
    }
    return 0;
}

#define memory_ordeal(...)      ocp_memory_ordeal(ORDEAL_ADDR, ORDEAL_LEN)
//#define memory_ordeal(...)      bstc_sequential_test(ORDEAL_ADDR, ORDEAL_LEN)

#define RX_ODT_ALWAYS_ON(en)    ({  RMOD_READ_CTRL_2_0(odt_force_sel, en, odt_force_sig, en);\
                                    RMOD_READ_CTRL_2_1(odt_force_sel, en, odt_force_sig, en);\
                                    RMOD_READ_CTRL_2_2(odt_force_sel, en, odt_force_sig, en);\
                                    RMOD_READ_CTRL_2_3(odt_force_sel, en, odt_force_sig, en);\
                                    DBG_PRINTF("RX ODT always on\n"); })

// dpi_post_pi_sel0/1: DCK0/1
#define APPLY_DCK_PI(v)     ({  RMOD_PLL_PI0(dpi_post_pi_sel0, v, dpi_post_pi_sel1, v); })

// dpi_post_pi_sel2/3/4/5: DQS0/1/2/3
#define APPLY_DQS_PI(v)     ({  RMOD_PLL_PI0(dpi_post_pi_sel2, v, dpi_post_pi_sel3, v);\
                                RMOD_PLL_PI1(dpi_post_pi_sel4, v, dpi_post_pi_sel5, v); })
                                
// dpi_post_pi_sel6/7/8/9: DQ0/1/2/3
#define APPLY_DQ_PI(v)      ({  RMOD_PLL_PI1(dpi_post_pi_sel6, v);\
                                RMOD_PLL_PI2(dpi_post_pi_sel7, v, dpi_post_pi_sel8, v, dpi_post_pi_sel9, v); })
                                
// dpi_post_pi_sel10/11: CS0/CS1
#define APPLY_CS_PI(v)      ({  RMOD_PLL_PI2(dpi_post_pi_sel10, v);\
                                RMOD_PLL_PI3(dpi_post_pi_sel11, v); })
                                
// dqs threshold, 
#define APPLY_DQS_RSN(v)    ({ RMOD_AFIFO_STR_0(dqs_rd_str_num_0, v, dqs_rd_str_num_1, v,   \
                                                    dqs_rd_str_num_2, v, dqs_rd_str_num_3, v);})

// dq threshold
#define APPLY_DQ_RSN(v)     ({ RMOD_AFIFO_STR_0(dq_rd_str_num_0, v, dq_rd_str_num_1, v,   \
                                                    dq_rd_str_num_2, v, dq_rd_str_num_3, v);})

// cmd threshold
#define APPLY_CMD_RSN(v)    ({ RMOD_AFIFO_STR_1(cmd_rd_str_num, v); })

// rx threshold
#define APPLY_RX_RSN(v)     ({ RMOD_AFIFO_STR_2(rx_rd_str_num_0, v, rx_rd_str_num_1, v,   \
                                                    rx_rd_str_num_2, v, rx_rd_str_num_3, v);})

MEMCNTLR_SECTION
u32_t dpi_write_leveling(PI_CALI_T *info) {
    u32_t __attribute__((unused)) j, res=0, reg=1;
	// turn on MR1
    RMOD_MR1(level, 1);
    mc_rxi310_init();
    
    /* scan 1st DQS PI */
    printf("II: Write Leveling ... \n");
    for (j=0; j<32; j++) {
    #ifndef PROJECT_ON_FPGA
        APPLY_DQS_PI(j);
        // enable write leveling
        RMOD_WRLVL_CTRL(wrlvl_en, 1, wrlvl_trig, 1);
        // read result, 
        if (0!=(reg=WRLVL_RD_DATArv)) res|=1<<j;
    #else
        if(j>3) res|=1<<j;
    #endif
    }
    
    // turn off MR1
    RMOD_MR1(level, 0);
    mc_rxi310_init();
    
    // check the result
    for (j=0; j<31; j++) {
        if (0x2==(0x3&(res>>j))) {
            break;
        }
    }
    
    info->raw_dqs_pi[0]=res;    
    info->res_dqs_pi=info->init_dqs_pi=j+1;
    
    printf("II: result = 0x%08x, dqs_pi = %d\n", info->raw_dqs_pi[0], info->res_dqs_pi);
    /* scan 1st DQS PI end */
    
    APPLY_DQS_PI(info->res_dqs_pi);
    return 0;
}

#ifndef PROJECT_ON_FPGA
// DQS
#define ADD_DQS_RSN()           ({ u32_t _num; _num=RFLD_AFIFO_STR_0(dqs_rd_str_num_0)+1; APPLY_DQS_RSN(_num);})
#define SUB_DQS_RSN()           ({ u32_t _num; _num=RFLD_AFIFO_STR_0(dqs_rd_str_num_0)-1; APPLY_DQS_RSN(_num);})
// DQ
#define ADD_DQ_RSN()            ({ u32_t _num; _num=RFLD_AFIFO_STR_0(dq_rd_str_num_0)+1; APPLY_DQ_RSN(_num);})
#define SUB_DQ_RSN()            ({ u32_t _num; _num=RFLD_AFIFO_STR_0(dq_rd_str_num_0)-1; APPLY_DQ_RSN(_num);})
// CS
#define ADD_CS_RSN()            ({ u32_t _num; _num=RFLD_AFIFO_STR_1(cmd_ex_rd_str_num)+1; RMOD_AFIFO_STR_1(cmd_ex_rd_str_num, _num);})
#define SUB_CS_RSN()            ({ u32_t _num; _num=RFLD_AFIFO_STR_1(cmd_ex_rd_str_num)-1; RMOD_AFIFO_STR_1(cmd_ex_rd_str_num, _num);})
// CMD

#else
#define ADD_DQS_RSN()
#define SUB_DQS_RSN()
#define ADD_DQ_RSN()
#define SUB_DQ_RSN()
#define ADD_CS_RSN()
#define SUB_CS_RSN()
#endif
#define POS_SCAN_CHECK(t, v)    ({ int _v=v;\
                                   if(31<=_v) { ADD_##t##_RSN(); _v=0; }\
                                   else { _v+=1; } _v;})
#define NEG_SCAN_CHECK(t, v)    ({ int _v=v;\
                                   if(0>=_v) { SUB_##t##_RSN(); _v=31; }\
                                   else { _v-=1; } _v;})
                                   
MEMCNTLR_SECTION
inline u32_t _find_upper_and_lower(u32_t d, int *u, int *l) {
    u32_t i;
    int s=-1, e=-1;
    *u=*l=0;
    for(i=0; i<32; i++) {
        if(1==((d>>i)&0x1) && -1==s) s=i;
        else if(0==(d>>i) && -1!=s) { break; }
        else e=i;
    }
    if((-1==s)||(-1==e)) {
        ERR_PRINTF("upper %d, lower %d\n", e, s);
        return 1;
    } 

    *u=e, *l=s;
    return 0;
}

MEMCNTLR_SECTION
u32_t dpi_pi_scan(PI_CALI_T *info) {
    // RX ODT always on
#ifndef PROJECT_ON_FPGA
    RX_ODT_ALWAYS_ON(1);
#endif
    u32_t res;
    int idx, I, J, K, L, upper, lower;
    
    /* scan DCK PI map (positive) */
    INFO_PRINTF("scan DCK PI (positive) \n");
    
    //I = info->init_dck_pi;
    J = info->init_dqs_pi;
    K = info->init_dq_pi;
    L = info->init_cs_pi;
    
    // turn on CS use cmd_ex_rd_str_num    
#ifndef PROJECT_ON_FPGA
    u32_t reg;
    reg = AFIFO_STR_SELrv | (0x3<<28);
    AFIFO_STR_SELrv = reg;
#endif
    
    for(idx=0, I=0, res=0; idx<32; idx++, I++) {
        J=POS_SCAN_CHECK(DQS, J);
        K=POS_SCAN_CHECK(DQ, K);
        L=POS_SCAN_CHECK(CS, L);
#ifndef PROJECT_ON_FPGA
        APPLY_DCK_PI(I);
        APPLY_DQS_PI(J);
        APPLY_DQ_PI(K);
        APPLY_CS_PI(L);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
        res|=1<<idx;
    } 
    info->raw_dck_pi[0] = res&TEST_MASK0;
    
    /* scan DCK PI map (negative) */
    INFO_PRINTF("scan DCK PI (negative) \n");
    for(idx=31, I=31, res=0; idx>=0; idx--, I--) {
        J=NEG_SCAN_CHECK(DQS, J);
        K=NEG_SCAN_CHECK(DQ, K);
        L=NEG_SCAN_CHECK(CS, L);
#ifndef PROJECT_ON_FPGA
        APPLY_DCK_PI(I);
        APPLY_DQS_PI(J);
        APPLY_DQ_PI(K);
        APPLY_CS_PI(L);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }
    info->raw_dck_pi[1] = res&TEST_MASK0;
    
    res = info->raw_dck_pi[0]&info->raw_dck_pi[1];
    if(0!=_find_upper_and_lower(res, &upper, &lower)) return 1;
    
    info->res_dck_pi=(upper+lower)/2;
    INFO_PRINTF("res=0x%08x, range is %02d-%02d-%02d\n", res, lower, info->res_dck_pi, upper);
    
    /* scan CS PI map (positive) */
    INFO_PRINTF("scan CS PI (positive) \n");
    L = info->init_cs_pi;
    for(idx=0, res=0; idx<32; idx++) {
        L=POS_SCAN_CHECK(CS, L);
#ifndef PROJECT_ON_FPGA
        APPLY_CS_PI(L);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    } 
    info->raw_cs_pi[0] = res&TEST_MASK1;
    
    /* scan CS PI map (negative) */
    INFO_PRINTF("scan CS PI (negative) \n");
    for(idx=31, res=0; idx>=0; idx--) {
        L=NEG_SCAN_CHECK(CS, L);
#ifndef PROJECT_ON_FPGA
        APPLY_CS_PI(L);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }
    info->raw_cs_pi[1] = res&TEST_MASK1;

    res = info->raw_cs_pi[0]&info->raw_cs_pi[1];
    if(0!=_find_upper_and_lower(res, &upper, &lower)) return 2;

    upper+=info->init_cs_pi;
    lower+=info->init_cs_pi;
    info->res_cs_pi=(upper+lower)/2;
    INFO_PRINTF("res=0x%08x, range is %02d-%02d-%02d\n", res, lower, info->res_cs_pi, upper);
    
    /* scan DQS map (positive) */
    //      [   WR   ]
    // DCK      
    // DQS      -->
    INFO_PRINTF("scan DQS map (positive) \n");
#ifndef PROJECT_ON_FPGA
    // read DQS/DQ, CMD, RX FIFO threshold and +1
    u32_t dqs_th = RFLD_AFIFO_STR_0(dqs_rd_str_num_0)+1;
    u32_t dq_th  = RFLD_AFIFO_STR_0(dq_rd_str_num_0) +1;
    u32_t cmd_th = RFLD_AFIFO_STR_1(cmd_rd_str_num)  +1;
    u32_t rx_th  = RFLD_AFIFO_STR_2(rx_rd_str_num_0) +1;
 
    APPLY_DQS_RSN(dqs_th);
    APPLY_DQ_RSN(dq_th);
    APPLY_CMD_RSN(cmd_th);
    APPLY_RX_RSN(rx_th);    
#endif
    J = info->res_dck_pi;
    K = info->init_dq_pi;
    for(idx=0, res=0; idx<32; idx++) {
        J=POS_SCAN_CHECK(DQS, J);
        K=POS_SCAN_CHECK(DQ, K);
#ifndef PROJECT_ON_FPGA        
        APPLY_DQS_PI(J);
        APPLY_DQ_PI(K);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }
    info->raw_dqs_pi[0] = res & TEST_MASK2;
    
    /* scan DQS map (negative) */
    //      [   WR   ]
    // DCK      
    // DQS  <--
   
    INFO_PRINTF("scan DQS map (negative) \n");
    J = info->res_dck_pi;
    K = info->init_dq_pi;
#ifndef PROJECT_ON_FPGA
    // read DQS/DQ, CMD, RX FIFO threshold and +1
    dqs_th = RFLD_AFIFO_STR_0(dqs_rd_str_num_0)-1;
    dq_th  = RFLD_AFIFO_STR_0(dq_rd_str_num_0) -1;
    cmd_th = RFLD_AFIFO_STR_1(cmd_rd_str_num)  -1;
    rx_th  = RFLD_AFIFO_STR_2(rx_rd_str_num_0) -1;
 
    APPLY_DQS_RSN(dqs_th);
    APPLY_DQ_RSN(dq_th);
    APPLY_CMD_RSN(cmd_th);
    APPLY_RX_RSN(rx_th);    
#endif    
    for(idx=31, res=0; idx>=0; idx--) {
        J=NEG_SCAN_CHECK(DQS, J);
        K=NEG_SCAN_CHECK(DQ, K);
#ifndef PROJECT_ON_FPGA
        APPLY_DQS_PI(J);
        APPLY_DQ_PI(K);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }    
    info->raw_dqs_pi[1] = res & TEST_MASK3;
    
    // check boundary
    _find_upper_and_lower(info->raw_dqs_pi[0], &upper, &lower);
    DBG_PRINTF("res=0x%08x, range is %02d-%02d\n", info->raw_dqs_pi[0], lower+info->res_dck_pi, upper+info->res_dck_pi);
    res = upper+info->res_dck_pi; // restore upper   
    
    
    _find_upper_and_lower(info->raw_dqs_pi[1], &upper, &lower);
    DBG_PRINTF("res=0x%08x, range is %02d-%02d\n", info->raw_dqs_pi[1], lower-info->res_dck_pi, upper-info->res_dck_pi);
    lower=lower-info->res_dck_pi;
    upper=res;
    
    info->res_dqs_pi=(lower+upper)/2;
    // check 
    // RDC: PIObpositive/negative scan, check (crt_spec : AFIFO_STR_0/1/2) min value, if (min value > 1) -> all value V (min value-1)

    INFO_PRINTF("res=%08x%08x, range is %02d-%02d-%02d\n", info->raw_dqs_pi[0], info->raw_dqs_pi[1], lower, info->res_dqs_pi, upper);
#ifndef PROJECT_ON_FPGA    
    // apply DQS PI
    APPLY_DQS_PI(info->res_dqs_pi);
#endif

    /* scan DQ PI map (positive) */
    INFO_PRINTF("scan DQ map (positive) \n");
#ifndef PROJECT_ON_FPGA
    // read DQS/DQ, CMD, RX FIFO threshold and +1
    dqs_th = RFLD_AFIFO_STR_0(dqs_rd_str_num_0)+1;
    dq_th  = RFLD_AFIFO_STR_0(dq_rd_str_num_0) +1;
    cmd_th = RFLD_AFIFO_STR_1(cmd_rd_str_num)  +1;
    rx_th  = RFLD_AFIFO_STR_2(rx_rd_str_num_0) +1;
 
    APPLY_DQS_RSN(dqs_th);
    APPLY_DQ_RSN(dq_th);
    APPLY_CMD_RSN(cmd_th);
    APPLY_RX_RSN(rx_th);    
#endif    
    // fix CK PI & DQS PI from scan map, init DQ PI =DQS PI
    K=info->res_dqs_pi;
    for(idx=0, res=0; idx<32; idx++) {
        K=POS_SCAN_CHECK(DQ, K);
#ifndef PROJECT_ON_FPGA        
        APPLY_DQ_PI(K);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }
    info->raw_dq_pi[0] = res & TEST_MASK2;
    
    /* scan DQ PI map (negative) */
    INFO_PRINTF("scan DQ map (negative) \n");
#ifndef PROJECT_ON_FPGA
    // read DQS/DQ, CMD, RX FIFO threshold and +1
    dqs_th = RFLD_AFIFO_STR_0(dqs_rd_str_num_0)-1;
    dq_th  = RFLD_AFIFO_STR_0(dq_rd_str_num_0) -1;
    cmd_th = RFLD_AFIFO_STR_1(cmd_rd_str_num)  -1;
    rx_th  = RFLD_AFIFO_STR_2(rx_rd_str_num_0) -1;
 
    APPLY_DQS_RSN(dqs_th);
    APPLY_DQ_RSN(dq_th);
    APPLY_CMD_RSN(cmd_th);
    APPLY_RX_RSN(rx_th);    
#endif
    K=info->res_dqs_pi;
    for(idx=31, res=0; idx>=0; idx--) {
        K=NEG_SCAN_CHECK(DQ, K);
#ifndef PROJECT_ON_FPGA
        APPLY_DQ_PI(K);
#endif
        if(0==memory_ordeal())
            res|=1<<idx;
    }   
    info->raw_dq_pi[1] = res & TEST_MASK3;
    
    // check boundary
    _find_upper_and_lower(info->raw_dq_pi[0], &upper, &lower);
    DBG_PRINTF("res=0x%08x, range is %02d-%02d\n", info->raw_dqs_pi[0], lower+info->res_dqs_pi, upper+info->res_dqs_pi);
    res = upper+info->res_dqs_pi; // restore upper   
    
    
    _find_upper_and_lower(info->raw_dq_pi[1], &upper, &lower);
    DBG_PRINTF("res=0x%08x, range is %02d-%02d\n", info->raw_dqs_pi[1], lower-info->res_dqs_pi, upper-info->res_dqs_pi);
    lower=lower-info->res_dqs_pi;
    upper=res;
    
    info->res_dq_pi=(lower+upper)/2;
    INFO_PRINTF("res=%08x%08x, range is %02d-%02d-%02d\n", info->raw_dq_pi[0], info->raw_dq_pi[1], lower, info->res_dqs_pi, upper);
    // check 
    
#ifndef PROJECT_ON_FPGA    
    // apply DQS PI
    APPLY_DQ_PI(info->res_dq_pi);
#endif
    
#ifndef PROJECT_ON_FPGA
    // disable RX ODT always on
    RX_ODT_ALWAYS_ON(0);
#endif    
    return 0;
}

MEMCNTLR_SECTION
u32_t dpi_dqs_en_scan(void) {

#ifndef PROJECT_ON_FPGA
    RMOD_DPI_CTRL_0(dqsen_mode, 0x0);
    // RX ODT always on
    RX_ODT_ALWAYS_ON(0);
    
    u32_t dqs_psel_0 = DQS_P_ODT_SEL_0rv;
    u32_t dqs_psel_1 = DQS_P_ODT_SEL_1rv;
    u32_t dqs_nsel_0 = DQS_N_ODT_SEL_0rv;
    u32_t dqs_nsel_1 = DQS_N_ODT_SEL_1rv;

    // unbalance DQS ODT setting, reverse P and N
    DQS_P_ODT_SEL_0rv = dqs_nsel_0;
    DQS_P_ODT_SEL_1rv = dqs_nsel_1;
    DQS_N_ODT_SEL_0rv = dqs_psel_0;
    DQS_N_ODT_SEL_1rv = dqs_psel_1;
    
    u32_t idx, tde, tdef, cnt;
    for(idx=0; idx<0x7F; idx++) {
        tde=idx>>2;
        tdef=idx&0x3;
        RMOD_READ_CTRL_0_0(tm_dqs_en, tde, tm_dqs_en_ftun, tdef);
        RMOD_READ_CTRL_0_1(tm_dqs_en, tde, tm_dqs_en_ftun, tdef);
        RMOD_READ_CTRL_0_2(tm_dqs_en, tde, tm_dqs_en_ftun, tdef);
        RMOD_READ_CTRL_0_3(tm_dqs_en, tde, tm_dqs_en_ftun, tdef);
        // check dqs_init, if 0
        cnt=0;
        while(1==RFLD_INT_STATUS_0(dqs_int_3)) {
            if(++cnt>0x1000) break;
        }
        RMOD_INT_CTRL(write_en_2, 0x1, fw_clr_dqs_int, 0x1);
        RMOD_INT_STATUS_0(dqs_int_3, 0x0, dqs_int_2, 0x0, dqs_int_1, 0x0, dqs_int_0, 0x0);
    }
    
    // restore unbalance DQS ODT setting
    DQS_P_ODT_SEL_0rv = dqs_psel_0;
    DQS_P_ODT_SEL_1rv = dqs_psel_1;
    DQS_N_ODT_SEL_0rv = dqs_nsel_0;
    DQS_N_ODT_SEL_1rv = dqs_nsel_1;    
#endif    

#ifndef PROJECT_ON_FPGA
// disable RX ODT always on
    RX_ODT_ALWAYS_ON(0);
#endif
    return 0;
}

#define SYNC_RD_DLY()               RMOD_DPI_CTRL_1(write_en_1, 0x1, fw_set_rd_dly, 0x1)
#define APPLY_DQS_IN_DLY(r, f, v)   ({ RMOD_##r##_0(f, v); RMOD_##r##_1(f, v); RMOD_##r##_2(f, v); RMOD_##r##_3(f, v); })
#define CHECK_DQS_IN_DLY(r, f)      ({ u32_t __i, __r;   \
                                       for(__i=0, __r=0; __i<16; __i++) { \
                                           APPLY_DQS_IN_DLY(r, f, __i); \
                                           SYNC_RD_DLY();   \
                                           if(0==memory_ordeal()) __r|=1<<__i; }\
                                       APPLY_DQS_IN_DLY(r, f, 0xA); \
                                       __r; })

#define APPLY_DQ_DLY(r, f, v)       ({ RMOD_##r##_0(f, v); RMOD_##r##_1(f, v); RMOD_##r##_2(f, v); RMOD_##r##_3(f, v); })
#define CHECK_DQ_DLY(r, f)          ({  u32_t __i, __r;   \
                                        for(__i=0, __r=0; __i<16; __i++) { \
                                            APPLY_DQ_DLY(r, f, __i); \
                                            SYNC_RD_DLY();   \
                                            if(0==memory_ordeal()) __r|=1<<__i; }\
                                        APPLY_DQ_DLY(r, f, 0x0); \
                                        __r; })

#define APPLY_DM_DLY(f, v)          ({ RMOD_DQ_DLY_1(f, v); })
#define CHECK_DM_DLY(f)             ({  u32_t __i, __r;   \
                                        for(__i=0, __r=0; __i<16; __i++) { \
                                            RMOD_DQ_DLY_1(f, __i); \
                                            SYNC_RD_DLY();   \
                                            if(0==memory_ordeal()) __r|=1<<__i; }\
                                        APPLY_DM_DLY(f, 0x0); \
                                        __r; })
                                        
MEMCNTLR_SECTION
int _find_dq_result(u32_t raw_dq) {
    u32_t i;
    int s, e;
    for(i=0, s=-1, e=-1; i<16; i++) {
        if(1==((raw_dq>>i)&0x1) && -1==s) s=i;
        else if(0==(raw_dq>>i) && -1!=s) { break; }
        else e=i;
    }
    if((-1==s)||(-1==e)) {
        ERR_PRINTF("upper %d, lower %d\n", e, s);
        return 1;
    }
    return (s+e)/2;
}

MEMCNTLR_SECTION
u32_t dpi_dq_delay_tap_scan(PI_CALI_T *info) {
#ifndef PROJECT_ON_FPGA
    u8_t csm = RFLD_DPI_CTRL_0(cal_set_mode);
    
    // make sure  turn off "delay follow dq0"
    RMOD_READ_CTRL_1(rd_dly_follow_dq0, 0x0);
    RMOD_DPI_CTRL_0(fw_set_mode, 0x2, cal_set_mode, 0x3);
    
    // RX delay for each DQ scan
    info->raw_dqs_in_pos[0] = CHECK_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_0);
    info->raw_dqs_in_pos[1] = CHECK_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_1);
    info->raw_dqs_in_pos[2] = CHECK_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_2);
    info->raw_dqs_in_pos[3] = CHECK_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_3);
    info->raw_dqs_in_pos[4] = CHECK_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_4);
    info->raw_dqs_in_pos[5] = CHECK_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_5);
    info->raw_dqs_in_pos[6] = CHECK_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_6);
    info->raw_dqs_in_pos[7] = CHECK_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_7);
    info->raw_dqs_in_neg[0] = CHECK_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_0);
    info->raw_dqs_in_neg[1] = CHECK_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_1);
    info->raw_dqs_in_neg[2] = CHECK_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_2);
    info->raw_dqs_in_neg[3] = CHECK_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_3);
    info->raw_dqs_in_neg[4] = CHECK_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_4);
    info->raw_dqs_in_neg[5] = CHECK_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_5);
    info->raw_dqs_in_neg[6] = CHECK_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_6);
    info->raw_dqs_in_neg[7] = CHECK_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_7);
    
    info->res_dqs_in_pos[0] = _find_dq_result(info->raw_dqs_in_pos[0]);
    info->res_dqs_in_pos[1] = _find_dq_result(info->raw_dqs_in_pos[1]);
    info->res_dqs_in_pos[2] = _find_dq_result(info->raw_dqs_in_pos[2]);
    info->res_dqs_in_pos[3] = _find_dq_result(info->raw_dqs_in_pos[3]);
    info->res_dqs_in_pos[4] = _find_dq_result(info->raw_dqs_in_pos[4]);
    info->res_dqs_in_pos[5] = _find_dq_result(info->raw_dqs_in_pos[5]);
    info->res_dqs_in_pos[6] = _find_dq_result(info->raw_dqs_in_pos[6]);
    info->res_dqs_in_pos[7] = _find_dq_result(info->raw_dqs_in_pos[7]);
    info->res_dqs_in_neg[0] = _find_dq_result(info->raw_dqs_in_neg[0]);
    info->res_dqs_in_neg[1] = _find_dq_result(info->raw_dqs_in_neg[1]);
    info->res_dqs_in_neg[2] = _find_dq_result(info->raw_dqs_in_neg[2]);
    info->res_dqs_in_neg[3] = _find_dq_result(info->raw_dqs_in_neg[3]);
    info->res_dqs_in_neg[4] = _find_dq_result(info->raw_dqs_in_neg[4]);
    info->res_dqs_in_neg[5] = _find_dq_result(info->raw_dqs_in_neg[5]);
    info->res_dqs_in_neg[6] = _find_dq_result(info->raw_dqs_in_neg[6]);
    info->res_dqs_in_neg[7] = _find_dq_result(info->raw_dqs_in_neg[7]);
    
    APPLY_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_0, info->res_dqs_in_pos[0]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_1, info->res_dqs_in_pos[1]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_2, info->res_dqs_in_pos[2]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_0, fw_rd_dly_pos_sel_3, info->res_dqs_in_pos[3]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_4, info->res_dqs_in_pos[4]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_5, info->res_dqs_in_pos[5]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_6, info->res_dqs_in_pos[6]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_1, fw_rd_dly_pos_sel_7, info->res_dqs_in_pos[7]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_0, info->res_dqs_in_neg[0]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_1, info->res_dqs_in_neg[1]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_2, info->res_dqs_in_neg[2]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_2, fw_rd_dly_neg_sel_3, info->res_dqs_in_neg[3]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_4, info->res_dqs_in_neg[4]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_5, info->res_dqs_in_neg[5]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_6, info->res_dqs_in_neg[6]);
    APPLY_DQS_IN_DLY(DQS_IN_DLY_3, fw_rd_dly_neg_sel_7, info->res_dqs_in_neg[7]);
    
    // scan TX delay tap
    RMOD_DPI_CTRL_0(fw_set_mode, 0x2, cal_set_mode, 0x3);
    RMOD_DQ_DLY_1(dqs_dly_sel_0, 0x0, dqs_dly_sel_1, 0x0, dqs_dly_sel_2, 0x0, dqs_dly_sel_3, 0x0);
    RMOD_DQ_DLY_2(dqs_oe_dly_sel_0, 0x0, dqs_oe_dly_sel_1, 0x0, dqs_oe_dly_sel_2, 0x0, dqs_oe_dly_sel_3, 0x0);
    
    info->raw_dq_dly[0] = CHECK_DQ_DLY(DQ_DLY_0, dq0_dly_sel);
    info->raw_dq_dly[1] = CHECK_DQ_DLY(DQ_DLY_0, dq1_dly_sel);
    info->raw_dq_dly[2] = CHECK_DQ_DLY(DQ_DLY_0, dq2_dly_sel);
    info->raw_dq_dly[3] = CHECK_DQ_DLY(DQ_DLY_0, dq3_dly_sel);
    info->raw_dq_dly[4] = CHECK_DQ_DLY(DQ_DLY_0, dq4_dly_sel);
    info->raw_dq_dly[5] = CHECK_DQ_DLY(DQ_DLY_0, dq5_dly_sel);
    info->raw_dq_dly[6] = CHECK_DQ_DLY(DQ_DLY_0, dq6_dly_sel);
    info->raw_dq_dly[7] = CHECK_DQ_DLY(DQ_DLY_0, dq7_dly_sel);
    
    info->raw_dm_dly[0] = CHECK_DM_DLY(dm_dly_sel_0);
    info->raw_dm_dly[1] = CHECK_DM_DLY(dm_dly_sel_1);
    info->raw_dm_dly[2] = CHECK_DM_DLY(dm_dly_sel_2);
    info->raw_dm_dly[3] = CHECK_DM_DLY(dm_dly_sel_3);
    
    info->res_dq_dly[0] = _find_dq_result(info->raw_dq_dly[0]);
    info->res_dq_dly[1] = _find_dq_result(info->raw_dq_dly[1]);
    info->res_dq_dly[2] = _find_dq_result(info->raw_dq_dly[2]);
    info->res_dq_dly[3] = _find_dq_result(info->raw_dq_dly[3]);
    info->res_dq_dly[4] = _find_dq_result(info->raw_dq_dly[4]);
    info->res_dq_dly[5] = _find_dq_result(info->raw_dq_dly[5]);
    info->res_dq_dly[6] = _find_dq_result(info->raw_dq_dly[6]);
    info->res_dq_dly[7] = _find_dq_result(info->raw_dq_dly[7]);
    
    info->res_dm_dly[0] = _find_dq_result(info->raw_dm_dly[0]);
    info->res_dm_dly[1] = _find_dq_result(info->raw_dm_dly[1]);
    info->res_dm_dly[2] = _find_dq_result(info->raw_dm_dly[2]);
    info->res_dm_dly[3] = _find_dq_result(info->raw_dm_dly[3]);
    
    APPLY_DQ_DLY(DQ_DLY_0, dq0_dly_sel, info->res_dq_dly[0]);
    APPLY_DQ_DLY(DQ_DLY_0, dq1_dly_sel, info->res_dq_dly[1]);
    APPLY_DQ_DLY(DQ_DLY_0, dq2_dly_sel, info->res_dq_dly[2]);
    APPLY_DQ_DLY(DQ_DLY_0, dq3_dly_sel, info->res_dq_dly[3]);
    APPLY_DQ_DLY(DQ_DLY_0, dq4_dly_sel, info->res_dq_dly[4]);
    APPLY_DQ_DLY(DQ_DLY_0, dq5_dly_sel, info->res_dq_dly[5]);
    APPLY_DQ_DLY(DQ_DLY_0, dq6_dly_sel, info->res_dq_dly[6]);
    APPLY_DQ_DLY(DQ_DLY_0, dq7_dly_sel, info->res_dq_dly[7]);
    
    // restore cal_set_mode
    RMOD_DPI_CTRL_0(cal_set_mode, csm);
#endif
    return 0;
}

#define APPLY_TM_ODT_EN_OSD(v)  ({  u32_t _odd=v>>3, _sel=(v>>1)&0x3, _dly=v&0x1; \
                                    RMOD_READ_CTRL_2_0(tm_odt_en_odd, _odd, tm_odt_en_sel, _sel, tm_odt_en_dly, _dly); \
                                    RMOD_READ_CTRL_2_1(tm_odt_en_odd, _odd, tm_odt_en_sel, _sel, tm_odt_en_dly, _dly); \
                                    RMOD_READ_CTRL_2_2(tm_odt_en_odd, _odd, tm_odt_en_sel, _sel, tm_odt_en_dly, _dly); \
                                    RMOD_READ_CTRL_2_3(tm_odt_en_odd, _odd, tm_odt_en_sel, _sel, tm_odt_en_dly, _dly); })
#define APPLY_TM_ODT_EN(v)      READ_CTRL_6_0rv=v; READ_CTRL_6_1rv=v; READ_CTRL_6_2rv=v; READ_CTRL_6_3rv=v;

MEMCNTLR_SECTION
u32_t dpi_rx_map_scan(PI_CALI_T *info) {
#ifndef PROJECT_ON_FPGA
    // check RX FIFO map
    printf("II: check RX FIFO map\n");
    RMOD_DPI_CTRL_0(fw_set_mode, 0x2);
    u32_t j, res;
    for(j=0, res=0; j<32; j++) {
        RMOD_READ_CTRL_1(tm_rd_fifo, j);
        SYNC_RD_DLY();
        if(0==memory_ordeal()) res|=1<<j;
    }
    info->raw_rd_fifo = res;
    
    
    // check RX ODT map
    printf("II: check RX ODT map\n");
    
    // ODT NOT always on
    RX_ODT_ALWAYS_ON(0);
    u32_t rc6_dv = READ_CTRL_6_0rv;
    APPLY_TM_ODT_EN(0xFFFFFFFF);
    
    u32_t tmp, tm_odt_en;
    
    for(j=0, res=0; j<32; j++) {
        tmp = (0xFFFFFFFF<<j)&0x7FFFFFFF;
        tm_odt_en = tmp|(0x80000000);
        // step a
        APPLY_TM_ODT_EN_OSD(0x0);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
        // step b
        APPLY_TM_ODT_EN_OSD(0x2);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
        // step c
        tm_odt_en = tmp;
        APPLY_TM_ODT_EN_OSD(0x9);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
         // step d
        APPLY_TM_ODT_EN_OSD(0xB);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
    }
    
    tmp = 0xFFFFFFFF;
    for(j=0, res=0; j<32; j++) {
        // step e
        tm_odt_en = tmp;
        APPLY_TM_ODT_EN_OSD(0x0);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
        // step f
        tm_odt_en = tmp>>(j+1);
        APPLY_TM_ODT_EN_OSD(0xC);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
        // step g
        APPLY_TM_ODT_EN_OSD(0x8);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
        
         // step h
        APPLY_TM_ODT_EN_OSD(0x4);
        APPLY_TM_ODT_EN(tm_odt_en);
        memory_ordeal();
    }
    // restore 
    READ_CTRL_6_0rv = rc6_dv;
#endif
    return 0;
}

MEMCNTLR_SECTION
u32_t dpi_static_setting(void) {
    puts("II: do DPI shortK\n");
    READ_CTRL_0_0rv = 0x3;
    READ_CTRL_0_1rv = 0x3;
    READ_CTRL_0_2rv = 0x3;
    READ_CTRL_0_3rv = 0x3;
    // Read FIFO
    READ_CTRL_1rv = 0x4;

    puts("II: ODT force on\n");
    RMOD_READ_CTRL_2_0(odt_force_sel, 1, odt_force_sig, 1);
    RMOD_READ_CTRL_2_1(odt_force_sel, 1, odt_force_sig, 1);
    RMOD_READ_CTRL_2_2(odt_force_sel, 1, odt_force_sig, 1);
    RMOD_READ_CTRL_2_3(odt_force_sel, 1, odt_force_sig, 1);
    
    DPI_CTRL_0rv = 0x082e2002;
    DPI_CTRL_1rv = 0x0000000C;
    
    // restore DPI_CTRL_0rv
    return 0;
}

#if 0
MEMCNTLR_SECTION
u32_t dpi_calibration(void) {
    PI_CALI_T info;
    memset((char *)&info, 0, sizeof(PI_CALI_T));
    if(1==meminfo.dpi_opt->dpi_long_cali){
        puts("II: do DPI longK\n");
        prb7_pattern_gen(pat_ary);
        memory_ordeal();
#ifndef PROJECT_ON_FPGA    
        dpi_write_leveling(&info);
        dpi_pi_scan(&info);
        dpi_dq_delay_tap_scan(&info);
#endif
    } else {
        mc_dram_size_detect();
        
        // set force rst_n = 1
        RMOD_CRT_CTL(force_rstn, 1);
        
        dpi_write_leveling(&info);
        dpi_pi_scan(&info);
        
        // set force rst_n = 0
        RMOD_CRT_CTL(force_rstn, 0);
    }
    return 0;
}
#endif
