 /*
 --- DMA machine ---
 This is a fetch-execute cpu where the fetch fucntion is done by one DMA block, which
 loads DMA control block images from RAM into another DMA block. The 'program' which is
 loaded consists of a carefully crafted series of memory copy commands which together
 act as a general purpose computer. The design is made easier by several trnasport-
 triggered actions in the DMA subsystem. These include an adder in the 'channel sniffer'
 and atomic SET/CLEAR/XOR write functions on all SFRs.

# ====================================
# === Register write functions =======
# Each peripheral register block is allocated 4kB of address space,
# with registers accessed using one of 4 methods,selected by
# address decode.
# Addr + 0x0000 : normal read write access
# Addr + 0x1000 : atomic XOR on write
# Addr + 0x2000 : atomic bitmask set on write
# Addr + 0x3000 : atomic bitmask clear on write

======================================
DMA channels can only get to GPIO through PAD  override registers
//force enable output and output 1
iobank0_hw->io[2].ctrl = 0x3300 ;
// force enable and output zero
iobank0_hw->io[2].ctrl = 0x3200 ;

*/

#include "hardware/gpio.h"
#include "hardware/timer.h"
#include "pico/stdlib.h"
#include "hardware/uart.h"
#include "stdio.h"
#include <string.h>
#include <pico/multicore.h>
#include "hardware/sync.h"
#include "hardware/dma.h"
#include "hardware/gpio.h"
#include "hardware/structs/iobank0.h"


// ==========================================
// === protothreads setup
// ==========================================
// protothreads header
#include "pt_cornell_rp2040_v1.1.h"

// === global thread+DMA communicaiton
// must be volatile because of DMA modification
// a zero load variable -- dont change
volatile int dma_var_0 = 0 ;
// a one-load variable -- dont change
volatile int dma_var_1 = 1 ;
// add test
volatile int dma_var_2 = 0 ;
// or test
volatile int dma_var_3 = 6 ;
volatile int dma_var_4 = 3 ;
volatile int dma_var_5 = 0 ;
// mult test
volatile int dma_var_6 = 10 ;
volatile int dma_var_7 = 3 ;
volatile int dma_var_8 = 0 ;
// signal from cpu
volatile int dma_flag = 0 ;


// ==========================================
// === DMA machine setup
// ==========================================
//  DMA control block size in words:
#define length_of_block 4
// the main DMA block program list
#define max_blocks 100
int DMA_blocks[max_blocks * length_of_block];
// base address of DMA block list being defined
 int * DMA_blocks_addr = &DMA_blocks[0] ;
// counter for the current block to create
int N = 0;
// /dev/null
int bit_bucket ;

// A macro to assign a sequence label value
// just copies the next block count to a label name  e.g. label1=branch_label() ;
// inserting this statment gives a name to a jump-target
#define branch_label() N 
// convert the label variable to an absolute address
// DMA_blocks is a pointer to the array of block images
#define branch_label_to_addr(label) ( DMA_blocks + (label)*length_of_block )
// address of current block
#define current_block_addr (&DMA_blocks[4*(N)])
// address of the next block
#define next_block_addr (&DMA_blocks[4*(N+1)])
// address of the  block 2 ahead
#define next_block2_addr (&DMA_blocks[4*(N+2)])
// address of the  block 3 ahead
#define next_block3_addr (&DMA_blocks[4*(N+3)])
// address of the  block 4 ahead
#define next_block4_addr (&DMA_blocks[4*(N+4)])
// adderss of arbitrary block
#define block_addr(N)    (&DMA_blocks[4*(N)])

// macro to build a DMA block
// inputs: read addr, write addr, count, ctrl word (4 bytes each)
// puts a 16 byte channel control block image in the DMA_blocks array at the current block count, N, 
// then increments the block count N
// read_addr is put in array location N*length_of_block,
// write address is put in array location N*length_of_block+1,
#define build_block(read_addr, write_addr, count, ctrl) \
    do { \
        DMA_blocks[4*N] = (int) read_addr; \
        DMA_blocks[4*N+1] = (int)write_addr; \
        DMA_blocks[4*N+2] = count; \
        DMA_blocks[4*N+3] = ctrl; \
        N++ ; \
    } while(0)

// macros for defining the DMA cntl word
// This duplicates some of the SDK 
// data_width is 0==byte 1==short 2==int
#define DMA_DATA_WIDTH(data_width) ((data_width & 0x03)<<2) 
// give this channel more access if several channels aare on
#define DMA_HIGH_PRI  (1<<1) 
// turn on the channel
#define DMA_EN  1  
// increment or keep constant wrrite nd read addr
// useful for peripheril write/read
#define DMA_WR_INC  (1<<5) 
#define DMA_RD_INC  (1<<4) 
// enable this channel for sniffer
#define SNIFF_EN (1<<23)
//turn off channel done IRQ
#define DMA_IRQ_QUIET  (1<<21)
// More tirgger info page 114
// 0x0 to 0x3a == select DREQ n as TREQ from table above
//# 0x3b ==’ Select Timer 0 as TREQ
// 0x3c == Select Timer 1 as TREQ
// 0x3d == Select Timer 2 as TREQ (Optional)
// 0x3e == Select Timer 3 as TREQ (Optional)
// 0x3f == Permanent request, for unpaced transfers.
// bits 15:20 trigger request source
#define DMA_TREQ(trigger_source)  ((trigger_source & 0x3f)<<15) 
// When this channel completes, it will trigger the channel
// indicated by CHAIN_TO. Disable by setting CHAIN_TO = (this channel).
// bits 11:14 next chnnel #
#define DMA_CHAIN_TO(next_ch) ((next_ch & 0x0f)<<11) 

//  sniffer atomic write operations 
// used to simulate an accumulator
// sniffer can also compute CRC, parity, and bit-inversion
#define sniffer_add 0x0f
#define DMA_SNIFF_DATA_SET   (0x2438 + 0x50000000)
#define DMA_SNIFF_DATA_XOR   (0x1438 + 0x50000000)  
#define DMA_SNIFF_DATA_CLR   (0x3438 + 0x50000000)
// logic:
// load A then set using B as mask implements OR
// load B then CLR bits using NOT(A) as a mask implements AND

// init the machine and define some global constants
// gpio 2
int pin_on = 0x3300 ; // sio_hw->gpio_set = pin_on
int pin_off = 0x3200 ; // sio_hw->gpio_clr = pin_off; 0x3200
int pin_burst[8] = {0x3300, 0x3300, 0x3300, 0x3300, 0x3300, 0x3300, 0x3300, 0x3200} ;
//
int DMA1_addr = (DMA_BASE + DMA_CH1_READ_ADDR_OFFSET) ;
//int DMA1_addr = dma_hw->ch[1].read_addr ;
int * DMA1_addr_ptr = &DMA1_addr ;
// junp target for flag==0
int * jump_zero_addr ;

// Define a set of blocks in the DMA_blocks array
// set up the fetch DMA module (DMA0), to start the machine
void DMA_machine(void){
    // init block count
    N = 0 ; 
    // ================================================
    // define blocks to be executed
    // Build the DMA program
    // ================================================
    // build_block(read_addr, write_addr, count, ctrl)
    // ================================================
    // Each of these DMA control blocks will be loaded by
    // the DMA0 fetch channel into DMA1, then the data will be moved
    // -then the DMA0 channel is fixed and restarted to fetch the next block

    // TWO pulses
    // =0= === set the pin
    build_block(&pin_on, &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    
    // =1= === clear the pin
    build_block(&pin_off,  &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;

    // =2= === set pin
    build_block(&pin_on, &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    
    // =3= === clear the pin
    build_block(&pin_off,  &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;

    
    // enable sniffer add operation for DMA channel 1 
    // -- done once at compile time
    dma_sniffer_enable(1, sniffer_add, true);

    // === add two variables by transport-triggered operation in sniff reg
    // === load a var to sniff data reg: dma_hw->sniff_data
    // =4=
    build_block(&dma_var_1,  &dma_hw->sniff_data, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    // =5= pass another var thru the sniffer to the bit_bucket
     build_block(&dma_var_2,  &bit_bucket, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN | SNIFF_EN) ;
    // =6= store the sniff data reg back to var_2
    build_block( &dma_hw->sniff_data, &dma_var_2, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;

    // === OR two variables by transport-triggered operation in sniff reg
    // =7 === load a var to sniff data reg: dma_hw->sniff_data
    build_block(&dma_var_3,  &dma_hw->sniff_data, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    // =8= load another var the the SET reg
     build_block(&dma_var_4,  DMA_SNIFF_DATA_SET, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    // =9= store the sniff data reg back to var_5
    build_block( &dma_hw->sniff_data, &dma_var_5, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;

    // === mult a variable by a constant by transport-triggered operation in sniff reg
    // in this case, times 4
    // by substituting the '4' to a variable, you can do general mult
    // =10 === clear sniff data reg: dma_hw->sniff_data (with no clear get MAC operation)
    build_block(&dma_var_0,  &dma_hw->sniff_data, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    // =11= pass the var thru the sniffer to the bit_bucket 4 times
     build_block(&dma_var_6,  &bit_bucket, 4, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN | SNIFF_EN) ;
    // =12= store the sniff data reg back to var_2
    build_block( &dma_hw->sniff_data, &dma_var_7, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;

    // === conditional skip 
    // the dma_flag variable can take only 0 and 32 values
    // =13= read flag to sniffer 
    build_block(&dma_flag, &dma_hw->sniff_data, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
    // =14= form target block address by adding jump_zero address;
    jump_zero_addr = block_addr(16) ; // 
    build_block( &jump_zero_addr,  &bit_bucket, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN | SNIFF_EN) ;
    // move sniffer data to read addr of DMA0 to force next read from new location
    // sniffer contains zero_jump_address + offset to one_jump
    // =15= push block address to DMA0 block
    build_block(&dma_hw->sniff_data, &dma_hw->ch[0].read_addr, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_IRQ_QUIET | DMA_EN) ;
    
    // === TARGET if dma_flag == 0
    // =16= === set pin
    build_block(&pin_on, &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;    
    // TARGET if dma_flag == 1
    // =18= === set pin
    build_block(&pin_on, &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ; 
    // TARGET if dma_flag==2
    // =17 === clear the pin
    build_block(&pin_off,  &iobank0_hw->io[2].ctrl, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_EN) ;
     
    // === TARGET if dma_flag == 3
    // =19 ===  unconditional jump to start of program
    // push the DMA_blocks[0] address into the program counter (DMA0 read pointer)
    build_block(&DMA_blocks_addr, &dma_hw->ch[0].read_addr, 1, 
        DMA_CHAIN_TO(2) | DMA_TREQ( DREQ_FORCE) | DMA_DATA_WIDTH(DMA_SIZE_32) | DMA_IRQ_QUIET | DMA_EN) ;

    // === END OF DMA PROGRAM ===
    //
    // ======================================================
    // execution machine -- no part of program below here!
    // ======================================================
    // === DMA2 module to reset write address of dma0
    // always set to  DMA1 write address
    dma_channel_config c2 = dma_channel_get_default_config(2);
    channel_config_set_transfer_data_size(&c2, DMA_SIZE_32);
    channel_config_set_read_increment(&c2, false);
    channel_config_set_write_increment(&c2, false);
    channel_config_set_irq_quiet(&c2, true);
    channel_config_set_enable(&c2, true); 
    channel_config_set_chain_to(&c2, 0) ;
    // unpaced, full speed transfer
    channel_config_set_dreq(&c2,  DREQ_FORCE);
    //
    dma_channel_configure(2, &c2, 
        &dma_hw->ch[0].write_addr , // reset the chan 0 to write to channel 1
        DMA1_addr_ptr ,  // read_addr, pointer to address of channel 1
        1, // one words per DMA block
        false) ; // triggered to start machine running

    // === The DMA0 fetch module
    // this is the program counter and fetch unit
    // when this code is executed it starts the DMAcpu machine!
    // NOTE that this moddule could be paced by a timer or other TREQ 
    //    for DDS, or other operations
   dma_channel_config c0 = dma_channel_get_default_config(0);
    channel_config_set_transfer_data_size(&c0, DMA_SIZE_32);
    channel_config_set_read_increment(&c0, true);
    channel_config_set_write_increment(&c0, true);
    channel_config_set_irq_quiet(&c0, true);
    channel_config_set_enable(&c0, true); 
    channel_config_set_chain_to(&c0, 1) ;
    // unpaced, full speed transfer
    channel_config_set_dreq(&c0,  DREQ_FORCE);
    //
    dma_channel_configure(0, &c0, 
        &dma_hw->ch[1].read_addr , // write to dma channel 1
        DMA_blocks_addr ,  // read_addr, DMA blocks list
        4, // four words per DMA block
        true) ; // triggered to start machine running
}

// ==================================================
// === user's serial input thread
// ==================================================
// serial_read an serial_write do not block any thread
// except this one
static PT_THREAD (protothread_serial(struct pt *pt))
{
    PT_BEGIN(pt);
      static int test_in1, test_in2, sum ;
      //
      while(1) {
        // print prompt
        sprintf(pt_serial_out_buffer, "input 16/0: ");
        // spawn a thread to do the non-blocking write
        serial_write ;

        // spawn a thread to do the non-blocking serial read
         serial_read ;
        // convert input string to number
        sscanf(pt_serial_in_buffer,"%d ", &dma_flag) ;
        // check for valid value 0, 1, 2
       // if((dma_flag<0) | (dma_flag>3)) dma_flag = 0;
        dma_flag *= 16 ;

        printf("%d %d %d\n\r", dma_var_2, dma_var_5, dma_var_7) ;
        //printf("%d %d \n\r", block_addr(1), block_addr(2)) ;

        // NEVER exit while
      } // END WHILE(1)
  PT_END(pt);
} // timer thread

// ==================================================
// === toggle25 thread 
// ==================================================
// the on-board LED blinks
static PT_THREAD (protothread_toggle25(struct pt *pt))
{
    PT_BEGIN(pt);
    static bool LED_state = false ;
    
     // set up LED p25 to blink
     gpio_init(25) ;	
     gpio_set_dir(25, GPIO_OUT) ;
     gpio_put(25, true);
      while(1) {
        // yield time 0.1 second
        PT_YIELD_usec(100000) ;
        // toggle the LED on PICO
        LED_state = LED_state? false : true ;
        gpio_put(25, LED_state);

        //printf("%d %d %d\n\r", dma_var_2, dma_var_5, dma_var_7) ;

        //gpio_set_outover (2,  true);
        //force enable output and output 1
        //iobank0_hw->io[2].ctrl = 0x3300 ;
        //PT_YIELD_usec(10) ;
        //gpio_set_outover (2,  false);
        //iobank0_hw->io[2].ctrl = 0x3200 ;
        //
        // NEVER exit while
      } // END WHILE(1)
  PT_END(pt);
} // blink thread


// ========================================
// === core 1 main -- started in main below
// ========================================
void core1_main(){ 
  
  //  === add threads  ====================
  // for core 1
  //
  // === initalize the scheduler ==========
  pt_schedule_start ;
  // NEVER exits
  // ======================================
}

// ========================================
// === core 0 main
// ========================================
int main(){
  // start the serial i/o
  stdio_init_all() ;
  // announce the threader version on system reset
  printf("\n\rProtothreads RP2040 v1.1 two-core\n\r");

  DMA_machine() ;

  // start core 1 threads
  //multicore_reset_core1();
  //multicore_launch_core1(&core1_main);

  // === config threads ========================
  // for core 0
  pt_add_thread(protothread_serial);
  pt_add_thread(protothread_toggle25);
  //
  // === initalize the scheduler ===============
  pt_schedule_start ;
  // NEVER exits
  // ===========================================
} // end main
///////////
// end ////
///////////