/**
example 1 -- isopotentials, insulators
======================================
 * HARDWARE CONNECTIONS
 *  - GPIO 16 ---> VGA Hsync
 *  - GPIO 17 ---> VGA Vsync
 *  - GPIO 18 ---> 330 ohm resistor ---> VGA Red
 *  - GPIO 19 ---> 330 ohm resistor ---> VGA Green
 *  - GPIO 20 ---> 330 ohm resistor ---> VGA Blue
 *  - RP2040 GND ---> VGA GND
 *
 * RESOURCES USED
 *  - PIO state machines 0, 1, and 2 on PIO instance 0
 *  - 4 DMA channels 
 *  - 2 x 153.6 kBytes of RAM (for pixel color data) double buffered
 *
 * Protothreads v1.4
 * Threads:
 * core 0:
 * -- Laplace and Graphics 
 * core 1:
 * -- Serial i/o for restart
 */
// ==========================================
// === VGA graphics library
// ==========================================
#include "vga16_graphics_v3.h"
#include <stdio.h>
#include <stdlib.h> 
#include "pico/float.h"
#include <math.h>
#include "pico/stdlib.h"
#include "hardware/pio.h"
#include "hardware/dma.h"
#include "hardware/clocks.h"
#include "hardware/vreg.h"


// ==========================================
// === protothreads globals
// ==========================================
#include "hardware/sync.h"
#include "hardware/timer.h"
#include "pico/multicore.h"
#include "string.h"
// protothreads header
#include "pt_cornell_rp2040_v1_4.h"

// color map -- low to high
// blue - green - yellow - orange - red - pink - white
//char color_map[16] = {0, 4, 5, 6, 7, 1, 2, 3, 11, 10, 9, 8, 12, 13, 14, 15} ;
char color_map[16] = {0, 1, 2, 4, 5, 6, 7,  3, 11, 10, 9, 8, 12, 13, 14, 15} ;

// finite diff grid
// all computed u are positive
// s is a status flag 
// 1==normal initernal point -- update this point
// 0== fixed potential conductor -- dont update, but use value for neighbors
// -1== insulator -- value is not used -- center value is used instead
#define Nx 170
#define Ny 170
// the poterntial
float u[Nx][Ny] ;
// charge sources
float f[Nx][Ny] ;
// grid type `1==solve ; 
// 0==isopotential (Dirichlet boundary); -1==insulator (Neumann boundary)
signed char s[Nx][Ny] ;

// SOR factor omega
// omega ~ 2/(1+sin(pi/N)
// for n>10 sin(pi/N) ~ (pi/N)
// so for n=50 omega = 2/(1+0.062) = 1.88
// n=100 omega = 1.94
//float omega = 1.94f ; //1.88f ;
// n = 150
float omega = 1.8f ;

// restart the computation
int restart_flag = false ;

// number of iterations
int iter_count ;

// total chage per time step
float delta_u, max_du;

// starts a threaad to compute vector fiweld
char draw_field = false ;
// pixel dup size
int sq_size = 2 ;
// cemtering
int x_pos = 320 - Nx;
// actually relax the solution
char relax = true ;
// voltage if dirichlet
float boundary_u = 7.0f ;
// neumann vs dirichlet
char neumann = true ;


// for priunting
 char pstr[50] ;

// ==================================================
// === graphics demo -- RUNNING on core 0
// ==================================================
static PT_THREAD (protothread_graphics(struct pt *pt)) {
    PT_BEGIN(pt);
    static float tempx, tempy ;
    static unsigned int i, j;
    static unsigned int phase ;
    int junk ;

    // Draw some filled rectangles
    fillRect(0, 0, 640, 480, BLACK); // erase
    fillRect(64, 0, 176, 50, BLUE); // blue box
    fillRect(250, 0, 176, 50, YELLOW); // red box
    fillRect(435, 0, 176, 50, GREEN); // green box

    // Write some text
    drawTextTiny8(65, 0, "Raspberry Pi Pico 2", WHITE, BLUE) ;
    drawTextGLCD(65, 10, "Floating Point Poisson demo", WHITE, BLUE) ;
    drawTextGLCD(65, 20, "Bruce Land brl4@cornell.edu", WHITE, BLUE) ;
    drawTextGLCD(65, 30, "Hunter Adams vha3@cornell.edu", WHITE, BLUE) ;
    //
    drawTextGLCD(438, 10, "Protothreads rp2040/2350 1.4", BLACK, GREEN) ;
    drawTextGLCD(438, 20, "Two CPU; clock 300 Mhz", BLACK, GREEN) ;
    drawTextGLCD(438, 30, "vga16_v3 driver", BLACK, GREEN) ;
    drawTextGLCD(438, 40, "Double Buffer off", BLACK, GREEN) ;
    //
    drawTextVGA437(260, 5, "Poisson SOR Solver", BLACK, YELLOW) ;
    //
    sprintf(pstr, "omega %5.3f ",omega);
    drawTextGLCD(270, 30, pstr, BLACK, YELLOW) ;
    // 
    sprintf(pstr, "Grid Size %3d x %3d ", Nx, Ny);
    drawTextGLCD(270, 40, pstr, BLACK, YELLOW) ;

    for (i=0; i<16; i++){
      fillRect(20*i+50, 460, 20, 20, color_map[i] ) ;
    }
    drawTextTiny8(75, 450, "BLACK is insulator; WHITE is I-source", GREEN, BLACK) ;
     
    // dup the static content into both buffers
    //copy_buffer_to_other();

    // set boundary conditions to Neumann insulate
    if (neumann){
      for(i=0; i<Nx; i++){
        s[i][0] = -1 ;
        s[i][Ny-1] = -1 ;
      } 
      for(j=0; j<Ny; j++){
        s[0][j] = -1 ;
        s[Nx-1][j] = -1 ;
      } 
      drawTextGLCD(270, 20, "Neumann boundary      ", BLACK, YELLOW) ;
    // awet to Dirichlet
    } else {
      for(i=0; i<Nx; i++){
        s[i][0] = 0 ;
        s[i][Ny-1] = 0;
        u[i][0] = boundary_u ;
        u[i][Ny-1] = boundary_u;
      } 
      for(j=0; j<Ny; j++){
        s[0][j] = 0;
        s[Nx-1][j] = 0 ;
        u[0][j] = boundary_u;
        u[Nx-1][j] = boundary_u ;
      } 
      //printf("%f", boundary_u) ;
      sprintf(pstr, "Dirichlet boundary %5.3f  ", boundary_u);
      drawTextGLCD(270, 20, pstr, BLACK, YELLOW) ;
    }
    //
    // init interior to compute state
    // and u to 50% of range
    for(int i=1 ;i<Nx-1; i++){
      for (int j=1 ;j<Ny-1; j++){
        s[i][j] = 1 ;
        u[i][j] = 7.0f ;
      }
    }

    // add a couple of horizontal interior conductors (Dirichlet boundary)
    // for(i=(Nx>>2); i<(Nx-(Nx>>2)); i++){
    //   u[i][(Ny>>2)-8] = 14.0f ;
    //   s[i][(Ny>>2)-8] = 0 ;
    //   u[i][(Ny>>2)+8] = 1.0f;
    //   s[i][(Ny>>2)+8] = 0 ;
    // } 

    // add a couple of vwertical interior conductors (Dirichlet boundary)
    // for(j=100; j<150; j++){
    //   u[80][j] = 11.0f ;
    //   s[80][j] = 0 ;
    //   u[90][j] = 3.0f;
    //   s[90][j] = 0 ;
    // } 

    //add a couple of horizontal interior sources
    //MUST sum to zeero!
    for(i=(70); i<100; i++){
      f[i][120] = -1.50f ;
      f[i][130] = 1.50f;
    } 
    //add a dipole interior point sources
    f[50][40] =  5.0f ;
    f[50][80] =  -5.0f ;
    f[85][40] = -5.0f ;
    f[85][80] =  5.0f ;
    f[120][40] =  5.0f ;
    f[120][80] =  -5.0f ;

    // add a small block of insulator
    for(int i=40 ;i<130; i++){
      for (int j=88 ;j<90; j++){
        s[i][j] = -1 ;
      }
    }

    sleep_ms(20) ;
    //
    while(true) {

        // signal from serial thread to restart THIS thread
        if(restart_flag){
          restart_flag = false ;
          iter_count = 0 ;
          delta_u = 0 ;
          max_du = 0 ;
          relax = true ;
          //
          PT_RESTART(pt) ;
        }
        
        // wait for frame sync and inter4ation
        //PT_YIELD_UNTIL(pt, draw_start_signal() && relax);
        PT_YIELD_UNTIL(pt, relax);
        //
        uint64_t start_time = PT_GET_TIME_usec();

        // count until error is low enough
        // sice this is a sum over 10000 cells
        // 1.0 means a per cell absdolute error of 1e-4
        if (max_du > 0.001f) iter_count++ ;
        // accumulater errors
        delta_u = 0 ;
        max_du = 0 ;

        // SOR //
        // red/black ordering 
        // send start to the other core
        PT_FIFO_FLUSH ;
        multicore_fifo_push_blocking(true);

        // pahse 0 means 1 3 5; ohase 1 meeans 2 4 6
         phase = 0 ;
        for(i=0 ;i<Nx; i++){
          phase = (~phase) & 1 ;
          for ( j=phase ;j<Ny; j+=2){
            if (s[i][j]==1){
              float u_old = u[i][j] ;
              // enforce zero gradient for insulator walls
              if(s[i-1][j] == -1) u[i-1][j] = u[i][j] ;
              if(s[i+1][j] == -1) u[i+1][j] = u[i][j] ;
              if(s[i][j-1] == -1) u[i][j-1] = u[i][j] ;
              if(s[i][j+1] == -1) u[i][j+1] = u[i][j] ;
              //
              float u_temp = 0.25f * (u[i+1][j] + u[i-1][j] + u[i][j+1] + u[i][j-1] + f[i][j]);
              u[i][j] = (1 - omega) * u_old + omega * u_temp ; 
              // compute  delta sum
              delta_u = fabs( u[i][j] - u_old) ; /// (u_old + 0.01f);
              max_du = (delta_u > max_du)? delta_u : max_du ;
            }
            // insulators are blacxk, conductors white
            // all other colors are voltages
            // fillRect(60+sq_size*i, 60+sq_size*j, 
            //     sq_size, sq_size, (
            //       s[i][j]==-1)? 0 : (s[i][j]==0)? 15 : color_map[(short)u[i][j]]);
            short c = (s[i][j]==-1)? 0 : (f[i][j]!=0)? 15 : 
                        (u[i][j]<0)? 0 :  (u[i][j]>15)? 15 : color_map[(short)u[i][j]] ;
            drawPixel(x_pos+sq_size*i, 60+sq_size*j, c);
            drawPixel(x_pos+sq_size*i+1, 60+sq_size*j, c);
            drawPixel(x_pos+sq_size*i, 60+sq_size*j+1, c);
            drawPixel(x_pos+sq_size*i+1, 60+sq_size*j+1, c);
          }
          //PT_YIELD(pt) ;
        }

        // wait for core 1
         multicore_fifo_pop_blocking() ;
       
        // interation doc
        sprintf(pstr, "Iteration Time = %5.2f mSec", (float)(PT_GET_TIME_usec()-start_time)/1000.0f);
        drawTextTiny8(390, 440, pstr, GREEN, BLACK) ;
        //
        sprintf(pstr, "Iteration to max error <1e-3:");
        drawTextTiny8(390, 450, pstr, GREEN, BLACK) ;
        //
        sprintf(pstr, "       Count = %3d ", iter_count);
        drawTextTiny8(390, 460, pstr, GREEN, BLACK) ;
        //
        sprintf(pstr, "max element error = %6.6f ", max_du);
        drawTextTiny8(390, 470, pstr, GREEN, BLACK) ;
    }
    PT_END(pt);
} // graphics thread

// ==================================================
// core 1 compute
// ==================================================
static PT_THREAD (protothread_core1_compute(struct pt *pt))
{
  PT_BEGIN(pt);
  static unsigned int i, j ;
  static unsigned int phase ;
  //static int junk ;

  // wait or signsl from core 0
  multicore_fifo_pop_blocking() ;

  phase = 1 ;
        for(i=0 ;i<Nx; i++){
          phase = (~phase) & 1 ;
          for ( j=phase ;j<Ny; j+=2){
            if (s[i][j]==1){
              float u_old = u[i][j] ;
              // enforce zero gradient for insulator walls
              if(s[i-1][j] == -1) u[i-1][j] = u[i][j] ;
              if(s[i+1][j] == -1) u[i+1][j] = u[i][j] ;
              if(s[i][j-1] == -1) u[i][j-1] = u[i][j] ;
              if(s[i][j+1] == -1) u[i][j+1] = u[i][j] ;
              //
              float u_temp = 0.25f * (u[i+1][j] + u[i-1][j] + u[i][j+1] + u[i][j-1] + f[i][j]);
              u[i][j] = (1 - omega) * u_old + omega * u_temp ; 
              // compute  delta sum
              delta_u = fabs( u[i][j] - u_old) ; /// (u_old + 0.01f);
              max_du = (delta_u > max_du)? delta_u : max_du ;
            }
            // insulators are blacxk, conductors white
            // all other colors are voltages
            // fillRect(60+sq_size*i, 60+sq_size*j, 
            //     sq_size, sq_size, (
            //       s[i][j]==-1)? 0 : (s[i][j]==0)? 15 : color_map[(short)u[i][j]]);
            short c = (s[i][j]==-1)? 0 : (f[i][j]!=0)? 15 : 
                        (u[i][j]<0)? 0 :  (u[i][j]>15)? 15 : color_map[(short)u[i][j]] ;
            drawPixel(x_pos+sq_size*i, 60+sq_size*j, c);
            drawPixel(x_pos+sq_size*i+1, 60+sq_size*j, c);
            drawPixel(x_pos+sq_size*i, 60+sq_size*j+1, c);
            drawPixel(x_pos+sq_size*i+1, 60+sq_size*j+1, c);
          }
          //PT_YIELD(pt) ;
        }
        // tell core 0 that core 1 is done
        // send done to the other core
        multicore_fifo_push_blocking(true);

        PT_END(pt);
} // core 1 compute thread

// ==================================================
// === draw field from serial thread
// ==================================================
// compute e-field
void drawField(void)
{
    static int i, j ;
    #define field_step 4

        for(i=field_step ;i<Nx-field_step; i+=field_step){
          for (j=field_step ;j<Ny-field_step; j+=field_step){
            // a computed grid point
            if (s[i][j]==1){
              // get the gradient and fiddle with scaling
              float ux =30.0f*( u[i+1][j] - u[i-1][j] );
              float uy = 30.0f *(u[i][j+1] - u[i][j-1] );
              float mag = ux*ux + uy*uy;
              if(mag > 25.0f ){
                ux = ux/10.0f ;
                uy = uy/10.0f ;
              }
              drawLine(x_pos+sq_size*i, 60+sq_size*j, 
                x_pos+sq_size*i+(short)ux, 60+sq_size*j+(short)uy, BLACK);
            }
          }
        }
} // 

// ==================================================
// === user's serial input thread on core 0
// ==================================================
// serial_read an serial_write do not block any thread
// except this one
static PT_THREAD (protothread_serial(struct pt *pt))
{
    PT_BEGIN(pt);
      // tokenized strings from command line
      static char *cmd, *arg1, *arg2, *arg3, *arg4;
      static int i, j ;
      //
      while(1) {
        // print prompt
        sprintf(pt_serial_out_buffer, "cmd: ");
        // spawn a thread to do the non-blocking write
        serial_write ;

        // spawn a thread to do the non-blocking serial read
         serial_read ;
         
         //
         // tokenize the input string
        //  --- NOTE that strtok is not re-entrant!
        //  ---   Use the reentrant, thread-safe, version if you need to use it
        //   ---  in moe than one thread!
        cmd = strtok(pt_serial_in_buffer, "  ");
        arg1 = strtok(NULL, "  ");
        arg2 = strtok(NULL, "  ");
        arg3 = strtok(NULL, "  ");
        arg4 = strtok(NULL, "  ");

         // tell the graphics thread to restart itself
         if(strcmp(cmd,"r")==0){
          restart_flag = true ;
          relax = true ;
          printf(" restart\n\r");
         }

         if(strcmp(cmd,"c")==0){
          //restart_flag = true ;
          relax = true ;
          printf("continue\n\r ");
         }
         
        if(strcmp(cmd,"omega")==0){
          sscanf(arg1,"%f", &omega);   
         // sprintf(pstr, "Omega = %5.3f  ", omega);
          //drawTextTiny8(390, 430, pstr, GREEN, BLACK) ;
         //sprintf(pstr, "omega %5.3f ",omega);
        //drawTextGLCD(270, 30, pstr, BLACK, YELLOW) ;
          restart_flag = true ;
        }

        if(strcmp(cmd,"field")==0){
          //draw_field = true ; 
          relax = false ; 
          drawField() ;
          //restart_flag = true ;
        }

        if(strcmp(cmd,"bound")==0){
          if (strcmp(arg1,"neumann")==0){    
            neumann = true ;
            omega = 1.8 ;
            restart_flag = true ;
          }
        //
          if (strcmp(arg1,"dirichlet")==0){
            sscanf(arg2, "%f", &boundary_u) ;
            if (boundary_u < 0.0f) boundary_u = 0.1f ;
            if (boundary_u > 14.0f) boundary_u = 14.1f ;
            if (floor(boundary_u) == boundary_u) boundary_u += 0.1f ;
            neumann = false ;
            omega = 1.7 ;
            restart_flag = true ;
          }
        }
        // NEVER exit while//
      } // END WHILE(1)
  PT_END(pt);
} // serial thread//

// ========================================
// === core 1 main -- started in main below
// ========================================
void core1_main(){ 
  //
  //  === add threads  ====================
  // for core 1
  
  pt_add_thread(protothread_core1_compute) ;
  //
  // === initalize the scheduler ==========
  pt_schedule_start ;
  // NEVER exits
  // ======================================
}

// ========================================
// === core 0 main ===
// ========================================
int main(){
  // set the clock
  vreg_set_voltage(VREG_VOLTAGE_1_30 );
  set_sys_clock_khz(300000, true); // 171us
  // wait for clopck to settle
  sleep_ms(20);
  // start the serial i/o
  stdio_init_all() ;
  // announce the threader version on system reset
  printf("\n\rProtothreads RP2040/2350 v1.4 two-core\n\r");

  // Initialize the VGA screen
  initVGA() ;
     
  // start core 1 threads
  multicore_reset_core1();
  multicore_launch_core1(&core1_main);

  // === config threads ========================
  // for core 0
  // core 1 needs to start first -- so give it time
  sleep_ms(20);
  pt_add_thread(protothread_graphics);
  pt_add_thread(protothread_serial) ;
  //pt_add_thread(protothread_draw_field);
  
  // === initalize the scheduler ===============
  pt_schedule_start ;
  // NEVER exits
  // ===========================================
} // end main