
 /*
 * This program implements the 
 * Sliding Windowed Infinite Fourier Transform (SWIFT)
 * and the slightly lower noise version, alpha-SWIFT
 *  ---- animate the complex resonator -------
 * 
 * Logan L. Grado, Matthew D. Johnson, and Theoden I. Netoff
 *  Digital Object Identifier 10.1109/MSP.2017.2718039
 *  https://ieeexplore.ieee.org/document/8026592
 * 
 * A bank of digital, complex, resonaters each with the appropriate window
 * copmputes a complete fourier transform at each input sample time.
 * 
 * for a freq of 0.02 :
 * approx Bandwidth      dk time 
 * 0.025 of center freq  dk=10*period
 * 0.05 of center freq   dk=6*period
 * 0.10 of center freq   dk=3*period
 * 0.20 of center freq   dk=1.5*period
 * 0.40 of center freq   dk=.75*period

 * HARDWARE CONNECTIONS
 *  - GPIO 16 ---> VGA Hsync
 *  - GPIO 17 ---> VGA Vsync
 *  - GPIO 18 ---> 470 ohm resistor ---> VGA Green lo-bit |__ both wired to VGA Green 
 *  - GPIO 19 ---> 330 ohm resistor ---> VGA Green hi_bit |   
 *  - GPIO 20 ---> 330 ohm resistor ---> VGA Blue
 *  - GPIO 21 ---> 330 ohm resistor ---> VGA Red
 *  - RP2040 GND ---> VGA GND
 * 
 *  - GPIO 28 ADC input 
 * in this example, input cam from a laptop through a high-pass biased to Vdd/2
 * 
 * -- SPI for DAC
 * #define PIN_CS   5
 * define PIN_SCK  6
 * define PIN_MOSI 7
 * define SPI_PORT spi0
 *
 * RESOURCES USED
 *  - PIO state machines 0, 1, and 2 on PIO instance 0
 *  - DMA channels 0, 1, 2, and 3
 *  - 153.6 kBytes of RAM (for pixel color data)
 *
 */

// ==========================================
// === VGA graphics library
// ==========================================
#include "vga16_graphics_v3.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// pico specific
#include "pico/stdlib.h"
#include "hardware/pio.h"
#include "hardware/dma.h"
#include "hardware/adc.h"
#include "hardware/spi.h"

//
//#include "hardware/vreg.h"
//#include "hardware/clocks.h"
//
// ==========================================
// === protothreads globals
// ==========================================
#include "hardware/sync.h"
#include "hardware/timer.h"
#include "pico/multicore.h"
#include "string.h"
// protothreads header
#include "pt_cornell_rp2040_v1_4.h"

// unprotected global to halt display
static int enable_dds = true ;
// restart flat
int restart_graphics = false ;

// ===========================================
// dSP definitions 
// ===========================================
// The Sliding Windowed Infinite Fourier Transform -- with alpha-SWIFT
// Logan L. Grado, Matthew D. Johnson, and Theoden I. Netoff
// Digital Object Identifier 10.1109/MSP.2017.2718039
//file:///home/bruce/Downloads/10078055.pdf

float Fs = 30 ;

// === SWIFT setup
// can be any number, limited by the speed of the cpu
#define N_FREQ  5 //200

// A list of the frequencys to transform
// need not be uniform in omega units
float F[N_FREQ] = {0.01f, 0.015f, 0.02f, 0.03f, 0.05f};
// for bandwidth testing
//float F[N_FREQ] = {0.018f, 0.0195f, 0.02f, 0.022f, 0.024f};
float f_in, delta_f_in ;

#define DDS_freq 2 // bin number

// the complex spectral estimate
// frequencies are normailsed and run 0 to pi
// at 10khz sample rate that is 0 to 5kz
float Xr[N_FREQ], Xi[N_FREQ] ;
// additive components of the alpha-SWIFT
float slow_Xr[N_FREQ], slow_Xi[N_FREQ] ;
float fast_Xr[N_FREQ], fast_Xi[N_FREQ] ;

// and the mag of X
float Fmag[N_FREQ] ;
// and the log mag
float Fmag_log[N_FREQ] ;
// lowpassed mag
float lp_mag[N_FREQ]  ;

// the per-sample complex phase shift at each frequecy
// omega = 2*pi*F/Fs so OHM = cos(omega) + j*sin(omega)
float slow_OHMr[N_FREQ], slow_OHMi[N_FREQ] ;
float fast_OHMr[N_FREQ], fast_OHMi[N_FREQ] ;

// the decay rate derived from number of samples 
// value will be exp(-1/(number of samples))
float dk = 500.0f ; // the raw decay time in samples
float dk_rate[N_FREQ] ;
float fast_dk_rate ;

// ouput test -- the mean compensates for drift
//float test_out, test_out_mean ;

//  dds will be input to thiis version
// to calibrate independent of input source
unsigned int accum1, accum2 ;
unsigned int inc1, inc2 ;
float sine_table[256] ;

// at each sample time:
// and for each different frequency omega (or F)
// Xn(omega) = exp(-1/(number of samples)) * exp(j*omega) * X(n-1)(omega) + new sample

//====================================
// === magnitude approx good to about +/-2%
// see https://en.wikipedia.org/wiki/Alpha_max_plus_beta_min_algorithm
#define min(a,b) (((a)<(b))?(a):(b))
#define max(a,b) (((a)>(b))?(a):(b))
//====================================
float magnitude(float real, float img){
  float mmax, mmin ;
  float c1 = 0.89820f ;
  float c2 = 0.48597f ;
  
  mmin = min(fabsf(real), fabsf(img)) ; 
  mmax = max(fabsf(real), fabsf(img)) ;
  // 
  return max(mmax, ((mmax*c1) + (mmin*c2) )); 
}


// ==================================================
// === graphics demo -- RUNNING on core 0
// ==================================================
static PT_THREAD (protothread_graphics(struct pt *pt)) {
    PT_BEGIN(pt);

    // DMA channel -- needed to check for DMA done
    //static int ADC_data_chan ;
    static char str[50] ;
    static int t ;
    static int draw_pos=150, draw_interval=16 ;
    static float sample ;

    // Draw some filled rectangles
    fillRect(20, 1, 400, 50, LIGHT_BLUE); // blue box
    drawTextVGA437(100, 20, "FFT spectrogram and SWIFT", WHITE, LIGHT_BLUE) ;

    fillRect(435, 1, 200, 50, YELLOW); // green box
    // Write some text
     // Draw some filled rectangles
    fillRect(0, 0, 640, 480, BLACK); // erase
    fillRect(64, 0, 176, 50, BLUE); // blue box
    fillRect(250, 0, 176, 50, YELLOW); // red box
    fillRect(435, 0, 176, 50, GREEN); // green box

    // Write some text
    drawTextTiny8(65, 0, "Raspberry Pi Pico 2", WHITE, BLUE) ;
    drawTextGLCD(65, 10, "SWIFT spectrum", WHITE, BLUE) ;
    drawTextGLCD(65, 20, "Bruce Land brl4@cornell.edu", WHITE, BLUE) ;
    drawTextGLCD(65, 30, "Hunter Adams vha3@cornell.edu", WHITE, BLUE) ;
    //
    drawTextGLCD(438, 10, "Protothreads rp2040/2350 1.4", BLACK, GREEN) ;
    drawTextGLCD(438, 20, "One CPU; clock 150 Mhz", BLACK, GREEN) ;
    drawTextGLCD(438, 30, "vga16_v3 driver", BLACK, GREEN) ;
    drawTextGLCD(438, 40, "Double Buffer 60 Hz", BLACK, GREEN) ;
    //
    drawTextVGA437(260, 5, "SWIFT Animation", BLACK, YELLOW) ;
    // decay time const
    sprintf(str, "Decay time  %3.0f timesteps", dk) ;
    drawTextGLCD(260, 20, str, BLACK, YELLOW) ;
    // DDS frew
    sprintf(str, "Input F %4.3f radian/step", F[DDS_freq] ) ;
    drawTextGLCD(260, 30, str, BLACK, YELLOW) ;
    sprintf(str, "  --%4.0f steps/cycle ", 6.283f/F[DDS_freq]) ;
    drawTextGLCD(260, 40, str, BLACK, YELLOW) ;
    //
   // drawHLine(0,479-200,640, GREEN);
    setCursor(200, 469-200) ;
    setTextColor2(BLACK, YELLOW);
    writeString("SWIFT Animation ");
    
    copy_buffer_to_other() ;

    // === congure the ADC =========
    // ===  and start a buffer fill
    //ADC_setup() ;

    // set up F, decay and phase shift arrays
   // #define freq_spacing 20.0f //20.0f
    #define fast_dk 2.0f // also 5-15 seems ok
    //
    for(int i=0; i<N_FREQ; i++){

      slow_Xr[i]=0 ; slow_Xi[i]=0 ;
      fast_Xr[i]=0 ; fast_Xi[i]=0 ;

      //decay rate will be exp(-1/(number of samples))
       dk_rate[i] = exp(-1.0f/dk);

       // dk_rate sets the bandwidth of a bin bigger is narrrower bandwidth
       // different uses willl require setting this
       // for speech 3.0*Fs/F seems good
       // dk_rate[i] = exp(-1.0f/(3.0f*Fs/F[i])); //10 is better freq resolution
      //
      fast_dk_rate = exp(-1.0f/fast_dk);
      //
      // omega = 2*pi*F/Fs so OHM = cos(omega) + j*sin(omega)
      float omega = F[i] ;
      slow_OHMr[i] = dk_rate[i] * cos(omega) ;
      slow_OHMi[i] = dk_rate[i] * sin(omega); 
      fast_OHMr[i] = fast_dk_rate * cos(omega) ;
      fast_OHMi[i] = fast_dk_rate * sin(omega);      
    }

    // dds F = (omega)*Fs/(2pi)
    inc1 = F[DDS_freq] /6.283f * pow(2,32) ;
    t=0 ;
    draw_pos = 150;

    //
    while(true) {
        
      PT_YIELD_UNTIL(pt, draw_start_signal()); 
      //
      // restaert logic 
      if (restart_graphics){
            // reset the flag
            restart_graphics = false ;       
            // restarts the current thread at PT_BEGIN(pt);
            // as if it has not executed before
            PT_RESTART(pt) ;
        }

      //clearLowFrame(50, BLACK) ;
      clearRect(0, 55, 140, 420, BLACK) ;
      // dont plot every time step
      if ((t & 0x07) == 0) { draw_pos++; }
      // only erase waveforms whn the screen is full
      if (draw_pos>= 635) {
          draw_pos = 150;
          clearLowFrame(50, BLACK) ;
          copy_buffer_to_other();
      }
      // keep time
      t++ ;

    // make the sine wave to send to all filters
    if(enable_dds) {
      accum1 += inc1 ;
      // update main waveform
      //sine_table[accum1>>24] ;
      sample = sine_table[accum1>>24] ; 
    }
    else{sample = 0.0f; }
    //
    // The actual SWIFT calc
    // muilt X times OHM and add input
    // complex product
    //xy	=	(a+ib)(c+id)	
	  // =	(ac-bd)+i(ad+bc).	

    for(int i=0; i<N_FREQ; i++){   //N_FREQ
      float temp ;
      // the slow arm of the swift
      temp = (slow_Xr[i] * slow_OHMr[i] - slow_Xi[i] * slow_OHMi[i]) + sample ; //-
      slow_Xi[i] = (slow_Xr[i] * slow_OHMi[i] + slow_Xi[i] * slow_OHMr[i]) ;//-
      slow_Xr[i] = temp ;
      //
      // the fast arm of the swift
      temp = (fast_Xr[i] * fast_OHMr[i] - fast_Xi[i] * fast_OHMi[i]) + sample ; //-
      fast_Xi[i] = (fast_Xr[i] * fast_OHMi[i] + fast_Xi[i] * fast_OHMr[i]) ;//-
      fast_Xr[i] = temp ;
      //
      // combine the two to make X
      Xr[i] = slow_Xr[i] - fast_Xr[i] ;
      Xi[i] = slow_Xi[i] - fast_Xi[i] ;
    }
       
    // print the time step
    sprintf(str, "time step %d    ", t) ;
    drawTextTiny8(240, 440, str, WHITE, BLACK) ;

    // draw input real, and complex resonator
    // draw time course
    #define Xscale 0.1f
    for(int i=0; i<N_FREQ; i++){
      int y_offset = 100 + 75 * i ;
      // reference circle
      drawCircle(100, y_offset, 30, GREEN);
      // resonator vector
      drawLine(100, y_offset, 100+(int)(Xr[i]*Xscale), y_offset+(int)(Xi[i]*Xscale), WHITE );
      drawLine(100, y_offset+1, 100+(int)(Xr[i]*Xscale), y_offset+1+(int)(Xi[i]*Xscale), WHITE );
      fillCircle(100+(int)(Xr[i]*Xscale), y_offset+(int)(Xi[i]*Xscale), 2, WHITE) ;
      // input
      drawLine(100, y_offset, 100+(int)(sample*25.0f), y_offset, YELLOW) ;
      drawLine(100, y_offset+1, 100+(int)(sample*25.0f), y_offset+1, YELLOW) ;
      // freq annotation 
      sprintf(str, "F=%5.4f", F[i]) ;
      drawTextTiny8(140, y_offset-30, str, (i==DDS_freq)? CYAN:WHITE, BLACK) ;
      // magnitude annotation
      lp_mag[i] = 0.99f * lp_mag[i] + 0.01f * magnitude(Xr[i], Xi[i]);
      sprintf(str, "mag=%5.0f", lp_mag[i] ) ;
      drawTextTiny8(140, y_offset-20, str, WHITE, BLACK) ;    
      // the waveforms
      drawPixel(draw_pos, y_offset+(int)(Xr[i]*Xscale), GREEN);
      
    }
   
    //
   } 

   PT_END(pt);
} // graphics thread


// ==================================================
// === user's serial input thread on core 1
// ==================================================
// serial_read an serial_write do not block any thread
// except this one
static PT_THREAD (protothread_serial(struct pt *pt))
{
    PT_BEGIN(pt);
      
      static char *cmd, *arg1, *arg2, *arg3, *arg4;

      while(1) {
        // prompt the user for input
        sprintf(pt_serial_out_buffer, "cmd> ") ;
        // spawn a thread to do the non-blocking serial write
        // to print the pt_serial_out_buffer string
        // this output routine YIELDs between each character printed.
        serial_write ; 
        // wait for user
        serial_read ;
        // tokenize the input string
        //  --- NOTE that strtok is not re-entrant!
        //  ---   Use the reentrant, thread-safe, version if you need to use it
        //   ---  in moe than one thread!
        cmd = strtok(pt_serial_in_buffer, "  ");
        arg1 = strtok(NULL, "  ");
        arg2 = strtok(NULL, "  ");
        arg3 = strtok(NULL, "  ");
        arg4 = strtok(NULL, "  ");

        //printf("%s\r\n", cmd) ;
        sleep_ms(10);

        // blink on time
        if(strcmp(cmd,"ddson")==0){
          // start sdds at zero amp
          accum1 = 0 ;
          enable_dds = true ;
        }
        //
        else if(strcmp(cmd,"ddsoff")==0){
          enable_dds = false ;
        }
        //
        else if(strcmp(cmd,"dk")==0){
          sscanf(arg1,"%f", &dk);  
          restart_graphics = true ;
        }
        //
        else if(strcmp(cmd,"f")==0){
          sscanf(arg1,"%f", &f_in); 
          sscanf(arg2,"%f", &delta_f_in);  
            F[0] = f_in - 2.0f * delta_f_in ; 
            F[1] = f_in - 1.0f * delta_f_in ; 
            F[2] = f_in - 0.0f * delta_f_in ; 
            F[3] = f_in + 1.0f * delta_f_in ; 
            F[4] = f_in + 2.0f * delta_f_in ; 
          restart_graphics = true ;
        }
        //
        // restart graphics thread
        else if(strcmp(cmd,"restart")==0){
          restart_graphics = true ;  
        }

        else{
          printf("Huh?\n\r") ;
        }

        // NEVER exit while
      } // END WHILE(1)
  PT_END(pt);
} // serial thread


// ========================================
// === core 1 main -- started in main below
// ========================================
void core1_main(){ 
  //
  //  === add threads  ====================
  // for core 1
 // pt_add_thread(protothread_serial) ;
  //pt_add_thread(protothread_graphics);
  //
  // === initalize the scheduler ==========
  pt_schedule_start ;
  // NEVER exits
  // ======================================
}

// ========================================
// === core 0 main
// ========================================
int main(){
  // set the clock
  //set_sys_clock_khz(250000, true); // 171us
  // start the serial i/o
  stdio_init_all() ;
  // announce the threader version on system reset
  printf("\n\rProtothreads RP2040/2350 v1.4 two-core\n\r");

  // Initialize the VGA screen
  initVGA() ;

  // start core 1 threads
  multicore_reset_core1();
  multicore_launch_core1(&core1_main);

  // dds table for testing
  for (int i=0; i<256; i++) {
    // sine table is in natural +1/-1 range
    sine_table[i] =  1.0f*(sin(2*3.1416*(float)i/256)) ;
  }

  // === config threads ========================
  // for core 0
  pt_add_thread(protothread_graphics);
  pt_add_thread(protothread_serial);
  //
  // === initalize the scheduler ===============
  pt_schedule_start ;
  // NEVER exits
  // ===========================================
} // end main