
 /*
 * This program implements the 
 * Sliding Windowed Infinite Fourier Transform (SWIFT)
 * and the slightly lower noise version, alpha-SWIFT
 * 
 * Logan L. Grado, Matthew D. Johnson, and Theoden I. Netoff
 *  Digital Object Identifier 10.1109/MSP.2017.2718039
 *  https://ieeexplore.ieee.org/document/8026592
 * 
 * A bank of digital, complex, resonaters each with the appropriate window
 * copmputes a complete fourier transform at each input sample time.
 * Sample rate in 10 KHz.

 * HARDWARE CONNECTIONS
 *  - GPIO 16 ---> VGA Hsync
 *  - GPIO 17 ---> VGA Vsync
 *  - GPIO 18 ---> 470 ohm resistor ---> VGA Green lo-bit |__ both wired to VGA Green 
 *  - GPIO 19 ---> 330 ohm resistor ---> VGA Green hi_bit |   
 *  - GPIO 20 ---> 330 ohm resistor ---> VGA Blue
 *  - GPIO 21 ---> 330 ohm resistor ---> VGA Red
 *  - RP2040 GND ---> VGA GND
 * 
 *  - GPIO 26 ADC input 
 * 
 * -- SPI for DAC
 * #define PIN_CS   5
 * define PIN_SCK  6
 * define PIN_MOSI 7
 * define SPI_PORT spi0
 *
 * RESOURCES USED
 *  - PIO state machines 0, 1, and 2 on PIO instance 0
 *  - DMA channels 0, 1, 2, and 3
 *  - 153.6 kBytes of RAM (for pixel color data)
 *
 */

// ==========================================
// === VGA graphics library
// ==========================================
#include "vga16_graphics_v3.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// pico specific
#include "pico/stdlib.h"
#include "hardware/pio.h"
#include "hardware/dma.h"
#include "hardware/adc.h"
#include "hardware/spi.h"

//
//#include "hardware/vreg.h"
//#include "hardware/clocks.h"
//
// ==========================================
// === protothreads globals
// ==========================================
#include "hardware/sync.h"
#include "hardware/timer.h"
#include "pico/multicore.h"
#include "string.h"
// protothreads header
#include "pt_cornell_rp2040_v1_4.h"

// unprotected global to halt display
static int run_stop = 1 ;

// ==========================================
// === set up timer ISR  used in this pgm
// ==========================================
// === timer alarm ========================
// !! modifiying alarm zero trashes the cpu 
//        and causes LED  4 long - 4 short
// !! DO NOT USE alarm 0
// This low-level setup is ocnsiderably faster to execute
// than the hogh-level callback

#define ALARM_NUM 1
#define ALARM_IRQ TIMER0_IRQ_1
// ISR interval will be 100 uSec
// 10 KHz
volatile int alarm_period = 100 ;
float Fs = 10000 ;
//
// the actual ISR
void compute_sample(void);
//
static void alarm_irq(void) {
    // mark ISR entry
    gpio_put(15,1);
    // Clear the alarm irq
    hw_clear_bits(&timer_hw->intr, 1u << ALARM_NUM);
    // arm the next interrupt
    // Write the lower 32 bits of the target time to the alarm to arm it
    timer_hw->alarm[ALARM_NUM] = timer_hw->timerawl + alarm_period ;
    //
    // The actual computation
    compute_sample();

    // mark ISR exit
    gpio_put(15,0);
}

// set up the timer alarm ISR
static void alarm_in_us(uint32_t delay_us) {
    // Enable the interrupt for our alarm (the timer outputs 4 alarm irqs)
    hw_set_bits(&timer_hw->inte, 1u << ALARM_NUM);
    // Set irq handler for alarm irq
    irq_set_exclusive_handler(ALARM_IRQ, alarm_irq);
    // Enable the alarm irq
    irq_set_enabled(ALARM_IRQ, true);
    // Enable interrupt in block and at processor
    // Alarm is only 32 bits 
    uint64_t target = timer_hw->timerawl + delay_us;
    // Write the lower 32 bits of the target time to the alarm which
    // will arm it
    timer_hw->alarm[ALARM_NUM] = (uint32_t) target;   
}

// ===========================================
// ADC setup 
// ===========================================

float adc_sample ;
float adc_mean ;
void ADC_setup(void){
  adc_init();
  adc_gpio_init(28);
  // p26 is ADC input 0 - gpio 28 is 2
  adc_select_input(2);
  // free run
  adc_run(1);
  // result is in adc_hw->result
}

// ========================================
// === spi setup 
// =======================================
//SPI configurations
#define PIN_CS   5
#define PIN_SCK  6
#define PIN_MOSI 7
#define SPI_PORT spi0

// constant to tell SPI DAC what to do
// prepend to each 12-bit sample
#define DAC_config_chan_A 0b0011000000000000
// B-channel, 1x, active
#define DAC_config_chan_B 0b1011000000000000
uint16_t DAC_data ; 
void spi_setup(void){
    // Initialize SPI channel (channel, baud rate set to 20MHz)
    // connected to spi DAC
    spi_init(SPI_PORT, 20000000) ;
    // Format (channel, data bits per transfer, polarity, phase, order)
    spi_set_format(SPI_PORT, 16, 0, 0, 0);
    // Map SPI signals to GPIO ports
    //gpio_set_function(PIN_MISO, GPIO_FUNC_SPI);
    gpio_set_function(PIN_SCK, GPIO_FUNC_SPI);
    gpio_set_function(PIN_MOSI, GPIO_FUNC_SPI);
    gpio_set_function(PIN_CS, GPIO_FUNC_SPI) ;
}
//

// ===========================================
// dSP definitions 
// ===========================================
// The Sliding Windowed Infinite Fourier Transform -- with alpha-SWIFT
// Logan L. Grado, Matthew D. Johnson, and Theoden I. Netoff
// Digital Object Identifier 10.1109/MSP.2017.2718039
//file:///home/bruce/Downloads/10078055.pdf

// === SWIFT setup
// can be any number, limited by the speed of the cpu
#define N_FREQ  200   

// A list of the frequencys to transform
// need not be uniform
float F[N_FREQ] ;

// the complex spectral estimate
// frequencies are normailsed and run 0 to pi
// at 10khz sample rate that is 0 to 5kz
float Xr[N_FREQ], Xi[N_FREQ] ;
// additive components of the alpha-SWIFT
float slow_Xr[N_FREQ], slow_Xi[N_FREQ] ;
float fast_Xr[N_FREQ], fast_Xi[N_FREQ] ;

// and the mag of X
float Fmag[N_FREQ] ;
// and the log mag
float Fmag_log[N_FREQ] ;

// the per-sample complex phase shift at each frequecy
// omega = 2*pi*F/Fs so OHM = cos(omega) + j*sin(omega)
float slow_OHMr[N_FREQ], slow_OHMi[N_FREQ] ;
float fast_OHMr[N_FREQ], fast_OHMi[N_FREQ] ;

// the decay rate derived from number of samples 
// value will be exp(-1/(number of samples))
float dk_rate[N_FREQ] ;
float fast_dk_rate ;

// spectyrogram
int time_column = 12 ;

// ouput test -- the mean compensates for drift
float test_out, test_out_mean ;

//  dds to calibrate independent of input source
unsigned int accum1, accum2 ;
unsigned int inc1, inc2 ;
float sine_table[256] ;

// at each sample time:
// and for each different frequency omega (or F)
// Xn(omega) = exp(-1/(number of samples)) * exp(j*omega) * X(n-1)(omega) + new sample



//====================================
// === magnitude approx good to about +/-2%
// see https://en.wikipedia.org/wiki/Alpha_max_plus_beta_min_algorithm
#define min(a,b) (((a)<(b))?(a):(b))
#define max(a,b) (((a)>(b))?(a):(b))
//====================================
float magnitude(float real, float img){
  float mmax, mmin ;
  float c1 = 0.89820f ;
  float c2 = 0.48597f ;
  
  mmin = min(fabsf(real), fabsf(img)) ; 
  mmax = max(fabsf(real), fabsf(img)) ;
  // 
  return max(mmax, ((mmax*c1) + (mmin*c2) )); 
}


// ==================================================
// === graphics demo -- RUNNING on core 0
// ==================================================
static PT_THREAD (protothread_graphics(struct pt *pt)) {
    PT_BEGIN(pt);

    // DMA channel -- needed to check for DMA done
    //static int ADC_data_chan ;

    // Draw some filled rectangles
    fillRect(20, 1, 400, 50, LIGHT_BLUE); // blue box
    drawTextVGA437(100, 20, "FFT spectrogram and SWIFT", WHITE, LIGHT_BLUE) ;

    fillRect(435, 1, 200, 50, YELLOW); // green box
    // Write some text
     // Draw some filled rectangles
    fillRect(0, 0, 640, 480, BLACK); // erase
    fillRect(64, 0, 176, 50, BLUE); // blue box
    fillRect(250, 0, 176, 50, YELLOW); // red box
    fillRect(435, 0, 176, 50, GREEN); // green box

    // Write some text
    drawTextTiny8(65, 0, "Raspberry Pi Pico 2", WHITE, BLUE) ;
    drawTextGLCD(65, 10, "SWIFT spectrum", WHITE, BLUE) ;
    drawTextGLCD(65, 20, "Bruce Land brl4@cornell.edu", WHITE, BLUE) ;
    drawTextGLCD(65, 30, "Hunter Adams vha3@cornell.edu", WHITE, BLUE) ;
    //
    drawTextGLCD(438, 10, "Protothreads rp2040/2350 1.4", BLACK, GREEN) ;
   // drawTextGLCD(438, 20, "Two CPU; clock 300 Mhz", BLACK, GREEN) ;
    drawTextGLCD(438, 30, "vga16_v3 driver", BLACK, GREEN) ;
    drawTextGLCD(438, 40, "Double Buffer off", BLACK, GREEN) ;
    //
    drawTextVGA437(260, 5, "SWIFT Transform", BLACK, YELLOW) ;
    //
   
    setCursor(80, 122) ;
    setTextSize(1) ;
    setTextColor2(BLACK, YELLOW);
    //writeString(" Spectrum 0-6.4 KHz ") ;
    setCursor(350, 122) ;
    //writeString(" Log Spectrum 0-6.4 KHz ") ;

    drawHLine(0,479,10, WHITE);
    setCursor(1, 469) ;
    setTextSize(1) ;
    setTextColor2(WHITE, BLACK);
    writeString("0");
    //
    drawHLine(0,479-50,10, WHITE);
    setCursor(1, 469-50) ;
    writeString("1k");
    //
    drawHLine(0,479-100,10, WHITE);
    setCursor(1, 469-100) ;
    writeString("2k");
    //
    drawHLine(0,479-150,10, WHITE);
    setCursor(1, 469-150) ;
    writeString("3k");
    //
    drawHLine(0,479-200,10, WHITE);
    setCursor(1, 469-200) ;
    writeString("4k");

    drawHLine(0,479-200,640, GREEN);
    setCursor(200, 469-200) ;
    setTextColor2(BLACK, YELLOW);
    writeString("SWIFT Spectogram ");
    

    //copy_buffer_to_other() ;

    // === congure the ADC =========
    // ===  and start a buffer fill
    ADC_setup() ;

    // set up F, decay and phase shift arrays
    #define freq_spacing 20.0f //20.0f
    #define dk 50.0f // 50.0
    #define fast_dk 15 
    for(int i=0; i<N_FREQ; i++){
      F[i] = freq_spacing * (float)i ;
      //decay rate will be exp(-1/(number of samples))
       //dk_rate[i] = exp(-1.0f/dk);
       // dk_rate sets the bandwidth of a bin bigger is narrrower bandwidth
       // different uses willl require setting this
       // for speech 3.0*Fs/F seems good
      dk_rate[i] = exp(-1.0f/(3.0f*Fs/F[i])); //10 is better freq resolution
      //
      fast_dk_rate = exp(-1.0f/fast_dk);
      //
      // omega = 2*pi*F/Fs so OHM = cos(omega) + j*sin(omega)
      float omega = 6.28318f * F[i]/Fs ;
      slow_OHMr[i] = dk_rate[i] * cos(omega) ;
      slow_OHMi[i] = dk_rate[i] * sin(omega); 
      fast_OHMr[i] = fast_dk_rate * cos(omega) ;
      fast_OHMi[i] = fast_dk_rate * sin(omega);      
    }

    // dds
    inc1 = 1000 * pow(2,32 )/ Fs ;

    while(true) {
        
        // A brief nap
        PT_YIELD_usec(20000) ;

        // clear spectrum
        // comput mag
        // plot spectrum
        // === plot single SWIFT spectrum for testing
        int max_bin = 0 ;
        float max_value = 0 ;
       for(int i=2; i<N_FREQ; i++){
          //erase a point
         //drawPixel(i*2+40, -(short)Fmag[i]+160, BLACK) ;
         fillRect(i*2+40, -(short)Fmag[i]+180, 2, 2, BLACK) ;
         fillRect(i*2+40, -(short)Fmag_log[i]+240, 2, 2, BLACK) ;
         //
         // the scale factors will change depending on recording conditions
         Fmag[i] = magnitude(Xr[i], Xi[i]) * 0.005f;
         Fmag_log[i] = log(Fmag[i]) * 10.0f;
         if (Fmag_log[i]<-2) Fmag_log[i] = -2.0f ;
         //
         //drawPixel(i*2+40, -(short)Fmag[i]+160, GREEN) ;
         fillRect(i*2+40, -(short)Fmag[i]+180, 2, 2, GREEN) ;
         fillRect(i*2+40, -(short)Fmag_log[i]+240, 2, 2, CYAN) ;
         // find max bin
         if(Fmag[i] > max_value){
            max_value = Fmag[i] ;
            max_bin = i ;
         }

        }
        char str[80] ;
        sprintf(str, "max bin freq = %5.1f   ", F[max_bin]);
        drawTextTiny8(100, 60, str, WHITE, BLACK);
        //
        // Spectrogram -- draw full spectrum, then  move right
        // wrap the screen
        // at right edge of screen, reset to left edge
        time_column++ ;
        if (time_column == 636) time_column = 12;
        //drawHLine(0, 80,5,WHITE) ;
        for(int i=4; i<N_FREQ; i++){
          // bound color to 0 to 15
         // may need to rescale for diffferent levels
         int color = Fmag[i]*0.9f ; //Fmag_log
         if (color > 15) color = 15;
         if (Fmag[i] < 0.01f*Fmag[max_bin] ) color = 0 ;         
          drawPixel(time_column, 480-i, color ) ;
          //drawVLine(time_column, 480-2*i, 2, fr_disp) ;
        } 
   }
   PT_END(pt);
} // graphics thread


// ==================================================
// === user's serial input thread on core 1
// ==================================================
// serial_read an serial_write do not block any thread
// except this one
static PT_THREAD (protothread_serial(struct pt *pt))
{
    PT_BEGIN(pt);
      
      //
      //for (int i = 0; i < N_WAVE; i++) {
       //   printf("%f \n\r", s1x14_to_float(window[i])) ; //m        
      //}

      while(1) {
        // print prompt
        sprintf(pt_serial_out_buffer, "input 0=stop 1=run: ");
        // spawn a thread to do the non-blocking write
        serial_write ;

        // spawn a thread to do the non-blocking serial read
         serial_read ;
        // convert input string to number
        sscanf(pt_serial_in_buffer,"%d ", &run_stop) ;


        // NEVER exit while
      } // END WHILE(1)
  PT_END(pt);
} // serial thread

// ==================================================
// === dsp ISR -- 
// ==================================================
// 
void compute_sample(void){
    // DDS
    // make the sine wave to send to filter
    //accum1 += inc1 ;
    // update main waveform
    //sine_table[accum1>>24] ;
   //adc_sample = sine_table[accum1>>24] ; ;

    // convert ADC to float
    adc_sample = (float) (adc_hw->result) ;
    
    // low pass to get mean
    // then subtract the mean (highpass)
    // high pass at around 0.1 second, so one Hz
    adc_mean = 0.999f * adc_mean + 0.001 * adc_sample ;
    adc_sample -= adc_mean ;

    //
    // muilt X times OHM and add input
    // complex product
    //xy	=	(a+ib)(c+id)	
	  // =	(ac-bd)+i(ad+bc).	

    // zero the output sum
    test_out = 0.0f ;

    // don't output very low frequencies
    // bin  3 is 3x20Hz
    for(int i=3; i<N_FREQ; i++){
      float temp ;
      // the slow arm of the swift
      temp = (slow_Xr[i] * slow_OHMr[i] - slow_Xi[i] * slow_OHMi[i]) + adc_sample ; //-
      slow_Xi[i] = (slow_Xr[i] * slow_OHMi[i] + slow_Xi[i] * slow_OHMr[i]) ;//-
      slow_Xr[i] = temp ;
      //
      // the fast arm of the swift
      temp = (fast_Xr[i] * fast_OHMr[i] - fast_Xi[i] * fast_OHMi[i]) + adc_sample ; //-
      fast_Xi[i] = (fast_Xr[i] * fast_OHMi[i] + fast_Xi[i] * fast_OHMr[i]) ;//-
      fast_Xr[i] = temp ;
      //
      // combine the two to make X
      Xr[i] = slow_Xr[i] - fast_Xr[i] ;
      Xi[i] = slow_Xi[i] - fast_Xi[i] ;

      // now test output -- simple sum output for testing
      test_out += Xr[i] ; // 
      //
    }
    //
    // scale may need to change depending on source amp
    // offset converts signed to offset binary
    DAC_data = DAC_config_chan_A | (((int)(test_out*0.2f) + 2048) & 0xfff)   ; 
    spi0_hw->dr = DAC_data ;
    DAC_data = DAC_config_chan_B | (((int)(test_out*0.2f) + 2048) & 0xfff)   ; 
    // NOTE === nonblocking SPI write ===
    spi0_hw->dr = DAC_data ;
    //blocking out: spi_write16_blocking(SPI_PORT, &DAC_data, 1) ;
    
    
}

// ========================================
// === core 1 main -- started in main below
// ========================================
void core1_main(){ 
  //
  //  === add threads  ====================
  // for core 1
  pt_add_thread(protothread_serial) ;
  pt_add_thread(protothread_graphics);
  //
  // === initalize the scheduler ==========
  pt_schedule_start ;
  // NEVER exits
  // ======================================
}

// ========================================
// === core 0 main
// ========================================
int main(){
  // set the clock
  //set_sys_clock_khz(250000, true); // 171us
  // start the serial i/o
  stdio_init_all() ;
  // announce the threader version on system reset
  printf("\n\rProtothreads RP2040/2350 v1.4 two-core\n\r");

  // Initialize the VGA screen
  initVGA() ;

  // the spi for DAC
  spi_setup() ;

  //
  // fire off interrupt
  alarm_in_us(alarm_period);

  gpio_init(15) ;	
     gpio_set_dir(15, GPIO_OUT) ;

  // start core 1 threads
  multicore_reset_core1();
  multicore_launch_core1(&core1_main);

  // dds table for testing
  for (int i=0; i<256; i++) {
    // sine table is in natural +1/-1 range
    sine_table[i] =  1000.0f + 1000.0f*(sin(2*3.1416*(float)i/256)) ;
  }

  // === config threads ========================
  // for core 0
  //pt_add_thread(protothread_graphics);
  //pt_add_thread(protothread_toggle25);
  //
  // === initalize the scheduler ===============
  pt_schedule_start ;
  // NEVER exits
  // ===========================================
} // end main