# import time # from rp2 import PIO, asm_pio # from machine import Pin import array import time # test assembler # a[3] is number of interations asm_mem=array.array('i',[ 1, 35, 3, 1000000]) ################################### ### fist, find out how fast a function call is @micropython.asm_thumb # passing an array name to the assembler # actually passes in the address def null_fn(r0): pass # now use the assembler routinet #@micropython.native def timing_null(): t0 = time.ticks_us() t1 = time.ticks_us() ovhd = t1-t0 # timing overhead t0 = time.ticks_us() null_fn(asm_mem) t1 = time.ticks_us() return (t1-t0-ovhd) # # call three times and report time t_single_call1 = timing_null() t_single_call2 = timing_null() t_single_call3 = timing_null() print('single fn =',t_single_call1, t_single_call2, t_single_call3) ######################################### ### now time multipe calls in a loop # now put the funciton call in a loop and time it def loop_fun(asm_mem): c = 0 while c<10000: null_fn(asm_mem) c += 1 @micropython.native def loop_fun_native(asm_mem): c = 0 while c<10000: null_fn(asm_mem) c += 1 @micropython.viper def loop_fun_viper(asm_mem): c = 0 while c<10000: null_fn(asm_mem) c += 1 print('========') # Get the timing overhead t0 = time.ticks_us() t1 = time.ticks_us() ovhd = t1-t0 print('timing overhead=',ovhd, 'uSec') # time the looped function # the null_fn function, in the loop, # (after subtracing 5.2 uSec for loop overhead) t0 = time.ticks_us() loop_fun(asm_mem) t1 = time.ticks_us() print ('multiple fun_loop_time=',(t1-t0-ovhd)/10000-5.2) t0 = time.ticks_us() loop_fun_native(asm_mem) t1 = time.ticks_us() print ('multiple fun_native_time=',(t1-t0-ovhd)/10000-1.75) t0 = time.ticks_us() loop_fun_viper(asm_mem) t1 = time.ticks_us() print ('multiple fun_viper_time=',(t1-t0-ovhd)/10000-0.12) #======================================== # now recode to run a loop of # retreive-multiply-store # assembler is 1 cycle/instruction # except for load, store and branch whihc are two @micropython.asm_thumb def asm_mult_loop(r0): ldr(r7,[r0,12]) # get a[3], the iteration count mov(r6, 0) # initialize counter ldr(r2,[r0,4]) # get a[1] to mult by ### compute loop label(loop_pt) mov(r1, r6) # mult by counter mul(r1, r2) # counter*a[1] into r1 str(r1, [r0,8]) # product -> a[2] add(r6, 1) # increment the counter cmp(r6, r7) # above limit? ble(loop_pt) #back to loop_pt mov(r0, r1) # set r0 is return value # use the assembler loop routine def timing_mult_loop(ovhd): t0 = time.ticks_us() d = asm_mult_loop(asm_mem) t1 = time.ticks_us() # subtract funcion call time plus timing overhead print('========') print('asm_multiply_loop_time=',((t1-t0)-ovhd)/asm_mem[3], 'count=', asm_mem[3]) # at 130 MHz clock rate, and # a loop count of 8 cycles, one # pass through the loop should take # 61.5 nSec. Actual time is 61.3 to 64. # timing_mult_loop(ovhd) # ######################################### # timing simple loop def speed_bytecode(): x = 0 for i in range(100000): x += 2 @micropython.native def speed_native(): x = 0 for i in range(100000): x += 2 @micropython.viper def speed_viper(): x = 0 for i in range(1000000): x += 2 @micropython.asm_thumb def speed_asm(r0): ldr(r7,[r0,12]) # get a[3], the iteration count mov(r6, 0) # initialize counter mov(r5, 0) # initialize variablle x ### compute loop label(loop_pt) add(r5, 2) # x = x + 2 add(r6, 1) # increment the counter cmp(r6, r7) # above limit? ble(loop_pt) #back to loop_pt mov(r0, r5) # set r0 is return value # Get the timing overhead t0 = time.ticks_us() t1 = time.ticks_us() ovhd = t1-t0 print('========') print('timing overhead=',ovhd, 'uSec') # time the looped function # the null_fn function, in the loop, # (after subtracing 5 uSec for loop overhead) t0 = time.ticks_us() speed_bytecode() t1 = time.ticks_us() print ('bytecode_loop_time=',(t1-t0-ovhd)/100000, 'usec') t0 = time.ticks_us() speed_native() t1 = time.ticks_us() print ('native_loop_time=',(t1-t0-ovhd)/100000) t0 = time.ticks_us() speed_viper() t1 = time.ticks_us() print ('viper_loop_time=',(t1-t0-ovhd)/1000000) t0 = time.ticks_us() speed_asm(asm_mem) t1 = time.ticks_us() print ('asm_loop_time=',(t1-t0-ovhd)/1000000)