pico_divider_test.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. // make test-div64_64.bin && qemu-system-arm -M lm3s6965evb -cpu cortex-m3 -nographic -serial null -monitor null -semihosting -kernel test-div64_64.bin
  2. #include <stdio.h>
  3. #include <math.h>
  4. #include "pico/divider.h"
  5. #include "pico/stdlib.h"
  6. #ifdef TURBO
  7. #include "hardware/vreg.h"
  8. #endif
  9. typedef uint64_t ui64;
  10. typedef int64_t i64;
  11. typedef uint32_t ui32;
  12. typedef int32_t i32;
  13. void test_mulib_divu64u64(ui64*y,ui64*x,ui64*q,ui64*r) {
  14. *q = divmod_u64u64_rem(*y, *x, r);
  15. }
  16. void test_mulib_divs64s64( i64*y, i64*x, i64*q, i64*r) {
  17. *q = divmod_s64s64_rem(*y, *x, r);
  18. }
  19. ui32 hwdiv_data[4];
  20. void hwdiv_sim() {
  21. hwdiv_data[2]=hwdiv_data[0]/hwdiv_data[1];
  22. hwdiv_data[3]=hwdiv_data[0]%hwdiv_data[1];
  23. // ostr("HWS: ");
  24. // o8hex(hwdiv_data[0]); osp();
  25. // o8hex(hwdiv_data[1]); osp();
  26. // o8hex(hwdiv_data[2]); osp();
  27. // o8hex(hwdiv_data[3]); onl();
  28. }
  29. ui64 ntests=0;
  30. #ifdef uart_default
  31. void o1ch(int c) {
  32. uart_putc(uart_default, c);
  33. }
  34. void ostr(char*p) { while(*p) o1ch(*p++); }
  35. void onl() {ostr("\r\n");}
  36. void osp() {o1ch(' ');}
  37. void ostrnl(char*p) { ostr(p); onl();}
  38. void o1hex(int u) {u&=0x0f; if(u>=10) o1ch(u-10+'A'); else o1ch(u+'0');}
  39. void o2hex(int u) {o1hex(u>> 4); o1hex(u);}
  40. void o4hex(int u) {o2hex(u>> 8); o2hex(u);}
  41. void o8hex(int u) {o4hex(u>>16); o4hex(u);}
  42. void o16hex(ui64 u) {o8hex(u>>32); o8hex(u);}
  43. unsigned int odig(unsigned int*pv,unsigned int d,int zf) {
  44. char c='0';
  45. unsigned int v=*pv;
  46. while(v>=d) v-=d,c++;
  47. if(zf==1&&c=='0') osp();
  48. else o1ch(c),zf=0;
  49. *pv=v;
  50. return zf;
  51. }
  52. void odec(int u) {
  53. unsigned int v=u;
  54. int zf=1;
  55. if(u<0) o1ch('-'),v=-v;
  56. zf=odig(&v,1000000000,zf);
  57. zf=odig(&v,100000000,zf);
  58. zf=odig(&v,10000000,zf);
  59. zf=odig(&v,1000000,zf);
  60. zf=odig(&v,100000,zf);
  61. zf=odig(&v,10000,zf);
  62. zf=odig(&v,1000,zf);
  63. zf=odig(&v,100,zf);
  64. zf=odig(&v,10,zf);
  65. zf=odig(&v,1,0);
  66. }
  67. #endif
  68. int xdigval(int c) {
  69. if(c>='0'&&c<='9') return c-'0';
  70. if(c>='A'&&c<='F') return c-'A'+10;
  71. if(c>='a'&&c<='f') return c-'a'+10;
  72. return -1;
  73. }
  74. ui64 seed;
  75. ui64 rnd64() {
  76. if(seed&1) seed=(seed>>1)^0x800000000000000dULL;
  77. else seed= seed>>1;
  78. return seed;
  79. }
  80. unsigned int rnd32() {
  81. return rnd64();
  82. }
  83. //#define RANDOMISE
  84. //#define rfn "/dev/random"
  85. #ifdef uart_default
  86. void test_divu64u64(ui64 y,ui64 x) {
  87. ui64 q,r;
  88. test_mulib_divu64u64(&y,&x,&q,&r);
  89. #if !PICO_ON_DEVICE
  90. if (!x) return;
  91. #endif
  92. if(q==y/x&&r==y%x) ;
  93. else {
  94. ostr("U ");
  95. o16hex(y); osp();
  96. o16hex(x); osp();
  97. o16hex(q); osp();
  98. o16hex(r);
  99. ostr(" : ");
  100. o16hex(y/x); osp();
  101. o16hex(y%x); onl();
  102. }
  103. ntests++;
  104. }
  105. void test_divs64s64(i64 y,i64 x) {
  106. i64 q,r;
  107. #if !PICO_ON_DEVICE
  108. if (y == INT64_MIN) return;
  109. #endif
  110. test_mulib_divs64s64(&y,&x,&q,&r);
  111. #if !PICO_ON_DEVICE
  112. if (!x) return;
  113. #endif
  114. if(q==y/x&&r==y%x) ;
  115. else {
  116. ostr("S ");
  117. o16hex(y); osp();
  118. o16hex(x); osp();
  119. o16hex(q); osp();
  120. o16hex(r);
  121. ostr(" : ");
  122. o16hex(y/x); osp();
  123. o16hex(y%x); onl();
  124. }
  125. ntests++;
  126. }
  127. // for all x and y consisting of a single run of 1:s, test a region around (x,y)
  128. void test_special() {
  129. int i0,j0,i1,j1,dy,dx;
  130. ui64 y,x;
  131. for(i0=0;i0<64;i0++) {
  132. y=0;
  133. for(i1=i0;i1<65;i1++) {
  134. for(j0=0;j0<64;j0++) {
  135. x=0;
  136. for(j1=j0;j1<65;j1++) {
  137. #define A 2
  138. for(dy=-A;dy<=A;dy++) {
  139. for(dx=-A;dx<=A;dx++) {
  140. test_divu64u64( y+dy, x+dx);
  141. test_divs64s64( y+dy, x+dx);
  142. test_divs64s64( y+dy,-x-dx);
  143. test_divs64s64(-y-dy, x+dx);
  144. test_divs64s64(-y-dy,-x-dx);
  145. }
  146. }
  147. x|=1ULL<<j1;
  148. }
  149. }
  150. y|=1ULL<<i1;
  151. }
  152. odec(i0+1); ostr(" "); odec(i1+1); ostr(" specials\n");
  153. }
  154. }
  155. void test_random() {
  156. int i,j;
  157. ui64 y,x,m;
  158. for(i=0;;i++) {
  159. for(j=0;j<200000;j++) {
  160. m=1ULL<<(rnd32()%48+15); m+=m-1; y=rnd64()&m;
  161. m=1ULL<<(rnd32()%48+15); m+=m-1; x=rnd64()&m;
  162. test_divu64u64( y, x);
  163. test_divs64s64( y, x);
  164. test_divs64s64( y,-x);
  165. test_divs64s64(-y, x);
  166. test_divs64s64(-y,-x);
  167. }
  168. odec(i+1); ostr("M\n");
  169. }
  170. }
  171. #endif
  172. uint32_t __attribute__((naked)) time_32(uint32_t a, uint32_t b, uint32_t (*func)(uint32_t a, uint32_t b)) {
  173. asm(
  174. ".syntax unified\n"
  175. "push {r4, r5, lr}\n"
  176. "ldr r4, =#0xe000e018\n"
  177. "ldr r5, [r4]\n"
  178. "blx r2\n"
  179. "ldr r0, [r4]\n"
  180. "subs r5, r0\n"
  181. "lsls r0, r5, #8\n"
  182. "asrs r0, #8\n"
  183. "pop {r4, r5, pc}\n"
  184. );
  185. }
  186. uint32_t __attribute__((naked)) time_64(uint64_t a, uint64_t b, uint64_t (*func64)(uint64_t a, uint64_t b)) {
  187. asm(
  188. ".syntax unified\n"
  189. "push {r4-r6, lr}\n"
  190. "ldr r6, [sp, #16]\n"
  191. "ldr r4, =#0xe000e018\n"
  192. "ldr r5, [r4]\n"
  193. "blx r6\n"
  194. "ldr r0, [r4]\n"
  195. "subs r5, r0\n"
  196. "lsls r0, r5, #8\n"
  197. "asrs r0, #8\n"
  198. "pop {r4-r6, pc}\n"
  199. );
  200. }
  201. uint32_t compiler_div_s32(uint32_t a, uint32_t b) {
  202. return ((int32_t)a) / (int32_t)b;
  203. }
  204. uint32_t pico_div_s32(uint32_t a, uint32_t b) {
  205. return div_s32s32(a, b);
  206. }
  207. uint32_t compiler_div_u32(uint32_t a, uint32_t b) {
  208. return a/b;
  209. }
  210. uint32_t pico_div_u32(uint32_t a, uint32_t b) {
  211. return div_u32u32(a, b);
  212. }
  213. uint64_t compiler_div_s64(uint64_t a, uint64_t b) {
  214. return ((int64_t)a) / (int64_t)b;
  215. }
  216. uint64_t pico_div_s64(uint64_t a, uint64_t b) {
  217. return div_s64s64(a, b);
  218. }
  219. uint64_t compiler_div_u64(uint64_t a, uint64_t b) {
  220. return a/b;
  221. }
  222. uint64_t pico_div_u64(uint64_t a, uint64_t b) {
  223. return div_u64u64(a, b);
  224. }
  225. void perf_test() {
  226. *(volatile unsigned int *)0xe000e010=5; // enable SYSTICK at core clock
  227. for(int bit = 30; bit>=0; bit--) {
  228. int div = 1u << (31-bit);
  229. const int N = 1000;
  230. int tc = 0, tp = 0;
  231. for (int i = 0; i < N; i++) {
  232. int a = rnd32();
  233. int b;
  234. do {
  235. b = rnd32() / div;
  236. } while (b == 0);
  237. tc += time_32(a, b, compiler_div_s32);
  238. tp += time_32(a, b, pico_div_s32);
  239. }
  240. printf(" S32 %d %f\t%f\n", bit, tc / 1000.0, tp / 1000.0);
  241. }
  242. for(int bit = 30; bit>=0; bit--) {
  243. int div = 1u << (31-bit);
  244. const int N = 1000;
  245. int tc = 0, tp = 0;
  246. for (int i = 0; i < N; i++) {
  247. int a = rnd32();
  248. int b;
  249. do {
  250. b = rnd32() / div;
  251. } while (b == 0);
  252. tc += time_32(a, b, compiler_div_u32);
  253. tp += time_32(a, b, pico_div_u32);
  254. }
  255. printf(" U32 %d %f\t%f\n", bit, tc / 1000.0, tp / 1000.0);
  256. }
  257. for(int extra = 0; extra <= 48; extra+=16)
  258. {
  259. for(int bit = 62; bit>=0; bit--) {
  260. int64_t div = 1ull << (62-bit);
  261. const int N = 1000;
  262. int tc = 0, tp = 0;
  263. for (int i = 0; i < N; i++) {
  264. int64_t a = rnd64() / (1u << extra);
  265. int64_t b;
  266. do {
  267. b = ((int64_t)rnd64()) / div;
  268. } while (b == 0);
  269. tc += time_64(a, b, compiler_div_s64);
  270. tp += time_64(a, b, pico_div_s64);
  271. }
  272. printf(" S64 %d %d %f\t%f\n", extra, bit, tc / 1000.0, tp / 1000.0);
  273. }
  274. for(int bit = 62; bit>=0; bit--) {
  275. int64_t div = 1ull << (62-bit);
  276. const int N = 1000;
  277. int tc = 0, tp = 0;
  278. for (int i = 0; i < N; i++) {
  279. uint64_t a = rnd64();
  280. uint64_t b;
  281. do {
  282. b = rnd64() / div;
  283. } while (b == 0);
  284. tc += time_64(a, b, compiler_div_u64);
  285. tp += time_64(a, b, pico_div_u64);
  286. }
  287. printf(" U64 %d %d %f\t%f\n", extra, bit, tc / 1000.0, tp / 1000.0);
  288. }
  289. }
  290. }
  291. int main() {
  292. #ifndef uart_default
  293. #warning test/pico_divider requires a default uart
  294. #else
  295. #ifdef TURBO
  296. vreg_set_voltage(VREG_VOLTAGE_MAX);
  297. set_sys_clock_khz(48000*8, true);
  298. #endif
  299. setup_default_uart();
  300. #ifdef RANDOMISE
  301. int u;
  302. ifh=sys_host(SYS_OPEN,(int)rfn,0,strlen(rfn));
  303. u=sys_host(SYS_READ,ifh,(int)&seed,sizeof(seed));
  304. if(u) {ostrnl("Error reading random stream"); return 16;}
  305. sys_host(SYS_CLOSE,ifh,0,0);
  306. #else
  307. seed=12233524287791987605ULL;
  308. #endif
  309. perf_test();
  310. ostr("begin\n");
  311. test_divu64u64( 38, 6);
  312. test_divs64s64( 38, 6);
  313. test_divs64s64( 38,-6);
  314. test_divs64s64(-38, 6);
  315. test_divs64s64(-38,-6);
  316. test_divu64u64(1234567890123ULL,6);
  317. test_divu64u64(0x0000000100000000ULL,6);
  318. test_divu64u64(0xffffffffffffffffULL,6);
  319. test_special();
  320. o16hex(ntests);
  321. ostr(" special tests done; starting random tests\n");
  322. test_divu64u64(0xf123456789abcdefULL,0x0000000100000000ULL);
  323. test_divu64u64(0xf123456789abcdefULL,0x00000001ffffffffULL);
  324. test_divu64u64(0xf123456789abcdefULL,0x00000003ffffffffULL);
  325. test_divu64u64(0xf123456789abcdefULL,0x00000007ffffffffULL);
  326. test_divu64u64(0xf123456789abcdefULL,0x0000000fffffffffULL);
  327. test_divu64u64(0xf123456789abcdefULL,0x0000001fffffffffULL);
  328. test_divu64u64(0xf123456789abcdefULL,0x0000003fffffffffULL);
  329. test_divu64u64(0xf123456789abcdefULL,0x0000007fffffffffULL);
  330. test_divu64u64(0xf123456789abcdefULL,0x000000ffffffffffULL);
  331. test_divu64u64(0xf123456789abcdefULL,0x000001ffffffffffULL);
  332. test_divu64u64(0xf123456789abcdefULL,0x000003ffffffffffULL);
  333. test_divu64u64(0xf123456789abcdefULL,0x000007ffffffffffULL);
  334. test_divu64u64(0xf123456789abcdefULL,0x00000fffffffffffULL);
  335. test_divu64u64(0xf123456789abcdefULL,0x00001fffffffffffULL);
  336. test_divu64u64(0xf123456789abcdefULL,0x00003fffffffffffULL);
  337. test_divu64u64(0xf123456789abcdefULL,0x00007fffffffffffULL);
  338. test_divu64u64(0xf123456789abcdefULL,0x0000ffffffffffffULL);
  339. test_random();
  340. ostr("END\n");
  341. return 0;
  342. #endif
  343. }