Nr man lser om CPUer brukar man f reda p om de r "out-of-order" eller "in-order" dvs om de sjlva kan ndra p ordningen vid krning eller om det krvs en kompilator fr detta. Dagen PC har alla out-of-order s det r svrt att hitta tydliga skillnader. Den sista med in-order var original Atom.
Mer spnnande r det fr ARM och Raspberry Pi. Jag har gjort ngra tester p Pi 3 med denna kod:
Med A-53 s har vi in-order s utan optimering fungerar enbart Sum1.
Med -O1, -O2 eller -O3 gr det bttre
RPi1, RPi2 och RPi3 har in-order medan RPi4 har "deeply-out-of-order".
Tinker Board gen 1 har A-17 som r out-of-order men inte "deeply".
Tinker Board gen 2 har A-53 dvs in-order.
Banana Pi har in-order.
Ngon som vill testa denna kod?
Mer spnnande r det fr ARM och Raspberry Pi. Jag har gjort ngra tester p Pi 3 med denna kod:
Kod:
#include <ctime> #include <iostream> #include <math.h> using namespace std; int identity(int x) { return x; } int sum1(int num) { int a = 3, b, c, d, e, f, g, h, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, z1, z2, z3, z4, z5, z6 ; for (int i = 0; i < 2000000; i++) num += i; t= r/a; s= o/a; r= a/a; p= 2*a; o= b/a; h = b/a; g= h%a; f= b/a; e= c%a; d= c/a; b=3*a; c=a/a; p= k+a+d; q= p+b+m; n= k+a; m= 1+b+l; l=3*a+k; k=a+b+3; t= r/a; u = t/a; v= h/a; w= b/z6; x= c/a; y= c/a +p; z=3*a; z1=z5/a; z2= k+a+d; z3= p+b+m; z4= k+a; z5= 1+b+l; a= a+b; return num; } int sum2(int num) { int a1 = 3, b1, c1, d1, e1, f1, g1, h1, k1, l1, m1, n1, o1, p1, q1, r1, s1, t1; float x1, y1 ,z1, u1, w1, x3, y3 ,z3, u3, w3, x5, y5 ,z5, u5, w5; for (int i = 0; i < 2000000; i++) num += i; t1= r1/s1; s1= o1/p1; r1= a1/b1; p1= 2*a1; o1= a1/b1; h1 = b1/a1; g1= h1/a1; f1= b1/c1; e1= c1/a1; d1= c1/b1; b1=3*a1; c1=a1/b1; p1= k1+a1+d1; q1= p1+b1+m1; n1= k1+a1; m1= 1+b1+l1; l1=3*a1+k1; k1=a1+b1+3; x1= sqrt (w1); y1= 1/z1; z1= (float) c1; u1= 1/x1; w1= (float) h1; x3= sqrt (w1); y3= 1/z1; z3= (float) c1; u3= 1/x1; w3= (float) h1; x5= sqrt (w1); y5= 1/z3; z5= (float) m1; u5= 1/x1; w5= (float) e1; a1= a1+b1; return num; } int sum3(int num) { int j1 = 3, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15, j16, j17, j18; float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15 ; for (int i = 0; i < 2000000; i++) num += i; j18= j17/j1; j17= j16/j3; j16= j15/j4; j15= j1/j3; j2 = j1/j1; j3= j2/j1; j4= j2/j2; j5= j1/j2; j6= j1/j3; j7=3*j1; j8=j1/j7; j9 = j1+j2+j3; j10= j1+j9+j5; j11= j1+j10+j3; j12= j1+j2; j13= 1+j8+j4; j14=3*j1+j11; s1= sqrt (10); s2= 1/s1; s3= (float) j7; s4= 1/s3; s5= (float) j1; s6= sqrt (10); s7= 1/s1; s8= (float) j7; s9= 1/s3; s10= (float) j3; s11= sqrt (10); s12= 1/s1; s13= (float) j7; s14= 1/s3; s15= (float) j8; j1=j1+j2; return num; } int sum4(int num) { int j1 = 3, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15, j16, j17, j18; float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15 ; for (int i = 0; i < 2000000; i++) num += i; j7=3*j1; j3= j2/j1; j4= j2/j2; j6= j1/j3; j13= 1+j8+j4; j8=j1/j7; j9 = j1+j2+j3; j10= j1+j9+j5; j11= j1+j10+j3; j12= j1+j2; j5= j1/j2; j14=3*j1; j2 = j1/j1; s11= sqrt (5+j2); s9= 1/s3; s2= 1/s1; s3= (float) j7; s4= 1/s3; j18= j17/j1; j17= j16/j3; j16= j15/j4; j15= j1/j3; s6= sqrt (10); s7= 1/s1; s8= (float) j7; s5= (float) j1; s10= (float) j3; s1= sqrt (10+j2); s12= 1/s1; s13= (float) j7; s14= 1/s3; s15= (float) j8; j1= j1+j2; return num; } int sum5(int num) { float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30; for (int i = 0; i < 2000000; i++) num += i; s30= sqrt (5); s29= 1/s3; s28= 1/s1; s27= sqrt(s30); s26= sqrt (10); s25= sqrt (5); s24= 1/s3; s23= 1/s1; s22= 1/s30; s21= 1/s3; s20= sqrt(10); s19=s20/s30; s17=sqrt(s20); s16=s15+s23; s7= 1/s1; s8= s7*s7; s5= s1+s1; s10= s3+s1; s1= sqrt(10); s12= 1/s1; s13= s2/s3; s14= 1/s3; s15= 5; s2=s1/s5; s3=s10/s17; s4=s5/s2; return num; } double time_it(int (*action)(int), int arg) { clock_t start_time = clock(); action(arg); clock_t finis_time = clock(); return ((double) 1000*(finis_time - start_time)) /CLOCKS_PER_SEC; } int main() { cout << "Identity(100) takes " << time_it(identity, 100) << " mseconds." << endl; cout << "Sum1(100) takes " << time_it(sum1, 100) << " mseconds." << endl; cout << "Sum2(100) takes " << time_it(sum2, 100) << " mseconds." << endl; cout << "Sum3(100) takes " << time_it(sum3, 100) << " mseconds." << endl; cout << "Sum4(100) takes " << time_it(sum4, 100) << " mseconds." << endl; cout << "Sum5(100) takes " << time_it(sum4, 100) << " mseconds." << endl; return 0; }
Med A-53 s har vi in-order s utan optimering fungerar enbart Sum1.
Med -O1, -O2 eller -O3 gr det bttre
RPi1, RPi2 och RPi3 har in-order medan RPi4 har "deeply-out-of-order".
Tinker Board gen 1 har A-17 som r out-of-order men inte "deeply".
Tinker Board gen 2 har A-53 dvs in-order.
Banana Pi har in-order.
Ngon som vill testa denna kod?