2019-02-02

winafl中基于插桩的覆盖率反馈原理

最近winafl增加支持对Intel PT的支持的，但是只支持x64，且覆盖率计算不全，比如条件跳转等，所以它现在还是不如直接用插桩去hook的方式来得准确完整，这里主要想分析也是基于 DynamoRIO插桩的覆盖率反馈原理。

之前曾有人在《初识 Fuzzing 工具 WinAFL》（https://paper.seebug.org/323/#32）中“3.2.2 插桩模块”一节中简单分析过其插桩原理，但没有找到我想要的答案，因此只好自动动手分析下源码。

比如，我想知道：

通过循环调用fuzzing的目标函数来提高速度，但DynamoRIO的覆盖率信息是如何同步给fuzzer主进程的？
具体是如何实现寄存器环境的记录与恢复，从而实现目标函数的不断循环？
覆盖率信息是如何记录与分析的？

覆盖率信息记录与分析原理

第3个问题发现已经有人分析过afl，可以参见这里《AFL内部实现细节小记》（http://rk700.github.io/2017/12/28/afl-internals/），简单总结下:

AFL在编译源码时，为每个代码生成一个随机数，代表位置地址;
在二元组中记录分支跳转的源地址与目标地址，将两者异或的结果为该分支的key，保存每个分支的执行次数，用1字节来储存；
保存分支的执行次数实际上是一张大小为64K的哈希表，位于共享内存中，方便target进程与fuzzer进程之间共享，对应的伪代码如下：
1
2
3
cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;

fuzzer进程通过buckets哈希桶来归类这些分支执行次数，如下结构定义，左边为执行次数，右边为记录值trace_bits：

static const u8 count_class_lookup8[256] = {
  [0]           = 0, 
  [1]           = 1, 
  [2]           = 2, 
  [3]           = 4, 
  [4 ... 7]     = 8, 
  [8 ... 15]    = 16,
  [16 ... 31]   = 32,
  [32 ... 127]  = 64,
  [128 ... 255] = 128
};

对于是否触发新路径，主要通过计算各分支的trace_bits的hash值（算法：u32 cksum **=** hash32(trace_bits, MAP_SIZE常量, HASH_CONST常量);）是否发生变化来实现的

覆盖信息的传递原理

先在fuzzer进程中先创建命名管道，其中fuzzer_id为随机值：

//afl-fuzz.c
 pipe_name = (char *)alloc_printf("\\\\.\\pipe\\afl_pipe_%s", fuzzer_id);
   
 pipe_handle = CreateNamedPipe(
   pipe_name,                // pipe name
PIPE_ACCESS_DUPLEX |     // read/write access 
FILE_FLAG_OVERLAPPED,    // overlapped mode 
   0,
   1,                        // max. instances
   512,                      // output buffer size
   512,                      // input buffer size
   20000,                    // client time-out
   NULL);                    // default security attribute

创建drrun进程去运行目标程序并Hook，在childpid_(%fuzzer_id%).txt的文件中记录子进程id，即目标进程ID，然后等待管道连接，并通过读取上述txt文件以获取目标进程id，主要用来后面超时中断进程的：

//afl-fuzz.c    
    pidfile = alloc_printf("childpid_%s.txt", fuzzer_id);
	if (persist_dr_cache) {
		cmd = alloc_printf(
			"%s\\drrun.exe -pidfile %s -no_follow_children -persist -persist_dir \"%s\\drcache\" -c winafl.dll %s -fuzzer_id %s -drpersist -- %s",
			dynamorio_dir, pidfile, out_dir, client_params, fuzzer_id, target_cmd);
	} else {
		cmd = alloc_printf(
			"%s\\drrun.exe -pidfile %s -no_follow_children -c winafl.dll %s -fuzzer_id %s -- %s",
			dynamorio_dir, pidfile, client_params, fuzzer_id, target_cmd);
	}
......
  if(!CreateProcess(NULL, cmd, NULL, NULL, inherit_handles, CREATE_SUSPENDED, NULL, NULL, &si, &pi)) {
    FATAL("CreateProcess failed, GLE=%d.\n", GetLastError());
  }
......
      if(!OverlappedConnectNamedPipe(pipe_handle, &pipe_overlapped)) {
      FATAL("ConnectNamedPipe failed, GLE=%d.\n", GetLastError());
  }

  watchdog_enabled = 0;

  if(drioless == 0) {
    //by the time pipe has connected the pidfile must have been created
    fp = fopen(pidfile, "rb");
    if(!fp) {
      FATAL("Error opening pidfile.txt");
    }
    fseek(fp,0,SEEK_END);
    pidsize = ftell(fp);
    fseek(fp,0,SEEK_SET);
    buf = (char *)malloc(pidsize+1);
    fread(buf, pidsize, 1, fp);
    buf[pidsize] = 0;
    fclose(fp);
    remove(pidfile);
    child_pid = atoi(buf);
    free(buf);
    ck_free(pidfile);
  }
  else {
    child_pid = pi.dwProcessId;
  }

在插桩模块winafl.dll中打开前面创建的命名管道，然后通过管道与fuzzer主进程进行交互：

//winafl.c
static void
setup_pipe() {
    pipe = CreateFile(
         options.pipe_name,   // pipe name
         GENERIC_READ |  // read and write access
         GENERIC_WRITE,
         0,              // no sharing
         NULL,           // default security attributes
         OPEN_EXISTING,  // opens existing pipe
         0,              // default attributes
         NULL);          // no template file

    if (pipe == INVALID_HANDLE_VALUE) DR_ASSERT_MSG(false, "error connecting to pipe");
}
......
char ReadCommandFromPipe()
{
	DWORD num_read;
	char result;
	ReadFile(pipe, &result, 1, &num_read, NULL);
	return result;
}

void WriteCommandToPipe(char cmd)
{
	DWORD num_written;
	WriteFile(pipe, &cmd, 1, &num_written, NULL);
}

当插桩模块winafl.dll监测到程序首次运行至目标函数入口时，pre_fuzz_handler函数会被执行，然后通过管道写入’P’命令，代表开始进入目标函数，afl-fuzz.exe进程收到命令后，会向目标进程写入管道命令’F’，并监测超时时间和循环调用次数。afl-fuzz.exe与目标进程正是通过读写管道命令来交互的，主要有’F’（退出目标函数）、’P’（进入目标函数）、’K’（超时中断进程）、’C’（崩溃）、’Q’（退出进程）。覆盖信息通过文件映射方法（内存共享）写入winafl_data.afl_area：

//winafl.c
pre_fuzz_handler(void *wrapcxt, INOUT void **user_data)
{
    ......
    if(!options.debug_mode) {
		WriteCommandToPipe('P');
		command = ReadCommandFromPipe();

        if(command != 'F') {
            if(command == 'Q') {
                dr_exit_process(0);
            } else {
                DR_ASSERT_MSG(false, "unrecognized command received over pipe");
            }
        }
    } else {
        debug_data.pre_hanlder_called++;
        dr_fprintf(winafl_data.log, "In pre_fuzz_handler\n");
    }
......
    memset(winafl_data.afl_area, 0, MAP_SIZE);	// 用于存储覆盖率信息

    if(options.coverage_kind == COVERAGE_EDGE || options.thread_coverage) {
        void **thread_data = (void **)drmgr_get_tls_field(drcontext, winafl_tls_field);
        thread_data[0] = 0;
        thread_data[1] = winafl_data.afl_area;	//如果开启-thread_coverage选项，则会将覆盖率信息写入线程TLS中
    }
}

//winafl.c
static void
setup_shmem() {
   HANDLE map_file;

   map_file = OpenFileMapping(
                   FILE_MAP_ALL_ACCESS,   // read/write access
                   FALSE,                 // do not inherit the name
                   options.shm_name);     // name of mapping object

   if (map_file == NULL) DR_ASSERT_MSG(false, "error accesing shared memory");

   winafl_data.afl_area = (unsigned char *) MapViewOfFile(map_file, // handle to map object
               FILE_MAP_ALL_ACCESS,  // read/write permission
               0,
               0,
               MAP_SIZE);

   if (winafl_data.afl_area == NULL) DR_ASSERT_MSG(false, "error accesing shared memory");
}

篡改目标函数循环调用的原理

此步的关键就在于进入目标函数前调用的pre_fuzz_handler函数，以及函数退出后调用的post_fuzz_handler函数。

进入pre_fuzz_handler函数时，winafl.dll会先获取以下信息

app_pc target_to_fuzz = drwrap_get_func(wrapcxt);	//获取目标函数地址
dr_mcontext_t *mc = drwrap_get_mcontext_ex(wrapcxt, DR_MC_ALL);	//获取目标函数当前内存上下文信息
drcontext = drwrap_get_drcontext(wrapcxt);	//获取DynamoRIO上下文

fuzz_target.xsp = mc->xsp;	// 保存栈指针,xsp是各平台下的通用标记变量
fuzz_target.func_pc = target_to_fuzz;	// 目标函数地址

其中内存上下文信息支持各平台的寄存器记录：

typedef struct _dr_mcontext_t {
    /**
     * The size of this structure.  This field must be set prior to filling
     * in the fields to support forward compatibility.
     */
    size_t size;
    /**
     * The valid fields of this structure.  This field must be set prior to
     * filling in the fields.  For input requests (dr_get_mcontext()), this
     * indicates which fields should be written.  Writing the multimedia fields
     * frequently can incur a performance hit.  For output requests
     * (dr_set_mcontext() and dr_redirect_execution()), this indicates which
     * fields will be copied to the actual context.
     */
    dr_mcontext_flags_t flags;

#ifdef AARCHXX
    reg_t r0;   /**< The r0 register. */
    reg_t r1;   /**< The r1 register. */
    reg_t r2;   /**< The r2 register. */
    reg_t r3;   /**< The r3 register. */
    reg_t r4;   /**< The r4 register. */
    reg_t r5;   /**< The r5 register. */
    reg_t r6;   /**< The r6 register. */
    reg_t r7;   /**< The r7 register. */
    reg_t r8;   /**< The r8 register. */
    reg_t r9;   /**< The r9 register. */
    reg_t r10;  /**< The r10 register. */
    reg_t r11;  /**< The r11 register. */
    reg_t r12;  /**< The r12 register. */
# ifdef X64 /* 64-bit */
    reg_t r13;  /**< The r13 register. */
    reg_t r14;  /**< The r14 register. */
    reg_t r15;  /**< The r15 register. */
    reg_t r16;  /**< The r16 register. \note For 64-bit DR builds only. */
    reg_t r17;  /**< The r17 register. \note For 64-bit DR builds only. */
    reg_t r18;  /**< The r18 register. \note For 64-bit DR builds only. */
    reg_t r19;  /**< The r19 register. \note For 64-bit DR builds only. */
    reg_t r20;  /**< The r20 register. \note For 64-bit DR builds only. */
    reg_t r21;  /**< The r21 register. \note For 64-bit DR builds only. */
    reg_t r22;  /**< The r22 register. \note For 64-bit DR builds only. */
    reg_t r23;  /**< The r23 register. \note For 64-bit DR builds only. */
    reg_t r24;  /**< The r24 register. \note For 64-bit DR builds only. */
    reg_t r25;  /**< The r25 register. \note For 64-bit DR builds only. */
    reg_t r26;  /**< The r26 register. \note For 64-bit DR builds only. */
    reg_t r27;  /**< The r27 register. \note For 64-bit DR builds only. */
    reg_t r28;  /**< The r28 register. \note For 64-bit DR builds only. */
    reg_t r29;  /**< The r29 register. \note For 64-bit DR builds only. */
    union {
        reg_t r30; /**< The r30 register. \note For 64-bit DR builds only. */
        reg_t lr;  /**< The link register. */
    }; /**< The anonymous union of alternative names for r30/lr register. */
    union {
        reg_t r31; /**< The r31 register. \note For 64-bit DR builds only. */
        reg_t sp;  /**< The stack pointer register. */
        reg_t xsp; /**< The platform-independent name for the stack pointer register. */
    }; /**< The anonymous union of alternative names for r31/sp register. */
    /**
     * The program counter.
     * \note This field is not always set or read by all API routines.
     */
    byte *pc;
    union {
        uint xflags; /**< The platform-independent name for condition flags. */
        struct {
            uint nzcv; /**< Condition flags (status register). */
            uint fpcr; /**< Floating-Point Control Register. */
            uint fpsr; /**< Floating-Point Status Register. */
        }; /**< AArch64 flag registers. */
    }; /**< The anonymous union of alternative names for flag registers. */
# else /* 32-bit */
    union {
        reg_t r13; /**< The r13 register. */
        reg_t sp;  /**< The stack pointer register.*/
        reg_t xsp; /**< The platform-independent name for the stack pointer register. */
    }; /**< The anonymous union of alternative names for r13/sp register. */
    union {
        reg_t r14; /**< The r14 register. */
        reg_t lr;  /**< The link register. */
    }; /**< The anonymous union of alternative names for r14/lr register. */
    /**
     * The anonymous union of alternative names for r15/pc register.
     * \note This field is not always set or read by all API routines.
     */
    union {
        reg_t r15; /**< The r15 register. */
        byte *pc;  /**< The program counter. */
    };
    union {
        uint xflags; /**< The platform-independent name for full APSR register. */
        uint apsr; /**< The application program status registers in AArch32. */
        uint cpsr; /**< The current program status registers in AArch32. */
    }; /**< The anonymous union of alternative names for apsr/cpsr register. */
# endif /* 64/32-bit */
    /**
     * The SIMD registers.  We would probably be ok if we did not preserve the
     * callee-saved registers (q4-q7 == d8-d15) but to be safe we preserve them
     * all.  We do not need anything more than word alignment for OP_vldm/OP_vstm,
     * and dr_simd_t has no fields larger than 32 bits, so we have no padding.
     */
    dr_simd_t simd[NUM_SIMD_SLOTS];
#else /* X86 */
    union {
        reg_t xdi; /**< The platform-independent name for full rdi/edi register. */
        reg_t IF_X64_ELSE(rdi, edi); /**< The platform-dependent name for
                                          rdi/edi register. */
    }; /**< The anonymous union of alternative names for rdi/edi register. */
    union {
        reg_t xsi; /**< The platform-independent name for full rsi/esi register. */
        reg_t IF_X64_ELSE(rsi, esi); /**< The platform-dependent name for
                                          rsi/esi register. */
    }; /**< The anonymous union of alternative names for rsi/esi register. */
    union {
        reg_t xbp; /**< The platform-independent name for full rbp/ebp register. */
        reg_t IF_X64_ELSE(rbp, ebp); /**< The platform-dependent name for
                                          rbp/ebp register. */
    }; /**< The anonymous union of alternative names for rbp/ebp register. */
    union {
        reg_t xsp; /**< The platform-independent name for full rsp/esp register. */
        reg_t IF_X64_ELSE(rsp, esp); /**< The platform-dependent name for
                                          rsp/esp register. */
    }; /**< The anonymous union of alternative names for rsp/esp register. */
    union {
        reg_t xbx; /**< The platform-independent name for full rbx/ebx register. */
        reg_t IF_X64_ELSE(rbx, ebx); /**< The platform-dependent name for
                                          rbx/ebx register. */
    }; /**< The anonymous union of alternative names for rbx/ebx register. */
    union {
        reg_t xdx; /**< The platform-independent name for full rdx/edx register. */
        reg_t IF_X64_ELSE(rdx, edx); /**< The platform-dependent name for
                                          rdx/edx register. */
    }; /**< The anonymous union of alternative names for rdx/edx register. */
    union {
        reg_t xcx; /**< The platform-independent name for full rcx/ecx register. */
        reg_t IF_X64_ELSE(rcx, ecx); /**< The platform-dependent name for
                                          rcx/ecx register. */
    }; /**< The anonymous union of alternative names for rcx/ecx register. */
    union {
        reg_t xax; /**< The platform-independent name for full rax/eax register. */
        reg_t IF_X64_ELSE(rax, eax); /**< The platform-dependent name for
                                          rax/eax register. */
    }; /**< The anonymous union of alternative names for rax/eax register. */
# ifdef X64
    reg_t r8;  /**< The r8 register. \note For 64-bit DR builds only. */
    reg_t r9;  /**< The r9 register. \note For 64-bit DR builds only. */
    reg_t r10; /**< The r10 register. \note For 64-bit DR builds only. */
    reg_t r11; /**< The r11 register. \note For 64-bit DR builds only. */
    reg_t r12; /**< The r12 register. \note For 64-bit DR builds only. */
    reg_t r13; /**< The r13 register. \note For 64-bit DR builds only. */
    reg_t r14; /**< The r14 register. \note For 64-bit DR builds only. */
    reg_t r15; /**< The r15 register. \note For 64-bit DR builds only. */
# endif
    union {
        reg_t xflags; /**< The platform-independent name for
                           full rflags/eflags register. */
        reg_t IF_X64_ELSE(rflags, eflags); /**< The platform-dependent name for
                                                rflags/eflags register. */
    }; /**< The anonymous union of alternative names for rflags/eflags register. */
    /**
     * Anonymous union of alternative names for the program counter /
     * instruction pointer (eip/rip). \note This field is not always set or
     * read by all API routines.
     */
    union {
        byte *xip; /**< The platform-independent name for full rip/eip register. */
        byte *pc; /**< The platform-independent alt name for full rip/eip register. */
        byte *IF_X64_ELSE(rip, eip); /**< The platform-dependent name for
                                          rip/eip register. */
    };
    byte padding[PRE_XMM_PADDING]; /**< The padding to get ymm field 32-byte aligned. */
    /**
     * The SSE registers xmm0-xmm5 (-xmm15 on Linux) are volatile
     * (caller-saved) for 64-bit and WOW64, and are actually zeroed out on
     * Windows system calls.  These fields are ignored for 32-bit processes
     * that are not WOW64, or if the underlying processor does not support
     * SSE.  Use dr_mcontext_xmm_fields_valid() to determine whether the
     * fields are valid.
     *
     * When the fields are valid, on processors with AVX enabled (i.e.,
     * proc_has_feature(FEATURE_AVX) returns true), these fields will
     * contain the full ymm register values; otherwise, the top 128
     * bits of each slot will be undefined.
     */
    dr_ymm_t ymm[NUM_SIMD_SLOTS];
#endif /* ARM/X86 */
} dr_mcontext_t;

接下来就是获取和设置fuzzed的目标函数参数：

//save or restore arguments
if (!options.no_loop) {
    if (fuzz_target.iteration == 0) {
        for (i = 0; i < options.num_fuz_args; i++)
            options.func_args[i] = drwrap_get_arg(wrapcxt, i);	//首次运行先获取运行参数
    } else {
        for (i = 0; i < options.num_fuz_args; i++)
            drwrap_set_arg(wrapcxt, i, options.func_args[i]);	//设置运行参数
    }
}

当目标函数退出后，执行post_fuzz_handler函数，会恢复栈顶指针和pc地址，以此实现目标函数的循环调用：

static void
post_fuzz_handler(void *wrapcxt, void *user_data)
{
    dr_mcontext_t *mc;
    mc = drwrap_get_mcontext(wrapcxt);	//获取上下文信息

    if(!options.debug_mode) {
		WriteCommandToPipe('K');
    } else {
        debug_data.post_handler_called++;
        dr_fprintf(winafl_data.log, "In post_fuzz_handler\n");
    }

    /* We don't need to reload context in case of network-based fuzzing. */
    if (options.no_loop)
        return;	//网络型Fuzzing无需重载上下文信息
    
	//超过循环次数就退出进程
    fuzz_target.iteration++;
    if(fuzz_target.iteration == options.fuzz_iterations) {
        dr_exit_process(0);
    }

    mc->xsp = fuzz_target.xsp;			//恢复栈顶指针
    mc->pc = fuzz_target.func_pc;		//篡改pc地址加原目标函数地址
	drwrap_redirect_execution(wrapcxt);	//篡改执行流
}

总结

总结下整个winafl执行流程：

afl-fuzz.exe通过创建命名管道与内存映射来实现与目标进程交互，其中管道用来发送和接收命令相互操作对方进程，内存映射主要用来记录覆盖率信息；
覆盖率记录主要通过drmgr_register_bb_instrumentation_event去设置BB执行的回调函数，通过instrument_bb_coverage或者instrument_edge_coverage来记录覆盖率情况，如果发现新的执行路径，就将样本放入队列目录中，用于后续文件变异，以提高代码覆盖率；
目标进程执行到目标函数后，会调用pre_fuzz_handler来存储上下文信息，包括寄存器和运行参数；
目标函数退出后，会调用post_fuzz_handler函数，记录恢复上下文信息，以执行回原目标函数，又回到第2步；
目录函数运行次数达到指定循环调用次数时，会中断进程退出。

riusksk's blog

攀蟾折桂摄寰宇，摘星揽月御乾坤。踏云踩雾骋宵壤，驱风逐日闯天地。 ------泉哥

winafl中基于插桩的覆盖率反馈原理

覆盖率信息记录与分析原理

覆盖信息的传递原理

篡改目标函数循环调用的原理

总结