Box86 介绍

在这里插入图片描述
GitHub 上流传的类似于 QEMU 的架构模拟器,并且是专用于模拟 x86 架构的模拟软件 Box86 开源代码,具体网址:github链接
通过 Box86 能够在非x86 Linux(例如ARM)上运行 x86 Linux 程序(例如游戏)(主机系统必须是32位little-endian)。所以如果在 aarch64 架构机器上使用的话就需要一个32位子系统来运行和构建Box86。 Box86 目前来说在64位系统上是无法使用的。此外,除了32位子系统外还需要32位工具链来构建Box86。仅支持64位的工具链将无法编译Box86,并且会出现错误(通常在aarch64上,会显示“ -marm”无法识别)。

由于Box86使用某些“系统”库的本机版本,例如libc,libm,SDL和OpenGL,因此易于集成和使用,并且在某些情况下性能可能令人惊讶地高。

大多数x86游戏都需要OpenGL,因此在ARM平台上通常需要诸如gl4es之类的解决方案。

Box86现在为ARM平台集成了DynaRec(动态重新编译器),与仅使用解释器相比,速度提高了5到10倍。

许多游戏已经可以使用,例如:WorldOfGoo,Airline Tycoon Deluxe和FTL。许多GameMaker Linux游戏也运行良好(列表很长,其中包括UNDERTALE,A Rain of Rain和Cook Server Delicious)

如果真的需要使用Box86,则应在cmake项目中安装ccache并激活其支持(例如,使用ccmake)。要启用TRACE(即,将所有执行的单个x86指令转储到stdout,并转储寄存器),您还将需要系统上可用的Zydis库。

源码导读

接下来进入 Box86 源码进行深入研读,源代码结构如下:

./
├── Box86Icon.png
├── Box86Logo.png
├── CHANGELOG.md
├── CMakeLists.txt
├── cmake_uninstall.cmake.in
├── COMPILE.md
├── LICENSE
├── README.md
├── rebuild_printer.py
├── rebuild_wrappers.py
├── runTest.cmake
├── src (源代码目录)
├── system (系统环境配置)
├── tests	(测试代码)
├── USAGE.md
└── x86lib	(X86标准库)

在源代码中,box86 可执行程序的入口函数 main()src/main.c 文件中,具体内容如下:


int main(int argc, const char **argv, const char **env) {

    init_auxval(argc, argv, env);
    // trying to open and load 1st arg
    if(argc==1) {
        PrintBox86Version();
        PrintHelp();
        return 1;
    }

    // init random seed
    srandom(time(NULL));

    // check BOX86_LOG debug level
    LoadLogEnv();
    
    const char* prog = argv[1];
    int nextarg = 1;
    // check if some options are passed
    while(prog && prog[0]=='-') {
        if(!strcmp(prog, "-v") || !strcmp(prog, "--version")) {
            PrintBox86Version();
            exit(0);
        }
        if(!strcmp(prog, "-h") || !strcmp(prog, "--help")) {
            PrintHelp();
            exit(0);
        }
        // other options?
        if(!strcmp(prog, "--")) {
            prog = argv[++nextarg];
            break;
        }
        printf("Warning, unrecognized option '%s'\n", prog);
        prog = argv[++nextarg];
    }
    if(!prog || nextarg==argc) {
        printf("Box86: nothing to run\n");
        exit(0);
    }
    if(!box86_nobanner)
        PrintBox86Version();
    // precheck, for win-preload
    if(strstr(prog, "wine-preloader")==(prog+strlen(prog)-strlen("wine-preloader"))) {
        // wine-preloader detecter, skipping it if next arg exist and is an x86 binary
        int x86 = (nextarg<argc)?FileIsX86ELF(argv[nextarg]):0;
        if(x86) {
            prog = argv[++nextarg];
            printf_log(LOG_INFO, "BOX86: Wine preloader detected, loading \"%s\" directly\n", prog);
        }
    }
    // check if this is wine
    if(!strcmp(prog, "wine") || (strlen(prog)>5 && !strcmp(prog+strlen(prog)-strlen("/wine"), "/wine"))) {
        const char* prereserve = getenv("WINEPRELOADRESERVE");
        printf_log(LOG_INFO, "BOX86: Wine detected, WINEPRELOADRESERVE=\"%s\"\n", prereserve?prereserve:"");
            wine_prereserve(prereserve);
    }
    // Create a new context
    my_context = NewBox86Context(argc - nextarg);

    // check BOX86_LD_LIBRARY_PATH and load it
    LoadEnvVars(my_context);

    if(argv[0][0]=='/')
        my_context->box86path = strdup(argv[0]);
    else
        my_context->box86path = ResolveFile(argv[0], &my_context->box86_path);
    // prepare all other env. var
    my_context->envc = CountEnv(env);
    printf_log(LOG_INFO, "Counted %d Env var\n", my_context->envc);
    // allocate extra space for new environment variables such as BOX86_PATH
    my_context->envv = (char**)calloc(my_context->envc+4, sizeof(char*));
    GatherEnv(&my_context->envv, env, my_context->box86path);
    if(box86_log>=LOG_DUMP) {
        for (int i=0; i<my_context->envc; ++i)
            printf_log(LOG_DUMP, " Env[%02d]: %s\n", i, my_context->envv[i]);
    }

    path_collection_t ld_preload = {0};
    if(getenv("BOX86_LD_PRELOAD")) {
        char* p = getenv("BOX86_LD_PRELOAD");
        ParseList(p, &ld_preload, 0);
        if (ld_preload.size && box86_log) {
            printf_log(LOG_INFO, "BOX86 try to Preload ");
            for (int i=0; i<ld_preload.size; ++i)
                printf_log(LOG_INFO, "%s ", ld_preload.paths[i]);
            printf_log(LOG_INFO, "\n");
        }
    } else {
        if(getenv("LD_PRELOAD")) {
            char* p = getenv("LD_PRELOAD");
            ParseList(p, &ld_preload, 0);
            if (ld_preload.size && box86_log) {
                printf_log(LOG_INFO, "BOX86 try to Preload ");
                for (int i=0; i<ld_preload.size; ++i)
                    printf_log(LOG_INFO, "%s ", ld_preload.paths[i]);
                printf_log(LOG_INFO, "\n");
            }
        }
    }
    // lets build argc/argv stuff
    printf_log(LOG_INFO, "Looking for %s\n", prog);
    if(strchr(prog, '/'))
        my_context->argv[0] = strdup(prog);
    else
        my_context->argv[0] = ResolveFile(prog, &my_context->box86_path);

    const char* prgname = strrchr(prog, '/');
    if(!prgname)
        prgname = prog;
    else
        ++prgname;
    // special case for LittleInferno that use an old libvorbis
    if(strstr(prgname, "LittleInferno.bin.x86")==prgname) {
        printf_log(LOG_INFO, "LittleInferno detected, forcing emulated libvorbis\n");
        AddPath("libvorbis.so.0", &my_context->box86_emulated_libs, 0);
    }
    // special case for dontstarve that use an old SDL2
    if(strstr(prgname, "dontstarve")) {
        printf_log(LOG_INFO, "Dontstarve* detected, forcing emulated SDL2\n");
        AddPath("libSDL2-2.0.so.0", &my_context->box86_emulated_libs, 0);
    }
    // special case for steam that somehow seems to alter libudev opaque pointer (udev_monitor)
    if(strstr(prgname, "steam")==prgname) {
        printf_log(LOG_INFO, "steam detected, forcing emulated libudev\n");
        AddPath("libudev.so.0", &my_context->box86_emulated_libs, 0);
        box86_steam = 1;
    }
    // special case for steam-runtime-check-requirements to fake 64bits suport
    if(strstr(prgname, "steam-runtime-check-requirements")==prgname) {
        printf_log(LOG_INFO, "steam-runtime-check-requirements detected, faking All is good!\n");
        exit(0);    // exiting, not testing anything
    }
    // special case for UnrealLinux.bin, it doesn't like "full path resolution"
    if(!strcmp(prog, "UnrealLinux.bin") && my_context->argv[0]) {
        free(my_context->argv[0]);
        my_context->argv[0] = strdup("./UnrealLinux.bin");
    }

    for(int i=1; i<my_context->argc; ++i) {
        my_context->argv[i] = strdup(argv[i+nextarg]);
        printf_log(LOG_INFO, "argv[%i]=\"%s\"\n", i, my_context->argv[i]);
    }

    // check if file exist
    if(!my_context->argv[0] || !FileExist(my_context->argv[0], IS_FILE)) {
        printf_log(LOG_NONE, "Error: file is not found (check BOX86_PATH)\n");
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    if(!FileExist(my_context->argv[0], IS_FILE|IS_EXECUTABLE)) {
        printf_log(LOG_NONE, "Error: %s is not an executable file\n", my_context->argv[0]);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    if(!(my_context->fullpath = realpath(my_context->argv[0], NULL)))
        my_context->fullpath = strdup(my_context->argv[0]);
    FILE *f = fopen64(my_context->argv[0], "rb");
    if(!f) {
        printf_log(LOG_NONE, "Error: Cannot open %s\n", my_context->argv[0]);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    elfheader_t *elf_header = LoadAndCheckElfHeader(f, my_context->argv[0], 1);
    if(!elf_header) {
        printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    AddElfHeader(my_context, elf_header);

    if(CalcLoadAddr(elf_header)) {
        printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    // allocate memory
    if(AllocElfMemory(my_context, elf_header, 1)) {
        printf_log(LOG_NONE, "Error: allocating memory for elf %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    // Load elf into memory
    if(LoadElfMemory(f, my_context, elf_header)) {
        printf_log(LOG_NONE, "Error: loading in memory elf %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    // can close the file now
    fclose(f);
    // get and alloc stack size and align
    if(CalcStackSize(my_context)) {
        printf_log(LOG_NONE, "Error: allocating stack\n");
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    // init x86 emu
    x86emu_t *emu = NewX86Emu(my_context, my_context->ep, (uintptr_t)my_context->stack, my_context->stacksz, 0);
    // stack setup is much more complicated then just that!
    SetupInitialStack(emu); // starting here, the argv[] don't need free anymore
    SetupX86Emu(emu);
    SetEAX(emu, my_context->argc);
    SetEBX(emu, (uint32_t)my_context->argv);

    // child fork to handle traces
    pthread_atfork(NULL, NULL, my_child_fork);

    thread_set_emu(emu);

    setupTraceInit(my_context);
    // export symbols
    AddSymbols(my_context->maplib, GetMapSymbol(my_context->maplib), GetWeakSymbol(my_context->maplib), GetLocalSymbol(my_context->maplib), elf_header);
    if(wine_preloaded) {
        uintptr_t wineinfo = FindSymbol(GetMapSymbol(my_context->maplib), "wine_main_preload_info");
        if(!wineinfo) wineinfo = FindSymbol(GetWeakSymbol(my_context->maplib), "wine_main_preload_info");
        if(!wineinfo) wineinfo = FindSymbol(GetLocalSymbol(my_context->maplib), "wine_main_preload_info");
        if(!wineinfo) {printf_log(LOG_NONE, "Warning, Symbol wine_main_preload_info not found\n");}
        else {
            *(void**)wineinfo = get_wine_prereserve();
            printf_log(LOG_DEBUG, "WINE wine_main_preload_info found and updated\n");
        }
    }
    // pre-load lib if needed
    if(ld_preload.size) {
        for (int i=0; i<ld_preload.size; ++i) {
            if(AddNeededLib(NULL, NULL, 0, ld_preload.paths[i], my_context, emu)) {
                printf_log(LOG_INFO, "Warning, cannot pre-load lib: \"%s\"\n", ld_preload.paths[i]);
            }            
        }
    }
    FreeCollection(&ld_preload);
    // Call librarian to load all dependant elf
    if(LoadNeededLibs(elf_header, my_context->maplib, &my_context->neededlibs, 0, my_context, emu)) {
        printf_log(LOG_NONE, "Error: loading needed libs in elf %s\n", my_context->argv[0]);
        FreeBox86Context(&my_context);
        return -1;
    }
    // reloc...
    printf_log(LOG_DEBUG, "And now export symbols / relocation for %s...\n", ElfName(elf_header));
    if(RelocateElf(my_context->maplib, NULL, elf_header)) {
        printf_log(LOG_NONE, "Error: relocating symbols in elf %s\n", my_context->argv[0]);
        FreeBox86Context(&my_context);
        return -1;
    }
    // and handle PLT
    RelocateElfPlt(my_context->maplib, NULL, elf_header);
    // defered init
    RunDeferedElfInit(emu);
    // do some special case check, _IO_2_1_stderr_ and friends, that are setup by libc, but it's already done here, so need to do a copy
    ResetSpecialCaseMainElf(elf_header);
    // init...
    setupTrace(my_context);
    // get entrypoint
    my_context->ep = GetEntryPoint(my_context->maplib, elf_header);
#ifdef RPI
    // before launching emulation, let's check if this is a mojosetup from GOG
    if (((strstr(prog, "bin/linux/x86/mojosetup") && getenv("MOJOSETUP_BASE")) || strstr(prog, ".mojosetup/mojosetup"))
       && getenv("GTK2_RC_FILES")) {
        sanitize_mojosetup_gtk_background();
    }
#endif

    atexit(endBox86);
    
    // emulate!
    printf_log(LOG_DEBUG, "Start x86emu on Main\n");
    SetEAX(emu, my_context->argc);
    SetEBX(emu, (uint32_t)my_context->argv);
    SetEIP(emu, my_context->ep);
    ResetFlags(emu);
    Run(emu, 0);
    // Get EAX
    int ret = GetEAX(emu);
    printf_log(LOG_DEBUG, "Emulation finished, EAX=%d\n", ret);

    if(trace_func)  {
        free(trace_func);
        trace_func = NULL;
    }

    return ret;
}

可以看出是很长的一部分代码代码,其中从读取当前环境变量、读取X86架构可执行程序文件的读取已经代码翻译到执行都是很齐全的,后面就开始一步一步分解其中用到的代码内容。

box86 源码解析(main入口开始)

init_auxval(argc, argv, env)

int main(int argc, const char **argv, const char **env) {
    init_auxval(argc, argv, env);
    ...
    ...
}

在入口主函数中,首先先执行了 init_auxval(argc, argv, env) 的函数执行。

int init_auxval(int argc, const char **argv, const char **env) {
    // auxval vector is after envs...
    while(*env)
        env++;
    auxval_start = (uintptr_t*)(env+1);
    return 0;
}

init_auxval 这个函数的作用就是为 auxval_start 获取正确的可执行内容的地址位置,方便后期通过以这个地址为基地址进行程序代码的读取和执行。例如以下测试函数:

#include <stdio.h>

typedef unsigned long int	uintptr_t;
static uintptr_t* auxval_start = NULL;

int main(int argc, const char **argv, const char **env)
{
    int i = 0;
    for(i=0; env[i]!=NULL; i++)
        printf(" env[%d]:%s\n", i, env[i]);
    while(*env)
        env++;
    auxval_start = (uintptr_t*)(env+1);
    printf("auxval_start = [%p][%s]\n", auxval_start, (char *)auxval_start);
    return 0;
}

执行后,可以看到会将当前的环境变量参数都打印出来,在最后打印出程序执行的地址:

在这里插入图片描述

srandom(time(NULL))LoadLogEnv()

int main(int argc, const char **argv, const char **env) {
    init_auxval(argc, argv, env);
    if(argc==1) {
        PrintBox86Version();
        PrintHelp();
        return 1;
    }

    // init random seed
    srandom(time(NULL));

    // check BOX86_LOG debug level
    LoadLogEnv();
    ...
    ...
}

srandom(time(NULL) 函数的作用就不用过多解释了,这个就是为后期使用随机数准备,获取一个随机数种子,而使用 time(NULL) 的原因是使得每次执行程序所使用的随机数种子也是不一样的,例程如下:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main()
{
    srandom(time(NULL));
    printf("random() = %d \n", random() % 100);
    printf("random() = %d \n", random() % 100);
    printf("random() = %d \n", random() % 100);
    return 0;
}

在这里插入图片描述
后面的 LoadLogEnv() 函数的主要作用就是读取当前环境变量中的一些设置,从而确定当前的log文件打印信息等级及打印信息输出文件目标,具体如下:

#include <stdio.h>

#define LOG_NONE    0
#define LOG_INFO    1
#define LOG_DEBUG   2
#define LOG_DUMP    3

FILE* ftrace = NULL;
int box86_log = LOG_NONE;
int box86_nobanner = 0;

int main()
{
    ftrace = stdout;
    const char *p = getenv("BOX86_LOG");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0'+LOG_NONE && p[1]<='0'+LOG_DEBUG)
                box86_log = p[0]-'0';
        } else {
            if(!strcasecmp(p, "NONE"))
                box86_log = LOG_NONE;
            else if(!strcasecmp(p, "INFO"))
                box86_log = LOG_INFO;
            else if(!strcasecmp(p, "DEBUG"))
                box86_log = LOG_DEBUG;
            else if(!strcasecmp(p, "DUMP"))
                box86_log = LOG_DUMP;
        }
        printf_log(LOG_INFO, "Debug level is %d\n", box86_log);
    }
    p = getenv("BOX86_NOBANNER");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='1')
                box86_nobanner = p[0]-'0';
        }
        printf_log(LOG_INFO, "Dynarec is %s\n", box86_nobanner?"On":"Off");
    }
#ifdef DYNAREC
    p = getenv("BOX86_DYNAREC_DUMP");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='1')
                box86_dynarec_dump = p[0]-'0';
        }
        if (box86_dynarec_dump) printf_log(LOG_INFO, "Dynarec blocks are dumped%s\n", (box86_dynarec_dump>1)?" in color":"");
    }
    p = getenv("BOX86_DYNAREC_LOG");
    if(p) {
        if(strlen(p)==1) {
            if((p[0]>='0'+LOG_NONE) && (p[0]<='0'+LOG_DUMP))
                box86_dynarec_log = p[0]-'0';
        } else {
            if(!strcasecmp(p, "NONE"))
                box86_dynarec_log = LOG_NONE;
            else if(!strcasecmp(p, "INFO"))
                box86_dynarec_log = LOG_INFO;
            else if(!strcasecmp(p, "DEBUG"))
                box86_dynarec_log = LOG_DEBUG;
            else if(!strcasecmp(p, "VERBOSE"))
                box86_dynarec_log = LOG_DUMP;
        }
        printf_log(LOG_INFO, "Dynarec log level is %d\n", box86_dynarec_log);
    }
    p = getenv("BOX86_DYNAREC");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='1')
                box86_dynarec = p[0]-'0';
        }
        printf_log(LOG_INFO, "Dynarec is %s\n", box86_dynarec?"On":"Off");
    }
    p = getenv("BOX86_DYNAREC_LINKER");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='1')
                box86_dynarec_linker = p[0]-'0';
        }
        printf_log(LOG_INFO, "Dynarec Linker is %s\n", box86_dynarec_linker?"On":"Off");
    }
    p = getenv("BOX86_DYNAREC_FORCED");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='1')
                box86_dynarec_forced = p[0]-'0';
        }
        if(box86_dynarec_forced)
        printf_log(LOG_INFO, "Dynarec is Forced on all addresses\n");
    }
#endif
#ifdef HAVE_TRACE
    p = getenv("BOX86_TRACE_XMM");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                trace_xmm = p[0]-'0';
        }
    }
    p = getenv("BOX86_TRACE_EMM");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                trace_emm = p[0]-'0';
        }
    }
    p = getenv("BOX86_TRACE_START");
    if(p) {
        char* p2;
        start_cnt = strtoll(p, &p2, 10);
        printf_log(LOG_INFO, "Will start trace only after %llu instructions\n", start_cnt);
    }
#ifdef DYNAREC
    p = getenv("BOX86_DYNAREC_TRACE");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                box86_dynarec_trace = p[0]-'0';
            if(box86_dynarec_trace)
                printf_log(LOG_INFO, "Dynarec generated code will also print a trace\n");
        }
    }
#endif
#endif
    // grab BOX86_TRACE_FILE envvar, and change %pid to actual pid is present in the name
    openFTrace();
    // Other BOX86 env. var.
    p = getenv("BOX86_DLSYM_ERROR");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                dlsym_error = p[0]-'0';
        }
    }
#ifdef PANDORA
    p = getenv("BOX86_X11COLOR16");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                x11color16 = p[0]-'0';
        }
        printf_log(LOG_INFO, "Try to adjust X11 Color (32->16bits) : %s\n", x11color16?"Yes":"No");
    }
#endif
    p = getenv("BOX86_X11THREADS");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                x11threads = p[0]-'0';
        }
        if(x11threads)
            printf_log(LOG_INFO, "Try to Call XInitThreads if libX11 is loaded\n");
    }
    p = getenv("BOX86_X11GLX");
    if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                x11glx = p[0]-'0';
        }
        if(x11glx)
            printf_log(LOG_INFO, "Hack to force libX11 GLX extension present\n");
        else
            printf_log(LOG_INFO, "Disabled Hack to force libX11 GLX extension present\n");
    }
    p = getenv("BOX86_LIBGL");
    if(p)
        libGL = strdup(p);
    if(!libGL) {
        p = getenv("SDL_VIDEO_GL_DRIVER");
        if(p)
            libGL = strdup(p);
    }
    if(libGL) {
        printf_log(LOG_INFO, "BOX86 using \"%s\" as libGL.so.1\n", p);
    }
    p = getenv("BOX86_ALLOWMISSINGLIBS");
        if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                allow_missing_libs = p[0]-'0';
        }
        if(allow_missing_libs)
            printf_log(LOG_INFO, "Allow missing needed libs\n");
    }
    p = getenv("BOX86_FIX_64BIT_INODES");
        if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                fix_64bit_inodes = p[0]-'0';
        }
        if(fix_64bit_inodes)
            printf_log(LOG_INFO, "Fix 64bit inodes\n");
    }
    p = getenv("BOX86_JITGDB");
        if(p) {
        if(strlen(p)==1) {
            if(p[0]>='0' && p[1]<='0'+1)
                jit_gdb = p[0]-'0';
        }
        if(jit_gdb)
            printf_log(LOG_INFO, "Launch gdb on segfault\n");
    }
    box86_pagesize = sysconf(_SC_PAGESIZE);
    if(!box86_pagesize)
        box86_pagesize = 4096;
#ifdef DYNAREC
    GatherDynarecExtensions();
#endif
}

读取当前执行参数并输出打印信息

接下来的内容就是分析当前 box86 使用的选项,其中就有对于使用 wine 专门的处理部分。这里简单来说主要就是分析一下当前使用的参数信息并针对需要打印的部分输出打印信息。


int main(int argc, const char **argv, const char **env) {
	...
	...
    const char* prog = argv[1];
    int nextarg = 1;
    // check if some options are passed
    while(prog && prog[0]=='-') {
        if(!strcmp(prog, "-v") || !strcmp(prog, "--version")) {
            PrintBox86Version();
            exit(0);
        }
        if(!strcmp(prog, "-h") || !strcmp(prog, "--help")) {
            PrintHelp();
            exit(0);
        }
        // other options?
        if(!strcmp(prog, "--")) {
            prog = argv[++nextarg];
            break;
        }
        printf("Warning, unrecognized option '%s'\n", prog);
        prog = argv[++nextarg];
    }
    if(!prog || nextarg==argc) {
        printf("Box86: nothing to run\n");
        exit(0);
    }
    if(!box86_nobanner)
        PrintBox86Version();
    // precheck, for win-preload
    if(strstr(prog, "wine-preloader")==(prog+strlen(prog)-strlen("wine-preloader"))) {
        // wine-preloader detecter, skipping it if next arg exist and is an x86 binary
        int x86 = (nextarg<argc)?FileIsX86ELF(argv[nextarg]):0;
        if(x86) {
            prog = argv[++nextarg];
            printf_log(LOG_INFO, "BOX86: Wine preloader detected, loading \"%s\" directly\n", prog);
        }
    }
    // check if this is wine
    if(!strcmp(prog, "wine") || (strlen(prog)>5 && !strcmp(prog+strlen(prog)-strlen("/wine"), "/wine"))) {
        const char* prereserve = getenv("WINEPRELOADRESERVE");
        printf_log(LOG_INFO, "BOX86: Wine detected, WINEPRELOADRESERVE=\"%s\"\n", prereserve?prereserve:"");
            wine_prereserve(prereserve);
    }
    ...
    ...
    ...
}

NewBox86Context()

NewBox86Context() 函数就将开始将具体执行的可执行文件进行读取并创建环境:


EXPORTDYN
box86context_t *NewBox86Context(int argc)
{
#ifdef BUILD_DYNAMIC
    if(my_context) {
        ++my_context->count;
        return my_context;
    }
#endif
    // init and put default values
    box86context_t *context = (box86context_t*)calloc(1, sizeof(box86context_t));

#ifdef BUILD_LIB
    context->deferedInit = 0;
#else
    context->deferedInit = 1;
#endif
    context->maplib = NewLibrarian(context, 1);
    context->local_maplib = NewLibrarian(context, 1);
    context->system = NewBridge();
    // create vsyscall
    context->vsyscall = AddBridge(context->system, vFv, x86Syscall, 0);
#ifdef BUILD_LIB
    context->box86lib = RTLD_DEFAULT;   // not ideal
#else
    context->box86lib = dlopen(NULL, RTLD_NOW|RTLD_GLOBAL);
#endif
    context->dlprivate = NewDLPrivate();

    context->callbacks = NewCallbackList();

    context->argc = argc;
    context->argv = (char**)calloc(context->argc+1, sizeof(char*));

    pthread_mutex_init(&context->mutex_once, NULL);
    pthread_mutex_init(&context->mutex_once2, NULL);
    pthread_mutex_init(&context->mutex_trace, NULL);
#ifndef DYNAREC
    pthread_mutex_init(&context->mutex_lock, NULL);
#endif
    pthread_mutex_init(&context->mutex_tls, NULL);
    pthread_mutex_init(&context->mutex_thread, NULL);
#ifdef DYNAREC
    pthread_mutex_init(&context->mutex_dyndump, NULL);
#endif
    pthread_key_create(&context->tlskey, free_tlsdatasize);

#ifdef DYNAREC
    pthread_mutex_init(&context->mutex_blocks, NULL);
    pthread_mutex_init(&context->mutex_mmap, NULL);
    context->dynablocks = NewDynablockList(0, 0, 0, 0, 0);
#endif
    InitFTSMap(context);

    for (int i=0; i<4; ++i) context->canary[i] = 1 +  getrand(255);
    context->canary[getrand(4)] = 0;
    printf_log(LOG_DEBUG, "Setting up canary (for Stack protector) at GS:0x14, value:%08X\n", *(uint32_t*)context->canary);

    initAllHelpers(context);

    return context;
}

其中主要就是用来设置 context 结构体中的内容,结构体具体内容如下:

typedef struct box86context_s {
    path_collection_t   box86_path;     // PATH env. variable
    path_collection_t   box86_ld_lib;   // LD_LIBRARY_PATH env. variable

    path_collection_t   box86_emulated_libs;    // Collection of libs that should not be wrapped

    int                 x86trace;
    int                 trace_tid;
#ifdef DYNAREC
    int                 trace_dynarec;
    pthread_mutex_t     mutex_dyndump;
#endif
    zydis_t             *zydis;         // dlopen the zydis dissasembler
    void*               box86lib;       // dlopen on box86 itself

    int                 argc;
    char**              argv;

    int                 envc;
    char**              envv;

    char*               fullpath;
    char*               box86path;      // path of current box86 executable

    uint32_t            stacksz;
    int                 stackalign;
    void*               stack;          // alocated stack

    elfheader_t         **elfs;         // elf headers and memory
    int                 elfcap;
    int                 elfsize;        // number of elf loaded

    needed_libs_t       neededlibs;     // needed libs for main elf

    uintptr_t           ep;             // entry point

    lib_t               *maplib;        // lib and symbols handling
    lib_t               *local_maplib;  // libs and symbols openned has local (only collection of libs, no symbols)

    kh_threadstack_t    *stacksizes;    // stack sizes attributes for thread (temporary)
    kh_cancelthread_t   *cancelthread;  // thread cancel mecanism is bit complex, create a map to ease it
    bridge_t            *threads;       // threads
    bridge_t            *system;        // other bridges
    uintptr_t           vsyscall;       // vsyscall bridge value
    dlprivate_t         *dlprivate;     // dlopen library map
    kh_symbolmap_t      *glwrappers;    // the map of wrapper for glProcs (for GLX or SDL1/2)
    kh_symbolmap_t      *glmymap;       // link to the mysymbolmap of libGL
    procaddess_t        glxprocaddress;
    kh_symbolmap_t      *alwrappers;    // the map of wrapper for alGetProcAddress
    kh_symbolmap_t      *almymap;       // link to the mysymbolmap if libOpenAL

    callbacklist_t      *callbacks;     // all callbacks

    pthread_mutex_t     mutex_once;
    pthread_mutex_t     mutex_once2;
    pthread_mutex_t     mutex_trace;
    #ifndef DYNAREC
    pthread_mutex_t     mutex_lock;     // dynarec build will use their own mecanism
    #endif
    pthread_mutex_t     mutex_tls;
    pthread_mutex_t     mutex_thread;

    library_t           *libclib;       // shortcut to libc library (if loaded, so probably yes)
    library_t           *sdl1lib;       // shortcut to SDL1 library (if loaded)
    void*               sdl1allocrw;
    void*               sdl1freerw;
    library_t           *sdl1mixerlib;
    library_t           *sdl1imagelib;
    library_t           *sdl1ttflib;
    library_t           *sdl2lib;       // shortcut to SDL2 library (if loaded)
    void*               sdl2allocrw;
    void*               sdl2freerw;
    library_t           *sdl2mixerlib;
    library_t           *sdl2imagelib;
    library_t           *sdl2ttflib;
    library_t           *x11lib;
    library_t           *libxcb;
    library_t           *libxcbxfixes;
    library_t           *libxcbshape;
    library_t           *libxcbshm;
    library_t           *libxcbrandr;
    library_t           *libxcbimage;
    library_t           *libxcbkeysyms;
    library_t           *libxcbxtest;
    library_t           *zlib;
    library_t           *vorbisfile;
    library_t           *vorbis;
    library_t           *asound;
    library_t           *pulse;
    library_t           *d3dadapter9;

    int                 deferedInit;
    elfheader_t         **deferedInitList;
    int                 deferedInitSz;
    int                 deferedInitCap;

    pthread_key_t       tlskey;     // then tls key to have actual tlsdata
    void*               tlsdata;    // the initial global tlsdata
    int32_t             tlssize;    // wanted size of tlsdata
    base_segment_t      segtls[3];  // only handling 0/1/2 descriptors

    uintptr_t           *auxval_start;

    cleanup_t   *cleanups;          // atexit functions
    int         clean_sz;
    int         clean_cap;
#ifdef DYNAREC
    pthread_mutex_t     mutex_blocks;
    pthread_mutex_t     mutex_mmap;
    dynablocklist_t     *dynablocks;
    mmaplist_t          *mmaplist;
    int                 mmapsize;
    dynmap_t*           dynmap[DYNAMAP_SIZE];  // 4G of memory mapped by 4K block
#endif
#ifndef NOALIGN
    kh_fts_t            *ftsmap;
#endif
    zydis_dec_t         *dec;           // trace

    int                 forked;         //  how many forks... cleanup only when < 0

    atfork_fnc_t        *atforks;       // fnc for atfork...
    int                 atfork_sz;
    int                 atfork_cap;

    uint8_t             canary[4];

    uintptr_t           signals[MAX_SIGNAL];
    uintptr_t           restorer[MAX_SIGNAL];
    int                 is_sigaction[MAX_SIGNAL];
    x86emu_t            *emu_sig;       // the emu with stack used for signal handling (must be separated from main ones)
    int                 no_sigsegv;
    int                 no_sigill;
#ifdef BUILD_DYNAMIC
    int                 count;      // number of instances
#endif
} box86context_t;

读取可执行文件

FILE *f = fopen64(my_context->argv[0], "rb");
    if(!f) {
        printf_log(LOG_NONE, "Error: Cannot open %s\n", my_context->argv[0]);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    elfheader_t *elf_header = LoadAndCheckElfHeader(f, my_context->argv[0], 1);
    if(!elf_header) {
        printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }
    AddElfHeader(my_context, elf_header);

经过对 box86context_t 变量环境的初始化后,这里就真正地去读取可执行 ELF 文件。具体读取 ELF 类型文件信息的方式主要是通过将对应信息直接安装结构体读取即可。具体操作在函数 LoadAndCheckElfHeader 中,接下来就就具体分析该函数的操作:

// f : 第一个执行程序的文件描述符
// name : 带路径的执行程序文件名称
// exec : 是否是可执行程序
elfheader_t* LoadAndCheckElfHeader(FILE* f, const char* name, int exec)
{
    elfheader_t *h = ParseElfHeader(f, name, exec);
    if(!h)
        return NULL;

	//获取当前执行文件的绝对路径,如果获取失败则返回空串。(外面在使用h指针时会free释放,所以返回空串也使用malloc申请内容。)
    if ((h->path = realpath(name, NULL)) == NULL) {
        h->path = (char*)malloc(1);
        h->path[0] = '\0';
    }
    return h;
}

从这段代码内可以看到,解析文件内容的具体操作在 ParseElfHeader() 函数内,具体如下:


elfheader_t* ParseElfHeader(FILE* f, const char* name, int exec)
{
    Elf32_Ehdr header;
    int level = (exec)?LOG_INFO:LOG_DEBUG;
    //直接通过结构体方式来读取 文件头的内容,具体可以参考后面实例程序进行理解。
    if(fread(&header, sizeof(Elf32_Ehdr), 1, f)!=1) {
        printf_log(level, "Cannot read ELF Header\n");
        return NULL;
    }
    if(memcmp(header.e_ident, ELFMAG, SELFMAG)!=0) {
        printf_log(LOG_INFO, "Not an ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]);
        return NULL;
    }
    if(header.e_ident[EI_CLASS]!=ELFCLASS32) {
        if(header.e_ident[EI_CLASS]==ELFCLASS64) {
            printf_log(LOG_INFO, "This is a 64bits ELF! box86 can only run 32bits ELF!\n");
        } else {
            printf_log(LOG_INFO, "Not a 32bits ELF (%d)\n", header.e_ident[EI_CLASS]);
        }
        return NULL;
    }
    if(header.e_ident[EI_DATA]!=ELFDATA2LSB) {
        printf_log(LOG_INFO, "Not a LittleEndian ELF (%d)\n", header.e_ident[EI_DATA]);
        return NULL;
    }
    if(header.e_ident[EI_VERSION]!=EV_CURRENT) {
        printf_log(LOG_INFO, "Incorrect ELF version (%d)\n", header.e_ident[EI_VERSION]);
        return NULL;
    }
    if(header.e_ident[EI_OSABI]!=ELFOSABI_LINUX && header.e_ident[EI_OSABI]!=ELFOSABI_NONE && header.e_ident[EI_OSABI]!=ELFOSABI_SYSV) {
        printf_log(LOG_INFO, "Not a Linux ELF (%d)\n",header.e_ident[EI_OSABI]);
        return NULL;
    }

    if(header.e_type != ET_EXEC && header.e_type != ET_DYN) {
        printf_log(LOG_INFO, "Not an Executable (%d)\n", header.e_type);
        return NULL;
    }

    if(header.e_machine != EM_386) {
        printf_log(level, "Not an i386 ELF (%d)\n", header.e_machine);
        return NULL;
    }

    if(header.e_entry == 0 && exec) {
        printf_log(LOG_INFO, "No entry point in ELF\n");
        return NULL;
    }
    if(header.e_phentsize != sizeof(Elf32_Phdr)) {
        printf_log(LOG_INFO, "Program Header Entry size incorrect (%d != %d)\n", header.e_phentsize, sizeof(Elf32_Phdr));
        return NULL;
    }
    if(header.e_shentsize != sizeof(Elf32_Shdr) && header.e_shentsize != 0) {
        printf_log(LOG_INFO, "Section Header Entry size incorrect (%d != %d)\n", header.e_shentsize, sizeof(Elf32_Shdr));
        return NULL;
    }

    elfheader_t *h = calloc(1, sizeof(elfheader_t));
    h->name = strdup(name);
    h->entrypoint = header.e_entry;
    h->numPHEntries = header.e_phnum;
    h->numSHEntries = header.e_shnum;
    h->SHIdx = header.e_shstrndx;
    if(header.e_shentsize && header.e_shnum) {
        // special cases for nums
        if(h->numSHEntries == 0) {
            printf_log(LOG_DEBUG, "Read number of Sections in 1st Section\n");
            // read 1st section header and grab actual number from here
            fseeko64(f, header.e_shoff, SEEK_SET);
            Elf32_Shdr section;
            if(fread(&section, sizeof(Elf32_Shdr), 1, f)!=1) {
                free(h);
                printf_log(LOG_INFO, "Cannot read Initial Section Header\n");
                return NULL;
            }
            h->numSHEntries = section.sh_size;
        }
        // now read all section headers
        printf_log(LOG_DEBUG, "Read %d Section header\n", h->numSHEntries);
        h->SHEntries = (Elf32_Shdr*)calloc(h->numSHEntries, sizeof(Elf32_Shdr));
        fseeko64(f, header.e_shoff ,SEEK_SET);
        if(fread(h->SHEntries, sizeof(Elf32_Shdr), h->numSHEntries, f)!=h->numSHEntries) {
                FreeElfHeader(&h);
                printf_log(LOG_INFO, "Cannot read all Section Header\n");
                return NULL;
        }

        if(h->numPHEntries == PN_XNUM) {
            printf_log(LOG_DEBUG, "Read number of Program Header in 1st Section\n");
            // read 1st section header and grab actual number from here
            h->numPHEntries = h->SHEntries[0].sh_info;
        }
    }

    printf_log(LOG_DEBUG, "Read %d Program header\n", h->numPHEntries);
    h->PHEntries = (Elf32_Phdr*)calloc(h->numPHEntries, sizeof(Elf32_Phdr));
    fseeko64(f, header.e_phoff ,SEEK_SET);
    if(fread(h->PHEntries, sizeof(Elf32_Phdr), h->numPHEntries, f)!=h->numPHEntries) {
            FreeElfHeader(&h);
            printf_log(LOG_INFO, "Cannot read all Program Header\n");
            return NULL;
    }

    if(header.e_shentsize && header.e_shnum) {
        if(h->SHIdx == SHN_XINDEX) {
            printf_log(LOG_DEBUG, "Read number of String Table in 1st Section\n");
            h->SHIdx = h->SHEntries[0].sh_link;
        }
        if(h->SHIdx > h->numSHEntries) {
            printf_log(LOG_INFO, "Incoherent Section String Table Index : %d / %d\n", h->SHIdx, h->numSHEntries);
            FreeElfHeader(&h);
            return NULL;
        }
        // load Section table
        printf_log(LOG_DEBUG, "Loading Sections Table String (idx = %d)\n", h->SHIdx);
        if(LoadSH(f, h->SHEntries+h->SHIdx, (void*)&h->SHStrTab, ".shstrtab", SHT_STRTAB)) {
            FreeElfHeader(&h);
            return NULL;
        }
        if(box86_log>=LOG_DUMP) DumpMainHeader(&header, h);

        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".strtab", "SymTab Strings", SHT_STRTAB, (void**)&h->StrTab, NULL);
        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".symtab", "SymTab", SHT_SYMTAB, (void**)&h->SymTab, &h->numSymTab);
        if(box86_log>=LOG_DUMP && h->SymTab) DumpSymTab(h);

        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynamic", "Dynamic", SHT_DYNAMIC, (void**)&h->Dynamic, &h->numDynamic);
        if(box86_log>=LOG_DUMP && h->Dynamic) DumpDynamicSections(h);
        // grab DT_REL & DT_RELA stuffs
        // also grab the DT_STRTAB string table
        {
            for (int i=0; i<h->numDynamic; ++i) {
                if(h->Dynamic[i].d_tag == DT_REL)
                    h->rel = h->Dynamic[i].d_un.d_ptr;
                else if(h->Dynamic[i].d_tag == DT_RELSZ)
                    h->relsz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELENT)
                    h->relent = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELA)
                    h->rela = h->Dynamic[i].d_un.d_ptr;
                else if(h->Dynamic[i].d_tag == DT_RELASZ)
                    h->relasz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELAENT)
                    h->relaent = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTGOT)
                    h->pltgot = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTREL)
                    h->pltrel = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTRELSZ)
                    h->pltsz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_JMPREL)
                    h->jmprel = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_STRTAB)
                    h->DynStrTab = (char*)(h->Dynamic[i].d_un.d_ptr);
                else if(h->Dynamic[i].d_tag == DT_STRSZ)
                    h->szDynStrTab = h->Dynamic[i].d_un.d_val;
            }
            if(h->rel) {
                if(h->relent != sizeof(Elf32_Rel)) {
                    printf_log(LOG_NONE, "Rel Table Entry size invalid (0x%x should be 0x%x)\n", h->relent, sizeof(Elf32_Rel));
                    FreeElfHeader(&h);
                    return NULL;
                }
                printf_log(LOG_DEBUG, "Rel Table @%p (0x%x/0x%x)\n", (void*)h->rel, h->relsz, h->relent);
            }
            if(h->rela) {
                if(h->relaent != sizeof(Elf32_Rela)) {
                    printf_log(LOG_NONE, "RelA Table Entry size invalid (0x%x should be 0x%x)\n", h->relaent, sizeof(Elf32_Rela));
                    FreeElfHeader(&h);
                    return NULL;
                }
                printf_log(LOG_DEBUG, "RelA Table @%p (0x%x/0x%x)\n", (void*)h->rela, h->relasz, h->relaent);
            }
            if(h->jmprel) {
                if(h->pltrel == DT_REL) {
                    h->pltent = sizeof(Elf32_Rel);
                } else if(h->pltrel == DT_RELA) {
                    h->pltent = sizeof(Elf32_Rela);
                } else {
                    printf_log(LOG_NONE, "PLT Table type is unknown (size = 0x%x, type=%d)\n", h->pltsz, h->pltrel);
                    FreeElfHeader(&h);
                    return NULL;
                }
                if((h->pltsz / h->pltent)*h->pltent != h->pltsz) {
                    printf_log(LOG_NONE, "PLT Table Entry size invalid (0x%x, ent=0x%x, type=%d)\n", h->pltsz, h->pltent, h->pltrel);
                    FreeElfHeader(&h);
                    return NULL;
                }
                printf_log(LOG_DEBUG, "PLT Table @%p (type=%d 0x%x/0x%0x)\n", (void*)h->jmprel, h->pltrel, h->pltsz, h->pltent);
            }
            if(h->DynStrTab && h->szDynStrTab) {
                //DumpDynamicNeeded(h); cannot dump now, it's not loaded yet
            }
        }
        // look for PLT Offset
        int ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got.plt");
        if(ii) {
            h->gotplt = h->SHEntries[ii].sh_addr;
            printf_log(LOG_DEBUG, "The GOT.PLT Table is at address %p\n", (void*)h->gotplt);
        }
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got");
        if(ii) {
            h->got = h->SHEntries[ii].sh_addr;
            printf_log(LOG_DEBUG, "The GOT Table is at address %p\n", (void*)h->got);
        }
        // look for .init entry point
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init");
        if(ii) {
            h->initentry = h->SHEntries[ii].sh_addr;
            printf_log(LOG_DEBUG, "The .init is at address %p\n", (void*)h->initentry);
        }
        // and .init_array
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init_array");
        if(ii) {
            h->initarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr);
            h->initarray = (uintptr_t)(h->SHEntries[ii].sh_addr);
            printf_log(LOG_DEBUG, "The .init_array is at address %p, and have %d elements\n", (void*)h->initarray, h->initarray_sz);
        }
        // look for .fini entry point
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini");
        if(ii) {
            h->finientry = h->SHEntries[ii].sh_addr;
            printf_log(LOG_DEBUG, "The .fini is at address %p\n", (void*)h->finientry);
        }
        // and .fini_array
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini_array");
        if(ii) {
            h->finiarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr);
            h->finiarray = (uintptr_t)(h->SHEntries[ii].sh_addr);
            printf_log(LOG_DEBUG, "The .fini_array is at address %p, and have %d elements\n", (void*)h->finiarray, h->finiarray_sz);
        }
        // grab .text for main code
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".text");
        if(ii) {
            h->text = (uintptr_t)(h->SHEntries[ii].sh_addr);
            h->textsz = h->SHEntries[ii].sh_size;
            printf_log(LOG_DEBUG, "The .text is at address %p, and is %d big\n", (void*)h->text, h->textsz);
        }

        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynstr", "DynSym Strings", SHT_STRTAB, (void**)&h->DynStr, NULL);
        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynsym", "DynSym", SHT_DYNSYM, (void**)&h->DynSym, &h->numDynSym);
        if(box86_log>=LOG_DUMP && h->DynSym) DumpDynSym(h);
    }
    
    return h;
}

我们可以简单创建一个示例程序来验证一下,具体代码如下(由于这里):

#include "ParseElf.h"

int LoadSH(FILE *f, Elf32_Shdr *s, void** SH, const char* name, uint32_t type)
{
    if(type && (s->sh_type != type)) {
        printf("Section Header \"%s\" (off=%d, size=%d) has incorect type (%d != %d)\n", name, s->sh_offset, s->sh_size, s->sh_type, type);
        return -1;
    }
    if (type==SHT_SYMTAB && s->sh_size%sizeof(Elf32_Sym)) {
        printf("Section Header \"%s\" (off=%d, size=%d) has size (not multiple of %d)\n", name, s->sh_offset, s->sh_size, sizeof(Elf32_Sym));
    }
    printf("LoadSH : name = [%s], s->sh_size = [%d] \n", name, s->sh_size);
    *SH = calloc(1, s->sh_size);
    fseeko(f, s->sh_offset ,SEEK_SET);
    if(fread(*SH, s->sh_size, 1, f)!=1) {
            printf("Cannot read Section Header \"%s\" (off=%d, size=%d)\n", name, s->sh_offset, s->sh_size);
            return -1;
    }

    return 0;
}

int FindSection(Elf32_Shdr *s, int n, char* SHStrTab, const char* name)
{
    printf("FindSection : n = [%d] name = [%s] \n", n, name);
    for (int i=0; i<n; ++i) {
        if(s[i].sh_type!=SHT_NULL)
        {
            // printf("s[i].sh_name = %u\n", s[i].sh_name);
            if(!(SHStrTab+s[i].sh_name) || !name)
            {
                printf("%s : %d : name is NULL or SHStrTab+s[i].sh_name is NULL \n", __func__, __LINE__);
                return 0;
            }
            if(!strcmp(SHStrTab+s[i].sh_name, name))
            {
                return i;
            }
                
        }
            
    }
    return 0;
}


void LoadNamedSection(FILE *f, Elf32_Shdr *s, int size, char* SHStrTab, const char* name, const char* clearname, uint32_t type, void** what, int* num)
{
    int n = FindSection(s, size, SHStrTab, name);
    printf("Loading %s (idx = %d)\n", clearname, n);
    if(n)
        LoadSH(f, s+n, what, name, type);
    if(type==SHT_SYMTAB || type==SHT_DYNSYM) {
        if(*what && num)
            *num = s[n].sh_size / sizeof(Elf32_Sym);
    } else if(type==SHT_DYNAMIC) {
        if(*what && num)
            *num = s[n].sh_size / sizeof(Elf32_Dyn);
    }
}

int main(int argc, char **argv)
{
    printf("*************************\n");
    printf("Parse ELF Start....\n");
    printf("*************************\n");
    if(argc < 2)
    {
        printf("Please input parse exec file! \n");
        return -1;
    }
    FILE* f = fopen(argv[1], "rb");
    Elf32_Ehdr header;
    if(fread(&header, sizeof(Elf32_Ehdr), 1, f)!=1) {
        printf("Cannot read ELF Header\n");
        return -1;
    }
    //比较前面4个字节的内容,
    if(memcmp(header.e_ident, ELFMAG, SELFMAG)!=0) {
        printf("Not an ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]);
        return -1;
    }
    else
    {
        printf("ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]);
    }
    //如果 e_ident[4] != 1 说明这不是一个32位ELF文件
    if(header.e_ident[EI_CLASS]!=ELFCLASS32) {
        //说明是一个64位的ELF文件
        if(header.e_ident[EI_CLASS]==ELFCLASS64) {
            printf("This is a 64bits ELF! box86 can only run 32bits ELF!\n");
        } else {
            printf("Not a 32bits ELF (%d)\n", header.e_ident[EI_CLASS]);
        }
        return -1;
    }

    //判断这个ELF是一个小端程序文件
    if(header.e_ident[EI_DATA]!=ELFDATA2LSB) {
        printf("Not a LittleEndian ELF (%d)\n", header.e_ident[EI_DATA]);
        return -1;
    }

    //判断ELF版本是否正确
    if(header.e_ident[EI_VERSION]!=EV_CURRENT) {
        printf("Incorrect ELF version (%d)\n", header.e_ident[EI_VERSION]);
        return -1;
    }

    //判断是否是linux ELF 文件
    if(header.e_ident[EI_OSABI]!=ELFOSABI_LINUX && header.e_ident[EI_OSABI]!=ELFOSABI_NONE && header.e_ident[EI_OSABI]!=ELFOSABI_SYSV) {
        printf("Not a Linux ELF (%d)\n",header.e_ident[EI_OSABI]);
        return -1;
    }    

    //判断文件是否为EXEC 文件,
    if(header.e_type != ET_EXEC && header.e_type != ET_DYN) {
        printf("Not an Executable (%d)\n", header.e_type);
        return -1;
    }

    //判断是否是 i386 文件
    if(header.e_machine != EM_386) {
        printf("Not an i386 ELF (%d)\n", header.e_machine);
        return -1;
    }

    //如果这是一个可执行文件 EXEC,则入口不能为0
    if(header.e_entry == 0 ) {
        printf("No entry point in ELF\n");
        return -1;
    }

    //判断程序头信息长度是否正确
    if(header.e_phentsize != sizeof(Elf32_Phdr)) {
        printf("Program Header Entry size incorrect (%d != %ld)\n", header.e_phentsize, sizeof(Elf32_Phdr));
        return -1;
    }
    //判断文件段地址信息长度是否正确
    if(header.e_shentsize != sizeof(Elf32_Shdr) && header.e_shentsize != 0) {
        printf("Section Header Entry size incorrect (%d != %ld)\n", header.e_shentsize, sizeof(Elf32_Shdr));
        return -1;
    }

    elfheader_t *h = calloc(1, sizeof(elfheader_t));
    //获取当前执行文件的信息
    h->name = strdup(argv[1]);
    h->entrypoint = header.e_entry; //程序入口的虚拟地址
    h->numPHEntries = header.e_phnum; //程序头表格数量
    h->numSHEntries = header.e_shnum; //节区头部表格数量
    h->SHIdx = header.e_shstrndx; //头部表格与数据索引信息
    if(header.e_shentsize && header.e_shnum) {
        // special cases for nums
        if(h->numSHEntries == 0) {
          //节区数据存在偏移量,重新读取
            printf("Read number of Sections in 1st Section\n");
            // read 1st section header and grab actual number from here
            fseeko(f, header.e_shoff, SEEK_SET);
            Elf32_Shdr section;
            if(fread(&section, sizeof(Elf32_Shdr), 1, f)!=1) {
                free(h);
                printf("Cannot read Initial Section Header\n");
                return -1;
            }
            printf("section.sh_size = [%u]\n", section.sh_size);
            h->numSHEntries = section.sh_size;
        }
        // now read all section headers
        printf("Read %d Section header\n", h->numSHEntries);
        h->SHEntries = (Elf32_Shdr*)calloc(h->numSHEntries, sizeof(Elf32_Shdr));
        fseeko(f, header.e_shoff ,SEEK_SET);
        if(fread(h->SHEntries, sizeof(Elf32_Shdr), h->numSHEntries, f)!=h->numSHEntries) {
            free(h);
            printf("Cannot read all Section Header\n");
            return -1;
        }

        if(h->numPHEntries == PN_XNUM) {
          //数据巨大,将第一个字节作为新的偏移入口地址
            printf("Read number of Program Header in 1st Section\n");
            // read 1st section header and grab actual number from here
            h->numPHEntries = h->SHEntries[0].sh_info;
        }
    }
    printf("Read %d Program header\n", h->numPHEntries);
    h->PHEntries = (Elf32_Phdr*)calloc(h->numPHEntries, sizeof(Elf32_Phdr));
    fseeko(f, header.e_phoff ,SEEK_SET);
    if(fread(h->PHEntries, sizeof(Elf32_Phdr), h->numPHEntries, f)!=h->numPHEntries) 
    {
        free(h);
        printf("Cannot read all Program Header\n");
        return -1;
    }

    if(header.e_shentsize && header.e_shnum) 
    {
        if(h->SHIdx == SHN_XINDEX) 
        {
            printf("Read number of String Table in 1st Section\n");
            h->SHIdx = h->SHEntries[0].sh_link;
        }
        if(h->SHIdx > h->numSHEntries) {
            printf("Incoherent Section String Table Index : %d / %d\n", h->SHIdx, h->numSHEntries);
            return -1;
        }
        // load Section table
        printf("Loading Sections Table String (idx = %d)\n", h->SHIdx);
        printf("h->SHStrTab = [%s]\n", h->SHStrTab);
        // look for PLT Offset
        if(LoadSH(f, h->SHEntries+h->SHIdx, (void*)&h->SHStrTab, ".shstrtab", SHT_STRTAB)) {
            free(h);
            return -1;
        }
        printf("LoadSH after : h->SHStrTab = [%p], sizeof(h->SHStrTab) = [%u]\n", h->SHStrTab, sizeof(h->SHStrTab));
        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".strtab", "SymTab Strings", SHT_STRTAB, (void**)&h->StrTab, NULL);
        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".symtab", "SymTab", SHT_SYMTAB, (void**)&h->SymTab, &h->numSymTab);
        
        LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynamic", "Dynamic", SHT_DYNAMIC, (void**)&h->Dynamic, &h->numDynamic);
        // grab DT_REL & DT_RELA stuffs
        // also grab the DT_STRTAB string table
        {
            for (int i=0; i<h->numDynamic; ++i) {
                if(h->Dynamic[i].d_tag == DT_REL)
                    h->rel = h->Dynamic[i].d_un.d_ptr;
                else if(h->Dynamic[i].d_tag == DT_RELSZ)
                    h->relsz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELENT)
                    h->relent = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELA)
                    h->rela = h->Dynamic[i].d_un.d_ptr;
                else if(h->Dynamic[i].d_tag == DT_RELASZ)
                    h->relasz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_RELAENT)
                    h->relaent = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTGOT)
                    h->pltgot = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTREL)
                    h->pltrel = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_PLTRELSZ)
                    h->pltsz = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_JMPREL)
                    h->jmprel = h->Dynamic[i].d_un.d_val;
                else if(h->Dynamic[i].d_tag == DT_STRTAB)
                    h->DynStrTab = (char*)(h->Dynamic[i].d_un.d_ptr);
                else if(h->Dynamic[i].d_tag == DT_STRSZ)
                    h->szDynStrTab = h->Dynamic[i].d_un.d_val;
            }
            if(h->rel) {
                if(h->relent != sizeof(Elf32_Rel)) {
                    printf("Rel Table Entry size invalid (0x%x should be 0x%x)\n", h->relent, sizeof(Elf32_Rel));
                    free(h);
                    return -1;
                }
                printf("Rel Table @%p (0x%x/0x%x)\n", (void*)h->rel, h->relsz, h->relent);
            }
            if(h->rela) {
                if(h->relaent != sizeof(Elf32_Rela)) {
                    printf("RelA Table Entry size invalid (0x%x should be 0x%x)\n", h->relaent, sizeof(Elf32_Rela));
                    free(h);
                    return -1;
                }
                printf("RelA Table @%p (0x%x/0x%x)\n", (void*)h->rela, h->relasz, h->relaent);
            }
            if(h->jmprel) {
                if(h->pltrel == DT_REL) {
                    h->pltent = sizeof(Elf32_Rel);
                } else if(h->pltrel == DT_RELA) {
                    h->pltent = sizeof(Elf32_Rela);
                } else {
                    printf("PLT Table type is unknown (size = 0x%x, type=%d)\n", h->pltsz, h->pltrel);
                    free(h);
                    return -1;
                }
                if((h->pltsz / h->pltent)*h->pltent != h->pltsz) {
                    printf("PLT Table Entry size invalid (0x%x, ent=0x%x, type=%d)\n", h->pltsz, h->pltent, h->pltrel);
                    free(h);
                    return -1;
                }
                printf("PLT Table @%p (type=%d 0x%x/0x%0x)\n", (void*)h->jmprel, h->pltrel, h->pltsz, h->pltent);
            }
            if(h->DynStrTab && h->szDynStrTab) {
                //DumpDynamicNeeded(h); cannot dump now, it's not loaded yet
            }
        }

        int ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got.plt");
        if(ii) {
            h->gotplt = h->SHEntries[ii].sh_addr;
            printf("The GOT.PLT Table is at address %p\n", (void*)h->gotplt);
        }
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got");
        if(ii) {
            h->got = h->SHEntries[ii].sh_addr;
            printf("The GOT Table is at address %p\n", (void*)h->got);
        }
        // look for .init entry point
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init");
        if(ii) {
            h->initentry = h->SHEntries[ii].sh_addr;
            printf("The .init is at address %p\n", (void*)h->initentry);
        }
        // and .init_array
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init_array");
        if(ii) {
            h->initarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr);
            h->initarray = (uintptr_t)(h->SHEntries[ii].sh_addr);
            printf("The .init_array is at address %p, and have %d elements\n", (void*)h->initarray, h->initarray_sz);
        }
        // look for .fini entry point
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini");
        if(ii) {
            h->finientry = h->SHEntries[ii].sh_addr;
            printf("The .fini is at address %p\n", (void*)h->finientry);
        }
        // and .fini_array
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini_array");
        if(ii) {
            h->finiarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr);
            h->finiarray = (uintptr_t)(h->SHEntries[ii].sh_addr);
            printf("The .fini_array is at address %p, and have %d elements\n", (void*)h->finiarray, h->finiarray_sz);
        }
        // grab .text for main code
        ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".text");
        if(ii) {
            h->text = (uintptr_t)(h->SHEntries[ii].sh_addr);
            h->textsz = h->SHEntries[ii].sh_size;
            printf("The .text is at address %p, and is %d big\n", (void*)h->text, h->textsz);
        }
    }
    printf("***********************\n");
    printf("***********************\n");
    printf("Parse ELF END.....\n");
    printf("***********************\n");
    printf("***********************\n");
    return 0;
}

运行结果如下:

在这里插入图片描述
对比 readelf -S a.out 命令结果可以看到示例程序已经成功并且正确解析出了ELF可执行文件的头信息内容,如下:

在这里插入图片描述在这里插入图片描述

ELF文件解析相关的结构体

在示例程序解析的过程中用到的几个结构体,下面具体来了解一下:

#define EI_NIDENT (16)
typedef struct
{
  unsigned char	e_ident[EI_NIDENT];	/* Magic number and other info */
  Elf32_Half	e_type;			/* Object file type */
  Elf32_Half	e_machine;		/* Architecture */
  Elf32_Word	e_version;		/* Object file version */
  Elf32_Addr	e_entry;		/* Entry point virtual address */
  Elf32_Off	e_phoff;		/* Program header table file offset */
  Elf32_Off	e_shoff;		/* Section header table file offset */
  Elf32_Word	e_flags;		/* Processor-specific flags */
  Elf32_Half	e_ehsize;		/* ELF header size in bytes */
  Elf32_Half	e_phentsize;		/* Program header table entry size */
  Elf32_Half	e_phnum;		/* Program header table entry count */
  Elf32_Half	e_shentsize;		/* Section header table entry size */
  Elf32_Half	e_shnum;		/* Section header table entry count */
  Elf32_Half	e_shstrndx;		/* Section header string table index */
} Elf32_Ehdr;

其中,具体描述如下:

e_identELF的一些标识信息,前四位为.ELF,其他的信息比如大小端等
e_machine文件的目标体系架构,比如ARM
e_version0为非法版本,1为当前版本
e_entry程序入口的虚拟地址
e_phoff程序头部表偏移地址
e_shoff节区头部表偏移地址
e_flags保存与文件相关的,特定于处理器的标志
e_ehsizeELF头的大小
e_phentsize每个程序头部表的大小
e_phnum程序头部表的数量
e_shentsize每个节区头部表的大小
e_shnum节区头部表的数量
e_shstrndx节区字符串表位置

从这里就能找到节区头部表的数量(e_shnum) 以及 程序头部表的数量(e_phnum),然后结合程序头部表偏移地址(e_entry) 和 节区头部表偏移地址(e_shoff) 就能够提取掉文件对应的程序头部数据和节区头部数据。同样,也是利用结构体来读取。具体如下:

节区(section)
typedef struct
{
  Elf32_Word	sh_name;		/* Section name (string tbl index) */
  Elf32_Word	sh_type;		/* Section type */
  Elf32_Word	sh_flags;		/* Section flags */
  Elf32_Addr	sh_addr;		/* Section virtual addr at execution */
  Elf32_Off	sh_offset;		/* Section file offset */
  Elf32_Word	sh_size;		/* Section size in bytes */
  Elf32_Word	sh_link;		/* Link to another section */
  Elf32_Word	sh_info;		/* Additional section information */
  Elf32_Word	sh_addralign;		/* Section alignment */
  Elf32_Word	sh_entsize;		/* Entry size if section holds table */
} Elf32_Shdr;

具体描述如下:

sh_name节区名称
sh_type节区类型
sh_flags节区字段定义了一个节区中包含的内容是否可以修改、是否可以执行等信息。 如果一个标志位被设置,则该位取值为 1。 定义的各位都设置为 0。
sh_addr节区虚拟执行地址
sh_offset节区偏移地址
sh_size节区数据字节长度
程序(program)
typedef struct
{
  Elf32_Word	p_type;			/* Segment type */
  Elf32_Off	p_offset;		/* Segment file offset */
  Elf32_Addr	p_vaddr;		/* Segment virtual address */
  Elf32_Addr	p_paddr;		/* Segment physical address */
  Elf32_Word	p_filesz;		/* Segment size in file */
  Elf32_Word	p_memsz;		/* Segment size in memory */
  Elf32_Word	p_flags;		/* Segment flags */
  Elf32_Word	p_align;		/* Segment alignment */
} Elf32_Phdr;

具体描述如下:

p_type此数组元素描述的段的类型,或者如何解释此数组元素的信息。
p_offset此成员给出从文件头到该段第一个字节的偏移。
p_vaddr此成员给出段的第一个字节将被放到内存中的虚拟地址。
p_paddr此成员仅用于与物理地址相关的系统中。因为 System V 忽略所有应用程序的物理地址信息,此字段对与可执行文件和共享目标文件而言具体内容是指定的。
p_filesz此成员给出段在文件映像中所占的字节数。可以为 0。
p_memsz此成员给出段在内存映像中占用的字节数。可以为 0。
p_flags此成员给出与段相关的标志。
p_align可加载的进程段的 p_vaddr 和 p_offset 取值必须合适,相对于对页面大小的取模而言。此成员给出段在文件中和内存中如何 对齐。数值 0 和 1 表示不需要对齐。否则 p_align 应该是个正整数,并且是 2 的幂次数,p_vaddr 和 p_offset 对 p_align 取模后应该相等。

CalcLoadAddr(elf_header)

继续分析源代码,在将 box86 读取的 ELF 文件解析之后获取了 ELF文件的头信息,这里使用还是 CalcLoadAddr(elf_header) 继续处理,具体步骤如下:

    if(CalcLoadAddr(elf_header)) {
        printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]);
        fclose(f);
        free_contextargv();
        FreeBox86Context(&my_context);
        FreeCollection(&ld_preload);
        return -1;
    }

函数具体处理如下:

int CalcLoadAddr(elfheader_t* head)
{
    head->memsz = 0;
    head->paddr = head->vaddr = ~(uintptr_t)0;
    head->align = 1;
    for (int i=0; i<head->numPHEntries; ++i)
        if(head->PHEntries[i].p_type == PT_LOAD) {
            if(head->paddr > (uintptr_t)head->PHEntries[i].p_paddr)
                head->paddr = (uintptr_t)head->PHEntries[i].p_paddr;
            if(head->vaddr > (uintptr_t)head->PHEntries[i].p_vaddr)
                head->vaddr = (uintptr_t)head->PHEntries[i].p_vaddr;
        }
    
    if(head->vaddr==~(uintptr_t)0 || head->paddr==~(uintptr_t)0) {
        printf_log(LOG_NONE, "Error: v/p Addr for Elf Load not set\n");
        return 1;
    }

    head->stacksz = 1024*1024;          //1M stack size default?
    head->stackalign = 4;   // default align for stack
    for (int i=0; i<head->numPHEntries; ++i) {
        if(head->PHEntries[i].p_type == PT_LOAD) {
            uintptr_t phend = head->PHEntries[i].p_vaddr - head->vaddr + head->PHEntries[i].p_memsz;
            if(phend > head->memsz)
                head->memsz = phend;
            if(head->PHEntries[i].p_align > head->align)
                head->align = head->PHEntries[i].p_align;
        }
        if(head->PHEntries[i].p_type == PT_GNU_STACK) {
            if(head->stacksz < head->PHEntries[i].p_memsz)
                head->stacksz = head->PHEntries[i].p_memsz;
            if(head->stackalign < head->PHEntries[i].p_align)
                head->stackalign = head->PHEntries[i].p_align;
        }
        if(head->PHEntries[i].p_type == PT_TLS) {
            head->tlssize = head->PHEntries[i].p_memsz;
            head->tlsalign = head->PHEntries[i].p_align;
            // force alignement...
            if(head->tlsalign>1)
                while(head->tlssize&(head->tlsalign-1))
                    head->tlssize++;
        }
    }
    printf_log(LOG_DEBUG, "Elf Addr(v/p)=%p/%p Memsize=0x%x (align=0x%x)\n", (void*)head->vaddr, (void*)head->paddr, head->memsz, head->align);
    printf_log(LOG_DEBUG, "Elf Stack Memsize=%u (align=%u)\n", head->stacksz, head->stackalign);
    printf_log(LOG_DEBUG, "Elf TLS Memsize=%u (align=%u)\n", head->tlssize, head->tlsalign);

    return 0;
}

这里主要是根据执行程序头信息的数据,申请一段内存用来存放执行程序的头数据,这里能够得到 ELF 文件执行过程中所需要的内存大小、栈大小以及TLS段的长度等等信息。

总结

目前看到的 box86 源代码中的执行流程如下,这里只是其中的一部分,后续还需要继续研究:
在这里插入图片描述

Logo

更多推荐