Something About Dynamic Linking

69
Something About Dynamic Linking Kai

Transcript of Something About Dynamic Linking

Page 1: Something About Dynamic Linking

Something About Dynamic Linking

Kai

Page 2: Something About Dynamic Linking

ELFmemory

Page 3: Something About Dynamic Linking

ELFmemoryELF header

Page 4: Something About Dynamic Linking

ELFmemoryELF header typedef struct

{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; Elf32_Off e_phoff; /* offset of Program Header table*/ Elf32_Off e_shoff; Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; /* the size of each entry */ Elf32_Half e_phnum; /* the number of entries */ Elf32_Half e_shentsize; Elf32_Half e_shnum; Elf32_Half e_shstrndx; } Elf32_Ehdr;

Page 5: Something About Dynamic Linking

ELFmemoryELF header typedef struct

{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; Elf32_Off e_phoff; /* offset of Program Header table*/ Elf32_Off e_shoff; Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; /* the size of each entry */ Elf32_Half e_phnum; /* the number of entries */ Elf32_Half e_shentsize; Elf32_Half e_shnum; Elf32_Half e_shstrndx; } Elf32_Ehdr;

ELF Program Header table

Page 6: Something About Dynamic Linking

ELFmemoryELF header typedef struct

{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; Elf32_Off e_phoff; /* offset of Program Header table*/ Elf32_Off e_shoff; Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; /* the size of each entry */ Elf32_Half e_phnum; /* the number of entries */ Elf32_Half e_shentsize; Elf32_Half e_shnum; Elf32_Half e_shstrndx; } Elf32_Ehdr;

ELF Program Header table

Page 7: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

Page 8: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

Page 9: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

Page 10: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

Page 11: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

Page 12: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

Page 13: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

segment in memory

Page 14: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

segment in memory

Page 15: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_LOAD */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

segment

segment in memory

Page 16: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_INTERP */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

/lib/ld-linux.so.2segment in memory

segment

Page 17: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_INTERP */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

/lib/ld-linux.so.2segment in memory

dynamic linker

segment

Page 18: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_INTERP */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

/lib/ld-linux.so.2segment in memory

dynamic linker

auxiliary vector

stack

segment

Page 19: Something About Dynamic Linking

ELFmemoryELF header

ELF Program Header table

typedef struct{ Elf32_Word p_type; /* PT_INTERP */ Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; } Elf32_Phdr;

/lib/ld-linux.so.2segment in memory

dynamic linker

auxiliary vector

stack

entry

segment

Page 20: Something About Dynamic Linking

Dynamic Linker• Determine and load dependencies

• Relocate the application and all dependencies

• Initialise the application and dependencies in the correct order

Page 21: Something About Dynamic Linking

Types of Relocation• Relative relocation

• Location which are known to be in the own object

• Not associated with a specific symbol

• Named relocation

• Based on symbols

• The reference of the definition is generally in a different object than the definition

Page 22: Something About Dynamic Linking

Symbol Lookup• Traditional ELF Hash Table Handling

• GNU-style Hash Table Handling

Page 23: Something About Dynamic Linking

1. Determine the hash value for the relocation name 2. For the object in the lookup scope,

A. Get the hash bucket using the hash value B. Get the name offset of the symbol C. Compare the symbol name with the relocation name D. If the names match, we found the definition E. If the names do not match, retry with next element in the

bucket F. If there is no other element in the bucket, try next object.

3. If there is no other object in the lookup scope, the lookup failed

Traditional ELF Hash Table Handling

Page 24: Something About Dynamic Linking

Traditional ELF Hash Table Handling• If the scope contains more than one definition of the same

symbol, the algorithm simply picks up the first definition it finds.

• Use LD_PRELOAD to replace implementation

• LD_PRELOAD=./libfoo.so ./a.out

• The performance of each lookup depends on

• The length of the hash chains

• the number of symbols

• the choice of the hash table size

• The number of objects

Page 25: Something About Dynamic Linking

/* sum.c */#include <stdio.h>

int sum(int a, int b){ printf("sum is called\n");

return a + b;}

/* sum_v2.c */#include <stdio.h>

int base = 100;

int sum(int a, int b){ printf("sum (with base %d) is called\n", base);

return base + a + b;}

Page 26: Something About Dynamic Linking
Page 27: Something About Dynamic Linking

LD_DEBUG=symbols LD_DEBUG_OUTPUT=debug.log LD_PRELOAD=./libsumv2.so ./a.out

LD_DEBUG=symbols LD_DEBUG_OUTPUT=debug.log ./a.out

Page 28: Something About Dynamic Linking

1. Determine the hash value for the relocation name 2. For the object in the lookup scope,

A. Determine whether an entry with the given hash value is presented (2-bit Bloom filter). If the filter indicates there is no such definition, the next object is searched.

B. Determine the hash bucket C. Get the entry from the chain array. Compare the value with

the hash value of the symbol. Ignore bit 0. D. If the hash value matches, get the name offset. E. Compare the symbol name with the relocation name F. If the names match, we found the definition G. If the names do not match and the value loaded from the

hash bucket does not have bit 0 set, retry with next element in the bucket array

H. If bit 0 is set, there is no further entry in the hash chain, try next object.

3. If there is no other object in the lookup scope, the lookup failed

GNU-style Hash Table Handling

Page 29: Something About Dynamic Linking
Page 30: Something About Dynamic Linking

Lookup Scope• An ordered list of most loaded object

• global lookup scope

• executable

• all its dependencies in DT_NEEDED (added in breadth-first order)

• DT_SYMBOLIC: the object with the reference is added in front of the global lookup scope

• Only the object itself is added in front, not its dependencies

• local lookup scope

• DSO loaded dynamic using dlopen

• RTDL_GLOBAL: the loaded object and all the dependencies are added to the global scope

• RTDL_DEEPBIND: search local lookup scope before global lookup scope

Page 31: Something About Dynamic Linking

DT_SYMBOLIC

Page 32: Something About Dynamic Linking

DT_SYMBOLIC

Page 33: Something About Dynamic Linking

LD_DEBUG=symbols LD_PRELOAD=./libfoo_v2.so ./test_foo_sym

LD_DEBUG=symbols LD_PRELOAD=./libfoo_v2.so ./test_foo

Page 34: Something About Dynamic Linking

DT_SYMBOLIC

Page 35: Something About Dynamic Linking

/* foo.c */void foo(void){ printf("foo in libfoo.so\n");}

void libfoo(void){ printf("libfoo calls "); foo();}

/* foo_dyn.c */void foo(void){ printf("foo in libfoo_dyn.so\n");}

void libfoo(void){ printf("libfoo, dynamically loaded, calls "); foo();}

Page 36: Something About Dynamic Linking

#include <stdio.h>#include <dlfcn.h>

typedef void (*ptr)(void);

int main(){ void *handle; ptr foo_ptr;

libfoo();

handle = dlopen("libfoo_dyn.so", RTLD_LAZY);

foo_ptr = dlsym(handle, "libfoo"); foo_ptr();

dlclose(handle);

return 0;}

Page 37: Something About Dynamic Linking

No RTLD_DEEPBIND

Page 38: Something About Dynamic Linking

#include <stdio.h>#include <dlfcn.h>

typedef void (*ptr)(void);

int main(){ void *handle; ptr foo_ptr;

libfoo();

handle = dlopen("libfoo_dyn.so", RTLD_LAZY | RTLD_DEEPBIND);

foo_ptr = dlsym(handle, "libfoo"); foo_ptr();

dlclose(handle);

return 0;}

Page 39: Something About Dynamic Linking

/* foo_dyn.c */void foo(void){ printf("foo in libfoo\n");}

void bar(void){ printf("bar in libfoo\n");}

void libfoo(void){ printf("libfoo, dynamically loaded, calls "); foo();}

Page 40: Something About Dynamic Linking

/* bar_dyn.c */void bar(void){ printf("bar in libbar\n");}

void libbar(void){ printf("libbar, dynamically loaded, calls "); bar();}

Page 41: Something About Dynamic Linking

RTLD_GLOBAL

dlopen(“libfoo_dyn.so”, RTLD_LAZY | RTLD_GLOBAL)

Page 42: Something About Dynamic Linking

GOT & PLT

movl $foo, %edicall barmovl $0, %eax

movq foo@GOTPCREL(%rip), %raxmovq %rax, %rdicall bar@PLT

typedef void (*ptr)(void);

extern void foo(void);extern void bar(ptr fn);

int libbar(void){ bar(foo); return 0;}

PIC

non-PIC

Page 43: Something About Dynamic Linking

GOT & PLT

Disassembly of section .plt:

<_init+0x20>:pushq 0x199a(%rip)jmpq *0x199c(%rip)nopnopnopnop

. . .

<bar@plt>:jmpq *0x198a(%rip)pushq $0x2jmpq 650 <_init+0x20>

call bar@PLT # dl-trampoline.S

<_dl_runtime_resolve>:. . .

<bar>:. . .

0

GOT

Page 44: Something About Dynamic Linking

GOT & PLT

Disassembly of section .plt:

<_init+0x20>:pushq 0x199a(%rip)jmpq *0x199c(%rip)nopnopnopnop

. . .

<bar@plt>:jmpq *0x198a(%rip)pushq $0x2jmpq 650 <_init+0x20>

call bar@PLT # dl-trampoline.S

<_dl_runtime_resolve>:. . .

<bar>:. . .

0

GOT

Page 45: Something About Dynamic Linking

GOT & PLT

Disassembly of section .plt:

<_init+0x20>:pushq 0x199a(%rip)jmpq *0x199c(%rip)nopnopnopnop

. . .

<bar@plt>:jmpq *0x198a(%rip)pushq $0x2jmpq 650 <_init+0x20>

call bar@PLT # dl-trampoline.S

<_dl_runtime_resolve>:. . .

<bar>:. . . GOT

bar

Page 46: Something About Dynamic Linking

GOT & PLT

Disassembly of section .plt:

<_init+0x20>:pushq 0x199a(%rip)jmpq *0x199c(%rip)nopnopnopnop

. . .

<bar@plt>:jmpq *0x198a(%rip)pushq $0x2jmpq 650 <_init+0x20>

call bar@PLT # dl-trampoline.S

<_dl_runtime_resolve>:. . .

<bar>:. . . GOT

Page 47: Something About Dynamic Linking

GOT & PLT

Disassembly of section .plt:

<_init+0x20>:pushq 0x199a(%rip)jmpq *0x199c(%rip)nopnopnopnop

. . .

<bar@plt>:jmpq *0x198a(%rip)pushq $0x2jmpq 650 <_init+0x20>

call bar@PLT # dl-trampoline.S

<_dl_runtime_resolve>:. . .

<bar>:. . . GOT

bar

Page 48: Something About Dynamic Linking

Data Definitions• Common

• There can be more than one definition and they all get unified into one location.

• Unintialized

• It allows the linker to find multiple definitions and flag them as errors.

• Variables initialised with zero

• __attribute__ ((nocommon))

• -fno-common

• Initialised

• The initialisation value is stored in the file.

• It is always preferable to add variables as uninitialised or initialised with zero as opposed to as initialised with a value other than zero.

• save disk space and eventually improve startup time.

Page 49: Something About Dynamic Linking

Visibility• default

• The symbol is exported and can be interposed.

• hidden

• while static restricts the visibility of a symbol to the file it is defined in, the hidden attribute limits the visibility to the DSO the definition ends up in.

• the linker will not add hidden symbols to the dynamic symbol table.

• internal

• internal visibility is like hidden visibility, but with additional processor specific semantics.

• protected

• references to symbols defined in the same object are always satisfied locally, but the symbols are still available outside the DSO.

Page 50: Something About Dynamic Linking

Export Control• Use static

• Define global visibility

• -fvisibility=hidden

• Define per-symbol visibility

• __attribute__ ((visibility (“hidden”)))

• #pragma GCC visibility push(hidden)

• Export Map

• -Wl,—version-script=symbol.map

• The linker is used only after the compiler already did its work and the once generated code cannot be optimised significantly.

Page 51: Something About Dynamic Linking

int last;

int next(void){ return ++last;}

int foo(int scale){ return next() << scale; }

Use Static

static

static int last;

static int next(void){ return ++last;}

int foo(int scale){ return next() << scale; }

Page 52: Something About Dynamic Linking

Use Static

static

static

Page 53: Something About Dynamic Linking

Define Visibility

gcc -fPIC -fvisibility=hidden -S test.c

int last;

int next(void){ return ++last;}

int __attribute__ ((visibility (“default”)))foo(int scale){ return next() << scale; }

Page 54: Something About Dynamic Linking

Export Map

{ global: foo; local: *;};

Page 55: Something About Dynamic Linking

Export Map

Page 56: Something About Dynamic Linking

Export Map

Page 57: Something About Dynamic Linking

Export Map

Page 58: Something About Dynamic Linking

Avoid Using Exported Symbols• In some situations it might not be desirable to avoid exporting a symbol

but at the same time all local references should use the local definition.

• Wrapper functions

• Using aliases

• __attribute__ ((alias (“symbol”), visibility (“hidden”)))

• It is mandatory to create alias only of non-static functions and variables.

• DT_SYMBOLIC

• all interfaces are affected

• the compiler does not get told about the use of local symbols

• lookup scope is changed

Page 59: Something About Dynamic Linking

Wrapper Functions

static int last;static int next_int (void) { return ++last;}

int next (void) { // wrapper function return next_int ();}

int index (int scale) { return next_int () << scale;}

Page 60: Something About Dynamic Linking

Alias

int last;extern __typeof (last) last_int // used in internal __attribute ((alias (“last"), visibility (“hidden")));

int next (void) { return ++last_int;}

extern __typeof (next) next_int // used in internal __attribute ((alias (“next"), visibility (“hidden")));

int index (int scale) { return next_int () << scale;}

Page 61: Something About Dynamic Linking
Page 62: Something About Dynamic Linking

Pointers v.s. Arrays

// the use of a variable is unnecessary.char *str = “some string”;

// Here “str” is a name for a sequence of bytes.// save one pointer variable in the non-sharable data segment// save one relative relocationchar str[] = “some string”;

// compiler is able to move the string in read-only memoryconst char str[] = “some string”;

Page 63: Something About Dynamic Linking

Pointers v.s. Arrays

const char const *str = “some string”;

const char []str = “some string”;

Page 64: Something About Dynamic Linking

Arrays of Data Pointers// The total cost for this code is three words of data// in writable memory and three relocations modifying// this data in addition to the memory for the strings// themselves.static const char *msgs[] = { [ERR1] = "message for err1", [ERR2] = "message for err2", [ERR3] = "message for err3"};const char *errstr (int nr) { return msgs[nr];}

// If the strings have different lengths it would mean// wasting quite a bit of memory. static const char msgs[][17] = { [ERR1] = "message for err1", [ERR2] = "message for err2", [ERR3] = "message for err3"};

Page 65: Something About Dynamic Linking

Arrays of Data Pointers// The cost of this code include three size_t words in// read-only memory in addition to the memory for the strings.static const char msgstr[] = "message for err1\0" "message for err2\0" "message for err3”;

static const size_t msgidx[] = { 0, sizeof ("message for err1"), sizeof ("message for err1") + sizeof ("message for err2")};

const char *errstr (int nr) { return msgstr + msgidx[nr];}

Page 66: Something About Dynamic Linking

Security• A changed GOT value might redirect a call to a function to an

arbitrary other place.

• -z relro linker option

• The linker is instructed to move the sections onto separate memory page and emit a new program header entry PT_GNU_RELRO.

• At runtime the dynamic linker can remove the write access to those pages after it is done.

• -z now linker option

• Disable all lazy relocation at the expense of increased startup costs.

Page 67: Something About Dynamic Linking

Inter-Object File Relations• By default the dynamic linker only looks into a few

directories to find DSOs.

• /lib

• /usr/lib

• Directories in /etc/ld.so.conf

• LD_LIBRARY_PATH environment variable

• rpath settings

Page 68: Something About Dynamic Linking

Run Path• Programmers could decide the path directly.

• The dynamic linker will use the value of the run path string when searching for dependencies of the object.

• DT_RPATH (deprecated)

• Used before LD_LIBRARY_PATH

• It does not allow the user to overwrite the value.

• -rpath or -R linker option

• DT_RUNPATH

• Used after LD_LIBRARY_PATH

• --enable-new-dtags

• Empty path represents the current working directory.

• Dynamic string token

• $ORIGIN, $LIB, $PLATFORM

Page 69: Something About Dynamic Linking

Reference• How to Write Shared Libraries - Ulrich Drepper

• https://software.intel.com/sites/default/files/m/a/1/e/dsohowto.pdf

• ELF Symbol Versioning

• https://www.akkadia.org/drepper/symbol-versioning