Wednesday, May 1, 2013

how to write position independent code

writing position independent code -- x86 Windows.
AMD64 is much easier -- most code is naturally position independent
ARM32 is also viable, but not shown here.

I'm not an expert, but this weekend I finally wrote
some position independent code.
Accesses of global data incur relocations.
jmp and call do not.


So wrap data in functions. Including imports.
Possibly collect all your data together in one struct.
struct globals_t { ... } globals;
globals_t* GetGlobals();


Here is a working example.
Notice the lack of .reloc in link /dump.


We need the custom import library.
Without it, when the linker finds __imp__printf in msvcrt.lib,
it complains about the duplicate _printf.
By claiming that printf is data, the import .lib only has __imp__printf and no _printf.


  link /lib /def:msvcrt.def /machine:x86 
  ml -c 4asm.asm 
  cl  4.c -link 4asm.obj -entry:Entry -subsystem:console -incremental:no -nod .\msvcrt.lib 
  link /dump 4.exe
  .\4.exe


4.c:

int __cdecl printf(const char*, ...);
void * GetImageBase(void);

extern const char string[] = "%p\n";
char* p_string(void);

int Entry()
{
printf(p_string(), GetImageBase());
return 0;
}



4asm.asm:

.model flat
.686


_text segment


public _GetImageBase
_GetImageBase proc
  call @F
@@:
  pop eax
  sub eax, imagerel $ - 1
  ret
_GetImageBase endp



IMPORT macro name
externdef __imp__&name:dword, _&name:proc
_&name proc
  call _GetImageBase
  add eax, imagerel __imp__&name
  jmp dword ptr [eax]
_&name endp
  endm


DATA macro name
externdef _&name:byte, _p_&name:proc
_p_&name proc
  call _GetImageBase
  add eax, imagerel _&name
  ret
_p_&name endp
  endm


IMPORT printf
DATA string


_text ends
end



msvcrt.def:
EXPORTS
printf DATA


PE hacking -- build import data yourself with the C compiler.

// Producing PE imports by building the data yourself.
// This works.


#include <stddef.h> // offsetof


#pragma data_seg(".idata$2") // special value


// page of code at 0x1000
// idata at 0x2000
// page of relocs at 0x3000 for now
// If you use /Zi, change to 0x2000 to account for .rdata
#define BASE 0x2000


typedef struct { int names, timestamp, forwarder, name, pointers; } import_t;


typedef struct { union { int offset; int (__cdecl* p)(const char*, ...); } printf; int end; } msvcrt_t;
typedef struct {
  char msvcrt[sizeof("msvcrt.dll")];
  char printf[sizeof("\0\0printf")]; // first two bytes are "hint"
} strings_t;


typedef struct {
  import_t imsvcrt, inull;
  struct { msvcrt_t names, pointers; } msvcrt;
  strings_t strings;
} imports_t;


#define OFFSET(x) (offsetof(imports_t, x) + BASE)


imports_t imports = {
  { OFFSET(msvcrt.names), 0, 0, OFFSET(strings.msvcrt), OFFSET(msvcrt.pointers) },  { 0 },
  { { OFFSET(strings.printf), 0 },  { OFFSET(strings.printf), 0 } }, // msvcrt names and pointers
  { "msvcrt.dll", "\0\0printf" }
} ;


void Entry()
{
  imports.msvcrt.pointers.printf.p("hello\n");
}


// cl -Ox 1.c -link -nod  -entry:Entry -subsystem:console -opt:ref