add linux_emul base, reorganize docs
[openbsd_emul.git] / linux_emul_base / exec.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ureg.h>
4 #include <tos.h>
5 #include "dat.h"
6 #include "fns.h"
7 #include "linux.h"
8
9 typedef struct Elfhdr Elfhdr;
10 typedef struct Proghdr Proghdr;
11 typedef struct ElfEx ElfEx;
12
13 struct Elfhdr {
14 uchar ident[16];
15 ushort type;
16 ushort machine;
17 ulong version;
18 ulong entry;
19 ulong phoff;
20 ulong shoff;
21 ulong flags;
22 ushort ehsize;
23 ushort phentsize;
24 ushort phnum;
25 ushort shentsize;
26 ushort shnum;
27 ushort shstrndx;
28 };
29
30 struct Proghdr {
31 ulong type;
32 ulong offset;
33 ulong vaddr;
34 ulong paddr;
35 ulong filesz;
36 ulong memsz;
37 ulong flags;
38 ulong align;
39 };
40
41 struct ElfEx
42 {
43 ulong ientry;
44 ulong ibase;
45
46 ulong entry;
47 ulong base;
48
49 ulong phdr;
50 ulong phnum;
51 ulong phent;
52 };
53
54 static void
55 padzero(ulong addr)
56 {
57 ulong n;
58
59 if(n = (pagealign(addr) - addr))
60 memset((void*)addr, 0, n);
61 }
62
63 enum {
64 /* file types */
65 ElfTNone = 0,
66 ElfTReloc = 1,
67 ElfTExec = 2,
68 ElfTShared = 3,
69 ElfTCore = 4,
70 ElfTMax = 5,
71
72 /* machine architectures */
73 ElfMNone = 0,
74 ElfM32 = 1,
75 ElfMSparc = 2,
76 ElfM386 = 3,
77 ElfM68 = 4,
78 ElfM88 = 5,
79 ElfM860 = 7,
80 ElfMMips = 8,
81 ElfMMax = 9,
82
83 /* program segment types */
84 ElfPNull = 0,
85 ElfPLoad = 1,
86 ElfPDynamic = 2,
87 ElfPInterp = 3,
88 ElfPNote = 4,
89 ElfPShlib = 5,
90 ElfPPhdr = 6,
91 ElfPMax = 7,
92
93 /* program segment flags */
94 ElfPFX = 1,
95 ElfPFW = 2,
96 ElfPFR = 4,
97 };
98
99 static int
100 loadelf(char *file, ElfEx *ex, int depth)
101 {
102 int fd;
103 int i, l;
104 int mapprot;
105 int mapflags;
106 ulong mapbase;
107 ulong loadaddr;
108 ulong bss;
109
110 Elfhdr hdr;
111 Proghdr *phdr;
112 char *interpreter;
113
114 interpreter = nil;
115 phdr = nil;
116
117 if((fd = sys_open(file, O_RDONLY, 0)) < 0){
118 werrstr("cant open %s", file);
119 goto errout;
120 }
121
122 if(sys_read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)){
123 werrstr("cant read elf header");
124 goto errout;
125 }
126
127 if(memcmp(hdr.ident, "\x7fELF", 4)!=0){
128 werrstr("no elf magic");
129 goto errout;
130 }
131
132 l = hdr.phnum * hdr.phentsize;
133 phdr = kmalloc(l);
134 sys_lseek(fd, hdr.phoff, 0);
135 if(sys_read(fd, phdr, l) != l){
136 werrstr("cant read program headers");
137 goto errout;
138 }
139
140 loadaddr = 0;
141 mapbase = 0;
142 mapflags = MAP_PRIVATE;
143 if(hdr.type != ElfTShared)
144 mapflags |= MAP_FIXED;
145
146 trace("loadelf(): phnum=%d", hdr.phnum);
147
148 bss = 0;
149 for(i=0; i<hdr.phnum; i++){
150 Proghdr *p;
151
152 p = &phdr[i];
153 if(p->type == ElfPInterp){
154 if(interpreter){
155 werrstr("multiple interpeter sections");
156 goto errout;
157 }
158 l = p->filesz;
159
160 interpreter = kmalloc(l+1);
161 sys_lseek(fd, p->offset, 0);
162 if(sys_read(fd, interpreter, l)!=l){
163 werrstr("cant read interpreter section");
164 goto errout;
165 }
166 interpreter[l] = '\0';
167 }
168
169 if(p->type == ElfPLoad){
170 ulong a;
171 int diff;
172
173 trace("loadelf(): phdr %d: vaddr=%lux memsz=%lux filesz=%lux offset=%lux flags=%lux",
174 i,
175 p->vaddr,
176 p->memsz,
177 p->filesz,
178 p->offset,
179 p->flags);
180
181 mapprot = 0;
182 if(p->flags & ElfPFR)
183 mapprot |= PROT_READ;
184 if(p->flags & ElfPFW)
185 mapprot |= PROT_WRITE;
186 if(p->flags & ElfPFX)
187 mapprot |= PROT_EXEC;
188
189 if(hdr.entry >= p->vaddr && hdr.entry < p->vaddr + p->memsz)
190 mapprot |= PROT_EXEC;
191
192 diff = p->vaddr - (p->vaddr & ~(PAGESIZE-1));
193
194 /* have to call mapdata() before we do the first mmap */
195 if(loadaddr == 0 && depth == 0){
196 if(hdr.type == ElfTShared){
197 mapbase = pagealign((ulong)end + 0x4000000);
198 mapflags |= MAP_FIXED;
199 }
200 mapdata((mapbase + p->vaddr) - diff);
201 }
202
203 a = sys_mmap(
204 (mapbase + p->vaddr) - diff,
205 p->filesz + diff,
206 mapprot,
207 mapflags,
208 fd,
209 (p->offset - diff)/PAGESIZE);
210
211 if(((int)a < 0) && ((int)a > -EMAX)){
212 werrstr("mmap failed: %E", (int)a);
213 goto errout;
214 }
215 if(loadaddr == 0)
216 loadaddr = a;
217 if(hdr.type == ElfTShared && mapbase == 0){
218 mapbase = a + diff;
219 mapflags |= MAP_FIXED;
220 }
221 if(mapprot & PROT_WRITE)
222 padzero(mapbase + p->vaddr + p->filesz);
223 if(depth == 0)
224 if(mapbase + p->vaddr + p->memsz > bss)
225 bss = mapbase + p->vaddr + p->memsz;
226 } else {
227 trace("loadelf(): phdr %d: type=%lux", i, p->type);
228 }
229 }
230
231 ex->base = loadaddr;
232 ex->entry = hdr.entry + ((hdr.type == ElfTShared) ? loadaddr : 0);
233
234 ex->phdr = loadaddr + hdr.phoff;
235 ex->phent = hdr.phentsize;
236 ex->phnum = hdr.phnum;
237
238 if(depth == 0){
239 sys_brk(pagealign(bss));
240
241 current->codestart = loadaddr;
242 current->codeend = bss;
243 }
244
245 if(interpreter){
246 ElfEx interpex;
247
248 if(loadelf(interpreter, &interpex, depth+1) < 0){
249 werrstr("cant load interpreter: %r");
250 goto errout;
251 }
252 free(interpreter);
253
254 ex->ientry = interpex.entry;
255 ex->ibase = interpex.base;
256 } else {
257 ex->ientry = ex->entry;
258 ex->ibase = 0; /* no interpreter */
259 }
260
261 sys_close(fd);
262 free(phdr);
263 return 0;
264
265 errout:
266 if(fd >= 0)
267 sys_close(fd);
268 free(interpreter);
269 free(phdr);
270 return -1;
271 }
272
273
274 enum {
275 AT_NULL,
276 AT_IGNORE,
277 AT_EXECFD,
278 AT_PHDR,
279 AT_PHENT,
280 AT_PHNUM,
281 AT_PAGESZ,
282 AT_BASE,
283 AT_FLAGS,
284 AT_ENTRY,
285 AT_NOTELF,
286 AT_UID,
287 AT_EUID,
288 AT_GID,
289 AT_EGID,
290 AT_PLATFORM,
291 AT_HWCAP,
292 AT_CLKTCK,
293 AT_SECURE = 23,
294
295 AT_SYSINFO = 32,
296 AT_SYSINFO_EHDR = 33,
297 };
298
299 static void*
300 setupstack(ElfEx *ex, char *argv[], char *envp[])
301 {
302 int envc;
303 int argc;
304
305 char **dargv;
306 char **denv;
307
308 ulong *stack;
309 ulong *p;
310 char *x;
311 int i, n;
312
313 /*
314 * calculate the size we need on stack
315 */
316 argc=0;
317 while(argv && argv[argc]) argc++;
318
319 envc=0;
320 while(envp && envp[envc]) envc++;
321
322 n = 0;
323 n += sizeof(ulong); // argc
324 n += (argc+1)*sizeof(char*); // argv + nil
325 n += (envc+1)*sizeof(char*); // envp + nil
326 n += 16*(2*sizeof(ulong)); // aux
327
328 for(i=0; i<argc; i++)
329 n += (strlen(argv[i])+1);
330 for(i=0; i<envc; i++)
331 n += (strlen(envp[i])+1);
332
333 if(USTACK - n < PAGESIZE){
334 werrstr("too many arguments passed on stack");
335 return nil;
336 }
337
338 stack = mapstack(USTACK);
339
340 if(((int)stack < 0) && ((int)stack > -EMAX)){
341 werrstr("mapstack failed: %E", (int)stack);
342 return nil;
343 }
344 stack = (ulong*)(((ulong)stack - n) & ~7);
345
346 current->stackstart = (ulong)stack;
347
348 p = stack;
349
350 *p++ = argc;
351
352 dargv = (char**)p;
353 p += (argc + 1);
354
355 denv = (char**)p;
356 p += (envc + 1);
357
358 #define AUXENT(k, v) {p[0]=k; p[1]=v; p+=2;}
359 AUXENT(AT_PAGESZ, PAGESIZE);
360 AUXENT(AT_CLKTCK, HZ);
361 AUXENT(AT_PHDR, ex->phdr);
362 AUXENT(AT_PHENT, ex->phent);
363 AUXENT(AT_PHNUM, ex->phnum);
364 AUXENT(AT_BASE, ex->ibase);
365 AUXENT(AT_FLAGS, 0);
366 AUXENT(AT_ENTRY, ex->entry);
367 AUXENT(AT_UID, current->uid);
368 AUXENT(AT_EUID, current->uid);
369 AUXENT(AT_GID, current->gid);
370 AUXENT(AT_EGID, current->gid);
371 AUXENT(AT_NULL, 0);
372 AUXENT(AT_NULL, 0);
373 AUXENT(AT_NULL, 0);
374 AUXENT(AT_NULL, 0);
375 #undef AUXENT
376
377 x = (char*)p;
378
379 for(i=0; i<argc; i++)
380 x += (strlen(dargv[i] = strcpy(x, argv[i])) + 1);
381 dargv[argc] = 0;
382 for(i=0; i<envc; i++)
383 x += (strlen(denv[i] = strcpy(x, envp[i])) + 1);
384 denv[envc] = 0;
385
386 return stack;
387 }
388
389 static char**
390 copystrings(char *a[])
391 {
392 char **r;
393 char *p;
394 int i, n;
395
396 if(a == nil)
397 return nil;
398 i = 0;
399 n = sizeof(a[0]);
400 while(a[i]){
401 n += sizeof(a[0]) + (strlen(a[i]) + 1);
402 i++;
403 }
404 r = kmalloc(n);
405 n = i;
406 p = (char*)&r[n+1];
407 for(i=0; i<n; i++)
408 p += strlen(r[i] = strcpy(p, a[i]))+1;
409 r[n] = 0;
410 return r;
411 }
412
413 static void
414 setcomm(char *exe, char *name, char *argv[])
415 {
416 char *buf, *p;
417 int i, n;
418
419 n = strlen(exe) + strlen(name) +2;
420 for(i=0; argv[i]; i++)
421 n += strlen(argv[i])+1;
422
423 buf = kmalloc(n);
424
425 p = buf;
426 p += strlen(strcpy(p, name));
427 for(i=0; argv[i]; i++){
428 p += strlen(strcpy(p, " "));
429 p += strlen(strcpy(p, argv[i]));
430 }
431 setprocname(buf);
432
433 /* comm contains the full exe name + argv */
434 p = buf;
435 p += strlen(strcpy(p, exe));
436 *p++ = 0;
437 for(i=0; argv[i]; i++){
438 p += strlen(strcpy(p, argv[i]));
439 *p++ = 0;
440 }
441 *p++ = 0;
442
443 free(current->comm);
444 current->comm = buf;
445 current->ncomm = p - buf;
446 }
447
448 static void
449 clinote(struct Ureg *ureg)
450 {
451 jmp_buf jmp;
452 ulong pc;
453 ulong sp;
454 ulong ax;
455
456 pc = ureg->pc;
457 sp = ureg->sp;
458 ax = ureg->ax;
459
460 if(!setjmp(jmp))
461 notejmp(ureg, jmp, 1);
462
463 ureg->pc = pc;
464 ureg->sp = sp;
465 ureg->ax = ax;
466 }
467
468 struct kexecveargs
469 {
470 char *name;
471 char **argv;
472 char **envp;
473 };
474
475 #pragma profile off
476
477 static int
478 kexecve(void *arg)
479 {
480 struct kexecveargs *args;
481 Ufile *f;
482 ElfEx ex;
483 Ureg u;
484 int r, n;
485 char *b, *p, *e, *x, **a;
486 void *stack;
487 char *name, *exe;
488 char **argv;
489 char **envp;
490 int phase;
491
492 args = arg;
493 name = args->name;
494 argv = args->argv;
495 envp = args->envp;
496
497 phase = 0;
498 n = 8192;
499 b = kmalloc(n);
500 p = b;
501 e = b + n;
502 again:
503 if(r = sys_access(name, 05)){
504 if(r > 0)
505 r = -EACCES;
506 goto errout;
507 }
508 if((r = sys_open(name, O_RDONLY, 0)) < 0)
509 goto errout;
510 exe = "/dev/null";
511 if(f = fdgetfile(r)){
512 if(f->path != nil){
513 strncpy(p, f->path, e-p);
514 p += strlen(exe = p)+1;
515 }
516 putfile(f);
517 }
518 n = sys_read(r, p, (e-p)-1);
519 sys_close(r);
520
521 r = -ENOEXEC;
522 if(n < 4)
523 goto errout;
524
525 if(memcmp(p, "#!", 2) == 0){
526 p[n] = 0;
527
528 r = -ENAMETOOLONG;
529 if((x = strchr(p, '\n')) == nil)
530 goto errout;
531 *x = 0;
532
533 a = (char**)&x[1];
534 n = (e - (char*)a) / sizeof(a[0]);
535 if(n < 2)
536 goto errout;
537 n = getfields(&p[2], a, n, 1, "\t\r\n ");
538 if(n < 1)
539 goto errout;
540 r = -E2BIG;
541 if(&a[n+1] >= (char**)e)
542 goto errout;
543 a[n++] = name;
544 if(argv != nil){
545 argv++;
546 while(*argv){
547 if(&a[n+1] >= (char**)e)
548 goto errout;
549 a[n++] = *argv++;
550 }
551 }
552 a[n++] = 0;
553 p = (char*)&a[n];
554 if(e - p < 4)
555 goto errout;
556 argv = a;
557 name = argv[0];
558
559 goto again;
560 }
561
562 if(memcmp(p, "\x7fELF", 4)!=0)
563 goto errout;
564
565 /*
566 * the contents on envp[] or argv[] maybe stored in b[], stack or bss of the calling linux
567 * process that is destroyed on free(b) and exitmem()... so we need to temporary
568 * copy them.
569 */
570 r = -ENOMEM;
571 name = kstrdup(name);
572 phase++;
573 if(argv)
574 argv = copystrings(argv);
575 phase++;
576 if(envp)
577 envp = copystrings(envp);
578 phase++;
579
580 /* get out of the note before we destroy user stack */
581 if(current->innote){
582 clinote(current->ureg);
583 current->innote = 0;
584 }
585
586 /* this is the point of no return! */
587 qlock(&proctab);
588 zapthreads();
589 exitmem();
590 exitsignal();
591
592 initmem();
593 initsignal();
594 inittls();
595 qunlock(&proctab);
596
597 closexfds();
598
599 setcomm(exe, name, argv);
600
601 if(loadelf(name, &ex, 0) < 0){
602 trace("kexecve(): loadelf failed: %r");
603 goto errout;
604 }
605
606 if((stack = setupstack(&ex, argv, envp)) == nil){
607 trace("kexecve(): setupstack failed: %r");
608 goto errout;
609 }
610
611 memset(&u, 0, sizeof(u));
612 u.sp = (ulong)stack;
613 u.pc = (ulong)ex.ientry;
614 current->ureg = &u;
615 current->syscall = nil;
616 phase++;
617
618 trace("kexecve(): startup pc=%lux sp=%lux", current->ureg->pc, current->ureg->sp);
619
620 errout:
621 switch(phase){
622 default: free(envp);
623 case 2: free(argv);
624 case 1: free(name);
625 case 0: free(b);
626 }
627 switch(phase){
628 case 4: retuser();
629 case 3: exitproc(current, SIGKILL, 1);
630 }
631 return r;
632 }
633
634 int sys_execve(char *name, char *argv[], char *envp[])
635 {
636 struct kexecveargs args;
637
638 trace("sys_execve(%s, %p, %p)", name, argv, envp);
639
640 args.name = name;
641 args.argv = argv;
642 args.envp = envp;
643
644 return onstack(kstack, kexecve, &args);
645 }
646
647 #pragma profile on