add linux_emul base, reorganize docs
[openbsd_emul.git] / linux_emul_base / mem.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ureg.h>
4 #include "dat.h"
5 #include "fns.h"
6 #include "linux.h"
7
8 typedef struct Range Range;
9 typedef struct Area Area;
10 typedef struct Filemap Filemap;
11 typedef struct Futex Futex;
12 typedef struct Seg Seg;
13 typedef struct Space Space;
14
15 /* keep in order, lowest base address first */
16 enum {
17 SEGDATA,
18 SEGPRIVATE,
19 SEGSHARED,
20 SEGSTACK,
21 SEGMAX,
22 };
23
24 static char *segname[SEGMAX] = { "data", "private", "shared", "stack" };
25
26 struct Range
27 {
28 ulong base;
29 ulong top;
30 };
31
32 struct Filemap
33 {
34 Range addr;
35
36 Filemap *next;
37
38 char *path;
39 ulong offset;
40 int mode;
41 Ufile *file;
42
43 Ref;
44 };
45
46 struct Futex
47 {
48 ulong *addr;
49
50 Futex *next;
51 Futex **link;
52
53 Ref;
54 Uwaitq;
55 };
56
57 struct Area
58 {
59 Range addr;
60
61 Area *next; /* next higher area */
62 Area *prev; /* previous lower area */
63 Seg *seg; /* segment we belong to */
64
65 int prot;
66
67 Filemap *filemap;
68 Futex *futex;
69 };
70
71 struct Seg
72 {
73 Ref;
74 QLock;
75
76 Range addr;
77 ulong limit; /* maximum address this segment can grow */
78
79 Area *areas; /* orderd by address */
80
81 int type; /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */
82
83 Area *freearea;
84 Filemap *freefilemap;
85 Futex *freefutex;
86 };
87
88 struct Space
89 {
90 Ref;
91 QLock;
92
93 ulong brk;
94 Seg *seg[SEGMAX];
95 };
96
97
98 void*
99 kmalloc(int size)
100 {
101 void *p;
102
103 p = malloc(size);
104 if(p == nil)
105 panic("kmalloc: out of memory");
106 setmalloctag(p, getcallerpc(&size));
107 return p;
108 }
109 void*
110 krealloc(void *ptr, int size)
111 {
112 void *p;
113
114 p = realloc(ptr, size);
115 if(size > 0){
116 if(p == nil)
117 panic("krealloc: out of memory");
118 setmalloctag(p, getcallerpc(&ptr));
119 }
120 return p;
121 }
122
123 void*
124 kmallocz(int size, int zero)
125 {
126 void *p;
127
128 p = mallocz(size, zero);
129 if(p == nil)
130 panic("kmallocz: out of memory");
131 setmalloctag(p, getcallerpc(&size));
132 return p;
133 }
134
135 char*
136 kstrdup(char *s)
137 {
138 char *p;
139 int n;
140
141 n = strlen(s);
142 p = kmalloc(n+1);
143 memmove(p, s, n);
144 p[n] = 0;
145 setmalloctag(p, getcallerpc(&s));
146 return p;
147 }
148
149 char*
150 ksmprint(char *fmt, ...)
151 {
152 va_list args;
153 char *p;
154 int n;
155
156 n = 4096;
157 p = kmalloc(n);
158 va_start(args, fmt);
159 n = vsnprint(p, n, fmt, args);
160 va_end(args);
161 if((p = realloc(p, n+1)) == nil)
162 panic("ksmprint: out of memory");
163 setmalloctag(p, getcallerpc(&fmt));
164 return p;
165 }
166
167 ulong
168 pagealign(ulong addr)
169 {
170 ulong m;
171
172 m = PAGESIZE-1;
173 return (addr + m) & ~m;
174 }
175
176 static void
177 syncarea(Area *a, Range r)
178 {
179 if(a->filemap == nil)
180 return;
181 if(a->filemap->file == nil)
182 return;
183 if((a->prot & PROT_WRITE) == 0)
184 return;
185
186 if(r.base < a->addr.base)
187 r.base = a->addr.base;
188 if(r.top > a->addr.top)
189 r.top = a->addr.top;
190 if(r.base < a->filemap->addr.base)
191 r.base = a->filemap->addr.base;
192 if(r.top > a->filemap->addr.top)
193 r.top = a->filemap->addr.top;
194 pwritefile(a->filemap->file, (void*)r.base, r.top - r.base,
195 (r.base - a->filemap->addr.base) + a->filemap->offset);
196 }
197
198 static void
199 linkarea(Seg *seg, Area *a)
200 {
201 Area *p;
202
203 a->next = nil;
204 a->prev = nil;
205 a->seg = seg;
206
207 for(p = seg->areas; p && p->next; p=p->next)
208 if(p->addr.base > a->addr.base)
209 break;
210 if(p != nil){
211 if(p->addr.base > a->addr.base){
212 a->next = p;
213 if(a->prev = p->prev)
214 a->prev->next = a;
215 p->prev = a;
216 } else {
217 a->prev = p;
218 p->next = a;
219 }
220 }
221 if(a->prev == nil)
222 seg->areas = a;
223 }
224
225 static Area *
226 duparea(Area *a)
227 {
228 Area *r;
229
230 if(r = a->seg->freearea){
231 a->seg->freearea = r->next;
232 } else {
233 r = kmalloc(sizeof(Area));
234 }
235 r->addr = a->addr;
236 r->next = nil;
237 r->prev = nil;
238 r->seg = nil;
239 r->prot = a->prot;
240 if(r->filemap = a->filemap)
241 incref(r->filemap);
242 r->futex = nil;
243 return r;
244 }
245
246 static void
247 freearea(Area *a)
248 {
249 Filemap *f;
250 Futex *x;
251 Seg *seg;
252
253 seg = a->seg;
254 if(f = a->filemap){
255 syncarea(a, a->addr);
256 a->filemap = nil;
257 if(!decref(f)){
258 free(f->path);
259 putfile(f->file);
260 f->next = seg->freefilemap;
261 seg->freefilemap = f;
262 }
263 }
264 while(x = a->futex){
265 if(a->futex = x->next)
266 x->next->link = &a->futex;
267 x->link = nil;
268 x->next = nil;
269 wakeq(x, MAXPROC);
270 }
271 if(a->prev == nil){
272 if(seg->areas = a->next)
273 a->next->prev = nil;
274 } else {
275 if(a->prev->next = a->next)
276 a->next->prev = a->prev;
277 }
278
279 a->next = seg->freearea;
280 seg->freearea = a;
281 }
282
283 static Seg *
284 allocseg(int type, Range addr, ulong limit, int attr, char *class)
285 {
286 Seg *seg;
287
288 if(class){
289 trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top);
290 if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base)
291 panic("allocseg: segattach %s segment: %r", segname[type]);
292 }
293
294 seg = kmallocz(sizeof(Seg), 1);
295 seg->addr = addr;
296 seg->limit = limit;
297 seg->type = type;
298 seg->ref = 1;
299
300 return seg;
301 }
302
303 static Seg *
304 dupseg(Seg *old, int copy)
305 {
306 Seg *new;
307 Area *a, *p, *x;
308
309 if(old == nil)
310 return nil;
311 if(!copy){
312 incref(old);
313 return old;
314 }
315 new = allocseg(old->type, old->addr, old->limit, 0, nil);
316 p = nil;
317 for(a=old->areas; a; a=a->next){
318 x = duparea(a);
319 x->seg = new;
320 if(x->prev = p){
321 p->next = x;
322 } else {
323 new->areas = x;
324 }
325 p = x;
326 }
327
328 return new;
329 }
330
331 static Space *
332 getspace(Space *old, int copy)
333 {
334 Space *new;
335 Seg *seg;
336 int t;
337
338 if(!copy){
339 incref(old);
340 return old;
341 }
342
343 new = kmallocz(sizeof(Space), 1);
344 new->ref = 1;
345
346 qlock(old);
347 for(t=0; t<SEGMAX; t++){
348 if(seg = old->seg[t]){
349 qlock(seg);
350 new->seg[t] = dupseg(seg, t != SEGSHARED);
351 qunlock(seg);
352 }
353 }
354 new->brk = old->brk;
355 qunlock(old);
356
357 return new;
358 }
359
360 static void
361 putspace(Space *space)
362 {
363 Seg *seg;
364 int t;
365 Area *a;
366 Filemap *f;
367 Futex *x;
368 void *addr;
369
370 if(decref(space))
371 return;
372 for(t=0; t<SEGMAX; t++){
373 if(seg = space->seg[t]){
374 addr = (void*)seg->addr.base;
375 if(!decref(seg)){
376 qlock(seg);
377 /* mark all areas as free */
378 while(a = seg->areas)
379 freearea(a);
380
381 /* clear the free lists */
382 while(a = seg->freearea){
383 seg->freearea = a->next;
384 free(a);
385 }
386 while(f = seg->freefilemap){
387 seg->freefilemap = f->next;
388 free(f);
389 }
390 while(x = seg->freefutex){
391 seg->freefutex = x->next;
392 free(x);
393 }
394 free(seg);
395 }
396 if(segdetach(addr) < 0)
397 panic("putspace: segdetach %s segment: %r", segname[t]);
398 }
399 }
400 free(space);
401 }
402
403 static int
404 canmerge(Area *a, Area *b)
405 {
406 return a->filemap==nil &&
407 a->futex==nil &&
408 b->filemap==nil &&
409 b->futex==nil &&
410 a->prot == b->prot;
411 }
412
413 static void
414 mergearea(Area *a)
415 {
416 if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){
417 a->addr.base = a->prev->addr.base;
418 freearea(a->prev);
419 }
420 if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){
421 a->addr.top = a->next->addr.top;
422 freearea(a->next);
423 }
424 }
425
426 static int
427 findhole(Seg *seg, Range *r, int fixed)
428 {
429 Range h;
430 Area *a;
431 ulong m;
432 ulong z;
433 ulong hz;
434
435 z = r->top - r->base;
436 m = ~0;
437 h.base = seg->addr.base;
438 a = seg->areas;
439 for(;;) {
440 if((h.top = a ? a->addr.base : seg->addr.top) > h.base) {
441 if(fixed){
442 if(h.base > r->base)
443 break;
444 if((r->base >= h.base) && (r->top <= h.top))
445 goto found;
446 } else {
447 hz = h.top - h.base;
448 if((hz >= z) && (hz < m)) {
449 r->base = h.top - z;
450 r->top = h.top;
451 if((m = hz) == z)
452 goto found;
453 }
454 }
455 }
456 if(a == nil)
457 break;
458 h.base = a->addr.top;
459 a = a->next;
460 }
461 if(!fixed && (m != ~0))
462 goto found;
463 return 0;
464
465 found:
466 return 1;
467 }
468
469 /* wake up all futexes in range and unlink from area */
470 static void
471 wakefutexarea(Area *a, Range addr)
472 {
473 Futex *fu, *x;
474
475 for(fu = a->futex; fu; fu = x){
476 x = fu->next;
477 if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){
478 if(*fu->link = x)
479 x->link = fu->link;
480 fu->link = nil;
481 fu->next = nil;
482
483 trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr);
484 wakeq(fu, MAXPROC);
485 }
486 }
487 }
488
489 static void
490 makehole(Seg *seg, Range r)
491 {
492 Area *a, *b, *x;
493 Range f;
494
495 for(a = seg->areas; a; a = x){
496 x = a->next;
497
498 if(a->addr.top <= r.base)
499 continue;
500 if(a->addr.base >= r.top)
501 break;
502
503 f = r;
504 if(f.base < a->addr.base)
505 f.base = a->addr.base;
506 if(f.top > a->addr.top)
507 f.top = a->addr.top;
508
509 wakefutexarea(a, f);
510 if(f.base == a->addr.base){
511 if(f.top == a->addr.top){
512 freearea(a);
513 } else {
514 a->addr.base = f.top;
515 }
516 } else if(f.top == a->addr.top){
517 a->addr.top = f.base;
518 } else {
519 b = duparea(a);
520 b->addr.base = f.top;
521
522 a->addr.top = f.base;
523 linkarea(seg, b);
524 }
525
526 if(segfree((void*)f.base, f.top - f.base) < 0)
527 panic("makehole: segfree %s segment: %r", segname[seg->type]);
528 }
529 }
530
531 static Seg*
532 addr2seg(Space *space, ulong addr)
533 {
534 Seg *seg;
535 int t;
536
537 for(t=0; t<SEGMAX; t++){
538 if((seg = space->seg[t]) == nil)
539 continue;
540 qlock(seg);
541 if((addr >= seg->addr.base) && (addr < seg->addr.top))
542 return seg;
543 qunlock(seg);
544 }
545
546 return nil;
547 }
548
549 static Area*
550 addr2area(Seg *seg, ulong addr)
551 {
552 Area *a;
553
554 for(a=seg->areas; a; a=a->next)
555 if((addr >= a->addr.base) && (addr < a->addr.top))
556 return a;
557 return nil;
558 }
559
560 int
561 okaddr(void *ptr, int len, int write)
562 {
563 ulong addr;
564 Space *space;
565 Seg *seg;
566 Area *a;
567 int ok;
568
569 ok = 0;
570 addr = (ulong)ptr;
571 if(addr < PAGESIZE)
572 goto out;
573 if(space = current->mem){
574 qlock(space);
575 if(seg = addr2seg(space, addr)){
576 while(a = addr2area(seg, addr)){
577 if(write){
578 if((a->prot & PROT_WRITE) == 0)
579 break;
580 } else {
581 if((a->prot & PROT_READ) == 0)
582 break;
583 }
584 if((ulong)ptr + len <= a->addr.top){
585 ok = 1;
586 break;
587 }
588 addr = a->addr.top;
589 }
590 qunlock(seg);
591 }
592 qunlock(space);
593 }
594 out:
595 trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok);
596 return ok;
597 }
598
599 static void
600 unmapspace(Space *space, Range r)
601 {
602 Seg *seg;
603 int t;
604
605 for(t=0; t<SEGMAX; t++){
606 if((seg = space->seg[t]) == nil)
607 continue;
608 qlock(seg);
609 if(seg->addr.base >= r.top){
610 qunlock(seg);
611 break;
612 }
613 if(seg->addr.top > r.base)
614 makehole(seg, r);
615 qunlock(seg);
616 }
617 }
618
619 static Area*
620 mapspace(Space *space, Range r, int flags, int prot, int *perr)
621 {
622 Seg *seg;
623 Area *a;
624 Range f;
625 int t;
626
627 if(flags & MAP_PRIVATE){
628 if(r.base >= space->seg[SEGSTACK]->addr.base){
629 t = SEGSTACK;
630 } else if(r.base >= space->seg[SEGDATA]->addr.base &&
631 r.base < space->seg[SEGDATA]->limit){
632 t = SEGDATA;
633 } else {
634 t = SEGPRIVATE;
635 }
636 } else {
637 t = SEGSHARED;
638 }
639
640 if((seg = space->seg[t]) == nil)
641 goto nomem;
642
643 qlock(seg);
644 if((r.base >= seg->addr.base) && (r.top <= seg->limit)){
645 if(r.base >= seg->addr.top)
646 goto addrok;
647
648 f = r;
649 if(f.top > seg->addr.top)
650 f.top = seg->addr.top;
651 if(findhole(seg, &f, 1))
652 goto addrok;
653 if(flags & MAP_FIXED){
654 if(seg->type == SEGSHARED){
655 trace("mapspace(): cant make hole %lux-%lux in shared segment",
656 f.base, f.top);
657 goto nomem;
658 }
659 makehole(seg, f);
660 goto addrok;
661 }
662 }
663
664 if(flags & MAP_FIXED){
665 trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment",
666 r.base, r.top, segname[seg->type]);
667 goto nomem;
668 }
669
670 if(findhole(seg, &r, 0))
671 goto addrok;
672
673 r.top -= r.base;
674 r.base = seg->addr.top;
675 r.top += r.base;
676
677 addrok:
678 trace("mapspace(): addr %lux-%lux", r.base, r.top);
679
680 if(r.top > seg->addr.top){
681 if(r.top > seg->limit){
682 trace("mapspace(): area top %lux over %s segment limit %lux",
683 r.top, segname[seg->type], seg->limit);
684 goto nomem;
685 }
686 trace("mapspace(): segbrk %s segment %lux-%lux -> %lux",
687 segname[seg->type], seg->addr.base, seg->addr.top, r.top);
688 if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){
689 trace("mapspace(): segbrk failed: %r");
690 goto nomem;
691 }
692 seg->addr.top = r.top;
693 }
694
695 if(a = seg->freearea){
696 seg->freearea = a->next;
697 } else {
698 a = kmalloc(sizeof(Area));
699 }
700 a->addr = r;
701 a->prot = prot;
702 a->filemap = nil;
703 a->futex = nil;
704
705 linkarea(seg, a);
706
707 /* keep seg locked */
708 return a;
709
710 nomem:
711 if(seg != nil)
712 qunlock(seg);
713 if(perr) *perr = -ENOMEM;
714 return nil;
715 }
716
717 static ulong
718 brkspace(Space *space, ulong bk)
719 {
720 Seg *seg;
721 Area *a;
722 ulong old, new;
723 Range r;
724
725 if((seg = space->seg[SEGDATA]) == nil)
726 goto out;
727
728 qlock(seg);
729 if(space->brk < seg->addr.base)
730 space->brk = seg->addr.top;
731
732 if(bk < seg->addr.base)
733 goto out;
734
735 old = pagealign(space->brk);
736 new = pagealign(bk);
737
738 if(old != new){
739 if(bk < space->brk){
740 r.base = new;
741 r.top = old;
742 qunlock(seg);
743 seg = nil;
744
745 unmapspace(space, r);
746 } else {
747 r.base = old;
748 r.top = new;
749
750 trace("brkspace(): new mapping %lux-%lux", r.base, r.top);
751 for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){
752 if(a->addr.top <= r.base)
753 continue;
754 if(a->addr.base > r.top + PAGESIZE)
755 break;
756
757 trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top);
758 goto out;
759 }
760 qunlock(seg);
761 seg = nil;
762
763 a = mapspace(space, r,
764 MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED,
765 PROT_READ|PROT_WRITE|PROT_EXEC, nil);
766
767 if(a == nil)
768 goto out;
769
770 seg = a->seg;
771 mergearea(a);
772 }
773 }
774
775 if(space->brk != bk){
776 trace("brkspace: set new brk %lux", bk);
777 space->brk = bk;
778 }
779
780 out:
781 if(seg != nil)
782 qunlock(seg);
783
784 return space->brk;
785 }
786
787 static ulong
788 remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags)
789 {
790 Area *a;
791 Seg *seg;
792 int move;
793 Range r;
794
795 if(pagealign(addr) != addr)
796 return -EINVAL;
797
798 oldlen = pagealign(oldlen);
799 newlen = pagealign(newlen);
800
801 if((addr + oldlen) < addr)
802 return -EINVAL;
803 if((addr + newlen) <= addr)
804 return -EINVAL;
805
806 move = 0;
807 if(flags & MREMAP_FIXED){
808 if(pagealign(newaddr) != newaddr)
809 return -EINVAL;
810 if((flags & MREMAP_MAYMOVE) == 0)
811 return -EINVAL;
812 if((newaddr <= addr) && ((newaddr+newlen) > addr))
813 return -EINVAL;
814 if((addr <= newaddr) && ((addr+oldlen) > newaddr))
815 return -EINVAL;
816 move = (newaddr != addr);
817 }
818
819 if(newlen < oldlen){
820 r.base = addr + newlen;
821 r.top = addr + oldlen;
822
823 unmapspace(space, r);
824
825 oldlen = newlen;
826 }
827
828 if((newlen == oldlen) && !move)
829 return addr;
830
831 if((seg = addr2seg(space, addr)) == nil)
832 return -EFAULT;
833
834 if((a = addr2area(seg, addr)) == nil)
835 goto fault;
836 if(a->addr.top < (addr + oldlen))
837 goto fault;
838
839 if(move)
840 goto domove;
841 if((addr + oldlen) != a->addr.top)
842 goto domove;
843 if((addr + newlen) > seg->limit)
844 goto domove;
845 if(a->next != nil)
846 if((addr + newlen) > a->next->addr.base)
847 goto domove;
848
849 if((addr + newlen) > seg->addr.top){
850 trace("remapspace(): segbrk %s segment %lux-%lux -> %lux",
851 segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen));
852 if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){
853 trace("remapspace(): segbrk: %r");
854 goto domove;
855 }
856
857 seg->addr.top = (addr + newlen);
858 }
859 a->addr.top = (addr + newlen);
860 mergearea(a);
861 qunlock(seg);
862
863 return addr;
864
865 domove:
866 trace("remapspace(): domove not implemented");
867 if(seg != nil)
868 qunlock(seg);
869 return -ENOMEM;
870
871 fault:
872 if(seg != nil)
873 qunlock(seg);
874 return -EFAULT;
875 }
876
877 static void
878 syncspace(Space *space, Range r)
879 {
880 Seg *seg;
881 Area *a;
882
883 if(seg = addr2seg(space, r.base)){
884 for(a = addr2area(seg, r.base); a; a=a->next){
885 if(r.base >= a->addr.top)
886 break;
887 syncarea(a, r);
888 }
889 qunlock(seg);
890 }
891 }
892
893 void*
894 mapstack(int size)
895 {
896 Space *space;
897 ulong a;
898
899 space = current->mem;
900 a = space->seg[SEGSTACK]->addr.top;
901 size = pagealign(size);
902 a = sys_mmap(a - size, size,
903 PROT_READ|PROT_WRITE,
904 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
905 if(a == 0)
906 return nil;
907
908 return (void*)(a + size);
909 }
910
911 void
912 mapdata(ulong base)
913 {
914 Space *space;
915 Range r;
916 ulong top;
917 int t;
918
919 space = current->mem;
920 base = pagealign(base);
921 top = space->seg[SEGSTACK]->addr.base - PAGESIZE;
922
923 for(t=0; t<SEGMAX; t++){
924 if(space->seg[t] == nil){
925 switch(t){
926 case SEGDATA:
927 r.base = base;
928 break;
929 case SEGPRIVATE:
930 r.base = base + 0x10000000;
931 break;
932 case SEGSHARED:
933 r.base = top - 0x10000000;
934 break;
935 }
936 r.top = r.base + PAGESIZE;
937 space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory");
938 }
939 if(t > 0 && space->seg[t-1])
940 space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE;
941 }
942 }
943
944 /*
945 * unmapuserspace is called from kprocfork to get rid of
946 * the linux memory segments used by the calling process
947 * before current is set to zero. we just segdetach() all that
948 * segments but keep the data structures valid for the calling
949 * (linux) process.
950 */
951 void
952 unmapuserspace(void)
953 {
954 Space *space;
955 Seg *seg;
956 int t;
957
958 space = current->mem;
959 qlock(space);
960 for(t=0; t<SEGMAX; t++){
961 if((seg = space->seg[t]) == nil)
962 continue;
963 if(segdetach((void*)seg->addr.base) < 0)
964 panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]);
965 }
966 qunlock(space);
967 }
968
969 /* hack:
970 * we write segment out into a file, detach it and reattach
971 * a new one and reading contents back. i'm surprised that
972 * this even works seamless with the Plan9 Bss! :-)
973 */
974 static void
975 convertseg(Range r, ulong attr, char *class)
976 {
977 char name[64];
978 ulong p;
979 int n;
980 int fd;
981 ulong len;
982
983 snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid());
984 fd = create(name, ORDWR|ORCLOSE, 0600);
985 if(fd < 0)
986 panic("convertseg: cant create %s: %r", name);
987
988 len = r.top - r.base;
989
990 if(len > 0){
991 n = write(fd, (void*)r.base, len);
992 if(n != len)
993 panic("convertseg: write: %r");
994 }
995
996 /* copy string to stack because its memory gets detached :-) */
997 strncpy(name, class, sizeof(name));
998
999 trace("detaching %lux-%lux", r.base, r.top);
1000
1001 /* point of no return */
1002 if(segdetach((void*)r.base) < 0)
1003 panic("convertseg: segdetach: %r");
1004 if(segattach(attr, name, (void*)r.base, len) != (void*)r.base)
1005 *((int*)0) = 0;
1006
1007 p = 0;
1008 while(p < len) {
1009 /*
1010 * we use pread directly to avoid hitting profiling code until
1011 * data segment is read back again. pread is unprofiled syscall
1012 * stub.
1013 */
1014 n = pread(fd, (void*)(r.base + p), len - p, (vlong)p);
1015 if(n <= 0)
1016 *((int*)0) = 0;
1017 p += n;
1018 }
1019
1020 /* anything normal again */
1021 trace("segment %lux-%lux reattached as %s", r.base, r.top, class);
1022
1023 close(fd);
1024 }
1025
1026 void initmem(void)
1027 {
1028 Space *space;
1029 Range r, x;
1030 char buf[80];
1031 int fd;
1032 int n;
1033
1034 static int firsttime = 1;
1035
1036 space = kmallocz(sizeof(Space), 1);
1037 space->ref = 1;
1038
1039 snprint(buf, sizeof(buf), "/proc/%d/segment", getpid());
1040 if((fd = open(buf, OREAD)) < 0)
1041 panic("initspace: cant open %s: %r", buf);
1042
1043 n = 10 + 9 + 9 + 4 + 1;
1044 x.base = x.top = 0;
1045 while(readn(fd, buf, n)==n){
1046 char *name;
1047
1048 buf[8] = 0;
1049 buf[18] = 0;
1050 buf[28] = 0;
1051 buf[33] = 0;
1052
1053 name = &buf[0];
1054 r.base = strtoul(&buf[9], nil, 16);
1055 r.top = strtoul(&buf[19], nil, 16);
1056
1057 trace("initspace(): %s %lux-%lux", name, r.base, r.top);
1058
1059 if(firsttime){
1060 /*
1061 * convert Plan9 data+bss segments into shared segments so
1062 * that the memory of emulator data structures gets shared across
1063 * all processes. This only happens if initspace() is called the first time.
1064 */
1065 if(strstr(name, "Data")==name)
1066 convertseg(r, 0, "shared");
1067 if(strstr(name, "Bss")==name)
1068 convertseg(r, 0, "shared");
1069 }
1070
1071 if(strstr(name, "Stack")==name){
1072 x.top = r.base - PAGESIZE;
1073 x.base = x.top - pagealign((MAXPROC / 4) * USTACK);
1074
1075 if(!firsttime)
1076 break;
1077 }
1078 }
1079 close(fd);
1080 firsttime = 0;
1081
1082 /* allocate the linux stack */
1083 space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory");
1084
1085 current->mem = space;
1086 }
1087
1088 void exitmem(void)
1089 {
1090 Space *space;
1091
1092 if(space = current->mem){
1093 current->mem = nil;
1094 putspace(space);
1095 }
1096 }
1097
1098 void clonemem(Uproc *new, int copy)
1099 {
1100 Space *space;
1101
1102 if((space = current->mem) == nil){
1103 new->mem = nil;
1104 return;
1105 }
1106 new->mem = getspace(space, copy);
1107 }
1108
1109 ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe)
1110 {
1111 Space *space;
1112 ulong size, z;
1113 int i;
1114
1115 if(pdat) *pdat = 0;
1116 if(plib) *plib = 0;
1117 if(pshr) *pshr = 0;
1118 if(pstk) *pstk = 0;
1119 if(pexe) *pexe = 0;
1120
1121 if((space = proc->mem) == nil)
1122 return 0;
1123
1124 size = 0;
1125 qlock(space);
1126 for(i=0; i<SEGMAX; i++){
1127 Area *a;
1128 Seg *seg;
1129 if((seg = space->seg[i]) == nil)
1130 continue;
1131 qlock(seg);
1132 for(a = seg->areas; a; a = a->next){
1133 z = a->addr.top - a->addr.base;
1134 switch(i){
1135 case SEGDATA:
1136 if(pdat)
1137 *pdat += z;
1138 case SEGPRIVATE:
1139 if(plib)
1140 *plib += z;
1141 break;
1142 case SEGSHARED:
1143 if(pshr)
1144 *pshr += z;
1145 break;
1146 case SEGSTACK:
1147 if(pstk)
1148 *pstk += z;
1149 break;
1150 }
1151 if(pexe && (a->prot & PROT_EXEC))
1152 *pexe += z;
1153 size += z;
1154 }
1155 qunlock(seg);
1156 }
1157 qunlock(space);
1158
1159 return size;
1160 }
1161
1162 struct linux_mmap_args {
1163 ulong addr;
1164 int len;
1165 int prot;
1166 int flags;
1167 int fd;
1168 ulong offset;
1169 };
1170
1171 ulong
1172 sys_linux_mmap(void *a)
1173 {
1174 struct linux_mmap_args *p = a;
1175
1176 if(pagealign(p->offset) != p->offset)
1177 return -EINVAL;
1178
1179 return sys_mmap(
1180 p->addr,
1181 p->len,
1182 p->prot,
1183 p->flags,
1184 p->fd,
1185 p->offset / PAGESIZE);
1186 }
1187
1188 ulong
1189 sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff)
1190 {
1191 Space *space;
1192 Seg *seg;
1193 Range r;
1194 ulong o;
1195 int e, n;
1196 Area *a;
1197 Filemap *f;
1198 Ufile *file;
1199
1200 trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff);
1201
1202 if(pagealign(addr) != addr)
1203 return (ulong)-EINVAL;
1204
1205 r.base = addr;
1206 r.top = addr + pagealign(len);
1207 if(r.top <= r.base)
1208 return (ulong)-EINVAL;
1209
1210 file = nil;
1211 if((flags & MAP_ANONYMOUS)==0)
1212 if((file = fdgetfile(fd))==nil)
1213 return (ulong)-EBADF;
1214
1215 space = current->mem;
1216 qlock(space);
1217 if((a = mapspace(space, r, flags, prot, &e)) == nil){
1218 qunlock(space);
1219 putfile(file);
1220 return (ulong)e;
1221 }
1222
1223 seg = a->seg;
1224 r = a->addr;
1225
1226 if(flags & MAP_ANONYMOUS){
1227 mergearea(a);
1228 qunlock(seg);
1229 qunlock(space);
1230
1231 return r.base;
1232 }
1233
1234 o = pgoff * PAGESIZE;
1235
1236 if(f = seg->freefilemap)
1237 seg->freefilemap = f->next;
1238 if(f == nil)
1239 f = kmalloc(sizeof(Filemap));
1240 f->ref = 1;
1241 f->addr = r;
1242 f->next = nil;
1243 f->path = kstrdup(file->path);
1244 f->offset = o;
1245 if((f->mode = file->mode) != O_RDONLY){
1246 f->file = getfile(file);
1247 } else {
1248 f->file = nil;
1249 }
1250 a->filemap = f;
1251 qunlock(seg);
1252 qunlock(space);
1253
1254 trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top);
1255
1256 addr = r.base;
1257 while(addr < r.top){
1258 n = preadfile(file, (void*)addr, r.top - addr, o);
1259 if(n == 0)
1260 break;
1261 if(n < 0){
1262 trace("read failed at offset %lux for address %lux failed: %r", o, addr);
1263 break;
1264 }
1265 addr += n;
1266 o += n;
1267 }
1268
1269 putfile(file);
1270
1271 return r.base;
1272 }
1273
1274 int sys_munmap(ulong addr, ulong len)
1275 {
1276 Space *space;
1277 Range r;
1278
1279 trace("sys_munmap(%lux, %lux)", addr, len);
1280
1281 if(pagealign(addr) != addr)
1282 return -EINVAL;
1283 r.base = addr;
1284 r.top = addr + pagealign(len);
1285 if(r.top <= r.base)
1286 return -EINVAL;
1287
1288 space = current->mem;
1289 qlock(space);
1290 unmapspace(current->mem, r);
1291 qunlock(space);
1292
1293 return 0;
1294 }
1295
1296 ulong
1297 sys_brk(ulong bk)
1298 {
1299 Space *space;
1300 ulong a;
1301
1302 trace("sys_brk(%lux)", bk);
1303
1304 space = current->mem;
1305 qlock(space);
1306 a = brkspace(space, bk);
1307 qunlock(space);
1308
1309 return a;
1310 }
1311
1312 int sys_mprotect(ulong addr, ulong len, int prot)
1313 {
1314 Space *space;
1315 Seg *seg;
1316 Area *a, *b;
1317 int err;
1318
1319 trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot);
1320
1321 len = pagealign(len);
1322 if(pagealign(addr) != addr)
1323 return -EINVAL;
1324 if(len == 0)
1325 return -EINVAL;
1326
1327 err = -ENOMEM;
1328 space = current->mem;
1329 qlock(space);
1330 if(seg = addr2seg(space, addr)){
1331 for(a = addr2area(seg, addr); a!=nil; a=a->next){
1332 if(addr + len <= a->addr.base)
1333 break;
1334 err = 0;
1335 if(a->prot == prot)
1336 continue;
1337 wakefutexarea(a, a->addr);
1338 if(a->addr.base < addr){
1339 b = duparea(a);
1340 a->addr.base = addr;
1341 b->addr.top = addr;
1342 linkarea(seg, b);
1343 }
1344 if(a->addr.top > addr + len){
1345 b = duparea(a);
1346 a->addr.top = addr + len;
1347 b->addr.base = addr + len;
1348 linkarea(seg, b);
1349 }
1350 trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot);
1351 a->prot = prot;
1352 }
1353 qunlock(seg);
1354 }
1355 qunlock(space);
1356
1357 return err;
1358 }
1359
1360 int sys_msync(ulong addr, ulong len, int flags)
1361 {
1362 Space *space;
1363 Range r;
1364
1365 trace("sys_msync(%lux, %lux, %x)", addr, len, flags);
1366
1367 if(pagealign(addr) != addr)
1368 return -EINVAL;
1369 r.base = addr;
1370 r.top = addr + pagealign(len);
1371 if(r.top <= r.base)
1372 return -EINVAL;
1373
1374 space = current->mem;
1375 qlock(space);
1376 syncspace(space, r);
1377 qunlock(space);
1378
1379 return 0;
1380 }
1381
1382 ulong
1383 sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr)
1384 {
1385 Space *space;
1386 int r;
1387
1388 trace("sys_mremap(%lux, %lux, %lux, %x, %lux)",
1389 addr, oldlen, newlen, flags, newaddr);
1390
1391 space = current->mem;
1392 qlock(space);
1393 r = remapspace(space, addr, oldlen, newlen, newaddr, flags);
1394 qunlock(space);
1395
1396 return r;
1397 }
1398
1399 enum {
1400 FUTEX_WAIT,
1401 FUTEX_WAKE,
1402 FUTEX_FD,
1403 FUTEX_REQUEUE,
1404 FUTEX_CMP_REQUEUE,
1405 };
1406
1407 int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3)
1408 {
1409 Space *space;
1410 Seg *seg;
1411 Area *a;
1412 Futex *fu, *fu2;
1413 int err, val2;
1414 vlong timeout;
1415
1416 trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3);
1417
1418 seg = nil;
1419 err = -EFAULT;
1420 if((space = current->mem) == 0)
1421 goto out;
1422
1423 qlock(space);
1424 if((seg = addr2seg(space, (ulong)addr)) == nil){
1425 qunlock(space);
1426 goto out;
1427 }
1428 qunlock(space);
1429 if((a = addr2area(seg, (ulong)addr)) == nil)
1430 goto out;
1431 for(fu = a->futex; fu; fu = fu->next)
1432 if(fu->addr == addr)
1433 break;
1434
1435 switch(op){
1436 case FUTEX_WAIT:
1437 trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr);
1438
1439 if(fu == nil){
1440 if(fu = seg->freefutex){
1441 seg->freefutex = fu->next;
1442 } else {
1443 fu = kmallocz(sizeof(Futex), 1);
1444 }
1445 fu->ref = 1;
1446 fu->addr = addr;
1447 if(fu->next = a->futex)
1448 fu->next->link = &fu->next;
1449 fu->link = &a->futex;
1450 a->futex = fu;
1451 } else {
1452 incref(fu);
1453 }
1454
1455 err = 0;
1456 timeout = 0;
1457 if(ptime != nil){
1458 struct linux_timespec *ts = ptime;
1459 vlong now;
1460
1461 wakeme(1);
1462 now = nsec();
1463 if(current->restart->syscall){
1464 timeout = current->restart->futex.timeout;
1465 } else {
1466 timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec;
1467 }
1468 if(now < timeout){
1469 current->timeout = timeout;
1470 setalarm(timeout);
1471 } else {
1472 err = -ETIMEDOUT;
1473 }
1474 }
1475 if(err == 0){
1476 if(*addr != val){
1477 err = -EWOULDBLOCK;
1478 } else {
1479 err = sleepq(fu, seg, 1);
1480 }
1481 }
1482 if(ptime != nil){
1483 current->timeout = 0;
1484 wakeme(0);
1485 }
1486 if(err == -ERESTART)
1487 current->restart->futex.timeout = timeout;
1488
1489 if(!decref(fu)){
1490 if(fu->link){
1491 if(*fu->link = fu->next)
1492 fu->next->link = fu->link;
1493 fu->link = nil;
1494 fu->next = nil;
1495 }
1496 fu->next = seg->freefutex;
1497 seg->freefutex = fu;
1498 }
1499 break;
1500
1501 case FUTEX_WAKE:
1502 trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr);
1503 err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
1504 break;
1505
1506 case FUTEX_CMP_REQUEUE:
1507 trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr);
1508 if(*addr != val3){
1509 err = -EAGAIN;
1510 break;
1511 case FUTEX_REQUEUE:
1512 trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr);
1513 }
1514 err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
1515 if(err > 0){
1516 val2 = (int)ptime;
1517
1518 /* BUG: fu2 has to be in the same segment as fu */
1519 if(a = addr2area(seg, (ulong)addr2)){
1520 for(fu2 = a->futex; fu2; fu2 = fu2->next){
1521 if(fu2->addr == addr2){
1522 err += requeue(fu, fu2, val2);
1523 break;
1524 }
1525 }
1526 }
1527 }
1528 break;
1529
1530 default:
1531 err = -ENOSYS;
1532 }
1533
1534 out:
1535 if(seg)
1536 qunlock(seg);
1537 return err;
1538 }