#include #include #include #include "dat.h" #include "fns.h" #include "linux.h" typedef struct Range Range; typedef struct Area Area; typedef struct Filemap Filemap; typedef struct Futex Futex; typedef struct Seg Seg; typedef struct Space Space; /* keep in order, lowest base address first */ enum { SEGDATA, SEGPRIVATE, SEGSHARED, SEGSTACK, SEGMAX, }; static char *segname[SEGMAX] = { "data", "private", "shared", "stack" }; struct Range { ulong base; ulong top; }; struct Filemap { Range addr; Filemap *next; char *path; ulong offset; int mode; Ufile *file; Ref; }; struct Futex { ulong *addr; Futex *next; Futex **link; Ref; Uwaitq; }; struct Area { Range addr; Area *next; /* next higher area */ Area *prev; /* previous lower area */ Seg *seg; /* segment we belong to */ int prot; Filemap *filemap; Futex *futex; }; struct Seg { Ref; QLock; Range addr; ulong limit; /* maximum address this segment can grow */ Area *areas; /* orderd by address */ int type; /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */ Area *freearea; Filemap *freefilemap; Futex *freefutex; }; struct Space { Ref; QLock; ulong brk; Seg *seg[SEGMAX]; }; void* kmalloc(int size) { void *p; p = malloc(size); if(p == nil) panic("kmalloc: out of memory"); setmalloctag(p, getcallerpc(&size)); return p; } void* krealloc(void *ptr, int size) { void *p; p = realloc(ptr, size); if(size > 0){ if(p == nil) panic("krealloc: out of memory"); setmalloctag(p, getcallerpc(&ptr)); } return p; } void* kmallocz(int size, int zero) { void *p; p = mallocz(size, zero); if(p == nil) panic("kmallocz: out of memory"); setmalloctag(p, getcallerpc(&size)); return p; } char* kstrdup(char *s) { char *p; int n; n = strlen(s); p = kmalloc(n+1); memmove(p, s, n); p[n] = 0; setmalloctag(p, getcallerpc(&s)); return p; } char* ksmprint(char *fmt, ...) { va_list args; char *p; int n; n = 4096; p = kmalloc(n); va_start(args, fmt); n = vsnprint(p, n, fmt, args); va_end(args); if((p = realloc(p, n+1)) == nil) panic("ksmprint: out of memory"); setmalloctag(p, getcallerpc(&fmt)); return p; } ulong pagealign(ulong addr) { ulong m; m = PAGESIZE-1; return (addr + m) & ~m; } static void syncarea(Area *a, Range r) { if(a->filemap == nil) return; if(a->filemap->file == nil) return; if((a->prot & PROT_WRITE) == 0) return; if(r.base < a->addr.base) r.base = a->addr.base; if(r.top > a->addr.top) r.top = a->addr.top; if(r.base < a->filemap->addr.base) r.base = a->filemap->addr.base; if(r.top > a->filemap->addr.top) r.top = a->filemap->addr.top; pwritefile(a->filemap->file, (void*)r.base, r.top - r.base, (r.base - a->filemap->addr.base) + a->filemap->offset); } static void linkarea(Seg *seg, Area *a) { Area *p; a->next = nil; a->prev = nil; a->seg = seg; for(p = seg->areas; p && p->next; p=p->next) if(p->addr.base > a->addr.base) break; if(p != nil){ if(p->addr.base > a->addr.base){ a->next = p; if(a->prev = p->prev) a->prev->next = a; p->prev = a; } else { a->prev = p; p->next = a; } } if(a->prev == nil) seg->areas = a; } static Area * duparea(Area *a) { Area *r; if(r = a->seg->freearea){ a->seg->freearea = r->next; } else { r = kmalloc(sizeof(Area)); } r->addr = a->addr; r->next = nil; r->prev = nil; r->seg = nil; r->prot = a->prot; if(r->filemap = a->filemap) incref(r->filemap); r->futex = nil; return r; } static void freearea(Area *a) { Filemap *f; Futex *x; Seg *seg; seg = a->seg; if(f = a->filemap){ syncarea(a, a->addr); a->filemap = nil; if(!decref(f)){ free(f->path); putfile(f->file); f->next = seg->freefilemap; seg->freefilemap = f; } } while(x = a->futex){ if(a->futex = x->next) x->next->link = &a->futex; x->link = nil; x->next = nil; wakeq(x, MAXPROC); } if(a->prev == nil){ if(seg->areas = a->next) a->next->prev = nil; } else { if(a->prev->next = a->next) a->next->prev = a->prev; } a->next = seg->freearea; seg->freearea = a; } static Seg * allocseg(int type, Range addr, ulong limit, int attr, char *class) { Seg *seg; if(class){ trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top); if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base) panic("allocseg: segattach %s segment: %r", segname[type]); } seg = kmallocz(sizeof(Seg), 1); seg->addr = addr; seg->limit = limit; seg->type = type; seg->ref = 1; return seg; } static Seg * dupseg(Seg *old, int copy) { Seg *new; Area *a, *p, *x; if(old == nil) return nil; if(!copy){ incref(old); return old; } new = allocseg(old->type, old->addr, old->limit, 0, nil); p = nil; for(a=old->areas; a; a=a->next){ x = duparea(a); x->seg = new; if(x->prev = p){ p->next = x; } else { new->areas = x; } p = x; } return new; } static Space * getspace(Space *old, int copy) { Space *new; Seg *seg; int t; if(!copy){ incref(old); return old; } new = kmallocz(sizeof(Space), 1); new->ref = 1; qlock(old); for(t=0; tseg[t]){ qlock(seg); new->seg[t] = dupseg(seg, t != SEGSHARED); qunlock(seg); } } new->brk = old->brk; qunlock(old); return new; } static void putspace(Space *space) { Seg *seg; int t; Area *a; Filemap *f; Futex *x; void *addr; if(decref(space)) return; for(t=0; tseg[t]){ addr = (void*)seg->addr.base; if(!decref(seg)){ qlock(seg); /* mark all areas as free */ while(a = seg->areas) freearea(a); /* clear the free lists */ while(a = seg->freearea){ seg->freearea = a->next; free(a); } while(f = seg->freefilemap){ seg->freefilemap = f->next; free(f); } while(x = seg->freefutex){ seg->freefutex = x->next; free(x); } free(seg); } if(segdetach(addr) < 0) panic("putspace: segdetach %s segment: %r", segname[t]); } } free(space); } static int canmerge(Area *a, Area *b) { return a->filemap==nil && a->futex==nil && b->filemap==nil && b->futex==nil && a->prot == b->prot; } static void mergearea(Area *a) { if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){ a->addr.base = a->prev->addr.base; freearea(a->prev); } if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){ a->addr.top = a->next->addr.top; freearea(a->next); } } static int findhole(Seg *seg, Range *r, int fixed) { Range h; Area *a; ulong m; ulong z; ulong hz; z = r->top - r->base; m = ~0; h.base = seg->addr.base; a = seg->areas; for(;;) { if((h.top = a ? a->addr.base : seg->addr.top) > h.base) { if(fixed){ if(h.base > r->base) break; if((r->base >= h.base) && (r->top <= h.top)) goto found; } else { hz = h.top - h.base; if((hz >= z) && (hz < m)) { r->base = h.top - z; r->top = h.top; if((m = hz) == z) goto found; } } } if(a == nil) break; h.base = a->addr.top; a = a->next; } if(!fixed && (m != ~0)) goto found; return 0; found: return 1; } /* wake up all futexes in range and unlink from area */ static void wakefutexarea(Area *a, Range addr) { Futex *fu, *x; for(fu = a->futex; fu; fu = x){ x = fu->next; if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){ if(*fu->link = x) x->link = fu->link; fu->link = nil; fu->next = nil; trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr); wakeq(fu, MAXPROC); } } } static void makehole(Seg *seg, Range r) { Area *a, *b, *x; Range f; for(a = seg->areas; a; a = x){ x = a->next; if(a->addr.top <= r.base) continue; if(a->addr.base >= r.top) break; f = r; if(f.base < a->addr.base) f.base = a->addr.base; if(f.top > a->addr.top) f.top = a->addr.top; wakefutexarea(a, f); if(f.base == a->addr.base){ if(f.top == a->addr.top){ freearea(a); } else { a->addr.base = f.top; } } else if(f.top == a->addr.top){ a->addr.top = f.base; } else { b = duparea(a); b->addr.base = f.top; a->addr.top = f.base; linkarea(seg, b); } if(segfree((void*)f.base, f.top - f.base) < 0) panic("makehole: segfree %s segment: %r", segname[seg->type]); } } static Seg* addr2seg(Space *space, ulong addr) { Seg *seg; int t; for(t=0; tseg[t]) == nil) continue; qlock(seg); if((addr >= seg->addr.base) && (addr < seg->addr.top)) return seg; qunlock(seg); } return nil; } static Area* addr2area(Seg *seg, ulong addr) { Area *a; for(a=seg->areas; a; a=a->next) if((addr >= a->addr.base) && (addr < a->addr.top)) return a; return nil; } int okaddr(void *ptr, int len, int write) { ulong addr; Space *space; Seg *seg; Area *a; int ok; ok = 0; addr = (ulong)ptr; if(addr < PAGESIZE) goto out; if(space = current->mem){ qlock(space); if(seg = addr2seg(space, addr)){ while(a = addr2area(seg, addr)){ if(write){ if((a->prot & PROT_WRITE) == 0) break; } else { if((a->prot & PROT_READ) == 0) break; } if((ulong)ptr + len <= a->addr.top){ ok = 1; break; } addr = a->addr.top; } qunlock(seg); } qunlock(space); } out: trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok); return ok; } static void unmapspace(Space *space, Range r) { Seg *seg; int t; for(t=0; tseg[t]) == nil) continue; qlock(seg); if(seg->addr.base >= r.top){ qunlock(seg); break; } if(seg->addr.top > r.base) makehole(seg, r); qunlock(seg); } } static Area* mapspace(Space *space, Range r, int flags, int prot, int *perr) { Seg *seg; Area *a; Range f; int t; if(flags & MAP_PRIVATE){ if(r.base >= space->seg[SEGSTACK]->addr.base){ t = SEGSTACK; } else if(r.base >= space->seg[SEGDATA]->addr.base && r.base < space->seg[SEGDATA]->limit){ t = SEGDATA; } else { t = SEGPRIVATE; } } else { t = SEGSHARED; } if((seg = space->seg[t]) == nil) goto nomem; qlock(seg); if((r.base >= seg->addr.base) && (r.top <= seg->limit)){ if(r.base >= seg->addr.top) goto addrok; f = r; if(f.top > seg->addr.top) f.top = seg->addr.top; if(findhole(seg, &f, 1)) goto addrok; if(flags & MAP_FIXED){ if(seg->type == SEGSHARED){ trace("mapspace(): cant make hole %lux-%lux in shared segment", f.base, f.top); goto nomem; } makehole(seg, f); goto addrok; } } if(flags & MAP_FIXED){ trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment", r.base, r.top, segname[seg->type]); goto nomem; } if(findhole(seg, &r, 0)) goto addrok; r.top -= r.base; r.base = seg->addr.top; r.top += r.base; addrok: trace("mapspace(): addr %lux-%lux", r.base, r.top); if(r.top > seg->addr.top){ if(r.top > seg->limit){ trace("mapspace(): area top %lux over %s segment limit %lux", r.top, segname[seg->type], seg->limit); goto nomem; } trace("mapspace(): segbrk %s segment %lux-%lux -> %lux", segname[seg->type], seg->addr.base, seg->addr.top, r.top); if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){ trace("mapspace(): segbrk failed: %r"); goto nomem; } seg->addr.top = r.top; } if(a = seg->freearea){ seg->freearea = a->next; } else { a = kmalloc(sizeof(Area)); } a->addr = r; a->prot = prot; a->filemap = nil; a->futex = nil; linkarea(seg, a); /* keep seg locked */ return a; nomem: if(seg != nil) qunlock(seg); if(perr) *perr = -ENOMEM; return nil; } static ulong brkspace(Space *space, ulong bk) { Seg *seg; Area *a; ulong old, new; Range r; if((seg = space->seg[SEGDATA]) == nil) goto out; qlock(seg); if(space->brk < seg->addr.base) space->brk = seg->addr.top; if(bk < seg->addr.base) goto out; old = pagealign(space->brk); new = pagealign(bk); if(old != new){ if(bk < space->brk){ r.base = new; r.top = old; qunlock(seg); seg = nil; unmapspace(space, r); } else { r.base = old; r.top = new; trace("brkspace(): new mapping %lux-%lux", r.base, r.top); for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){ if(a->addr.top <= r.base) continue; if(a->addr.base > r.top + PAGESIZE) break; trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top); goto out; } qunlock(seg); seg = nil; a = mapspace(space, r, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, PROT_READ|PROT_WRITE|PROT_EXEC, nil); if(a == nil) goto out; seg = a->seg; mergearea(a); } } if(space->brk != bk){ trace("brkspace: set new brk %lux", bk); space->brk = bk; } out: if(seg != nil) qunlock(seg); return space->brk; } static ulong remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags) { Area *a; Seg *seg; int move; Range r; if(pagealign(addr) != addr) return -EINVAL; oldlen = pagealign(oldlen); newlen = pagealign(newlen); if((addr + oldlen) < addr) return -EINVAL; if((addr + newlen) <= addr) return -EINVAL; move = 0; if(flags & MREMAP_FIXED){ if(pagealign(newaddr) != newaddr) return -EINVAL; if((flags & MREMAP_MAYMOVE) == 0) return -EINVAL; if((newaddr <= addr) && ((newaddr+newlen) > addr)) return -EINVAL; if((addr <= newaddr) && ((addr+oldlen) > newaddr)) return -EINVAL; move = (newaddr != addr); } if(newlen < oldlen){ r.base = addr + newlen; r.top = addr + oldlen; unmapspace(space, r); oldlen = newlen; } if((newlen == oldlen) && !move) return addr; if((seg = addr2seg(space, addr)) == nil) return -EFAULT; if((a = addr2area(seg, addr)) == nil) goto fault; if(a->addr.top < (addr + oldlen)) goto fault; if(move) goto domove; if((addr + oldlen) != a->addr.top) goto domove; if((addr + newlen) > seg->limit) goto domove; if(a->next != nil) if((addr + newlen) > a->next->addr.base) goto domove; if((addr + newlen) > seg->addr.top){ trace("remapspace(): segbrk %s segment %lux-%lux -> %lux", segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen)); if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){ trace("remapspace(): segbrk: %r"); goto domove; } seg->addr.top = (addr + newlen); } a->addr.top = (addr + newlen); mergearea(a); qunlock(seg); return addr; domove: trace("remapspace(): domove not implemented"); if(seg != nil) qunlock(seg); return -ENOMEM; fault: if(seg != nil) qunlock(seg); return -EFAULT; } static void syncspace(Space *space, Range r) { Seg *seg; Area *a; if(seg = addr2seg(space, r.base)){ for(a = addr2area(seg, r.base); a; a=a->next){ if(r.base >= a->addr.top) break; syncarea(a, r); } qunlock(seg); } } void* mapstack(int size) { Space *space; ulong a; space = current->mem; a = space->seg[SEGSTACK]->addr.top; size = pagealign(size); a = sys_mmap(a - size, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); if(a == 0) return nil; return (void*)(a + size); } void mapdata(ulong base) { Space *space; Range r; ulong top; int t; space = current->mem; base = pagealign(base); top = space->seg[SEGSTACK]->addr.base - PAGESIZE; for(t=0; tseg[t] == nil){ switch(t){ case SEGDATA: r.base = base; break; case SEGPRIVATE: r.base = base + 0x10000000; break; case SEGSHARED: r.base = top - 0x10000000; break; } r.top = r.base + PAGESIZE; space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory"); } if(t > 0 && space->seg[t-1]) space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE; } } /* * unmapuserspace is called from kprocfork to get rid of * the linux memory segments used by the calling process * before current is set to zero. we just segdetach() all that * segments but keep the data structures valid for the calling * (linux) process. */ void unmapuserspace(void) { Space *space; Seg *seg; int t; space = current->mem; qlock(space); for(t=0; tseg[t]) == nil) continue; if(segdetach((void*)seg->addr.base) < 0) panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]); } qunlock(space); } /* hack: * we write segment out into a file, detach it and reattach * a new one and reading contents back. i'm surprised that * this even works seamless with the Plan9 Bss! :-) */ static void convertseg(Range r, ulong attr, char *class) { char name[64]; ulong p; int n; int fd; ulong len; snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid()); fd = create(name, ORDWR|ORCLOSE, 0600); if(fd < 0) panic("convertseg: cant create %s: %r", name); len = r.top - r.base; if(len > 0){ n = write(fd, (void*)r.base, len); if(n != len) panic("convertseg: write: %r"); } /* copy string to stack because its memory gets detached :-) */ strncpy(name, class, sizeof(name)); trace("detaching %lux-%lux", r.base, r.top); /* point of no return */ if(segdetach((void*)r.base) < 0) panic("convertseg: segdetach: %r"); if(segattach(attr, name, (void*)r.base, len) != (void*)r.base) *((int*)0) = 0; p = 0; while(p < len) { /* * we use pread directly to avoid hitting profiling code until * data segment is read back again. pread is unprofiled syscall * stub. */ n = pread(fd, (void*)(r.base + p), len - p, (vlong)p); if(n <= 0) *((int*)0) = 0; p += n; } /* anything normal again */ trace("segment %lux-%lux reattached as %s", r.base, r.top, class); close(fd); } void initmem(void) { Space *space; Range r, x; char buf[80]; int fd; int n; static int firsttime = 1; space = kmallocz(sizeof(Space), 1); space->ref = 1; snprint(buf, sizeof(buf), "/proc/%d/segment", getpid()); if((fd = open(buf, OREAD)) < 0) panic("initspace: cant open %s: %r", buf); n = 10 + 9 + 9 + 4 + 1; x.base = x.top = 0; while(readn(fd, buf, n)==n){ char *name; buf[8] = 0; buf[18] = 0; buf[28] = 0; buf[33] = 0; name = &buf[0]; r.base = strtoul(&buf[9], nil, 16); r.top = strtoul(&buf[19], nil, 16); trace("initspace(): %s %lux-%lux", name, r.base, r.top); if(firsttime){ /* * convert Plan9 data+bss segments into shared segments so * that the memory of emulator data structures gets shared across * all processes. This only happens if initspace() is called the first time. */ if(strstr(name, "Data")==name) convertseg(r, 0, "shared"); if(strstr(name, "Bss")==name) convertseg(r, 0, "shared"); } if(strstr(name, "Stack")==name){ x.top = r.base - PAGESIZE; x.base = x.top - pagealign((MAXPROC / 4) * USTACK); if(!firsttime) break; } } close(fd); firsttime = 0; /* allocate the linux stack */ space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory"); current->mem = space; } void exitmem(void) { Space *space; if(space = current->mem){ current->mem = nil; putspace(space); } } void clonemem(Uproc *new, int copy) { Space *space; if((space = current->mem) == nil){ new->mem = nil; return; } new->mem = getspace(space, copy); } ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe) { Space *space; ulong size, z; int i; if(pdat) *pdat = 0; if(plib) *plib = 0; if(pshr) *pshr = 0; if(pstk) *pstk = 0; if(pexe) *pexe = 0; if((space = proc->mem) == nil) return 0; size = 0; qlock(space); for(i=0; iseg[i]) == nil) continue; qlock(seg); for(a = seg->areas; a; a = a->next){ z = a->addr.top - a->addr.base; switch(i){ case SEGDATA: if(pdat) *pdat += z; case SEGPRIVATE: if(plib) *plib += z; break; case SEGSHARED: if(pshr) *pshr += z; break; case SEGSTACK: if(pstk) *pstk += z; break; } if(pexe && (a->prot & PROT_EXEC)) *pexe += z; size += z; } qunlock(seg); } qunlock(space); return size; } struct linux_mmap_args { ulong addr; int len; int prot; int flags; int fd; ulong offset; }; ulong sys_linux_mmap(void *a) { struct linux_mmap_args *p = a; if(pagealign(p->offset) != p->offset) return -EINVAL; return sys_mmap( p->addr, p->len, p->prot, p->flags, p->fd, p->offset / PAGESIZE); } ulong sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff) { Space *space; Seg *seg; Range r; ulong o; int e, n; Area *a; Filemap *f; Ufile *file; trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff); if(pagealign(addr) != addr) return (ulong)-EINVAL; r.base = addr; r.top = addr + pagealign(len); if(r.top <= r.base) return (ulong)-EINVAL; file = nil; if((flags & MAP_ANONYMOUS)==0) if((file = fdgetfile(fd))==nil) return (ulong)-EBADF; space = current->mem; qlock(space); if((a = mapspace(space, r, flags, prot, &e)) == nil){ qunlock(space); putfile(file); return (ulong)e; } seg = a->seg; r = a->addr; if(flags & MAP_ANONYMOUS){ mergearea(a); qunlock(seg); qunlock(space); return r.base; } o = pgoff * PAGESIZE; if(f = seg->freefilemap) seg->freefilemap = f->next; if(f == nil) f = kmalloc(sizeof(Filemap)); f->ref = 1; f->addr = r; f->next = nil; f->path = kstrdup(file->path); f->offset = o; if((f->mode = file->mode) != O_RDONLY){ f->file = getfile(file); } else { f->file = nil; } a->filemap = f; qunlock(seg); qunlock(space); trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top); addr = r.base; while(addr < r.top){ n = preadfile(file, (void*)addr, r.top - addr, o); if(n == 0) break; if(n < 0){ trace("read failed at offset %lux for address %lux failed: %r", o, addr); break; } addr += n; o += n; } putfile(file); return r.base; } int sys_munmap(ulong addr, ulong len) { Space *space; Range r; trace("sys_munmap(%lux, %lux)", addr, len); if(pagealign(addr) != addr) return -EINVAL; r.base = addr; r.top = addr + pagealign(len); if(r.top <= r.base) return -EINVAL; space = current->mem; qlock(space); unmapspace(current->mem, r); qunlock(space); return 0; } ulong sys_brk(ulong bk) { Space *space; ulong a; trace("sys_brk(%lux)", bk); space = current->mem; qlock(space); a = brkspace(space, bk); qunlock(space); return a; } int sys_mprotect(ulong addr, ulong len, int prot) { Space *space; Seg *seg; Area *a, *b; int err; trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot); len = pagealign(len); if(pagealign(addr) != addr) return -EINVAL; if(len == 0) return -EINVAL; err = -ENOMEM; space = current->mem; qlock(space); if(seg = addr2seg(space, addr)){ for(a = addr2area(seg, addr); a!=nil; a=a->next){ if(addr + len <= a->addr.base) break; err = 0; if(a->prot == prot) continue; wakefutexarea(a, a->addr); if(a->addr.base < addr){ b = duparea(a); a->addr.base = addr; b->addr.top = addr; linkarea(seg, b); } if(a->addr.top > addr + len){ b = duparea(a); a->addr.top = addr + len; b->addr.base = addr + len; linkarea(seg, b); } trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot); a->prot = prot; } qunlock(seg); } qunlock(space); return err; } int sys_msync(ulong addr, ulong len, int flags) { Space *space; Range r; trace("sys_msync(%lux, %lux, %x)", addr, len, flags); if(pagealign(addr) != addr) return -EINVAL; r.base = addr; r.top = addr + pagealign(len); if(r.top <= r.base) return -EINVAL; space = current->mem; qlock(space); syncspace(space, r); qunlock(space); return 0; } ulong sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr) { Space *space; int r; trace("sys_mremap(%lux, %lux, %lux, %x, %lux)", addr, oldlen, newlen, flags, newaddr); space = current->mem; qlock(space); r = remapspace(space, addr, oldlen, newlen, newaddr, flags); qunlock(space); return r; } enum { FUTEX_WAIT, FUTEX_WAKE, FUTEX_FD, FUTEX_REQUEUE, FUTEX_CMP_REQUEUE, }; int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3) { Space *space; Seg *seg; Area *a; Futex *fu, *fu2; int err, val2; vlong timeout; trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3); seg = nil; err = -EFAULT; if((space = current->mem) == 0) goto out; qlock(space); if((seg = addr2seg(space, (ulong)addr)) == nil){ qunlock(space); goto out; } qunlock(space); if((a = addr2area(seg, (ulong)addr)) == nil) goto out; for(fu = a->futex; fu; fu = fu->next) if(fu->addr == addr) break; switch(op){ case FUTEX_WAIT: trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr); if(fu == nil){ if(fu = seg->freefutex){ seg->freefutex = fu->next; } else { fu = kmallocz(sizeof(Futex), 1); } fu->ref = 1; fu->addr = addr; if(fu->next = a->futex) fu->next->link = &fu->next; fu->link = &a->futex; a->futex = fu; } else { incref(fu); } err = 0; timeout = 0; if(ptime != nil){ struct linux_timespec *ts = ptime; vlong now; wakeme(1); now = nsec(); if(current->restart->syscall){ timeout = current->restart->futex.timeout; } else { timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec; } if(now < timeout){ current->timeout = timeout; setalarm(timeout); } else { err = -ETIMEDOUT; } } if(err == 0){ if(*addr != val){ err = -EWOULDBLOCK; } else { err = sleepq(fu, seg, 1); } } if(ptime != nil){ current->timeout = 0; wakeme(0); } if(err == -ERESTART) current->restart->futex.timeout = timeout; if(!decref(fu)){ if(fu->link){ if(*fu->link = fu->next) fu->next->link = fu->link; fu->link = nil; fu->next = nil; } fu->next = seg->freefutex; seg->freefutex = fu; } break; case FUTEX_WAKE: trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr); err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0; break; case FUTEX_CMP_REQUEUE: trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr); if(*addr != val3){ err = -EAGAIN; break; case FUTEX_REQUEUE: trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr); } err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0; if(err > 0){ val2 = (int)ptime; /* BUG: fu2 has to be in the same segment as fu */ if(a = addr2area(seg, (ulong)addr2)){ for(fu2 = a->futex; fu2; fu2 = fu2->next){ if(fu2->addr == addr2){ err += requeue(fu, fu2, val2); break; } } } } break; default: err = -ENOSYS; } out: if(seg) qunlock(seg); return err; }