| 1 | #include <u.h> |
| 2 | #include <libc.h> |
| 3 | #include <ureg.h> |
| 4 | #include "dat.h" |
| 5 | #include "fns.h" |
| 6 | #include "linux.h" |
| 7 | |
| 8 | typedef struct Range Range; |
| 9 | typedef struct Area Area; |
| 10 | typedef struct Filemap Filemap; |
| 11 | typedef struct Futex Futex; |
| 12 | typedef struct Seg Seg; |
| 13 | typedef struct Space Space; |
| 14 | |
| 15 | /* keep in order, lowest base address first */ |
| 16 | enum { |
| 17 | SEGDATA, |
| 18 | SEGPRIVATE, |
| 19 | SEGSHARED, |
| 20 | SEGSTACK, |
| 21 | SEGMAX, |
| 22 | }; |
| 23 | |
| 24 | static char *segname[SEGMAX] = { "data", "private", "shared", "stack" }; |
| 25 | |
| 26 | struct Range |
| 27 | { |
| 28 | ulong base; |
| 29 | ulong top; |
| 30 | }; |
| 31 | |
| 32 | struct Filemap |
| 33 | { |
| 34 | Range addr; |
| 35 | |
| 36 | Filemap *next; |
| 37 | |
| 38 | char *path; |
| 39 | ulong offset; |
| 40 | int mode; |
| 41 | Ufile *file; |
| 42 | |
| 43 | Ref; |
| 44 | }; |
| 45 | |
| 46 | struct Futex |
| 47 | { |
| 48 | ulong *addr; |
| 49 | |
| 50 | Futex *next; |
| 51 | Futex **link; |
| 52 | |
| 53 | Ref; |
| 54 | Uwaitq; |
| 55 | }; |
| 56 | |
| 57 | struct Area |
| 58 | { |
| 59 | Range addr; |
| 60 | |
| 61 | Area *next; /* next higher area */ |
| 62 | Area *prev; /* previous lower area */ |
| 63 | Seg *seg; /* segment we belong to */ |
| 64 | |
| 65 | int prot; |
| 66 | |
| 67 | Filemap *filemap; |
| 68 | Futex *futex; |
| 69 | }; |
| 70 | |
| 71 | struct Seg |
| 72 | { |
| 73 | Ref; |
| 74 | QLock; |
| 75 | |
| 76 | Range addr; |
| 77 | ulong limit; /* maximum address this segment can grow */ |
| 78 | |
| 79 | Area *areas; /* orderd by address */ |
| 80 | |
| 81 | int type; /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */ |
| 82 | |
| 83 | Area *freearea; |
| 84 | Filemap *freefilemap; |
| 85 | Futex *freefutex; |
| 86 | }; |
| 87 | |
| 88 | struct Space |
| 89 | { |
| 90 | Ref; |
| 91 | QLock; |
| 92 | |
| 93 | ulong brk; |
| 94 | Seg *seg[SEGMAX]; |
| 95 | }; |
| 96 | |
| 97 | |
| 98 | void* |
| 99 | kmalloc(int size) |
| 100 | { |
| 101 | void *p; |
| 102 | |
| 103 | p = malloc(size); |
| 104 | if(p == nil) |
| 105 | panic("kmalloc: out of memory"); |
| 106 | setmalloctag(p, getcallerpc(&size)); |
| 107 | return p; |
| 108 | } |
| 109 | void* |
| 110 | krealloc(void *ptr, int size) |
| 111 | { |
| 112 | void *p; |
| 113 | |
| 114 | p = realloc(ptr, size); |
| 115 | if(size > 0){ |
| 116 | if(p == nil) |
| 117 | panic("krealloc: out of memory"); |
| 118 | setmalloctag(p, getcallerpc(&ptr)); |
| 119 | } |
| 120 | return p; |
| 121 | } |
| 122 | |
| 123 | void* |
| 124 | kmallocz(int size, int zero) |
| 125 | { |
| 126 | void *p; |
| 127 | |
| 128 | p = mallocz(size, zero); |
| 129 | if(p == nil) |
| 130 | panic("kmallocz: out of memory"); |
| 131 | setmalloctag(p, getcallerpc(&size)); |
| 132 | return p; |
| 133 | } |
| 134 | |
| 135 | char* |
| 136 | kstrdup(char *s) |
| 137 | { |
| 138 | char *p; |
| 139 | int n; |
| 140 | |
| 141 | n = strlen(s); |
| 142 | p = kmalloc(n+1); |
| 143 | memmove(p, s, n); |
| 144 | p[n] = 0; |
| 145 | setmalloctag(p, getcallerpc(&s)); |
| 146 | return p; |
| 147 | } |
| 148 | |
| 149 | char* |
| 150 | ksmprint(char *fmt, ...) |
| 151 | { |
| 152 | va_list args; |
| 153 | char *p; |
| 154 | int n; |
| 155 | |
| 156 | n = 4096; |
| 157 | p = kmalloc(n); |
| 158 | va_start(args, fmt); |
| 159 | n = vsnprint(p, n, fmt, args); |
| 160 | va_end(args); |
| 161 | if((p = realloc(p, n+1)) == nil) |
| 162 | panic("ksmprint: out of memory"); |
| 163 | setmalloctag(p, getcallerpc(&fmt)); |
| 164 | return p; |
| 165 | } |
| 166 | |
| 167 | ulong |
| 168 | pagealign(ulong addr) |
| 169 | { |
| 170 | ulong m; |
| 171 | |
| 172 | m = PAGESIZE-1; |
| 173 | return (addr + m) & ~m; |
| 174 | } |
| 175 | |
| 176 | static void |
| 177 | syncarea(Area *a, Range r) |
| 178 | { |
| 179 | if(a->filemap == nil) |
| 180 | return; |
| 181 | if(a->filemap->file == nil) |
| 182 | return; |
| 183 | if((a->prot & PROT_WRITE) == 0) |
| 184 | return; |
| 185 | |
| 186 | if(r.base < a->addr.base) |
| 187 | r.base = a->addr.base; |
| 188 | if(r.top > a->addr.top) |
| 189 | r.top = a->addr.top; |
| 190 | if(r.base < a->filemap->addr.base) |
| 191 | r.base = a->filemap->addr.base; |
| 192 | if(r.top > a->filemap->addr.top) |
| 193 | r.top = a->filemap->addr.top; |
| 194 | pwritefile(a->filemap->file, (void*)r.base, r.top - r.base, |
| 195 | (r.base - a->filemap->addr.base) + a->filemap->offset); |
| 196 | } |
| 197 | |
| 198 | static void |
| 199 | linkarea(Seg *seg, Area *a) |
| 200 | { |
| 201 | Area *p; |
| 202 | |
| 203 | a->next = nil; |
| 204 | a->prev = nil; |
| 205 | a->seg = seg; |
| 206 | |
| 207 | for(p = seg->areas; p && p->next; p=p->next) |
| 208 | if(p->addr.base > a->addr.base) |
| 209 | break; |
| 210 | if(p != nil){ |
| 211 | if(p->addr.base > a->addr.base){ |
| 212 | a->next = p; |
| 213 | if(a->prev = p->prev) |
| 214 | a->prev->next = a; |
| 215 | p->prev = a; |
| 216 | } else { |
| 217 | a->prev = p; |
| 218 | p->next = a; |
| 219 | } |
| 220 | } |
| 221 | if(a->prev == nil) |
| 222 | seg->areas = a; |
| 223 | } |
| 224 | |
| 225 | static Area * |
| 226 | duparea(Area *a) |
| 227 | { |
| 228 | Area *r; |
| 229 | |
| 230 | if(r = a->seg->freearea){ |
| 231 | a->seg->freearea = r->next; |
| 232 | } else { |
| 233 | r = kmalloc(sizeof(Area)); |
| 234 | } |
| 235 | r->addr = a->addr; |
| 236 | r->next = nil; |
| 237 | r->prev = nil; |
| 238 | r->seg = nil; |
| 239 | r->prot = a->prot; |
| 240 | if(r->filemap = a->filemap) |
| 241 | incref(r->filemap); |
| 242 | r->futex = nil; |
| 243 | return r; |
| 244 | } |
| 245 | |
| 246 | static void |
| 247 | freearea(Area *a) |
| 248 | { |
| 249 | Filemap *f; |
| 250 | Futex *x; |
| 251 | Seg *seg; |
| 252 | |
| 253 | seg = a->seg; |
| 254 | if(f = a->filemap){ |
| 255 | syncarea(a, a->addr); |
| 256 | a->filemap = nil; |
| 257 | if(!decref(f)){ |
| 258 | free(f->path); |
| 259 | putfile(f->file); |
| 260 | f->next = seg->freefilemap; |
| 261 | seg->freefilemap = f; |
| 262 | } |
| 263 | } |
| 264 | while(x = a->futex){ |
| 265 | if(a->futex = x->next) |
| 266 | x->next->link = &a->futex; |
| 267 | x->link = nil; |
| 268 | x->next = nil; |
| 269 | wakeq(x, MAXPROC); |
| 270 | } |
| 271 | if(a->prev == nil){ |
| 272 | if(seg->areas = a->next) |
| 273 | a->next->prev = nil; |
| 274 | } else { |
| 275 | if(a->prev->next = a->next) |
| 276 | a->next->prev = a->prev; |
| 277 | } |
| 278 | |
| 279 | a->next = seg->freearea; |
| 280 | seg->freearea = a; |
| 281 | } |
| 282 | |
| 283 | static Seg * |
| 284 | allocseg(int type, Range addr, ulong limit, int attr, char *class) |
| 285 | { |
| 286 | Seg *seg; |
| 287 | |
| 288 | if(class){ |
| 289 | trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top); |
| 290 | if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base) |
| 291 | panic("allocseg: segattach %s segment: %r", segname[type]); |
| 292 | } |
| 293 | |
| 294 | seg = kmallocz(sizeof(Seg), 1); |
| 295 | seg->addr = addr; |
| 296 | seg->limit = limit; |
| 297 | seg->type = type; |
| 298 | seg->ref = 1; |
| 299 | |
| 300 | return seg; |
| 301 | } |
| 302 | |
| 303 | static Seg * |
| 304 | dupseg(Seg *old, int copy) |
| 305 | { |
| 306 | Seg *new; |
| 307 | Area *a, *p, *x; |
| 308 | |
| 309 | if(old == nil) |
| 310 | return nil; |
| 311 | if(!copy){ |
| 312 | incref(old); |
| 313 | return old; |
| 314 | } |
| 315 | new = allocseg(old->type, old->addr, old->limit, 0, nil); |
| 316 | p = nil; |
| 317 | for(a=old->areas; a; a=a->next){ |
| 318 | x = duparea(a); |
| 319 | x->seg = new; |
| 320 | if(x->prev = p){ |
| 321 | p->next = x; |
| 322 | } else { |
| 323 | new->areas = x; |
| 324 | } |
| 325 | p = x; |
| 326 | } |
| 327 | |
| 328 | return new; |
| 329 | } |
| 330 | |
| 331 | static Space * |
| 332 | getspace(Space *old, int copy) |
| 333 | { |
| 334 | Space *new; |
| 335 | Seg *seg; |
| 336 | int t; |
| 337 | |
| 338 | if(!copy){ |
| 339 | incref(old); |
| 340 | return old; |
| 341 | } |
| 342 | |
| 343 | new = kmallocz(sizeof(Space), 1); |
| 344 | new->ref = 1; |
| 345 | |
| 346 | qlock(old); |
| 347 | for(t=0; t<SEGMAX; t++){ |
| 348 | if(seg = old->seg[t]){ |
| 349 | qlock(seg); |
| 350 | new->seg[t] = dupseg(seg, t != SEGSHARED); |
| 351 | qunlock(seg); |
| 352 | } |
| 353 | } |
| 354 | new->brk = old->brk; |
| 355 | qunlock(old); |
| 356 | |
| 357 | return new; |
| 358 | } |
| 359 | |
| 360 | static void |
| 361 | putspace(Space *space) |
| 362 | { |
| 363 | Seg *seg; |
| 364 | int t; |
| 365 | Area *a; |
| 366 | Filemap *f; |
| 367 | Futex *x; |
| 368 | void *addr; |
| 369 | |
| 370 | if(decref(space)) |
| 371 | return; |
| 372 | for(t=0; t<SEGMAX; t++){ |
| 373 | if(seg = space->seg[t]){ |
| 374 | addr = (void*)seg->addr.base; |
| 375 | if(!decref(seg)){ |
| 376 | qlock(seg); |
| 377 | /* mark all areas as free */ |
| 378 | while(a = seg->areas) |
| 379 | freearea(a); |
| 380 | |
| 381 | /* clear the free lists */ |
| 382 | while(a = seg->freearea){ |
| 383 | seg->freearea = a->next; |
| 384 | free(a); |
| 385 | } |
| 386 | while(f = seg->freefilemap){ |
| 387 | seg->freefilemap = f->next; |
| 388 | free(f); |
| 389 | } |
| 390 | while(x = seg->freefutex){ |
| 391 | seg->freefutex = x->next; |
| 392 | free(x); |
| 393 | } |
| 394 | free(seg); |
| 395 | } |
| 396 | if(segdetach(addr) < 0) |
| 397 | panic("putspace: segdetach %s segment: %r", segname[t]); |
| 398 | } |
| 399 | } |
| 400 | free(space); |
| 401 | } |
| 402 | |
| 403 | static int |
| 404 | canmerge(Area *a, Area *b) |
| 405 | { |
| 406 | return a->filemap==nil && |
| 407 | a->futex==nil && |
| 408 | b->filemap==nil && |
| 409 | b->futex==nil && |
| 410 | a->prot == b->prot; |
| 411 | } |
| 412 | |
| 413 | static void |
| 414 | mergearea(Area *a) |
| 415 | { |
| 416 | if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){ |
| 417 | a->addr.base = a->prev->addr.base; |
| 418 | freearea(a->prev); |
| 419 | } |
| 420 | if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){ |
| 421 | a->addr.top = a->next->addr.top; |
| 422 | freearea(a->next); |
| 423 | } |
| 424 | } |
| 425 | |
| 426 | static int |
| 427 | findhole(Seg *seg, Range *r, int fixed) |
| 428 | { |
| 429 | Range h; |
| 430 | Area *a; |
| 431 | ulong m; |
| 432 | ulong z; |
| 433 | ulong hz; |
| 434 | |
| 435 | z = r->top - r->base; |
| 436 | m = ~0; |
| 437 | h.base = seg->addr.base; |
| 438 | a = seg->areas; |
| 439 | for(;;) { |
| 440 | if((h.top = a ? a->addr.base : seg->addr.top) > h.base) { |
| 441 | if(fixed){ |
| 442 | if(h.base > r->base) |
| 443 | break; |
| 444 | if((r->base >= h.base) && (r->top <= h.top)) |
| 445 | goto found; |
| 446 | } else { |
| 447 | hz = h.top - h.base; |
| 448 | if((hz >= z) && (hz < m)) { |
| 449 | r->base = h.top - z; |
| 450 | r->top = h.top; |
| 451 | if((m = hz) == z) |
| 452 | goto found; |
| 453 | } |
| 454 | } |
| 455 | } |
| 456 | if(a == nil) |
| 457 | break; |
| 458 | h.base = a->addr.top; |
| 459 | a = a->next; |
| 460 | } |
| 461 | if(!fixed && (m != ~0)) |
| 462 | goto found; |
| 463 | return 0; |
| 464 | |
| 465 | found: |
| 466 | return 1; |
| 467 | } |
| 468 | |
| 469 | /* wake up all futexes in range and unlink from area */ |
| 470 | static void |
| 471 | wakefutexarea(Area *a, Range addr) |
| 472 | { |
| 473 | Futex *fu, *x; |
| 474 | |
| 475 | for(fu = a->futex; fu; fu = x){ |
| 476 | x = fu->next; |
| 477 | if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){ |
| 478 | if(*fu->link = x) |
| 479 | x->link = fu->link; |
| 480 | fu->link = nil; |
| 481 | fu->next = nil; |
| 482 | |
| 483 | trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr); |
| 484 | wakeq(fu, MAXPROC); |
| 485 | } |
| 486 | } |
| 487 | } |
| 488 | |
| 489 | static void |
| 490 | makehole(Seg *seg, Range r) |
| 491 | { |
| 492 | Area *a, *b, *x; |
| 493 | Range f; |
| 494 | |
| 495 | for(a = seg->areas; a; a = x){ |
| 496 | x = a->next; |
| 497 | |
| 498 | if(a->addr.top <= r.base) |
| 499 | continue; |
| 500 | if(a->addr.base >= r.top) |
| 501 | break; |
| 502 | |
| 503 | f = r; |
| 504 | if(f.base < a->addr.base) |
| 505 | f.base = a->addr.base; |
| 506 | if(f.top > a->addr.top) |
| 507 | f.top = a->addr.top; |
| 508 | |
| 509 | wakefutexarea(a, f); |
| 510 | if(f.base == a->addr.base){ |
| 511 | if(f.top == a->addr.top){ |
| 512 | freearea(a); |
| 513 | } else { |
| 514 | a->addr.base = f.top; |
| 515 | } |
| 516 | } else if(f.top == a->addr.top){ |
| 517 | a->addr.top = f.base; |
| 518 | } else { |
| 519 | b = duparea(a); |
| 520 | b->addr.base = f.top; |
| 521 | |
| 522 | a->addr.top = f.base; |
| 523 | linkarea(seg, b); |
| 524 | } |
| 525 | |
| 526 | if(segfree((void*)f.base, f.top - f.base) < 0) |
| 527 | panic("makehole: segfree %s segment: %r", segname[seg->type]); |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | static Seg* |
| 532 | addr2seg(Space *space, ulong addr) |
| 533 | { |
| 534 | Seg *seg; |
| 535 | int t; |
| 536 | |
| 537 | for(t=0; t<SEGMAX; t++){ |
| 538 | if((seg = space->seg[t]) == nil) |
| 539 | continue; |
| 540 | qlock(seg); |
| 541 | if((addr >= seg->addr.base) && (addr < seg->addr.top)) |
| 542 | return seg; |
| 543 | qunlock(seg); |
| 544 | } |
| 545 | |
| 546 | return nil; |
| 547 | } |
| 548 | |
| 549 | static Area* |
| 550 | addr2area(Seg *seg, ulong addr) |
| 551 | { |
| 552 | Area *a; |
| 553 | |
| 554 | for(a=seg->areas; a; a=a->next) |
| 555 | if((addr >= a->addr.base) && (addr < a->addr.top)) |
| 556 | return a; |
| 557 | return nil; |
| 558 | } |
| 559 | |
| 560 | int |
| 561 | okaddr(void *ptr, int len, int write) |
| 562 | { |
| 563 | ulong addr; |
| 564 | Space *space; |
| 565 | Seg *seg; |
| 566 | Area *a; |
| 567 | int ok; |
| 568 | |
| 569 | ok = 0; |
| 570 | addr = (ulong)ptr; |
| 571 | if(addr < PAGESIZE) |
| 572 | goto out; |
| 573 | if(space = current->mem){ |
| 574 | qlock(space); |
| 575 | if(seg = addr2seg(space, addr)){ |
| 576 | while(a = addr2area(seg, addr)){ |
| 577 | if(write){ |
| 578 | if((a->prot & PROT_WRITE) == 0) |
| 579 | break; |
| 580 | } else { |
| 581 | if((a->prot & PROT_READ) == 0) |
| 582 | break; |
| 583 | } |
| 584 | if((ulong)ptr + len <= a->addr.top){ |
| 585 | ok = 1; |
| 586 | break; |
| 587 | } |
| 588 | addr = a->addr.top; |
| 589 | } |
| 590 | qunlock(seg); |
| 591 | } |
| 592 | qunlock(space); |
| 593 | } |
| 594 | out: |
| 595 | trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok); |
| 596 | return ok; |
| 597 | } |
| 598 | |
| 599 | static void |
| 600 | unmapspace(Space *space, Range r) |
| 601 | { |
| 602 | Seg *seg; |
| 603 | int t; |
| 604 | |
| 605 | for(t=0; t<SEGMAX; t++){ |
| 606 | if((seg = space->seg[t]) == nil) |
| 607 | continue; |
| 608 | qlock(seg); |
| 609 | if(seg->addr.base >= r.top){ |
| 610 | qunlock(seg); |
| 611 | break; |
| 612 | } |
| 613 | if(seg->addr.top > r.base) |
| 614 | makehole(seg, r); |
| 615 | qunlock(seg); |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | static Area* |
| 620 | mapspace(Space *space, Range r, int flags, int prot, int *perr) |
| 621 | { |
| 622 | Seg *seg; |
| 623 | Area *a; |
| 624 | Range f; |
| 625 | int t; |
| 626 | |
| 627 | if(flags & MAP_PRIVATE){ |
| 628 | if(r.base >= space->seg[SEGSTACK]->addr.base){ |
| 629 | t = SEGSTACK; |
| 630 | } else if(r.base >= space->seg[SEGDATA]->addr.base && |
| 631 | r.base < space->seg[SEGDATA]->limit){ |
| 632 | t = SEGDATA; |
| 633 | } else { |
| 634 | t = SEGPRIVATE; |
| 635 | } |
| 636 | } else { |
| 637 | t = SEGSHARED; |
| 638 | } |
| 639 | |
| 640 | if((seg = space->seg[t]) == nil) |
| 641 | goto nomem; |
| 642 | |
| 643 | qlock(seg); |
| 644 | if((r.base >= seg->addr.base) && (r.top <= seg->limit)){ |
| 645 | if(r.base >= seg->addr.top) |
| 646 | goto addrok; |
| 647 | |
| 648 | f = r; |
| 649 | if(f.top > seg->addr.top) |
| 650 | f.top = seg->addr.top; |
| 651 | if(findhole(seg, &f, 1)) |
| 652 | goto addrok; |
| 653 | if(flags & MAP_FIXED){ |
| 654 | if(seg->type == SEGSHARED){ |
| 655 | trace("mapspace(): cant make hole %lux-%lux in shared segment", |
| 656 | f.base, f.top); |
| 657 | goto nomem; |
| 658 | } |
| 659 | makehole(seg, f); |
| 660 | goto addrok; |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | if(flags & MAP_FIXED){ |
| 665 | trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment", |
| 666 | r.base, r.top, segname[seg->type]); |
| 667 | goto nomem; |
| 668 | } |
| 669 | |
| 670 | if(findhole(seg, &r, 0)) |
| 671 | goto addrok; |
| 672 | |
| 673 | r.top -= r.base; |
| 674 | r.base = seg->addr.top; |
| 675 | r.top += r.base; |
| 676 | |
| 677 | addrok: |
| 678 | trace("mapspace(): addr %lux-%lux", r.base, r.top); |
| 679 | |
| 680 | if(r.top > seg->addr.top){ |
| 681 | if(r.top > seg->limit){ |
| 682 | trace("mapspace(): area top %lux over %s segment limit %lux", |
| 683 | r.top, segname[seg->type], seg->limit); |
| 684 | goto nomem; |
| 685 | } |
| 686 | trace("mapspace(): segbrk %s segment %lux-%lux -> %lux", |
| 687 | segname[seg->type], seg->addr.base, seg->addr.top, r.top); |
| 688 | if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){ |
| 689 | trace("mapspace(): segbrk failed: %r"); |
| 690 | goto nomem; |
| 691 | } |
| 692 | seg->addr.top = r.top; |
| 693 | } |
| 694 | |
| 695 | if(a = seg->freearea){ |
| 696 | seg->freearea = a->next; |
| 697 | } else { |
| 698 | a = kmalloc(sizeof(Area)); |
| 699 | } |
| 700 | a->addr = r; |
| 701 | a->prot = prot; |
| 702 | a->filemap = nil; |
| 703 | a->futex = nil; |
| 704 | |
| 705 | linkarea(seg, a); |
| 706 | |
| 707 | /* keep seg locked */ |
| 708 | return a; |
| 709 | |
| 710 | nomem: |
| 711 | if(seg != nil) |
| 712 | qunlock(seg); |
| 713 | if(perr) *perr = -ENOMEM; |
| 714 | return nil; |
| 715 | } |
| 716 | |
| 717 | static ulong |
| 718 | brkspace(Space *space, ulong bk) |
| 719 | { |
| 720 | Seg *seg; |
| 721 | Area *a; |
| 722 | ulong old, new; |
| 723 | Range r; |
| 724 | |
| 725 | if((seg = space->seg[SEGDATA]) == nil) |
| 726 | goto out; |
| 727 | |
| 728 | qlock(seg); |
| 729 | if(space->brk < seg->addr.base) |
| 730 | space->brk = seg->addr.top; |
| 731 | |
| 732 | if(bk < seg->addr.base) |
| 733 | goto out; |
| 734 | |
| 735 | old = pagealign(space->brk); |
| 736 | new = pagealign(bk); |
| 737 | |
| 738 | if(old != new){ |
| 739 | if(bk < space->brk){ |
| 740 | r.base = new; |
| 741 | r.top = old; |
| 742 | qunlock(seg); |
| 743 | seg = nil; |
| 744 | |
| 745 | unmapspace(space, r); |
| 746 | } else { |
| 747 | r.base = old; |
| 748 | r.top = new; |
| 749 | |
| 750 | trace("brkspace(): new mapping %lux-%lux", r.base, r.top); |
| 751 | for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){ |
| 752 | if(a->addr.top <= r.base) |
| 753 | continue; |
| 754 | if(a->addr.base > r.top + PAGESIZE) |
| 755 | break; |
| 756 | |
| 757 | trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top); |
| 758 | goto out; |
| 759 | } |
| 760 | qunlock(seg); |
| 761 | seg = nil; |
| 762 | |
| 763 | a = mapspace(space, r, |
| 764 | MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, |
| 765 | PROT_READ|PROT_WRITE|PROT_EXEC, nil); |
| 766 | |
| 767 | if(a == nil) |
| 768 | goto out; |
| 769 | |
| 770 | seg = a->seg; |
| 771 | mergearea(a); |
| 772 | } |
| 773 | } |
| 774 | |
| 775 | if(space->brk != bk){ |
| 776 | trace("brkspace: set new brk %lux", bk); |
| 777 | space->brk = bk; |
| 778 | } |
| 779 | |
| 780 | out: |
| 781 | if(seg != nil) |
| 782 | qunlock(seg); |
| 783 | |
| 784 | return space->brk; |
| 785 | } |
| 786 | |
| 787 | static ulong |
| 788 | remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags) |
| 789 | { |
| 790 | Area *a; |
| 791 | Seg *seg; |
| 792 | int move; |
| 793 | Range r; |
| 794 | |
| 795 | if(pagealign(addr) != addr) |
| 796 | return -EINVAL; |
| 797 | |
| 798 | oldlen = pagealign(oldlen); |
| 799 | newlen = pagealign(newlen); |
| 800 | |
| 801 | if((addr + oldlen) < addr) |
| 802 | return -EINVAL; |
| 803 | if((addr + newlen) <= addr) |
| 804 | return -EINVAL; |
| 805 | |
| 806 | move = 0; |
| 807 | if(flags & MREMAP_FIXED){ |
| 808 | if(pagealign(newaddr) != newaddr) |
| 809 | return -EINVAL; |
| 810 | if((flags & MREMAP_MAYMOVE) == 0) |
| 811 | return -EINVAL; |
| 812 | if((newaddr <= addr) && ((newaddr+newlen) > addr)) |
| 813 | return -EINVAL; |
| 814 | if((addr <= newaddr) && ((addr+oldlen) > newaddr)) |
| 815 | return -EINVAL; |
| 816 | move = (newaddr != addr); |
| 817 | } |
| 818 | |
| 819 | if(newlen < oldlen){ |
| 820 | r.base = addr + newlen; |
| 821 | r.top = addr + oldlen; |
| 822 | |
| 823 | unmapspace(space, r); |
| 824 | |
| 825 | oldlen = newlen; |
| 826 | } |
| 827 | |
| 828 | if((newlen == oldlen) && !move) |
| 829 | return addr; |
| 830 | |
| 831 | if((seg = addr2seg(space, addr)) == nil) |
| 832 | return -EFAULT; |
| 833 | |
| 834 | if((a = addr2area(seg, addr)) == nil) |
| 835 | goto fault; |
| 836 | if(a->addr.top < (addr + oldlen)) |
| 837 | goto fault; |
| 838 | |
| 839 | if(move) |
| 840 | goto domove; |
| 841 | if((addr + oldlen) != a->addr.top) |
| 842 | goto domove; |
| 843 | if((addr + newlen) > seg->limit) |
| 844 | goto domove; |
| 845 | if(a->next != nil) |
| 846 | if((addr + newlen) > a->next->addr.base) |
| 847 | goto domove; |
| 848 | |
| 849 | if((addr + newlen) > seg->addr.top){ |
| 850 | trace("remapspace(): segbrk %s segment %lux-%lux -> %lux", |
| 851 | segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen)); |
| 852 | if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){ |
| 853 | trace("remapspace(): segbrk: %r"); |
| 854 | goto domove; |
| 855 | } |
| 856 | |
| 857 | seg->addr.top = (addr + newlen); |
| 858 | } |
| 859 | a->addr.top = (addr + newlen); |
| 860 | mergearea(a); |
| 861 | qunlock(seg); |
| 862 | |
| 863 | return addr; |
| 864 | |
| 865 | domove: |
| 866 | trace("remapspace(): domove not implemented"); |
| 867 | if(seg != nil) |
| 868 | qunlock(seg); |
| 869 | return -ENOMEM; |
| 870 | |
| 871 | fault: |
| 872 | if(seg != nil) |
| 873 | qunlock(seg); |
| 874 | return -EFAULT; |
| 875 | } |
| 876 | |
| 877 | static void |
| 878 | syncspace(Space *space, Range r) |
| 879 | { |
| 880 | Seg *seg; |
| 881 | Area *a; |
| 882 | |
| 883 | if(seg = addr2seg(space, r.base)){ |
| 884 | for(a = addr2area(seg, r.base); a; a=a->next){ |
| 885 | if(r.base >= a->addr.top) |
| 886 | break; |
| 887 | syncarea(a, r); |
| 888 | } |
| 889 | qunlock(seg); |
| 890 | } |
| 891 | } |
| 892 | |
| 893 | void* |
| 894 | mapstack(int size) |
| 895 | { |
| 896 | Space *space; |
| 897 | ulong a; |
| 898 | |
| 899 | space = current->mem; |
| 900 | a = space->seg[SEGSTACK]->addr.top; |
| 901 | size = pagealign(size); |
| 902 | a = sys_mmap(a - size, size, |
| 903 | PROT_READ|PROT_WRITE, |
| 904 | MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); |
| 905 | if(a == 0) |
| 906 | return nil; |
| 907 | |
| 908 | return (void*)(a + size); |
| 909 | } |
| 910 | |
| 911 | void |
| 912 | mapdata(ulong base) |
| 913 | { |
| 914 | Space *space; |
| 915 | Range r; |
| 916 | ulong top; |
| 917 | int t; |
| 918 | |
| 919 | space = current->mem; |
| 920 | base = pagealign(base); |
| 921 | top = space->seg[SEGSTACK]->addr.base - PAGESIZE; |
| 922 | |
| 923 | for(t=0; t<SEGMAX; t++){ |
| 924 | if(space->seg[t] == nil){ |
| 925 | switch(t){ |
| 926 | case SEGDATA: |
| 927 | r.base = base; |
| 928 | break; |
| 929 | case SEGPRIVATE: |
| 930 | r.base = base + 0x10000000; |
| 931 | break; |
| 932 | case SEGSHARED: |
| 933 | r.base = top - 0x10000000; |
| 934 | break; |
| 935 | } |
| 936 | r.top = r.base + PAGESIZE; |
| 937 | space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory"); |
| 938 | } |
| 939 | if(t > 0 && space->seg[t-1]) |
| 940 | space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE; |
| 941 | } |
| 942 | } |
| 943 | |
| 944 | /* |
| 945 | * unmapuserspace is called from kprocfork to get rid of |
| 946 | * the linux memory segments used by the calling process |
| 947 | * before current is set to zero. we just segdetach() all that |
| 948 | * segments but keep the data structures valid for the calling |
| 949 | * (linux) process. |
| 950 | */ |
| 951 | void |
| 952 | unmapuserspace(void) |
| 953 | { |
| 954 | Space *space; |
| 955 | Seg *seg; |
| 956 | int t; |
| 957 | |
| 958 | space = current->mem; |
| 959 | qlock(space); |
| 960 | for(t=0; t<SEGMAX; t++){ |
| 961 | if((seg = space->seg[t]) == nil) |
| 962 | continue; |
| 963 | if(segdetach((void*)seg->addr.base) < 0) |
| 964 | panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]); |
| 965 | } |
| 966 | qunlock(space); |
| 967 | } |
| 968 | |
| 969 | /* hack: |
| 970 | * we write segment out into a file, detach it and reattach |
| 971 | * a new one and reading contents back. i'm surprised that |
| 972 | * this even works seamless with the Plan9 Bss! :-) |
| 973 | */ |
| 974 | static void |
| 975 | convertseg(Range r, ulong attr, char *class) |
| 976 | { |
| 977 | char name[64]; |
| 978 | ulong p; |
| 979 | int n; |
| 980 | int fd; |
| 981 | ulong len; |
| 982 | |
| 983 | snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid()); |
| 984 | fd = create(name, ORDWR|ORCLOSE, 0600); |
| 985 | if(fd < 0) |
| 986 | panic("convertseg: cant create %s: %r", name); |
| 987 | |
| 988 | len = r.top - r.base; |
| 989 | |
| 990 | if(len > 0){ |
| 991 | n = write(fd, (void*)r.base, len); |
| 992 | if(n != len) |
| 993 | panic("convertseg: write: %r"); |
| 994 | } |
| 995 | |
| 996 | /* copy string to stack because its memory gets detached :-) */ |
| 997 | strncpy(name, class, sizeof(name)); |
| 998 | |
| 999 | trace("detaching %lux-%lux", r.base, r.top); |
| 1000 | |
| 1001 | /* point of no return */ |
| 1002 | if(segdetach((void*)r.base) < 0) |
| 1003 | panic("convertseg: segdetach: %r"); |
| 1004 | if(segattach(attr, name, (void*)r.base, len) != (void*)r.base) |
| 1005 | *((int*)0) = 0; |
| 1006 | |
| 1007 | p = 0; |
| 1008 | while(p < len) { |
| 1009 | /* |
| 1010 | * we use pread directly to avoid hitting profiling code until |
| 1011 | * data segment is read back again. pread is unprofiled syscall |
| 1012 | * stub. |
| 1013 | */ |
| 1014 | n = pread(fd, (void*)(r.base + p), len - p, (vlong)p); |
| 1015 | if(n <= 0) |
| 1016 | *((int*)0) = 0; |
| 1017 | p += n; |
| 1018 | } |
| 1019 | |
| 1020 | /* anything normal again */ |
| 1021 | trace("segment %lux-%lux reattached as %s", r.base, r.top, class); |
| 1022 | |
| 1023 | close(fd); |
| 1024 | } |
| 1025 | |
| 1026 | void initmem(void) |
| 1027 | { |
| 1028 | Space *space; |
| 1029 | Range r, x; |
| 1030 | char buf[80]; |
| 1031 | int fd; |
| 1032 | int n; |
| 1033 | |
| 1034 | static int firsttime = 1; |
| 1035 | |
| 1036 | space = kmallocz(sizeof(Space), 1); |
| 1037 | space->ref = 1; |
| 1038 | |
| 1039 | snprint(buf, sizeof(buf), "/proc/%d/segment", getpid()); |
| 1040 | if((fd = open(buf, OREAD)) < 0) |
| 1041 | panic("initspace: cant open %s: %r", buf); |
| 1042 | |
| 1043 | n = 10 + 9 + 9 + 4 + 1; |
| 1044 | x.base = x.top = 0; |
| 1045 | while(readn(fd, buf, n)==n){ |
| 1046 | char *name; |
| 1047 | |
| 1048 | buf[8] = 0; |
| 1049 | buf[18] = 0; |
| 1050 | buf[28] = 0; |
| 1051 | buf[33] = 0; |
| 1052 | |
| 1053 | name = &buf[0]; |
| 1054 | r.base = strtoul(&buf[9], nil, 16); |
| 1055 | r.top = strtoul(&buf[19], nil, 16); |
| 1056 | |
| 1057 | trace("initspace(): %s %lux-%lux", name, r.base, r.top); |
| 1058 | |
| 1059 | if(firsttime){ |
| 1060 | /* |
| 1061 | * convert Plan9 data+bss segments into shared segments so |
| 1062 | * that the memory of emulator data structures gets shared across |
| 1063 | * all processes. This only happens if initspace() is called the first time. |
| 1064 | */ |
| 1065 | if(strstr(name, "Data")==name) |
| 1066 | convertseg(r, 0, "shared"); |
| 1067 | if(strstr(name, "Bss")==name) |
| 1068 | convertseg(r, 0, "shared"); |
| 1069 | } |
| 1070 | |
| 1071 | if(strstr(name, "Stack")==name){ |
| 1072 | x.top = r.base - PAGESIZE; |
| 1073 | x.base = x.top - pagealign((MAXPROC / 4) * USTACK); |
| 1074 | |
| 1075 | if(!firsttime) |
| 1076 | break; |
| 1077 | } |
| 1078 | } |
| 1079 | close(fd); |
| 1080 | firsttime = 0; |
| 1081 | |
| 1082 | /* allocate the linux stack */ |
| 1083 | space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory"); |
| 1084 | |
| 1085 | current->mem = space; |
| 1086 | } |
| 1087 | |
| 1088 | void exitmem(void) |
| 1089 | { |
| 1090 | Space *space; |
| 1091 | |
| 1092 | if(space = current->mem){ |
| 1093 | current->mem = nil; |
| 1094 | putspace(space); |
| 1095 | } |
| 1096 | } |
| 1097 | |
| 1098 | void clonemem(Uproc *new, int copy) |
| 1099 | { |
| 1100 | Space *space; |
| 1101 | |
| 1102 | if((space = current->mem) == nil){ |
| 1103 | new->mem = nil; |
| 1104 | return; |
| 1105 | } |
| 1106 | new->mem = getspace(space, copy); |
| 1107 | } |
| 1108 | |
| 1109 | ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe) |
| 1110 | { |
| 1111 | Space *space; |
| 1112 | ulong size, z; |
| 1113 | int i; |
| 1114 | |
| 1115 | if(pdat) *pdat = 0; |
| 1116 | if(plib) *plib = 0; |
| 1117 | if(pshr) *pshr = 0; |
| 1118 | if(pstk) *pstk = 0; |
| 1119 | if(pexe) *pexe = 0; |
| 1120 | |
| 1121 | if((space = proc->mem) == nil) |
| 1122 | return 0; |
| 1123 | |
| 1124 | size = 0; |
| 1125 | qlock(space); |
| 1126 | for(i=0; i<SEGMAX; i++){ |
| 1127 | Area *a; |
| 1128 | Seg *seg; |
| 1129 | if((seg = space->seg[i]) == nil) |
| 1130 | continue; |
| 1131 | qlock(seg); |
| 1132 | for(a = seg->areas; a; a = a->next){ |
| 1133 | z = a->addr.top - a->addr.base; |
| 1134 | switch(i){ |
| 1135 | case SEGDATA: |
| 1136 | if(pdat) |
| 1137 | *pdat += z; |
| 1138 | case SEGPRIVATE: |
| 1139 | if(plib) |
| 1140 | *plib += z; |
| 1141 | break; |
| 1142 | case SEGSHARED: |
| 1143 | if(pshr) |
| 1144 | *pshr += z; |
| 1145 | break; |
| 1146 | case SEGSTACK: |
| 1147 | if(pstk) |
| 1148 | *pstk += z; |
| 1149 | break; |
| 1150 | } |
| 1151 | if(pexe && (a->prot & PROT_EXEC)) |
| 1152 | *pexe += z; |
| 1153 | size += z; |
| 1154 | } |
| 1155 | qunlock(seg); |
| 1156 | } |
| 1157 | qunlock(space); |
| 1158 | |
| 1159 | return size; |
| 1160 | } |
| 1161 | |
| 1162 | struct linux_mmap_args { |
| 1163 | ulong addr; |
| 1164 | int len; |
| 1165 | int prot; |
| 1166 | int flags; |
| 1167 | int fd; |
| 1168 | ulong offset; |
| 1169 | }; |
| 1170 | |
| 1171 | ulong |
| 1172 | sys_linux_mmap(void *a) |
| 1173 | { |
| 1174 | struct linux_mmap_args *p = a; |
| 1175 | |
| 1176 | if(pagealign(p->offset) != p->offset) |
| 1177 | return -EINVAL; |
| 1178 | |
| 1179 | return sys_mmap( |
| 1180 | p->addr, |
| 1181 | p->len, |
| 1182 | p->prot, |
| 1183 | p->flags, |
| 1184 | p->fd, |
| 1185 | p->offset / PAGESIZE); |
| 1186 | } |
| 1187 | |
| 1188 | ulong |
| 1189 | sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff) |
| 1190 | { |
| 1191 | Space *space; |
| 1192 | Seg *seg; |
| 1193 | Range r; |
| 1194 | ulong o; |
| 1195 | int e, n; |
| 1196 | Area *a; |
| 1197 | Filemap *f; |
| 1198 | Ufile *file; |
| 1199 | |
| 1200 | trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff); |
| 1201 | |
| 1202 | if(pagealign(addr) != addr) |
| 1203 | return (ulong)-EINVAL; |
| 1204 | |
| 1205 | r.base = addr; |
| 1206 | r.top = addr + pagealign(len); |
| 1207 | if(r.top <= r.base) |
| 1208 | return (ulong)-EINVAL; |
| 1209 | |
| 1210 | file = nil; |
| 1211 | if((flags & MAP_ANONYMOUS)==0) |
| 1212 | if((file = fdgetfile(fd))==nil) |
| 1213 | return (ulong)-EBADF; |
| 1214 | |
| 1215 | space = current->mem; |
| 1216 | qlock(space); |
| 1217 | if((a = mapspace(space, r, flags, prot, &e)) == nil){ |
| 1218 | qunlock(space); |
| 1219 | putfile(file); |
| 1220 | return (ulong)e; |
| 1221 | } |
| 1222 | |
| 1223 | seg = a->seg; |
| 1224 | r = a->addr; |
| 1225 | |
| 1226 | if(flags & MAP_ANONYMOUS){ |
| 1227 | mergearea(a); |
| 1228 | qunlock(seg); |
| 1229 | qunlock(space); |
| 1230 | |
| 1231 | return r.base; |
| 1232 | } |
| 1233 | |
| 1234 | o = pgoff * PAGESIZE; |
| 1235 | |
| 1236 | if(f = seg->freefilemap) |
| 1237 | seg->freefilemap = f->next; |
| 1238 | if(f == nil) |
| 1239 | f = kmalloc(sizeof(Filemap)); |
| 1240 | f->ref = 1; |
| 1241 | f->addr = r; |
| 1242 | f->next = nil; |
| 1243 | f->path = kstrdup(file->path); |
| 1244 | f->offset = o; |
| 1245 | if((f->mode = file->mode) != O_RDONLY){ |
| 1246 | f->file = getfile(file); |
| 1247 | } else { |
| 1248 | f->file = nil; |
| 1249 | } |
| 1250 | a->filemap = f; |
| 1251 | qunlock(seg); |
| 1252 | qunlock(space); |
| 1253 | |
| 1254 | trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top); |
| 1255 | |
| 1256 | addr = r.base; |
| 1257 | while(addr < r.top){ |
| 1258 | n = preadfile(file, (void*)addr, r.top - addr, o); |
| 1259 | if(n == 0) |
| 1260 | break; |
| 1261 | if(n < 0){ |
| 1262 | trace("read failed at offset %lux for address %lux failed: %r", o, addr); |
| 1263 | break; |
| 1264 | } |
| 1265 | addr += n; |
| 1266 | o += n; |
| 1267 | } |
| 1268 | |
| 1269 | putfile(file); |
| 1270 | |
| 1271 | return r.base; |
| 1272 | } |
| 1273 | |
| 1274 | int sys_munmap(ulong addr, ulong len) |
| 1275 | { |
| 1276 | Space *space; |
| 1277 | Range r; |
| 1278 | |
| 1279 | trace("sys_munmap(%lux, %lux)", addr, len); |
| 1280 | |
| 1281 | if(pagealign(addr) != addr) |
| 1282 | return -EINVAL; |
| 1283 | r.base = addr; |
| 1284 | r.top = addr + pagealign(len); |
| 1285 | if(r.top <= r.base) |
| 1286 | return -EINVAL; |
| 1287 | |
| 1288 | space = current->mem; |
| 1289 | qlock(space); |
| 1290 | unmapspace(current->mem, r); |
| 1291 | qunlock(space); |
| 1292 | |
| 1293 | return 0; |
| 1294 | } |
| 1295 | |
| 1296 | ulong |
| 1297 | sys_brk(ulong bk) |
| 1298 | { |
| 1299 | Space *space; |
| 1300 | ulong a; |
| 1301 | |
| 1302 | trace("sys_brk(%lux)", bk); |
| 1303 | |
| 1304 | space = current->mem; |
| 1305 | qlock(space); |
| 1306 | a = brkspace(space, bk); |
| 1307 | qunlock(space); |
| 1308 | |
| 1309 | return a; |
| 1310 | } |
| 1311 | |
| 1312 | int sys_mprotect(ulong addr, ulong len, int prot) |
| 1313 | { |
| 1314 | Space *space; |
| 1315 | Seg *seg; |
| 1316 | Area *a, *b; |
| 1317 | int err; |
| 1318 | |
| 1319 | trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot); |
| 1320 | |
| 1321 | len = pagealign(len); |
| 1322 | if(pagealign(addr) != addr) |
| 1323 | return -EINVAL; |
| 1324 | if(len == 0) |
| 1325 | return -EINVAL; |
| 1326 | |
| 1327 | err = -ENOMEM; |
| 1328 | space = current->mem; |
| 1329 | qlock(space); |
| 1330 | if(seg = addr2seg(space, addr)){ |
| 1331 | for(a = addr2area(seg, addr); a!=nil; a=a->next){ |
| 1332 | if(addr + len <= a->addr.base) |
| 1333 | break; |
| 1334 | err = 0; |
| 1335 | if(a->prot == prot) |
| 1336 | continue; |
| 1337 | wakefutexarea(a, a->addr); |
| 1338 | if(a->addr.base < addr){ |
| 1339 | b = duparea(a); |
| 1340 | a->addr.base = addr; |
| 1341 | b->addr.top = addr; |
| 1342 | linkarea(seg, b); |
| 1343 | } |
| 1344 | if(a->addr.top > addr + len){ |
| 1345 | b = duparea(a); |
| 1346 | a->addr.top = addr + len; |
| 1347 | b->addr.base = addr + len; |
| 1348 | linkarea(seg, b); |
| 1349 | } |
| 1350 | trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot); |
| 1351 | a->prot = prot; |
| 1352 | } |
| 1353 | qunlock(seg); |
| 1354 | } |
| 1355 | qunlock(space); |
| 1356 | |
| 1357 | return err; |
| 1358 | } |
| 1359 | |
| 1360 | int sys_msync(ulong addr, ulong len, int flags) |
| 1361 | { |
| 1362 | Space *space; |
| 1363 | Range r; |
| 1364 | |
| 1365 | trace("sys_msync(%lux, %lux, %x)", addr, len, flags); |
| 1366 | |
| 1367 | if(pagealign(addr) != addr) |
| 1368 | return -EINVAL; |
| 1369 | r.base = addr; |
| 1370 | r.top = addr + pagealign(len); |
| 1371 | if(r.top <= r.base) |
| 1372 | return -EINVAL; |
| 1373 | |
| 1374 | space = current->mem; |
| 1375 | qlock(space); |
| 1376 | syncspace(space, r); |
| 1377 | qunlock(space); |
| 1378 | |
| 1379 | return 0; |
| 1380 | } |
| 1381 | |
| 1382 | ulong |
| 1383 | sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr) |
| 1384 | { |
| 1385 | Space *space; |
| 1386 | int r; |
| 1387 | |
| 1388 | trace("sys_mremap(%lux, %lux, %lux, %x, %lux)", |
| 1389 | addr, oldlen, newlen, flags, newaddr); |
| 1390 | |
| 1391 | space = current->mem; |
| 1392 | qlock(space); |
| 1393 | r = remapspace(space, addr, oldlen, newlen, newaddr, flags); |
| 1394 | qunlock(space); |
| 1395 | |
| 1396 | return r; |
| 1397 | } |
| 1398 | |
| 1399 | enum { |
| 1400 | FUTEX_WAIT, |
| 1401 | FUTEX_WAKE, |
| 1402 | FUTEX_FD, |
| 1403 | FUTEX_REQUEUE, |
| 1404 | FUTEX_CMP_REQUEUE, |
| 1405 | }; |
| 1406 | |
| 1407 | int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3) |
| 1408 | { |
| 1409 | Space *space; |
| 1410 | Seg *seg; |
| 1411 | Area *a; |
| 1412 | Futex *fu, *fu2; |
| 1413 | int err, val2; |
| 1414 | vlong timeout; |
| 1415 | |
| 1416 | trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3); |
| 1417 | |
| 1418 | seg = nil; |
| 1419 | err = -EFAULT; |
| 1420 | if((space = current->mem) == 0) |
| 1421 | goto out; |
| 1422 | |
| 1423 | qlock(space); |
| 1424 | if((seg = addr2seg(space, (ulong)addr)) == nil){ |
| 1425 | qunlock(space); |
| 1426 | goto out; |
| 1427 | } |
| 1428 | qunlock(space); |
| 1429 | if((a = addr2area(seg, (ulong)addr)) == nil) |
| 1430 | goto out; |
| 1431 | for(fu = a->futex; fu; fu = fu->next) |
| 1432 | if(fu->addr == addr) |
| 1433 | break; |
| 1434 | |
| 1435 | switch(op){ |
| 1436 | case FUTEX_WAIT: |
| 1437 | trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr); |
| 1438 | |
| 1439 | if(fu == nil){ |
| 1440 | if(fu = seg->freefutex){ |
| 1441 | seg->freefutex = fu->next; |
| 1442 | } else { |
| 1443 | fu = kmallocz(sizeof(Futex), 1); |
| 1444 | } |
| 1445 | fu->ref = 1; |
| 1446 | fu->addr = addr; |
| 1447 | if(fu->next = a->futex) |
| 1448 | fu->next->link = &fu->next; |
| 1449 | fu->link = &a->futex; |
| 1450 | a->futex = fu; |
| 1451 | } else { |
| 1452 | incref(fu); |
| 1453 | } |
| 1454 | |
| 1455 | err = 0; |
| 1456 | timeout = 0; |
| 1457 | if(ptime != nil){ |
| 1458 | struct linux_timespec *ts = ptime; |
| 1459 | vlong now; |
| 1460 | |
| 1461 | wakeme(1); |
| 1462 | now = nsec(); |
| 1463 | if(current->restart->syscall){ |
| 1464 | timeout = current->restart->futex.timeout; |
| 1465 | } else { |
| 1466 | timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec; |
| 1467 | } |
| 1468 | if(now < timeout){ |
| 1469 | current->timeout = timeout; |
| 1470 | setalarm(timeout); |
| 1471 | } else { |
| 1472 | err = -ETIMEDOUT; |
| 1473 | } |
| 1474 | } |
| 1475 | if(err == 0){ |
| 1476 | if(*addr != val){ |
| 1477 | err = -EWOULDBLOCK; |
| 1478 | } else { |
| 1479 | err = sleepq(fu, seg, 1); |
| 1480 | } |
| 1481 | } |
| 1482 | if(ptime != nil){ |
| 1483 | current->timeout = 0; |
| 1484 | wakeme(0); |
| 1485 | } |
| 1486 | if(err == -ERESTART) |
| 1487 | current->restart->futex.timeout = timeout; |
| 1488 | |
| 1489 | if(!decref(fu)){ |
| 1490 | if(fu->link){ |
| 1491 | if(*fu->link = fu->next) |
| 1492 | fu->next->link = fu->link; |
| 1493 | fu->link = nil; |
| 1494 | fu->next = nil; |
| 1495 | } |
| 1496 | fu->next = seg->freefutex; |
| 1497 | seg->freefutex = fu; |
| 1498 | } |
| 1499 | break; |
| 1500 | |
| 1501 | case FUTEX_WAKE: |
| 1502 | trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr); |
| 1503 | err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0; |
| 1504 | break; |
| 1505 | |
| 1506 | case FUTEX_CMP_REQUEUE: |
| 1507 | trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr); |
| 1508 | if(*addr != val3){ |
| 1509 | err = -EAGAIN; |
| 1510 | break; |
| 1511 | case FUTEX_REQUEUE: |
| 1512 | trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr); |
| 1513 | } |
| 1514 | err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0; |
| 1515 | if(err > 0){ |
| 1516 | val2 = (int)ptime; |
| 1517 | |
| 1518 | /* BUG: fu2 has to be in the same segment as fu */ |
| 1519 | if(a = addr2area(seg, (ulong)addr2)){ |
| 1520 | for(fu2 = a->futex; fu2; fu2 = fu2->next){ |
| 1521 | if(fu2->addr == addr2){ |
| 1522 | err += requeue(fu, fu2, val2); |
| 1523 | break; |
| 1524 | } |
| 1525 | } |
| 1526 | } |
| 1527 | } |
| 1528 | break; |
| 1529 | |
| 1530 | default: |
| 1531 | err = -ENOSYS; |
| 1532 | } |
| 1533 | |
| 1534 | out: |
| 1535 | if(seg) |
| 1536 | qunlock(seg); |
| 1537 | return err; |
| 1538 | } |