add linux_emul base, reorganize docs master
authorian <ian@kremlin.cc>
Wed, 18 Feb 2015 21:43:27 +0000 (16:43 -0500)
committerian <ian@kremlin.cc>
Wed, 18 Feb 2015 21:43:27 +0000 (16:43 -0500)
39 files changed:
linux_emul_base/CHANGES [new file with mode: 0644]
linux_emul_base/README [new file with mode: 0644]
linux_emul_base/bits.s [new file with mode: 0644]
linux_emul_base/bootstrap/tar [new file with mode: 0755]
linux_emul_base/bufproc.c [new file with mode: 0644]
linux_emul_base/consdev.c [new file with mode: 0644]
linux_emul_base/dat.h [new file with mode: 0644]
linux_emul_base/doc/ioctl_list.txt [new file with mode: 0644]
linux_emul_base/doc/linuxemu.txt [new file with mode: 0644]
linux_emul_base/doc/todo.txt [new file with mode: 0644]
linux_emul_base/dspdev.c [new file with mode: 0644]
linux_emul_base/error.c [new file with mode: 0644]
linux_emul_base/exec.c [new file with mode: 0644]
linux_emul_base/file.c [new file with mode: 0644]
linux_emul_base/fns.h [new file with mode: 0644]
linux_emul_base/fs.c [new file with mode: 0644]
linux_emul_base/linux [new file with mode: 0755]
linux_emul_base/linux.h [new file with mode: 0644]
linux_emul_base/linuxcall.c [new file with mode: 0644]
linux_emul_base/linuxcalltab [new file with mode: 0644]
linux_emul_base/linuxcalltab.awk [new file with mode: 0755]
linux_emul_base/main.c [new file with mode: 0644]
linux_emul_base/mem.c [new file with mode: 0644]
linux_emul_base/miscdev.c [new file with mode: 0644]
linux_emul_base/mkfile [new file with mode: 0644]
linux_emul_base/pipedev.c [new file with mode: 0644]
linux_emul_base/poll.c [new file with mode: 0644]
linux_emul_base/proc.c [new file with mode: 0644]
linux_emul_base/procdev.c [new file with mode: 0644]
linux_emul_base/ptydev.c [new file with mode: 0644]
linux_emul_base/rootdev.c [new file with mode: 0644]
linux_emul_base/signal.c [new file with mode: 0644]
linux_emul_base/sockdev.c [new file with mode: 0644]
linux_emul_base/stat.c [new file with mode: 0644]
linux_emul_base/time.c [new file with mode: 0644]
linux_emul_base/tls.c [new file with mode: 0644]
linux_emul_base/trace.c [new file with mode: 0644]
linux_emul_base/trap.c [new file with mode: 0644]
ref/bsd_man2_all [moved from bsd_man2_all with 100% similarity]

diff --git a/linux_emul_base/CHANGES b/linux_emul_base/CHANGES
new file mode 100644 (file)
index 0000000..138a094
--- /dev/null
@@ -0,0 +1,623 @@
+2008-08-16
+Creation of a CHANGES file
+
+A entry starts with the date followed by a newline and then
+the content follows. Usualy, the first line after the date
+is some short description and then a loger follows.
+To terminate the entry, intert two newlines at the end.
+
+So this entry serves as an exampe. Hope this is simple enougth :-)
+
+
+2008-08-16
+New debugging implemented
+
+Debug code removed from mem.c and trap.c and rewrote in
+acid. See the DEBUGGING section in the HOWTO file for further
+information.
+
+
+2008-08-17
+Fontconfig crash fixed, Debug code fixes
+
+libfontconfig mapped some config files with len == 0, this
+was not handled correctly so it crashed.
+
+the umem() acid function didnt check for zero segment
+pointers so it showd invalid data for the mostly unused
+SEGSHARED segment.
+
+
+2008-08-18
+Some minjor fixes
+
+more checking in memory manager
+sys_[gs]etpgrp implemented for pid != current->pid
+renamed emu.c to main.c
+set UID/GID/EUID/EGID in AUXVEC on exec()
+
+
+
+2008-08-21
+Making prof(1) work (at least dont let it crash)
+
+mem.c: convertseg():
+
+Peplaced read() calls to pread() to prevent profiling related crash.
+The problem was that convertseg() detaches the DATA segment reattaches
+a new one and used read() to get the contents back.  read() was a
+profiled function and the profiler finds its structures cleared to
+zero and crashes.  We now use pread() that is a unprofiled assembly
+syscall stub.
+
+Still, child processes and kprocs are not currently profiled.
+
+
+2008-08-22
+AF_UNIX client sockets implemented
+
+Its a little bit of a hack. We do the AF_UNIX handling like APE
+does so we can interact with the ported Xservers Xbr and equis.
+
+
+2008-08-23
+Work arround for mozilla GPFAULT bug
+
+If a process is notified that he has pending signals with the
+"sig" message and if the note interrupts the execution of a
+INT 0x80 instruction, a syscall in the handler causes mystic
+crashes i dont understand.
+
+The work arround detects the condition and delays the
+handling of the signals returning back to userspace.
+
+I could reproduce the condition and this hack seems
+to work. I should write some testcase to analyze this
+condition further. Maybe its some kernel bug.
+
+
+2008-08-23
+Minjor file related fixes/cleanups
+
+- Implemented in miscdev.c for writable /dev/zero
+- sys_umask() now returns the previous umask
+- default umask set to 022
+- sys_umask() sys_cwd() moved from proc.c to file.c
+
+
+2008-08-24
+linuxemu.rc script updated
+
+- resolve relative rootpath
+- generate /etc files for hostname and resolv.conf if not readable
+- removed enviroment user->USER home->HOME conversion in main.c
+
+
+2008-09-10
+Better work arround for mozilla GPFAULT bug
+
+The problem arises because of the handling of notes in Plan9 kernel.
+
+It happens if linux code does a syscall while here are usernotes (like
+the "sig" ones) queued in the process note[] array.  Then the trap()
+function in the kernel will enqueue the trap in the queue and the user
+note gets handled first.
+
+Now, after we have done a syscall in the note handler of the user
+signal, notify() gets called in the kernel that detects the next note
+to be a trap and thinks the note handler itself caused it and kills
+the process.
+
+I suggested a patch that makes sure traps get enqued on the head of
+the note[] array so that they get handled before a user note but its
+not decided to be applied yet so here is the work arround.
+
+The work arround avoids posting notes to a process that could possibly
+cause a trap in the future before the usernote gets handled.  (this
+excluded all linux code because it can issure a syscall anytime) The
+only time it is save to post notes is if we are in linuxemu syscall
+handler.
+
+This is mostly the case anyway when notes are used to interrupt
+blocking syscalls (waking a process sleeping in a sigsuspend for
+example)
+
+Linuxcode that would spin in a endless loop will not be interrupted by
+notes/signals anymore.
+
+
+2008-09-20
+Minjor stuff
+
+Masking more fp-errors in trap.c. (To get some dos game running in
+bochs)
+
+prboom and zsnes was bumpy sometimes because our select() didnt set
+the struct timeval *tvp to the time the select call did not slept.
+
+I stat function pointer was removed from the Ufile struct and all the *dev.c
+files. I have no need to return custom stats from the devices right now
+(except for /dev/zero (mode & 0222)) so i move the only case where its
+needed into the hackish dir2statmode function in p9dev.c. Here is also
+an fd field added to the generic Ufile scruct because most of all devices
+use plan9 filedescriptors at some point and we use it in fstat(). Saves some
+casting and fstat can use the fd (when its available) to make a dirstat() and
+convert to linux format.
+
+For the postnote stuff mentiond in the last changes entry, here is a global
+flag in main.c (notehack = 1) to enable/disable the work arround.
+
+
+2008-09-21
+atime/mtime, AT_CLKTCK
+
+new syscalls
+- sys_utime
+- sys_utimes
+- sys_times
+
+the AT_CLKTCK entry in exec is set to _tos->cyclefreq
+now.
+
+
+2008-10-05
+faster death proc detection
+
+Using linuxemu from terminal caused huge network load
+because we did a lookup of /proc/# on every timer round.
+
+now we keep open filedescriptors for /proc/#/args and
+/proc/#/note in the proc structure and only check for
+dead procs every second.
+
+
+2008-10-05
+sound (/dev/dsp)
+
+OSS sound implemented, its only tested with some SDL
+apps (zsnes, prboom).
+
+The output frequency is hardcoded at 44100 Hz and 2
+channels stereo at the moment.
+
+
+2008-10-20
+dsp, stat, mkfile, -d, dns, fs
+
+dspdev detects output frequency now and implement more
+ioctls.
+
+here is an universal Ustat and Udirent structs to separate
+the linux formats from driver code.
+
+big chnage in all devices, added indirection layer fs.c that
+has some kind of mapping from path -> device and forwards
+filesystem calls to device drivers. now drivers can return
+correct stat information.
+
+this is needed for implementing /dev/pts later.
+
+removed the /etc/(hosts resolve.conf) generation code from
+linuxemu.rc because they dont work in some cases and
+cause maintence nightmares.
+
+use the -d switch for turning on trace to stderr. so we dont
+need to always change the if(0) from trace.c and recompile.
+
+force compiler flags in mkfile, the -T from new mkone breaks
+build on current distribution.
+
+
+2008-10-27
+lots of fixes
+
+- off by one error basepath()
+- chmod used access which resolves always to link target
+- runlink
+- the note hack broke sigprocmask, and sometimes failed
+   to interrupt a sleepproc(). this is now handled in
+   interruptproc(). (this makes drawterm work on linuxemu)
+- write() to pty was not synchronized/not blocking that
+   caused some apps to spin in write() or others to drop
+   characters (curses).
+
+
+2008-10-28
+grow filedescriptor table in dup2
+
+- if the new fd supplied is out if range, dont return error but
+   grow the fdtab so it fits in. (this make configure work)
+
+2008-11-15
+lots of changes
+
+- Updated HOWTO
+
+- included <ureg.h> in some modules. this fixed the type
+   signature errors of 8l when compiling with -T flag.
+
+- fixed bug in exec()
+   exec needs to run outside the the note context, because it
+   is deleting memory segments and that can cause the removal
+   of the stack segment as well. we have to set current->syscall
+   to nil to avoid getting a note posted that could interrupt us in
+   the middle of some malloc() or something.
+
+- sys_pipe() FD_CLOEXEC
+   sys_pipe() create the filedescriptors with FD_CLOEXEC bit
+   set as open() does. this is wrong and caused gimp plugins
+   to fail.
+
+- new signal handling code
+   signal.c has changed a bit. now CLONE_THREAD procs
+   share a signal queue and proc() uses wantsignal() to
+   figure out what process to interrupt.
+
+- restartable syscalls
+   re process SA_RESTART flag now and are able to restart syscalls
+   that got interrupted by such signals. sleepproc() returns -ERESTART
+   by default now. sys_poll() and sys_sleep() will return -EINTR in
+   any case.
+
+- fs reorganized, [sg]etxattr added, p9cwd added as an optimization
+   to avoid walks().
+
+- ptydev emits SIGINTR to process group, added tty to Uproc.
+
+- added fddev (/dev/fd)
+
+- exitproc() now emits SIGCHLD.
+
+- mem.c: addrok() now takes a prot flags parameter so we can test
+   for expected memory protection too.
+
+- profme(): profiling support for child processes added
+
+- initialization completly moved in main()
+
+- fchmod, fchown, ftruncate moved to file.c
+
+
+2008-10-19
+just code dressing...
+
+renamed some files, added typedef for Ureg, abstracted syscall specific
+code in linuxcall.c, more tracing...
+
+
+2009-02-06
+Fixed the opera fork() no more threads bug
+
+There was a problem of dns resolver zombie processes created by
+operseemed that used up all the process table due to incomplete
+implementation of clone().  Linux specified a exit signal in the lower
+byte of the clone- flags parameter to tell if the process should hang
+arround as zombie so that the parent can wait for it.  If no signal is
+specified here, then the process should exit without becoming a
+zombie.  Here is also the case that the parent ignores the SIGCHLD
+signal or has SA_NOCHLDWAIT flags on the SIGCHLD set.  In that case
+the child should also purge itself.  I implemented reparenting,
+because i suspected the bug there but this was not the case.
+
+Here is a new uprocs() acid function that dumps the proctab.
+
+
+2009-02-18
+Minjor changes
+
+Added anonymous area merging to reduce the area count and removed
+redundant clearmem calls. Changed Uwaitq lock from QLock to normal
+spinlock. Removed some trace() calls.
+
+The biggest change is that linuxemu.rc now is able to start equis. This
+simplyfies writing wrapper scripts to start a browser or other X11 apps.
+
+
+2009-03-25
+Simplyfied area merging in memory manager
+
+Areas are doubly linked now so its easier to get the previous area for
+mergearea().
+
+
+2009-03-30
+Fixed man-bug. (Restarting syscalls)
+
+Restarting syscalls failed if the singal that was send to the interrupted
+process was blocked. This is fixed now.
+
+
+2009-04-01
+fixed awd-bug (use builtin cd), make errors more verbose in exec.c
+
+bla
+
+2009-05-11
+fix pipeseek, added pread64, pwrite64 syscalls.
+
+forgot to return -ESPIPE on seekpipe.
+implemented pread64/pwrite64 (needed by git).
+
+
+2009-07-25
+random stuff
+
+- incred bufproc read and queuesize to 4K/64K (fixes links2 -g hang)
+- reuse buffers in bufproc
+- cleanup timer stuff, introduce 5ms min sleep time, avoid interrupt note
+- fix sys_select() to always modify tv
+- fix format mismatch in nextsignal
+- dont combine in sys_readv/sys_writev
+- fix open in devdsp.c
+- s/memcpy/memmove/g
+- dont reset segment registers for signal handlers
+- possibly more that i forgot... use history(1)
+
+
+2009-07-27
+fixed audio delay
+
+keeping track of how many samples (time) has been submitted
+to /dev/audio already and wait when the buffer grows over some
+treshold. this removes the audio delay from games :)
+
+
+2009-07-29
+doc
+
+put documentation in doc subdirectory.
+
+
+2009-07-31
+mremap, segbrk shrinking, pagesize, doc
+
+rewrote mremap implementation to correctly clear area for shrinking.
+handles more error cases and checks for overlap. this fixes the gimp
+invalid pointer bug.
+
+removed segment shrinking with segbrk as this feature may be removed
+in newer kernel versions as it introduced a bug where it is possible to
+unmap pages while the kernel touches them and cause a panic.
+
+removed the ROUNDPAGE() macro from dat.h. heres a pagealign()
+function in mem.c now and the global variable pagesize that is initialized
+in main.
+
+some documentation cleanups.
+
+
+2009-08-24
+dspdev improvements.
+
+do some linear interpolation in audio resampling to get better sound quality.
+avoid copy when no resampling is required.  reflect queue full in
+GETOSPACE ioctl. cleanup code to avoid vlong calculations.
+
+
+2009-08-26
+bugs
+
+fixed uninitialized values in stat wich caused -EOVERFLOW on linux
+kernel build.  removed wakeableproc() (changes in signal.c, ptydev.c,
+bufproc.c, poll.c).  fixed sigsuspend race.  simpler waitq code (uses
+lesser memory too).  fixed waitpid race.  added /dev/dsp0 to dspdev
+(makes mikmod work).  fixed rfork/notify crash.
+
+
+2009-08-30
+mplayer, bb, audacity play cursor, bugs
+
+refactored timers, alarm and deadproccheck into one timerproc and
+removed timer.c. every Uproc has a timeout field now that is the
+time in nanoseconds when the timeout expires. on expiration, the
+timerproc sets the value to zero and does a wakeup on the
+timed out process. a process sets/resets its timeout with
+settimeout(delta). the remaining time in ms can be queried with
+timeoutremain().
+
+fixed missing protection flags in setupstack.
+
+more acurate GETOSPACE (mplayer, bb) and new GETOPTR
+ioctl (needed for audacity play cursor) in dspdev.c.
+
+handle kill note as SIGKILL in trap.c.
+
+handle illegal instruction as SIGILL as pass/restore sigcontext
+(needed for mplayer runtime SSE check).
+
+sys_sigreturn now uses current->ureg->sp to find the restore
+information.
+
+preallocate all Uprocs.
+
+
+2009-09-06
+cleanup
+
+removed dev argument from fdgetfile()
+
+return correct -ENOSOCK in socketcall()
+
+fixed pread/pwrite, dev->read/dev-write now take a offset
+argument.
+
+fixed time diff overflow in dspdev
+
+
+2009-09-08
+fuckup, O_TRUNC, restaring syscalls, rc, getdents
+
+fucked up:
+- seek didnt work for whence == 1 as the plan9 seek pointer was
+   never moved in read due to change to pread. this caused cp to
+   corrupt the output file when it skipped null blocks.
+- basepath in fs.c was broken
+- readv/writev didnt increment the file offset
+
+what we have now is that file.c does all the offset tracking, and
+devices provide a size() function that returns the actual file size.
+
+added O_TRUNC for open.
+
+signal restarting sometimes resulted in returning -ERESTART to
+userspace. this could happen when another thread had stolen
+our signal. we now restart the syscall in nextsignal() even if
+here was no signal pending for us.
+
+removed the exitsig function from linuxemu.rc as we can use
+the -terminate option of the xserver to get it shutdown.
+
+read the whole directory, then calculate file offset for directory
+entries. 
+
+
+2009-09-12
+signal handling changes, acid, rc
+
+
+2009-09-20
+sockets, basepath, alarm
+
+fixed error in basepath (*ps vs ps) and implemented nonblocking connect,
+server sockets, socketpair, sys_alarm
+
+
+2009-10-13
+lots of changes
+
+simplified locking by making process wakeup non blocking.
+to not miss wakeups, the to be suspended proc should call wakeme(1)
+before it goes to sleep.
+
+timers for alarm/itimer have been moved to the per "process" signal
+data. current->timeout is still local to the current "thread".
+
+sys_kill() now makes sure we only send one signal per "process".
+
+syscall restarting now can use the Urestart (current->restart)
+structure to remember state.  (implemented for nanosleep, poll and
+select)
+
+changed default to non tracing.
+
+pty now handles winsize changes. fixing current tty changing. (ssh bug,
+rxvt bug)
+
+added /dev/random and /dev/urandom to miscdev.
+
+more ioctls for dspdev.
+
+enforce non reentancy for traps.
+
+
+2009-10-15
+fixed sys_brk()
+
+we now use a separate segment for the BSS and dont intermix mmap and brk.
+thanks jibanes for reporting!
+
+
+2010-02-27
+futex, TLS, mprotect
+
+implemented sys_futex() finally
+
+changed tls to use the new /dev/gdt interface to change its
+process segment descriptors
+
+fixed mprotect
+
+
+20010-04-30
+linuxemu.rc gone, documentation
+
+removed linuxemu.rc and replaced it with linux.
+
+usage: linux [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...]
+
+linuxroot is now an optional parameter (-r). it will default to /sys/lib/linux.
+
+dont hide /lib/tls anymore and bind devarch. if you dont want to
+patch your kernel with the segdescr patch and use mroot[-linuxemu].tbz
+you can rename /lib/tls to /lib/_tls_disabled_.
+
+
+2010-05-02
+exit_group, exec, futex, waitpid, quoted arguments
+
+properly implement exit_group and zap all threads. notify
+all parent threads.
+
+zap threads in exec.
+
+implement FUTEX_REQUEUE and FUTEX_CMP_REQUEUE.
+
+handle WALL, WCLONE and WNOHANG in waitpid.
+
+preserve quoted arguments to linux.
+
+
+2010-05-11
+select/poll and EBADF, execve malloc, set_thread_area, initproc, SIGSTOP/SIGCONT, 
+tty, getsid, getpeeraddr, /proc
+
+select and poll never return -EBADF but ignore the offending
+filedescriptor. this is wrong in the manpage! (this was needed
+to survive the python configure script)
+
+handle malloc errors in execve and dont panic when elf
+loading fails but kill the process.
+
+detect empty descriptors in set_thread_area so descriptors
+can be freed.
+
+move some of the initialization from main to initproc.
+
+SIGSTOP/SIGCONT handling now works for thread groups. for this
+we now have stopproc() and contproc() that are called from the
+signal code when SIGSTOP or SIGCONT signal is received. each Uproc
+now has a traceproc callback that is called when we enter or
+exit the kernel. zapthreads() and stopproc() use this to get all threads
+in the wanted state. for stopped procs, waiting happens in
+the signal code so calling handlesignals() of a stopped proc will
+block until it gets killed or continued.
+
+new fields in Uproc:
+traceproc, tracearg - called when entering or exiting the kernel
+wstate - current wait state of this process. WEXITED, WSTOPPED, WCONTINUED.
+wevent - like wstate, but reset by waitpid
+comm - double null terminated string array. first entry is the full exe name
+followd by the exeve arguments.
+
+heres a new format %S for signal numbers.
+
+the per thread tty is gone. the tty is now in the per process signal queue.
+gettty() and settty() can be used to modify it. ptydev now allows opening
+the slave tty multiple times. (fixes midnight commander error)
+
+implemented sys_getsid().
+
+fix AF_INET padding and byte order for getpeername socketcall.
+
+implemented /proc (procdev). fddev is gone. /dev/tty handled by
+ptydev now. this makes pkill, ps, top and inkscape work!
+
+
+2010-05-28
+fixed pipe filedescriptor leak in AF_UNIX
+
+we leaked the sock->other descriptor when failing to connect
+a AF_UNIX socket. thanks yarikos for reporting!
+
+
+2011-08-05
+rename to existing symlink target bug, profine -> profile
+
+renaming a symlink to a existing symlink would cause the
+file file to be renamed to .udir.L.udir.L....
+
+fix profine/profile typo
+
+2014-11-20
+change uname release to 3.2.1 to make debian 7.0 not complain
+(thanks henesy)
diff --git a/linux_emul_base/README b/linux_emul_base/README
new file mode 100644 (file)
index 0000000..677085d
--- /dev/null
@@ -0,0 +1,138 @@
+INTRO
+
+Linuxemu is a program that can execute Linux/i386 ELF binaries on
+Plan9.  It was started by Russ Cox and development was continued by
+me.  Its opensource, I dont care what you are doing with it, but maybe
+Russ does, i don't know :-)
+
+If you found some bugs or have some other improvements/ideas send a
+email to:
+
+cinap_lenrek AT gmx DOT de
+
+
+SOURCE
+
+linuxemu is available on sources. On Plan9 do:
+
+% 9fs sources
+% cp /n/sources/contrib/cinap_lenrek/linuxemu3.tgz .
+
+Another source is my server on the web:
+
+% hget http://9hal.ath.cx/usr/cinap_lenrek/linuxemu3.tgz >linuxemu3.tgz
+
+
+DOCUMENTATION
+
+documentation is provided in the doc directory:
+
+doc/linuxemu.txt
+doc/todo.txt
+
+
+COMPILE
+
+% tar xzf linuxemu3.tgz
+% cd linuxemu3
+% mk
+
+
+INSTALL
+
+% mk install
+
+
+BOOTSTRAP
+
+You need a linux rootfilesystem packed in a tarball. Go!
+get some linux rootfs:
+
+http://9hal.ath.cx/usr/cinap_lenrek/mroot.tbz
+http://9hal.ath.cx/usr/cinap_lenrek/mroot-linuxemu.tbz
+
+the -linuxemu version contains no symlinks and can be extracted with
+plain plan9 tools bunzip/tar so you can skip the BOOTSTRAP section.
+:-)
+
+You can create your own with debootstrap on debian linux...  or help
+me write a installer that unpacks and installs slackware on plan9...
+In any case, linuxemu is not hardwared to any linux distribution!
+
+Extract your linux rootfilesystem with the static linked gnutar from
+the bootstrap directory.  (This will create all the fake symlinks for
+you)
+
+% 8.out bootstrap/tar xf /tmp/mroot.tar 
+
+
+RUNNING
+
+Then you can use the linux script to "chroot" into your linux
+rootfs. the linux script is neccesary because for linux programs
+to run shared libraries from your linux root have to appear at /lib
+and /usr/lib and configuration files are expected to be in /etc.
+the script will build a private namespace and bind the linuxroot
+over the plan9 root. the original plan9 namespace is mounted to /9.
+
+% linux -r ./mroot /bin/bash -i
+
+if you omit the -r option, the linuxroot defaults to /sys/lib/linux. you
+may put your linux root there or add a bind to your $home/lib/profile.
+
+You should change /etc/resolv.conf to match your network nameserver
+setup.  Also, you may want to edit /etc/apt/sources.list to change the
+debian mirror.
+
+
+DEBUGGING
+
+If linuxemu crashes, use acid to figure out whats going on:
+
+% mk acid
+% acid -l linuxemu.acid <pid>
+
+then you can issue the following commands:
+
+ustk()                         dump a (userspace) stacktrace for the current thread
+umem(Current())                dump the memory mappings
+ufds(Current())                        dump the filedescriptor table
+utrace(Current())              dump the internal tracebuffer (enabled by -d option)
+
+use xasm()/xcasm() for disassembly for linux code.
+
+You can also enable full trace logging:
+
+% linux -r ./mroot -dd /bin/bash -i >[2]/tmp/linuxemu.log
+
+This slows linuxemu down.  In case of race conditions, it often
+happens that the bug disapears when doing full trace logging!
+
+
+NPTL/thread-local storage
+
+If you get one of these errors:
+
+"cannot set up thread-local storage: cannot set up LDT for thread-local storage"
+
+this is glibc/libpthread complaining!  the problem is the following:
+glibc on i386 decided at some point to use the extra segment registers
+GS and FS as an indirection pointer for thread local storage.  the
+operating system kernel therfor must have a mechanism to let userspace
+change descriptor table entries and swap them in/out on context
+switch.
+
+to make it work here are several options:
+
+1) recompile and link the program with a pre NPTL version of glibc.
+
+2) on some distributions, a non-tls version of libc/libpthread is available.
+in my debian mroot, the NPTL version is in /lib/tls, the older version
+is in /lib. by renaming /lib/tls to /lib/_tls_disabled_ the loader will
+use the non-tls version.
+
+3) i made a kernel patch that adds support for per process descriptors to
+plan9:
+/n/sources/contrib/cinap_lenrek/segdescpatch
+http://9hal.ath.cx/usr/cinap_lenrek/segdescpatch.tgz
+it will add the files gdt and ldt to devarch (#P).
diff --git a/linux_emul_base/bits.s b/linux_emul_base/bits.s
new file mode 100644 (file)
index 0000000..5a6c86e
--- /dev/null
@@ -0,0 +1,53 @@
+TEXT   incref(SB),$0
+       MOVL    l+0(FP),AX
+       LOCK
+       INCL    0(AX)
+       RET
+
+TEXT   decref(SB),$0
+       MOVL    l+0(FP),AX
+       LOCK
+       DECL    0(AX)
+       JZ      iszero
+       MOVL    $1, AX
+       RET
+iszero:
+       MOVL    $0, AX
+       RET
+
+TEXT jumpureg(SB), 1, $0
+       MOVL ureg+0(FP), AX     /* ureg in AX */
+       MOVL 68(AX), SP         /* restore SP */
+       SUBL $12, SP
+       MOVL 28(AX), BX         /* put AX on 4(SP) */
+       MOVL BX, 4(SP)
+       MOVL 56(AX), BX         /* put PC on 8(SP) */
+       MOVL BX, 8(SP)
+       MOVL 0(AX), DI          /* restore registers */
+       MOVL 4(AX), SI
+       MOVL 8(AX), BP
+       MOVL 16(AX), BX
+       MOVL 20(AX), DX
+       MOVL 24(AX), CX
+       MOVL 4(SP), AX          /* restore AX */
+       ADDL $8, SP
+       RET
+
+TEXT linux_sigreturn(SB), 1, $0
+       MOVL $119, AX           /* sys_sigreturn */
+       INT $0x80
+       RET
+
+TEXT linux_rtsigreturn(SB), 1, $0
+       MOVL $173, AX           /* sys_rt_sigreturn */
+       INT $0x80
+       RET
+
+TEXT get_ds(SB), 1, $0
+       PUSHL DS
+       POPL AX
+       RET
+TEXT get_cs(SB), 1, $0
+       PUSHL CS
+       POPL AX
+       RET
diff --git a/linux_emul_base/bootstrap/tar b/linux_emul_base/bootstrap/tar
new file mode 100755 (executable)
index 0000000..a403a72
Binary files /dev/null and b/linux_emul_base/bootstrap/tar differ
diff --git a/linux_emul_base/bufproc.c b/linux_emul_base/bufproc.c
new file mode 100644 (file)
index 0000000..ceae26b
--- /dev/null
@@ -0,0 +1,263 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Bufproc Bufproc;
+typedef struct Bufq Bufq;
+
+struct Bufq
+{
+       Bufq            *next;
+
+       uchar   *start;
+       uchar   *end;
+
+       uchar   data[8*1024];
+};
+
+struct Bufproc
+{
+       Ref;
+       QLock;
+
+       int             fd;
+       int             error;
+       int             notefd;
+
+       Bufq            *qf;
+       Bufq            *qh;
+       Bufq            **qt;
+
+       int             wr;
+       Uwaitq  wq;
+};
+
+static int
+queuesize(Bufq *q)
+{
+       int n;
+
+       n = 0;
+       while(q){
+               n += (q->end - q->start);
+               q = q->next;
+       }
+       return n;
+}
+
+void
+freebufproc(void *bp)
+{
+       Bufproc *b = bp;
+       Bufq *q;
+
+       if(b == nil)
+               return;
+       qlock(b);
+       b->fd = -1;
+       if(decref(b)){
+               if(b->wr){
+                       b->wr = 0;
+                       while(rendezvous(&b->wr, 0) == (void*)~0)
+                               ;
+               } else {
+                       write(b->notefd, "interrupt", 9);
+               }
+               qunlock(b);
+               return;
+       }
+       qunlock(b);
+
+       *b->qt = b->qf;
+       while(q = b->qh){
+               b->qh = q->next;
+               free(q);
+       }
+       close(b->notefd);
+       free(b);
+}
+
+static void
+bufproc(void *aux)
+{
+       Bufproc *b = aux;
+       Bufq *q;
+       int ret;
+       int fd;
+
+       setprocname("bufproc()");
+
+       q = nil;
+       qlock(b);
+       for(;;){
+               while((b->fd >= 0) && (queuesize(b->qh) >= 64*1024)){
+                       b->wr = 1;
+                       qunlock(b);
+                       while(rendezvous(&b->wr, 0) == (void*)~0)
+                               ;
+                       qlock(b);
+               }
+               if((fd = b->fd) < 0)
+                       break;
+               if((q == nil) && (q = b->qf))
+                       b->qf = q->next;
+               qunlock(b);
+
+               if(q == nil)
+                       q = kmalloc(sizeof(*q));
+               q->next = nil;
+               q->end = q->start = &q->data[0];
+               ret = read(fd, q->start, sizeof(q->data));
+
+               qlock(b);
+               if(ret < 0){
+                       ret = mkerror();
+                       if(ret == -EINTR || ret == -ERESTART)
+                               continue;
+                       b->error = ret;
+                       b->fd = -1;
+                       break;
+               }
+               q->end = q->start + ret;
+               *b->qt = q;
+               b->qt = &q->next;
+               q = nil;
+               wakeq(&b->wq, MAXPROC);
+       }
+       if(q){
+               q->next = b->qf;
+               b->qf = q;
+       }
+       wakeq(&b->wq, MAXPROC);
+       qunlock(b);
+       freebufproc(b);
+}
+
+void*
+newbufproc(int fd)
+{
+       char buf[80];
+       Bufproc *b;
+       int pid;
+
+       b = kmallocz(sizeof(*b), 1);
+       b->ref = 2;
+       b->fd = fd;
+       b->qt = &b->qh;
+       if((pid = procfork(bufproc, b, 0)) < 0)
+               panic("unable to fork bufproc: %r");
+       snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+       b->notefd = open(buf, OWRITE);
+
+       return b;
+}
+
+int readbufproc(void *bp, void *data, int len, int peek, int noblock)
+{
+       Bufproc *b = bp;
+       uchar *p;
+       Bufq *q;
+       int ret;
+
+       qlock(b);
+       while((q = b->qh) == nil){
+               if(noblock){
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               if(peek){
+                       ret = 0;
+                       goto out;
+               }
+               if(b->fd < 0){
+                       if((ret = b->error) == 0)
+                               ret = -EIO;
+                       goto out;
+               }
+               if((ret = sleepq(&b->wq, b, 1)) < 0){
+                       qunlock(b);
+                       return ret;
+               }
+       }
+
+       p = data;
+       ret = 0;
+       while(q != nil){
+               int n;
+
+               n = q->end - q->start;
+               if(n == 0)
+                       break;
+               if(n > len - ret)
+                       n = len - ret;
+               memmove(p, q->start, n);
+               p += n;
+               ret += n;
+               if(q->start+n >= q->end){
+                       if(!peek){
+                               Bufq *t;
+
+                               t = q->next;
+                               if((b->qh = q->next) == nil)
+                                       b->qt = &b->qh;
+                               q->next = b->qf;
+                               b->qf = q;
+                               q = t;
+                       } else {
+                               q = q->next;
+                       }
+               } else {
+                       if(!peek)
+                               q->start += n;
+                       break;
+               }
+       }
+
+       if(b->wr && !peek){
+               b->wr = 0;
+               while(rendezvous(&b->wr, 0) == (void*)~0)
+                       ;
+               qunlock(b);
+
+               return ret;
+       }
+out:
+       qunlock(b);
+
+       return ret;
+}
+
+int pollbufproc(void *bp, Ufile *file, void *tab)
+{
+       Bufproc *b = bp;
+       int ret;
+
+       ret = 0;
+
+       qlock(b);
+       pollwait(file, &b->wq, tab);
+       if(b->fd >= 0){
+               ret |= POLLOUT;
+       } else if(b->error < 0)
+               ret |= POLLERR;
+       if(b->qh)
+               ret |= POLLIN;
+       qunlock(b);
+
+       return ret;
+}
+
+int nreadablebufproc(void *bp)
+{
+       Bufproc *b = bp;
+       int ret;
+
+       qlock(b);
+       ret = queuesize(b->qh);
+       qunlock(b);
+
+       return ret;
+}
diff --git a/linux_emul_base/consdev.c b/linux_emul_base/consdev.c
new file mode 100644 (file)
index 0000000..ace58d0
--- /dev/null
@@ -0,0 +1,157 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Cons Cons;
+
+struct Cons
+{
+       Ufile;
+       void    *bufproc;
+};
+
+static int
+closecons(Ufile *file)
+{
+       Cons *cons = (Cons*)file;
+
+       freebufproc(cons->bufproc);
+
+       return 0;
+}
+
+static void*
+bufproccons(Cons *cons)
+{
+       if(cons->bufproc == nil)
+               cons->bufproc = newbufproc(0);
+       return cons->bufproc;
+}
+
+static int
+pollcons(Ufile *file, void *tab)
+{
+       Cons *cons = (Cons*)file;
+       return pollbufproc(bufproccons(cons), cons, tab);
+}
+
+static int
+readcons(Ufile *file, void *buf, int len, vlong)
+{
+       Cons *cons = (Cons*)file;
+       int ret;
+
+       if((cons->mode & O_NONBLOCK) || (cons->bufproc != nil)){
+               ret = readbufproc(bufproccons(cons), buf, len, 0, (cons->mode & O_NONBLOCK));
+       } else {
+               if(notifyme(1))
+                       return -ERESTART;
+               ret = read(0, buf, len);
+               notifyme(0);
+               if(ret < 0)
+                       ret = mkerror();
+       }
+       return ret;
+}
+
+static int
+writecons(Ufile *, void *buf, int len, vlong)
+{
+       int ret;
+
+       if(notifyme(1))
+               return -ERESTART;
+       ret = write(1, buf, len);
+       notifyme(0);
+       if(ret < 0)
+               ret = mkerror();
+       return ret;
+}
+
+static int
+ioctlcons(Ufile *file, int cmd, void *arg)
+{
+       Cons *cons = (Cons*)file;
+
+       switch(cmd){
+       default:
+               return -ENOTTY;
+
+       case 0x541B:
+               {
+                       int r;
+
+                       if(arg == nil)
+                               return -EINVAL;
+                       if((r = nreadablebufproc(bufproccons(cons))) < 0){
+                               *((int*)arg) = 0;
+                               return r;
+                       }
+                       *((int*)arg) = r;
+               }
+               return 0;
+       }
+}
+
+static int
+opencons(char *path, int mode, int, Ufile **pf)
+{
+       Cons *file;
+
+       if(strcmp(path, "/dev/cons")!=0)
+               return -ENOENT;
+
+       file = mallocz(sizeof(Cons), 1);
+       file->ref = 1;
+       file->mode = mode;
+       file->dev = CONSDEV;
+       file->fd = 0;
+       file->path = kstrdup(path);
+       *pf = file;
+
+       return 0;
+}
+
+static int
+statcons(char *path, int, Ustat *s)
+{
+       if(strcmp(path, "/dev/cons")!=0)
+               return -ENOENT;
+
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->ino = hashpath(path);
+       s->dev = 0;
+       s->rdev = 0;
+       return 0;
+}
+
+static int
+fstatcons(Ufile *f, Ustat *s)
+{
+       return fsstat(f->path, 0, s);
+};
+
+static Udev consdev = 
+{
+       .open = opencons,
+       .read = readcons,
+       .write = writecons,
+       .poll = pollcons,
+       .close = closecons,
+       .ioctl = ioctlcons,
+       .fstat = fstatcons,
+       .stat = statcons,
+};
+
+void consdevinit(void)
+{
+       devtab[CONSDEV] = &consdev;
+
+       fsmount(&consdev, "/dev/cons");
+}
diff --git a/linux_emul_base/dat.h b/linux_emul_base/dat.h
new file mode 100644 (file)
index 0000000..f3cf4e0
--- /dev/null
@@ -0,0 +1,281 @@
+typedef struct Ref Ref;
+typedef struct Urestart Urestart;
+typedef struct Uproc Uproc;
+typedef struct Uproctab Uproctab;
+typedef struct Uwaitq Uwaitq;
+typedef struct Uwait Uwait;
+
+typedef struct Udev Udev;
+typedef struct Ufile Ufile;
+typedef struct Ustat Ustat;
+typedef struct Udirent Udirent;
+
+typedef struct Ureg Ureg;
+typedef struct Usiginfo Usiginfo;
+
+enum {
+       HZ = 100,
+       PAGESIZE = 0x1000,
+
+       MAXPROC = 128,
+       MAXFD   = 256,
+
+       USTACK  = 8*1024*1024,
+       KSTACK  = 8*1024,
+};
+
+struct Ref
+{
+       long    ref;
+};
+
+struct Urestart
+{
+       Urestart                *link;
+       char                    *syscall;
+
+       union {
+               struct {
+                       vlong   timeout;
+               }                       nanosleep;
+               struct {
+                       vlong   timeout;
+               }                       poll;
+               struct {
+                       vlong   timeout;
+               }                       select;
+               struct {
+                       vlong   timeout;
+               }                       futex;
+       };
+};
+
+struct Uproc
+{
+       QLock;
+
+       int             tid;
+       int             pid;
+       int             ppid;
+       int             pgid;
+       int             psid;
+       int             uid;
+       int             gid;
+       int             umask;
+       int             tlsmask;
+
+       int             kpid;
+       int             notefd;
+       int             argsfd;
+
+       int             wstate;
+       int             wevent;
+       int             exitcode;
+       int             exitsignal;
+
+       int             *cleartidptr;
+
+       vlong   timeout;
+
+       vlong   alarm;
+       Uproc   *alarmq;
+
+       char    *state;
+       char    *xstate;
+       int             innote;
+       int             notified;
+       Ureg            *ureg;
+       char            *syscall;
+       void            (*sysret)(int errno);
+       Urestart        *restart;
+       Urestart        restart0;
+       Uwait   *freewait;
+
+       void            (*traceproc)(void *arg);
+       void            *tracearg;
+
+       int             linkloop;
+       char            *root;
+       char            *cwd;
+       char            *kcwd;
+
+       void            *fdtab;
+       void            *mem;
+       void            *trace;
+       void            *signal;
+
+       char            *comm;
+       int             ncomm;
+       ulong   codestart;
+       ulong   codeend;
+       ulong   stackstart;
+       vlong   starttime;
+};
+
+struct Uproctab
+{
+       QLock;
+       int             nextpid;
+       int             alloc;
+       Uproc   proc[MAXPROC];
+};
+
+struct Uwaitq
+{
+       QLock;
+       Uwait   *w;
+};
+
+struct Uwait
+{
+       Uwait   *next;
+       Uwaitq  *q;
+       Uwait   *nextq;
+       Uproc   *proc;
+       Ufile   *file;
+};
+
+enum {
+       ROOTDEV,
+       SOCKDEV,
+       PIPEDEV,
+       CONSDEV,
+       MISCDEV,
+       DSPDEV,
+       PTYDEV,
+       PROCDEV,
+       MAXDEV,
+};
+
+/* device */
+struct Udev
+{
+       int             (*open)(char *path, int mode, int perm, Ufile **pf);
+       int             (*access)(char *path, int perm);
+       int             (*stat)(char *path, int link, Ustat *ps);
+
+       int             (*link)(char *old, char *new, int sym);
+       int             (*unlink)(char *path, int rmdir);
+       int             (*readlink)(char *path, char *buf, int len);
+       int             (*rename)(char *old, char *new);
+       int             (*mkdir)(char *path, int mode);
+       int             (*utime)(char *path, long atime, long mtime);
+       int             (*chmod)(char *path, int mode);
+       int             (*chown)(char *path, int uid, int gid, int link);
+       int             (*truncate)(char *path, vlong size);
+
+       int             (*read)(Ufile *file, void *buf, int len, vlong off);
+       int             (*write)(Ufile *file, void *buf, int len, vlong off);
+
+       vlong   (*size)(Ufile *file);
+       int             (*poll)(Ufile *file, void *tab);
+       int             (*ioctl)(Ufile *file, int cmd, void *arg);
+       int             (*close)(Ufile *file);
+
+       int             (*fstat)(Ufile *file, Ustat *ps);
+       int             (*readdir)(Ufile *file, Udirent **pd);
+       
+       int             (*fchmod)(Ufile *file, int mode);
+       int             (*fchown)(Ufile *file, int uid, int gid);
+       int             (*ftruncate)(Ufile *file, vlong size);
+};
+
+struct Ufile
+{
+       Ref;
+
+       int             mode;
+       int             dev;
+       char            *path;
+       int             fd;
+       vlong   off;
+
+       Udirent *rdaux; /* aux pointer to hold Udirent* chains */
+};
+
+struct Ustat
+{
+       int             mode;
+       int             uid;
+       int             gid;
+       int             dev;
+       int             rdev;
+       vlong   size;
+       ulong   atime;
+       ulong   mtime;
+       ulong   ctime;
+       uvlong  ino;
+};
+
+struct Udirent
+{
+       Udirent *next;
+
+       uvlong  ino;
+       int             mode;
+       char    name[];
+};
+
+struct Usiginfo
+{
+       int             signo;
+       int             errno;
+       int             code;
+
+       union {
+               /* kill() */
+               struct {
+                       int     pid;            /* sender's pid */
+                       int     uid;            /* sender's uid */
+               } kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       int     tid;                    /* timer id */
+                       int     overrun;                /* overrun count */
+                       int     val;                    /* same as below */
+                       int     sys_private;    /* not to be passed to user */
+               } timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       int     pid;                    /* sender's pid */
+                       int     uid;                    /* sender's uid */
+                       int     val;
+               } rt;
+
+               /* SIGCHLD */
+               struct {
+                       int     pid;                    /* which child */
+                       int     uid;                    /* sender's uid */
+                       int     status;                 /* exit code */
+                       long    utime;
+                       long    stime;
+               } chld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       void    *addr;          /* faulting insn/memory ref. */
+                       int     trapno;         /* TRAP # which caused the signal */
+               } fault;
+
+               /* SIGPOLL */
+               struct {
+                       long    band;           /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int     fd;
+               } poll;
+       };
+
+       int             topid;
+       int             group;
+};
+
+int debug;
+long *kstack;
+long *exitjmp;
+Uproc **pcurrent;
+#define current (*pcurrent)
+vlong boottime;
+
+Udev *devtab[MAXDEV];
+Uproctab proctab;
diff --git a/linux_emul_base/doc/ioctl_list.txt b/linux_emul_base/doc/ioctl_list.txt
new file mode 100644 (file)
index 0000000..5caa948
--- /dev/null
@@ -0,0 +1,612 @@
+Ubuntu Manpage Repository
+
+Provided by: manpages-de-dev_0.5-2ubuntu1_all
+
+BEZEICHNUNG
+
+        ioctl_list - Liste der ioctl-Aufrufe im Linux/i386-Kernel
+
+BESCHREIBUNG
+
+        Dies  ist  die  Ioctl-Liste  1.3.27,  eine  Liste von ioctl-Aufrufen im
+        Linux/i386-Kernel    1.3.27.     Sie    enthält    421    ioctls    aus
+        /usr/include/{asm,linux}/*.h.   Für  jeden  ioctl  wird  der numerische
+        Wert, der Name und der Argumenttyp aufgelistet.
+
+        Ein Argumenttyp const struct foo * bedeutet, dass das Argument  Eingabe
+        für  den  Kernel ist.  struct foo * bedeutet, der Kernel gibt das Argu‐
+        ment aus.  Wenn der Kernel das Argument für Ein- und  Ausgabe  benutzt,
+        wird dies durch // I-O markiert.
+
+        Einige ioctls benötigen mehr Argumente oder geben mehr Werte zurück als
+        eine einzige Struktur.  Diese werden durch // MORE markiert und  weiter
+        dokumentiert in einem separaten Abschnitt.
+
+        Diese Liste ist nicht vollständig.  Sie enthält nicht:
+
+                 Ioctls, die intern im Kernel definiert sind (scsi_ioctl.h).
+
+                 Ioctls,  die  in  Modulen definiert sind, die separat vom Kernel
+               verbreitet werden.
+
+        Und natürlich hat die Liste Fehler und Auslassungen.
+
+        Bitte  wenden  Sie   sich   wegen   Änderungen   und   Kommentaren   an
+        <mec@duracef.shout.net>.   Ich  bin  besonders interessiert an Modulen,
+        die ihre eigenen ioctls definieren.  Wenn Sie solch ein  Modul  kennen,
+        teilen  es  Sie  mir bitte mit, damit ich es mir per ftp besorgen kann,
+        und ich berücksichtige seine ioctls  in  der  nächsten  Ausgabe  dieser
+        Liste.
+
+        Bitte  wenden  Sie  sich  wegen  der Übersetzung in’s Deutsche nicht an
+        <krd@burn.rhein-ruhr.de>. ;-)
+
+Haupttabelle
+
+    // <include/asm-i386/socket.h>
+        0x00008901  FIOSETOWN                   const int *
+        0x00008902  SIOCSPGRP                   const int *
+        0x00008903  FIOGETOWN                   int *
+        0x00008904  SIOCGPGRP                   int *
+        0x00008905  SIOCATMARK                  int *
+        0x00008906  SIOCGSTAMP                  timeval *
+
+    // <include/asm-i386/termios.h>
+        0x00005401  TCGETS                      struct termios *
+        0x00005402  TCSETS                      const struct termios *
+        0x00005403  TCSETSW                     const struct termios *
+        0x00005404  TCSETSF                     const struct termios *
+        0x00005405  TCGETA                      struct termio *
+        0x00005406  TCSETA                      const struct termio *
+        0x00005407  TCSETAW                     const struct termio *
+        0x00005408  TCSETAF                     const struct termio *
+        0x00005409  TCSBRK                      int
+        0x0000540A  TCXONC                      int
+        0x0000540B  TCFLSH                      int
+        0x0000540C  TIOCEXCL                    void
+        0x0000540D  TIOCNXCL                    void
+        0x0000540E  TIOCSCTTY                   int
+        0x0000540F  TIOCGPGRP                   pid_t *
+        0x00005410  TIOCSPGRP                   const pid_t *
+        0x00005411  TIOCOUTQ                    int *
+        0x00005412  TIOCSTI                     const char *
+        0x00005413  TIOCGWINSZ                  const struct winsize *
+        0x00005414  TIOCSWINSZ                  struct winsize *
+        0x00005415  TIOCMGET                    int *
+        0x00005416  TIOCMBIS                    const int *
+        0x00005417  TIOCMBIC                    const int *
+        0x00005418  TIOCMSET                    const int *
+        0x00005419  TIOCGSOFTCAR                int *
+        0x0000541A  TIOCSSOFTCAR                const int *
+        0x0000541B  FIONREAD                    int *
+        0x0000541B  TIOCINQ                     int *
+        0x0000541C  TIOCLINUX                   const char *                    // MORE
+        0x0000541D  TIOCCONS                    void
+        0x0000541E  TIOCGSERIAL                 struct serial_struct *
+        0x0000541F  TIOCSSERIAL                 const struct serial_struct *
+        0x00005420  TIOCPKT                     const int *
+        0x00005421  FIONBIO                     const int *
+        0x00005422  TIOCNOTTY                   void
+        0x00005423  TIOCSETD                    const int *
+        0x00005424  TIOCGETD                    int *
+        0x00005425  TCSBRKP                     int
+        0x00005426  TIOCTTYGSTRUCT              struct tty_struct *
+        0x00005450  FIONCLEX                    void
+        0x00005451  FIOCLEX                     void
+        0x00005452  FIOASYNC                    const int *
+        0x00005453  TIOCSERCONFIG               void
+        0x00005454  TIOCSERGWILD                int *
+        0x00005455  TIOCSERSWILD                const int *
+        0x00005456  TIOCGLCKTRMIOS              struct termios *
+        0x00005457  TIOCSLCKTRMIOS              const struct temios *
+        0x00005458  TIOCSERGSTRUCT              struct async_struct *
+        0x00005459  TIOCSERGETLSR               int *
+        0x0000545A  TIOCSERGETMULTI             struct serial_multiport_struct *
+        0x0000545B  TIOCSERSETMULTI             const struct serial_multiport_struct *
+
+    // <include/linux/ax25.h>
+        0x000089E0  SIOCAX25GETUID              const struct sockaddr_ax25 *
+        0x000089E1  SIOCAX25ADDUID              const struct sockaddr_ax25 *
+        0x000089E2  SIOCAX25DELUID              const struct sockaddr_ax25 *
+        0x000089E3  SIOCAX25NOUID               const int *
+        0x000089E4  SIOCAX25DIGCTL              const int *
+        0x000089E5  SIOCAX25GETPARMS            struct ax25_parms_struct *      // I-O
+        0x000089E6  SIOCAX25SETPARMS            const struct ax25_parms-struct *
+
+    // <include/linux/cdk.h>
+        0x00007314  STL_BINTR                   void
+        0x00007315  STL_BSTART                  void
+        0x00007316  STL_BSTOP                   void
+        0x00007317  STL_BRESET                  void
+
+    // <include/linux/cdrom.h>
+        0x00005301  CDROMPAUSE                  void
+        0x00005302  CDROMRESUME                 void
+        0x00005303  CDROMPLAYMSF                const struct cdrom_msf *
+        0x00005304  CDROMPLAYTRKIND             const struct cdrom_ti *
+        0x00005305  CDROMREADTOCHDR             struct cdrom_tochdr *
+        0x00005306  CDROMREADTOCENTRY           struct cdrom_tocentry *         // I-O
+        0x00005307  CDROMSTOP                   void
+        0x00005308  CDROMSTART                  void
+        0x00005309  CDROMEJECT                  void
+        0x0000530A  CDROMVOLCTRL                const struct cdrom_volctrl *
+        0x0000530B  CDROMSUBCHNL                struct cdrom_subchnl *          // I-O
+        0x0000530C  CDROMREADMODE2              const struct cdrom_msf *        // MORE
+        0x0000530D  CDROMREADMODE1              const struct cdrom_msf *        // MORE
+        0x0000530E  CDROMREADAUDIO              const struct cdrom_read_audio * // MORE
+        0x0000530F  CDROMEJECT_SW               int
+        0x00005310  CDROMMULTISESSION           struct cdrom_multisession *     // I-O
+        0x00005311  CDROM_GET_UPC               struct { char [8]; } *
+        0x00005312  CDROMRESET                  void
+        0x00005313  CDROMVOLREAD                struct cdrom_volctrl *
+        0x00005314  CDROMREADRAW                const struct cdrom_msf *        // MORE
+        0x00005315  CDROMREADCOOKED             const struct cdrom_msf *        // MORE
+        0x00005316  CDROMSEEK                   const struct cdrom_msf *
+
+    // <include/linux/cm206.h>
+        0x00002000  CM206CTL_GET_STAT           int
+        0x00002001  CM206CTL_GET_LAST_STAT      int
+
+    // <include/linux/cyclades.h>
+        0x00435901  CYGETMON                    struct cyclades_monitor *
+        0x00435902  CYGETTHRESH                 int *
+        0x00435903  CYSETTHRESH                 int
+        0x00435904  CYGETDEFTHRESH              int *
+        0x00435905  CYSETDEFTHRESH              int
+        0x00435906  CYGETTIMEOUT                int *
+        0x00435907  CYSETTIMEOUT                int
+        0x00435908  CYGETDEFTIMEOUT             int *
+        0x00435909  CYSETDEFTIMEOUT             int
+
+    // <include/linux/ext2_fs.h>
+        0x80046601  EXT2_IOC_GETFLAGS           int *
+        0x40046602  EXT2_IOC_SETFLAGS           const int *
+        0x80047601  EXT2_IOC_GETVERSION         int *
+        0x40047602  EXT2_IOC_SETVERSION         const int *
+
+    // <include/linux/fd.h>
+        0x00000000  FDCLRPRM                    void
+        0x00000001  FDSETPRM                    const struct floppy_struct *
+        0x00000002  FDDEFPRM                    const struct floppy_struct *
+        0x00000003  FDGETPRM                    struct floppy_struct *
+        0x00000004  FDMSGON                     void
+        0x00000005  FDMSGOFF                    void
+        0x00000006  FDFMTBEG                    void
+        0x00000007  FDFMTTRK                    const struct format_descr *
+        0x00000008  FDFMTEND                    void
+        0x0000000A  FDSETEMSGTRESH              int
+        0x0000000B  FDFLUSH                     void
+        0x0000000C  FDSETMAXERRS                const struct floppy_max_errors *
+        0x0000000E  FDGETMAXERRS                struct floppy_max_errors *
+        0x00000010  FDGETDRVTYP                 struct { char [16]; } *
+        0x00000014  FDSETDRVPRM                 const struct floppy_drive_params *
+        0x00000015  FDGETDRVPRM                 struct floppy_drive_params *
+        0x00000016  FDGETDRVSTAT                struct floppy_drive_struct *
+        0x00000017  FDPOLLDRVSTAT               struct floppy_drive_struct *
+        0x00000018  FDRESET                     int
+        0x00000019  FDGETFDCSTAT                struct floppy_fdc_state *
+        0x0000001B  FDWERRORCLR                 void
+        0x0000001C  FDWERRORGET                 struct floppy_write_errors *
+        0x0000001E  FDRAWCMD                    struct floppy_raw_cmd * // MORE // I-O
+        0x00000028  FDTWADDLE                   void
+
+    // <include/linux/fs.h>
+        0x0000125D  BLKROSET                    const int *
+        0x0000125E  BLKROGET                    int *
+        0x0000125F  BLKRRPART                   void
+        0x00001260  BLKGETSIZE                  int *
+        0x00001261  BLKFLSBUF                   void
+        0x00001262  BLKRASET                    int
+        0x00001263  BLKRAGET                    int *
+        0x00000001  FIBMAP                      int *                           // I-O
+        0x00000002  FIGETBSZ                    int *
+
+    // <include/linux/hdreg.h>
+        0x00000301  HDIO_GETGEO                 struct hd_geometry *
+        0x00000302  HDIO_GET_UNMASKINTR         int *
+        0x00000304  HDIO_GET_MULTCOUNT          int *
+        0x00000307  HDIO_GET_IDENTITY           struct hd_driveid *
+        0x00000308  HDIO_GET_KEEPSETTINGS       int *
+        0x00000309  HDIO_GET_CHIPSET            int *
+        0x0000030A  HDIO_GET_NOWERR             int *
+        0x0000030B  HDIO_GET_DMA                int *
+        0x0000031F  HDIO_DRIVE_CMD              int *                           // I-O
+        0x00000321  HDIO_SET_MULTCOUNT          int
+        0x00000322  HDIO_SET_UNMASKINTR         int
+        0x00000323  HDIO_SET_KEEPSETTINGS       int
+        0x00000324  HDIO_SET_CHIPSET            int
+        0x00000325  HDIO_SET_NOWERR             int
+        0x00000326  HDIO_SET_DMA                int
+
+    // <include/linux/if_eql.h>
+        0x000089F0  EQL_ENSLAVE                 struct ifreq *          // MORE // I-O
+        0x000089F1  EQL_EMANCIPATE              struct ifreq *          // MORE // I-O
+        0x000089F2  EQL_GETSLAVECFG             struct ifreq *          // MORE // I-O
+        0x000089F3  EQL_SETSLAVECFG             struct ifreq *          // MORE // I-O
+        0x000089F4  EQL_GETMASTRCFG             struct ifreq *          // MORE // I-O
+        0x000089F5  EQL_SETMASTRCFG             struct ifreq *          // MORE // I-O
+
+    // <include/linux/if_plip.h>
+        0x000089F0  SIOCDEVPLIP                 struct ifreq *                  // I-O
+
+    // <include/linux/if_ppp.h>
+        0x00005490  PPPIOCGFLAGS                int *
+        0x00005491  PPPIOCSFLAGS                const int *
+        0x00005492  PPPIOCGASYNCMAP             int *
+        0x00005493  PPPIOCSASYNCMAP             const int *
+        0x00005494  PPPIOCGUNIT                 int *
+        0x00005495  PPPIOCSINPSIG               const int *
+        0x00005497  PPPIOCSDEBUG                const int *
+        0x00005498  PPPIOCGDEBUG                int *
+        0x00005499  PPPIOCGSTAT                 struct ppp_stats *
+        0x0000549A  PPPIOCGTIME                 struct ppp_ddinfo *
+        0x0000549B  PPPIOCGXASYNCMAP            struct { int [8]; } *
+        0x0000549C  PPPIOCSXASYNCMAP            const struct { int [8]; } *
+        0x0000549D  PPPIOCSMRU                  const int *
+        0x0000549E  PPPIOCRASYNCMAP             const int *
+        0x0000549F  PPPIOCSMAXCID               const int *
+
+    // <include/linux/ipx.h>
+        0x000089E0  SIOCAIPXITFCRT              const char *
+        0x000089E1  SIOCAIPXPRISLT              const char *
+        0x000089E2  SIOCIPXCFGDATA              struct ipx_config_data *
+
+    // <include/linux/kd.h>
+        0x00004B60  GIO_FONT                    struct { char [8192]; } *
+        0x00004B61  PIO_FONT                    const struct { char [8192]; } *
+        0x00004B6B  GIO_FONTX                   struct console_font_desc *  // MORE I-O
+        0x00004B6C  PIO_FONTX                   const struct console_font_desc * //MORE
+        0x00004B70  GIO_CMAP                    struct { char [48]; } *
+        0x00004B71  PIO_CMAP                    const struct { char [48]; }
+        0x00004B2F  KIOCSOUND                   int
+        0x00004B30  KDMKTONE                    int
+        0x00004B31  KDGETLED                    char *
+        0x00004B32  KDSETLED                    int
+        0x00004B33  KDGKBTYPE                   char *
+        0x00004B34  KDADDIO                     int                             // MORE
+        0x00004B35  KDDELIO                     int                             // MORE
+        0x00004B36  KDENABIO                    void                            // MORE
+        0x00004B37  KDDISABIO                   void                            // MORE
+        0x00004B3A  KDSETMODE                   int
+        0x00004B3B  KDGETMODE                   int *
+        0x00004B3C  KDMAPDISP                   void                            // MORE
+        0x00004B3D  KDUNMAPDISP                 void                            // MORE
+        0x00004B40  GIO_SCRNMAP                 struct { char [E_TABSZ]; } *
+        0x00004B41  PIO_SCRNMAP                 const struct { char [E_TABSZ]; } *
+        0x00004B69  GIO_UNISCRNMAP              struct { short [E_TABSZ]; } *
+        0x00004B6A  PIO_UNISCRNMAP              const struct { short [E_TABSZ]; } *
+        0x00004B66  GIO_UNIMAP                  struct unimapdesc *     // MORE // I-O
+        0x00004B67  PIO_UNIMAP                  const struct unimapdesc *       // MORE
+        0x00004B68  PIO_UNIMAPCLR               const struct unimapinit *
+        0x00004B44  KDGKBMODE                   int *
+        0x00004B45  KDSKBMODE                   int
+        0x00004B62  KDGKBMETA                   int *
+        0x00004B63  KDSKBMETA                   int
+        0x00004B64  KDGKBLED                    int *
+        0x00004B65  KDSKBLED                    int
+        0x00004B46  KDGKBENT                    struct kbentry *                // I-O
+        0x00004B47  KDSKBENT                    const struct kbentry *
+        0x00004B48  KDGKBSENT                   struct kbsentry *               // I-O
+        0x00004B49  KDSKBSENT                   const struct kbsentry *
+        0x00004B4A  KDGKBDIACR                  struct kbdiacrs *
+        0x00004B4B  KDSKBDIACR                  const struct kbdiacrs *
+        0x00004B4C  KDGETKEYCODE                struct kbkeycode *              // I-O
+        0x00004B4D  KDSETKEYCODE                const struct kbkeycode *
+        0x00004B4E  KDSIGACCEPT                 int
+
+    // <include/linux/lp.h>
+        0x00000601  LPCHAR                      int
+        0x00000602  LPTIME                      int
+        0x00000604  LPABORT                     int
+        0x00000605  LPSETIRQ                    int
+        0x00000606  LPGETIRQ                    int *
+        0x00000608  LPWAIT                      int
+        0x00000609  LPCAREFUL                   int
+        0x0000060A  LPABORTOPEN                 int
+        0x0000060B  LPGETSTATUS                 int *
+        0x0000060C  LPRESET                     void
+        0x0000060D  LPGETSTATS                  struct lp_stats *
+
+    // <include/linux/mroute.h>
+        0x000089E0  SIOCGETVIFCNT               struct sioc_vif_req *           // I-O
+        0x000089E1  SIOCGETSGCNT                struct sioc_sg_req *            // I-O
+
+    // <include/linux/mtio.h>
+        0x40086D01  MTIOCTOP                    const struct mtop *
+        0x801C6D02  MTIOCGET                    struct mtget *
+        0x80046D03  MTIOCPOS                    struct mtpos *
+        0x80206D04  MTIOCGETCONFIG              struct mtconfiginfo *
+        0x40206D05  MTIOCSETCONFIG              const struct mtconfiginfo *
+
+    // <include/linux/netrom.h>
+        0x000089E0  SIOCNRGETPARMS              struct nr_parms_struct *        // I-O
+        0x000089E1  SIOCNRSETPARMS              const struct nr_parms_struct *
+        0x000089E2  SIOCNRDECOBS                void
+        0x000089E3  SIOCNRRTCTL                 const int *
+
+    // <include/linux/sbpcd.h>
+        0x00009000  DDIOCSDBG                   const int *
+        0x00005382  CDROMAUDIOBUFSIZ            int
+
+    // <include/linux/scc.h>
+        0x00005470  TIOCSCCINI                  void
+        0x00005471  TIOCCHANINI                 const struct scc_modem *
+        0x00005472  TIOCGKISS                   struct ioctl_command *          // I-O
+        0x00005473  TIOCSKISS                   const struct ioctl_command *
+        0x00005474  TIOCSCCSTAT                 struct scc_stat *
+
+    // <include/linux/scsi.h>
+        0x00005382  SCSI_IOCTL_GET_IDLUN        struct { int [2]; } *
+        0x00005383  SCSI_IOCTL_TAGGED_ENABLE    void
+        0x00005384  SCSI_IOCTL_TAGGED_DISABLE   void
+        0x00005385  SCSI_IOCTL_PROBE_HOST       const int *                     // MORE
+
+    // <include/linux/smb_fs.h>
+        0x80027501  SMB_IOC_GETMOUNTUID         uid_t *
+
+    // <include/linux/sockios.h>
+        0x0000890B  SIOCADDRT                   const struct rtentry *          // MORE
+        0x0000890C  SIOCDELRT                   const struct rtentry *          // MORE
+        0x00008910  SIOCGIFNAME                 char []
+        0x00008911  SIOCSIFLINK                 void
+        0x00008912  SIOCGIFCONF                 struct ifconf *         // MORE // I-O
+        0x00008913  SIOCGIFFLAGS                struct ifreq *                  // I-O
+        0x00008914  SIOCSIFFLAGS                const struct ifreq *
+        0x00008915  SIOCGIFADDR                 struct ifreq *                  // I-O
+        0x00008916  SIOCSIFADDR                 const struct ifreq *
+        0x00008917  SIOCGIFDSTADDR              struct ifreq *                  // I-O
+        0x00008918  SIOCSIFDSTADDR              const struct ifreq *
+        0x00008919  SIOCGIFBRDADDR              struct ifreq *                  // I-O
+        0x0000891A  SIOCSIFBRDADDR              const struct ifreq *
+        0x0000891B  SIOCGIFNETMASK              struct ifreq *                  // I-O
+        0x0000891C  SIOCSIFNETMASK              const struct ifreq *
+        0x0000891D  SIOCGIFMETRIC               struct ifreq *                  // I-O
+        0x0000891E  SIOCSIFMETRIC               const struct ifreq *
+        0x0000891F  SIOCGIFMEM                  struct ifreq *                  // I-O
+        0x00008920  SIOCSIFMEM                  const struct ifreq *
+        0x00008921  SIOCGIFMTU                  struct ifreq *                  // I-O
+        0x00008922  SIOCSIFMTU                  const struct ifreq *
+        0x00008923  OLD_SIOCGIFHWADDR           struct ifreq *                  // I-O
+        0x00008924  SIOCSIFHWADDR               const struct ifreq *            // MORE
+        0x00008925  SIOCGIFENCAP                int *
+        0x00008926  SIOCSIFENCAP                const int *
+        0x00008927  SIOCGIFHWADDR               struct ifreq *                  // I-O
+        0x00008929  SIOCGIFSLAVE                void
+        0x00008930  SIOCSIFSLAVE                void
+        0x00008931  SIOCADDMULTI                const struct ifreq *
+        0x00008932  SIOCDELMULTI                const struct ifreq *
+        0x00008940  SIOCADDRTOLD                void
+        0x00008941  SIOCDELRTOLD                void
+        0x00008950  SIOCDARP                    const struct arpreq *
+        0x00008951  SIOCGARP                    struct arpreq *                 // I-O
+        0x00008952  SIOCSARP                    const struct arpreq *
+        0x00008960  SIOCDRARP                   const struct arpreq *
+        0x00008961  SIOCGRARP                   struct arpreq *                 // I-O
+        0x00008962  SIOCSRARP                   const struct arpreq *
+        0x00008970  SIOCGIFMAP                  struct ifreq *                  // I-O
+        0x00008971  SIOCSIFMAP                  const struct ifreq *
+
+    // <include/linux/soundcard.h>
+        0x00005100  SNDCTL_SEQ_RESET            void
+        0x00005101  SNDCTL_SEQ_SYNC             void
+        0xC08C5102  SNDCTL_SYNTH_INFO           struct synth_info *             // I-O
+        0xC0045103  SNDCTL_SEQ_CTRLRATE         int *                           // I-O
+        0x80045104  SNDCTL_SEQ_GETOUTCOUNT      int *
+        0x80045105  SNDCTL_SEQ_GETINCOUNT       int *
+        0x40045106  SNDCTL_SEQ_PERCMODE         void
+        0x40285107  SNDCTL_FM_LOAD_INSTR        const struct sbi_instrument *
+        0x40045108  SNDCTL_SEQ_TESTMIDI         const int *
+        0x40045109  SNDCTL_SEQ_RESETSAMPLES     const int *
+        0x8004510A  SNDCTL_SEQ_NRSYNTHS         int *
+        0x8004510B  SNDCTL_SEQ_NRMIDIS          int *
+        0xC074510C  SNDCTL_MIDI_INFO            struct midi_info *              // I-O
+        0x4004510D  SNDCTL_SEQ_THRESHOLD        const int *
+        0xC004510E  SNDCTL_SYNTH_MEMAVL         int *                           // I-O
+        0x4004510F  SNDCTL_FM_4OP_ENABLE        const int *
+        0xCFB85110  SNDCTL_PMGR_ACCESS          struct patmgr_info *            // I-O
+        0x00005111  SNDCTL_SEQ_PANIC            void
+        0x40085112  SNDCTL_SEQ_OUTOFBAND        const struct seq_event_rec *
+        0xC0045401  SNDCTL_TMR_TIMEBASE         int *                           // I-O
+        0x00005402  SNDCTL_TMR_START            void
+        0x00005403  SNDCTL_TMR_STOP             void
+        0x00005404  SNDCTL_TMR_CONTINUE         void
+        0xC0045405  SNDCTL_TMR_TEMPO            int *                           // I-O
+        0xC0045406  SNDCTL_TMR_SOURCE           int *                           // I-O
+        0x40045407  SNDCTL_TMR_METRONOME        const int *
+        0x40045408  SNDCTL_TMR_SELECT           int *                           // I-O
+        0xCFB85001  SNDCTL_PMGR_IFACE           struct patmgr_info *            // I-O
+        0xC0046D00  SNDCTL_MIDI_PRETIME         int *                           // I-O
+        0xC0046D01  SNDCTL_MIDI_MPUMODE         const int *
+        0xC0216D02  SNDCTL_MIDI_MPUCMD          struct mpu_command_rec *        // I-O
+        0x00005000  SNDCTL_DSP_RESET            void
+        0x00005001  SNDCTL_DSP_SYNC             void
+        0xC0045002  SNDCTL_DSP_SPEED            int *                           // I-O
+        0xC0045003  SNDCTL_DSP_STEREO           int *                           // I-O
+        0xC0045004  SNDCTL_DSP_GETBLKSIZE       int *                           // I-O
+        0xC0045006  SOUND_PCM_WRITE_CHANNELS    int *                           // I-O
+        0xC0045007  SOUND_PCM_WRITE_FILTER      int *                           // I-O
+        0x00005008  SNDCTL_DSP_POST             void
+        0xC0045009  SNDCTL_DSP_SUBDIVIDE        int *                           // I-O
+        0xC004500A  SNDCTL_DSP_SETFRAGMENT      int *                           // I-O
+        0x8004500B  SNDCTL_DSP_GETFMTS          int *
+        0xC0045005  SNDCTL_DSP_SETFMT           int *                           // I-O
+        0x800C500C  SNDCTL_DSP_GETOSPACE        struct audio_buf_info *
+        0x800C500D  SNDCTL_DSP_GETISPACE        struct audio_buf_info *
+        0x0000500E  SNDCTL_DSP_NONBLOCK         void
+        0x80045002  SOUND_PCM_READ_RATE         int *
+        0x80045006  SOUND_PCM_READ_CHANNELS     int *
+        0x80045005  SOUND_PCM_READ_BITS         int *
+        0x80045007  SOUND_PCM_READ_FILTER       int *
+        0x00004300  SNDCTL_COPR_RESET           void
+        0xCFB04301  SNDCTL_COPR_LOAD            const struct copr_buffer *
+        0xC0144302  SNDCTL_COPR_RDATA           struct copr_debug_buf *         // I-O
+        0xC0144303  SNDCTL_COPR_RCODE           struct copr_debug_buf *         // I-O
+        0x40144304  SNDCTL_COPR_WDATA           const struct copr_debug_buf *
+        0x40144305  SNDCTL_COPR_WCODE           const struct copr_debug_buf *
+        0xC0144306  SNDCTL_COPR_RUN             struct copr_debug_buf *         // I-O
+        0xC0144307  SNDCTL_COPR_HALT            struct copr_debug_buf *         // I-O
+        0x4FA44308  SNDCTL_COPR_SENDMSG         const struct copr_msg *
+        0x8FA44309  SNDCTL_COPR_RCVMSG          struct copr_msg *
+        0x80044D00  SOUND_MIXER_READ_VOLUME     int *
+        0x80044D01  SOUND_MIXER_READ_BASS       int *
+        0x80044D02  SOUND_MIXER_READ_TREBLE     int *
+        0x80044D03  SOUND_MIXER_READ_SYNTH      int *
+        0x80044D04  SOUND_MIXER_READ_PCM        int *
+        0x80044D05  SOUND_MIXER_READ_SPEAKER    int *
+        0x80044D06  SOUND_MIXER_READ_LINE       int *
+        0x80044D07  SOUND_MIXER_READ_MIC        int *
+        0x80044D08  SOUND_MIXER_READ_CD         int *
+        0x80044D09  SOUND_MIXER_READ_IMIX       int *
+        0x80044D0A  SOUND_MIXER_READ_ALTPCM     int *
+        0x80044D0B  SOUND_MIXER_READ_RECLEV     int *
+        0x80044D0C  SOUND_MIXER_READ_IGAIN      int *
+        0x80044D0D  SOUND_MIXER_READ_OGAIN      int *
+        0x80044D0E  SOUND_MIXER_READ_LINE1      int *
+        0x80044D0F  SOUND_MIXER_READ_LINE2      int *
+        0x80044D10  SOUND_MIXER_READ_LINE3      int *
+        0x80044D1C  SOUND_MIXER_READ_MUTE       int *
+        0x80044D1D  SOUND_MIXER_READ_ENHANCE    int *
+        0x80044D1E  SOUND_MIXER_READ_LOUD       int *
+        0x80044DFF  SOUND_MIXER_READ_RECSRC     int *
+        0x80044DFE  SOUND_MIXER_READ_DEVMASK    int *
+        0x80044DFD  SOUND_MIXER_READ_RECMASK    int *
+        0x80044DFB  SOUND_MIXER_READ_STEREODEVS int *
+        0x80044DFC  SOUND_MIXER_READ_CAPS       int *
+        0xC0044D00  SOUND_MIXER_WRITE_VOLUME    int *                           // I-O
+        0xC0044D01  SOUND_MIXER_WRITE_BASS      int *                           // I-O
+        0xC0044D02  SOUND_MIXER_WRITE_TREBLE    int *                           // I-O
+        0xC0044D03  SOUND_MIXER_WRITE_SYNTH     int *                           // I-O
+        0xC0044D04  SOUND_MIXER_WRITE_PCM       int *                           // I-O
+        0xC0044D05  SOUND_MIXER_WRITE_SPEAKER   int *                           // I-O
+        0xC0044D06  SOUND_MIXER_WRITE_LINE      int *                           // I-O
+        0xC0044D07  SOUND_MIXER_WRITE_MIC       int *                           // I-O
+        0xC0044D08  SOUND_MIXER_WRITE_CD        int *                           // I-O
+        0xC0044D09  SOUND_MIXER_WRITE_IMIX      int *                           // I-O
+        0xC0044D0A  SOUND_MIXER_WRITE_ALTPCM    int *                           // I-O
+        0xC0044D0B  SOUND_MIXER_WRITE_RECLEV    int *                           // I-O
+        0xC0044D0C  SOUND_MIXER_WRITE_IGAIN     int *                           // I-O
+        0xC0044D0D  SOUND_MIXER_WRITE_OGAIN     int *                           // I-O
+        0xC0044D0E  SOUND_MIXER_WRITE_LINE1     int *                           // I-O
+        0xC0044D0F  SOUND_MIXER_WRITE_LINE2     int *                           // I-O
+        0xC0044D10  SOUND_MIXER_WRITE_LINE3     int *                           // I-O
+        0xC0044D1C  SOUND_MIXER_WRITE_MUTE      int *                           // I-O
+        0xC0044D1D  SOUND_MIXER_WRITE_ENHANCE   int *                           // I-O
+        0xC0044D1E  SOUND_MIXER_WRITE_LOUD      int *                           // I-O
+        0xC0044DFF  SOUND_MIXER_WRITE_RECSRC    int *                           // I-O
+
+    // <include/linux/umsdos_fs.h>
+        0x000004D2  UMSDOS_READDIR_DOS          struct umsdos_ioctl *           // I-O
+        0x000004D3  UMSDOS_UNLINK_DOS           const struct umsdos_ioctl *
+        0x000004D4  UMSDOS_RMDIR_DOS            const struct umsdos_ioctl *
+        0x000004D5  UMSDOS_STAT_DOS             struct umsdos_ioctl *           // I-O
+        0x000004D6  UMSDOS_CREAT_EMD            const struct umsdos_ioctl *
+        0x000004D7  UMSDOS_UNLINK_EMD           const struct umsdos_ioctl *
+        0x000004D8  UMSDOS_READDIR_EMD          struct umsdos_ioctl *           // I-O
+        0x000004D9  UMSDOS_GETVERSION           struct umsdos_ioctl *
+        0x000004DA  UMSDOS_INIT_EMD             void
+        0x000004DB  UMSDOS_DOS_SETUP            const struct umsdos_ioctl *
+        0x000004DC  UMSDOS_RENAME_DOS           const struct umsdos_ioctl *
+
+    // <include/linux/vt.h>
+        0x00005600  VT_OPENQRY                  int *
+        0x00005601  VT_GETMODE                  struct vt_mode *
+        0x00005602  VT_SETMODE                  const struct vt_mode *
+        0x00005603  VT_GETSTATE                 struct vt_stat *
+        0x00005604  VT_SENDSIG                  void
+        0x00005605  VT_RELDISP                  int
+        0x00005606  VT_ACTIVATE                 int
+        0x00005607  VT_WAITACTIVE               int
+        0x00005608  VT_DISALLOCATE              int
+        0x00005609  VT_RESIZE                   const struct vt_sizes *
+        0x0000560A  VT_RESIZEX                  const struct vt_consize *
+        Einige  ioctls  benötigen  einen  Pointer  auf   eine   Struktur,   die
+        zusätzliche Pointer enthält.  Diese sind hier in alphabetischer Reihen‐
+        folge dokumentiert.
+
+        CDROMREADAUDIO   benötigt    eine    Eingabe-Pointer    const    struct
+        cdrom_read_audio  *.   Das  Feld  buf zeigt auf einen Ausgabepuffer der
+        Länge nframes * CD_FRAMESIZE_RAW.
+
+        CDROMREADCOOKED,  CDROMREADMODE1,   CDROMREADMODE2   und   CDROMREADRAW
+        benötigen einen Eingabe-Pointer const struct cdrom_msf *.  Sie benutzen
+        denselben Pointer als Ausgabe-Pointer auf char [].   Die  Länge  ändert
+        sich  durch  Anforderung.   Bei  CDROMREADMODE1  benutzen  die  meisten
+        Treiber  CD_FRAMESIZE,  jedoch  benutzt  der   Optics   Storage-Treiber
+        stattdessen OPT_BLOCKSIZE (beide haben den numerischen Wert 2048).
+            CDROMREADCOOKED             char [CD_FRAMESIZE]
+            CDROMREADMODE1              char [CD_FRAMESIZE oder OPT_BLOCKSIZE]
+            CDROMREADMODE2              char [CD_FRAMESIZE_RAW0]
+            CDROMREADRAW                char [CD_FRAMESIZE_RAW]
+        EQL_ENSLAVE, EQL_EMANCIPATE, EQL_GETSLAVECFG, EQL_SETSLAVECFG, EQL_GET
+        MASTERCFG und EQL_SETMASTERCFG benötigen eine struct ifreq *.  Das Feld
+        ifr_data ist ein Pointer auf eine weitere Struktur wie folgt:
+            EQL_ENSLAVE                 const struct slaving_request *
+            EQL_EMANCIPATE              const struct slaving_request *
+            EQL_GETSLAVECFG             struct slave_config *           // I-O
+            EQL_SETSLAVECFG             const struct slave_config *
+            EQL_GETMASTERCFG            struct master_config *
+            EQL_SETMASTERCFG            const struct master_config *
+        FDRAWCMD   benötigt  eine  struct  floppy  raw_cmd  *.   Wenn  flags  &
+        FD_RAW_WRITE nicht Null ist, dann zeigt data  auf  einen  Eingabepuffer
+        der  Länge length.  Wenn flags & FD_RAW_READ nicht Null ist, dann zeigt
+        data auf einen Ausgabepuffer der Länge ’length’.
+
+        GIO_FONTX und  PIO_FONTX  benötigen  eine  struct  console_font_desc  *
+        beziehungsweise  eine const struct console_font_desc *.  chardata zeigt
+        auf einen Puffer von char [charcount].  Dies ist ein Ausgabepuffer  für
+        GIO_FONTX und ein Eingabepuffer für PIO_FONTX.
+
+        GIO_UNIMAP   und   PIO_UNIMAP   benötigen   eine  struct  unimapdesc  *
+        beziehungsweise eine const struct  unimapdesc  *.   entries  zeigt  auf
+        einen Puffer von struct unipair [entry_ct].  Dies ist ein Ausgabepuffer
+        für GIO_UNIMAP und ein Eingabepuffer für PIO_UNIMAP.
+
+        KDADDIO, KDDELIO, KDDISABIO und KDENABIO geben Zugriff frei oder  sper‐
+        ren Zugriff auf I/O-Ports.  Sie sind nötige Alternativen zu ioperm.
+
+        KDMAPDISP  und KDUNMAPDISP geben frei oder sperren Memory-Mappings oder
+        Zugriff auf I/O-Ports.  Sie sind nicht im Kernel implementiert.
+
+        SCSI_IOCTL_PROBE_HOST benötigt einen Eingabe-Pointer const int  *,  der
+        eine  Länge ist.  Es benutzt den selben Pointer als Ausgabe-Pointer auf
+        einen Puffer char [] dieser Länge.
+
+        SIOCADDRT und SIOCDELRT benötigen einen Eingabe-Pointer, dessen Typ vom
+        Protokoll abhängt:
+            Die meisten Protokolle      const struct rtentry *
+            AX.25                       const struct ax25_route *
+            NET/ROM                     const struct nr_route_struct *
+        SIOCGIFCONF  benötigt eine struct ifconf *.  Das Feld ifc_buf zeigt auf
+        einen Puffer der Länge ifc_len Byte, wohinein der Kernel eine Liste des
+        Typs struct ifreq [] schreibt.
+
+        SIOCSIFHWADDR  benötigt einen Eingabe-Pointer, dessen Typ vom Protokoll
+        abhängt:
+            Die meisten Protokolle      const struct ifreq *
+            AX.25                       const char [AX25_ADDR_LEN]
+        TIOCLINUX benötigt eine const char *.  Es  benutzt  dies,  um  zwischen
+        diversen  unabhängigen  Fällen  zu unterscheiden.  In der Tabelle unten
+        bedeutet »N + foo« so viel wie »foo« nach einem  N-byte-Block.   struct
+        selection ist definiert in drivers/char/selection.c.
+            TIOCLINUX-2                 1 + const struct selection *
+            TIOCLINUX-3                 void
+            TIOCLINUX-4                 void
+            TIOCLINUX-5                 4 + const struct { long [8]; } *
+            TIOCLINUX-6                 char *
+            TIOCLINUX-7                 char *
+            TIOCLINUX-10                1 + const char *
+
+    Doppelte ioctls
+        Diese  Liste  enthält  keine  ioctls  der  Gruppen  SIOCDEVPRIVATE  und
+        SIOCPROTOPRIVATE.
+        0x00000001  FDSETPRM                    FIBMAP
+        0x00000002  FDDEFPRM                    FIGETBSZ
+        0x00005382  CDROMAUDIOBUFSIZ            SCSI_IOCTL_GET_IDLUN
+        0x00005402  SNDCTL_TMR_START            TCSETS
+        0x00005403  SNDCTL_TMR_STOP             TCSETSW
+        0x00005404  SNDCTL_TMR_CONTINUE         TCSETSF
+
+=======
+
+Powered by the Ubuntu Manpage Repository generator
+Maintained by Dustin Kirkland
diff --git a/linux_emul_base/doc/linuxemu.txt b/linux_emul_base/doc/linuxemu.txt
new file mode 100644 (file)
index 0000000..0c587ee
--- /dev/null
@@ -0,0 +1,117 @@
+SYSCALLS
+
+on linux/i386, the machine code puts the arguments of a syscall in the
+registers AX, BX, CX, DX, DI, SI and makes a soft interrupt 0x80.
+
+as the plan9 kernel doesnt care about the interrupt vector 0x80 it
+sends a note to the process that traped and if not handled kills it.
+in a note handler, it is possible to access the machine state of the
+process when the trap/interrupt happend from the ureg argument.
+
+in linuxemu, we install a note handler that checks if the trap was a
+linux syscall and call our handler function from our systab. 
+
+after our syscall handler returned, we move the program counter
+in the machine state structure after the int 0x80 instruction and
+continue execution by accepting the note as handled with a call to
+noted(NCONT).
+
+todo automatic conversion to a plan9 function call the number of
+arguments and the function name of the handler must be known.  this
+information is provided by the linuxcalltab input file that is feed trough
+linuxcalltab.awk to build neccesary tables.
+
+the linux specific syscall handling and argument conversion done in
+linuxcall.c only.  the idea is to later add support for other syscall
+personalities like bsd without having to change the handler code.
+
+
+MEMORY
+
+unlike shared libraries wich are position independent, binaries have to be
+loaded to a fixed address location. (elf supports position independent
+programs that can be loaded everywhere, but its not used on i386)
+
+the emulator doesnt need to load and relocate shared libraries itself. this is
+done my the runtime linker (/lib/ld-linux.so). it just needs to load
+the binary and the runtime linker to ther prefered location and jump into
+the entry point. then the runtime linker will parse the elf sections of the
+binary and call mmap to load further shared libraries.
+
+the first thing we need is an implementation of mmap that allows us
+to copy files to fixed addresses into memory. to do that on plan9,
+segments are used.
+
+its is not possible to create a segment for every memory mapping
+because plan9 limits the number of segments per process to a small
+number.  instead we create a fixed number of segments and
+expand/shrink them on demand.  the linux stack area is fixed size and
+uses the fact thet plan9 doesnt allocate physical memory until pages
+are touched.
+
+here are 3 segments created for a linux process:
+
+"private" is used for all MAP_PRIVATE mappings and can be shared if
+processes run in same address space. code, data and files is mapped there.
+
+"shared" for shared memory mappings.
+
+"stack" is like "private", but lives just below the plan9 stack segment.
+this is needed because glibc expands the stack down by mmap() pages
+below the current stack area. we cannot use the plan9 stack segment
+because that segment is copied on rfork and is never shared between
+processes.
+
+the data structures of the emulator itself ("kernel memory") need to
+be shared for all processes even if the linux process runs in its own
+private address space, so the plan9 Bss and Data segments are made
+shared on startup by copying the contents of the original segment into a
+temporary file, segdetach() it and segattach() a new shared segments
+on the same place and copy the data back in from the file.
+
+with this memory layout, it is possible for the linux process to damage
+data structures in the emulator. but we seem to be lucky for now :)
+
+
+USER PROCESSES (UPROCS)
+
+linuxemu does not switch ans schedule linux processes itself. every user
+process has its own plan9 process. memory sharing semantics is translated
+to rfork flags on fork/clone.
+
+we have a global process table of Uproc structures to track states and
+resources for all user processes:
+
+fs: filesystem mount table
+fdtab: the filedescriptor table
+mem: memory mappings
+signal: signal handler and queue
+trace: debug trace buffer
+
+resources that can be shared are reference counted and get freed when
+the last process referencing them exits.
+
+
+KERNEL PROCESSES (KPROCS)
+
+if we needs to defer work or do asynchronous i/o it can spawn a
+kernel process with kprocfork. kernel processes dont have a Uproc
+structure associated and have the userspace memory segments detached
+therfor cant access userspace memory.
+
+bufprocs and timers are implemented with kernel processes.
+
+
+DEVICES
+
+ealier versions mapped linux files directly to plan9 files.  this made
+the implementation of ioctls, symlinks, remove on close, and
+select/poll hard and also had problems with implementing fork sharing
+semantics.
+
+current linuxemu does it all by itself.  here is a global device table
+of Udev structures.  devices can implement all i/o related syscalls by
+providing a function pointer in ther Udev.  when a device has to deal
+with asynchronous io on real plan9 files it uses bufprocs.
+
+
diff --git a/linux_emul_base/doc/todo.txt b/linux_emul_base/doc/todo.txt
new file mode 100644 (file)
index 0000000..a24cb3b
--- /dev/null
@@ -0,0 +1,14 @@
+- AF_INET6
+       i dont need that too yet
+
+- VDSO
+       we could gain quite a performance hit when we can avoid
+       the trapping overhead and let linux-code directly jump
+       in linuxemu handler.
+
+- dsp / mixer
+       implement mixer ioctls in devdsp
+       mmap and trigger caps for quake
+
+- ptrace
+       implement ptrace support so we can use native debugger
diff --git a/linux_emul_base/dspdev.c b/linux_emul_base/dspdev.c
new file mode 100644 (file)
index 0000000..822ce51
--- /dev/null
@@ -0,0 +1,377 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+       FREQUENCY = 44100,
+       CHANNELS = 2,
+       DELAY = 100,
+       FRAGSIZE = 4096,
+};
+
+typedef struct Chan Chan;
+typedef struct DSP DSP;
+
+struct Chan
+{
+       ulong   phase;
+       int             last;
+};
+
+struct DSP
+{
+       Ufile;
+
+       int             channels;               /* number of channels (2 for stereo) */
+       int             freq;                   /* frequency of sound stream */
+
+       int             rfreq;          /* frequency of /dev/audio */
+
+       uchar   *buf;                   /* resampling */
+       ulong   nbuf;
+       Chan    chan[CHANNELS]; 
+
+       vlong   time;                   /* time point of the last sample in device buffer */
+
+       ulong   written;                /* number of bytes written to dsp */
+       ulong   written2;               /* same as written, will be reset on every GETOPTR ioctl */
+};
+
+static int
+closedsp(Ufile *file)
+{
+       DSP *dsp = (DSP*)file;
+
+       trace("dsp: closedsp");
+       free(dsp->buf);
+       close(dsp->fd);
+
+       return 0;
+}
+
+static int
+polldsp(Ufile *, void *)
+{
+       return POLLOUT;
+}
+
+static int
+readdsp(Ufile *, void *, int, vlong)
+{
+       return 0;               /* not implemented */
+}
+
+static int
+resample(Chan *c, uchar *src, uchar *dst, int sstep, int dstep, ulong delta, ulong count)
+{
+       int last, val, out;
+       ulong phase, pos;
+       uchar *dp, *sp;
+
+       dp = dst;
+       last = val = c->last;
+       phase = c->phase;
+       pos = phase >> 16;
+       while(pos < count){
+               sp = src + sstep*pos;
+               val = sp[0] | (sp[1] << 8);
+               val = (val & 0x7FFF) - (val & 0x8000);
+               if(pos){
+                       sp -= sstep;
+                       last = sp[0] | (sp[1] << 8);
+                       last = (last & 0x7FFF) - (last & 0x8000);
+               }
+               out = last + (((val - last) * (phase & 0xFFFF)) >> 16);
+               dp[0] = out;
+               dp[1] = out >> 8;
+               dp += dstep;
+               phase += delta;
+               pos = phase >> 16;
+       }
+       c->last = val;
+       if(delta < 0x10000){
+               c->phase = phase & 0xFFFF;
+       } else {
+               c->phase = phase - (count << 16);
+       }
+       return (dp - dst) / dstep;
+}
+
+static int
+convertout(DSP *dsp, uchar *buf, int len, uchar **out)
+{
+       int ret, ch;
+       ulong count, delta;
+
+       /* no conversion required? */
+       if(dsp->freq == dsp->rfreq && dsp->channels == CHANNELS){
+               *out = buf;
+               return len;
+       }
+
+       /*
+        * delta is the number of input samples to 
+        * produce one output sample. scaled by 16 bit to
+        * get fractional part.
+        */
+       delta = ((ulong)dsp->freq << 16) / dsp->rfreq;
+       count = len / (2 * dsp->channels);
+
+       /*
+        * get maximum required size of output bufer. this is not exact!
+        * number of output samples depends on phase!
+        */
+       ret = (((count << 16) + delta-1) / delta) * 2*CHANNELS;
+       if(ret > dsp->nbuf){
+               free(dsp->buf);
+               dsp->buf = kmalloc(ret);
+               dsp->nbuf = ret;
+       }
+       for(ch=0; ch < CHANNELS; ch++)
+               ret = resample(dsp->chan + ch,
+                       buf + 2*(ch % dsp->channels),
+                       dsp->buf + 2*ch,
+                       2*dsp->channels,
+                       2*CHANNELS,
+                       delta,
+                       count);
+
+       *out = dsp->buf;
+       return ret * 2*CHANNELS;
+}
+
+static int
+writedsp(Ufile *file, void *buf, int len, vlong)
+{
+       DSP *dsp = (DSP*)file;
+       vlong now;
+       int ret, diff;
+       uchar *out;
+
+       if((ret = convertout(dsp, buf, len, &out)) <= 0)
+               return ret;
+
+       if((ret = write(dsp->fd, out, ret)) < 0)
+               return mkerror();
+
+       now = nsec();
+       if(dsp->time < now){
+               dsp->time = now;
+               dsp->written = 0;
+               dsp->written2 = 0;
+       } else {
+               diff = (dsp->time - now) / 1000000;
+               if(diff > DELAY)
+                       sleep(diff - DELAY);
+       }
+       dsp->time += ((1000000000LL) * ret / (dsp->rfreq * 2*CHANNELS));
+       dsp->written += len;
+       dsp->written2 += len;
+
+       return len;
+}
+
+enum
+{
+       AFMT_S16_LE = 0x10,
+};
+
+static int
+ioctldsp(Ufile *file, int cmd, void *arg)
+{
+       DSP *dsp = (DSP*)file;
+       int ret, i;
+       vlong now;
+       static int counter;
+
+       ret = 0;
+       switch(cmd){
+       default:
+               trace("dsp: unknown ioctl %lux %p", (ulong)cmd, arg);
+               ret = -ENOTTY;
+               break;
+
+       case 0xC004500A:
+               trace("dsp: SNDCTL_DSP_SETFRAGMENT(%lux)", *(ulong*)arg);
+               break;
+
+       case 0xC0045004:
+               trace("dsp: SNDCTL_DSP_GETBLKSIZE");
+               *((int*)arg) = FRAGSIZE;
+               break;
+
+       case 0x800c5011:
+               trace("dsp: SNDCTL_DSP_GETIPTR");
+               ret = -EPERM;
+               break;
+
+       case 0x800c5012:
+               trace("dsp: SNDCTL_DSP_GETOPTR");
+               ((int*)arg)[0] = dsp->written;                          // Total # of bytes processed
+               ((int*)arg)[1] = dsp->written2 / FRAGSIZE;      // # of fragment transitions since last time
+               dsp->written2 = 0;
+               ((int*)arg)[2] = 0;                                             // Current DMA pointer value
+               break;
+
+       case 0x8010500D:
+               trace("dsp: SNDCTL_DSG_GETISPACE");
+               ret = -EPERM;
+               break;
+       case 0x8010500C:
+               trace("dsp: SNDCTL_DSP_GETOSPACE");
+               i = (2 * dsp->channels) * ((dsp->freq*DELAY)/1000);
+               ((int*)arg)[1] = i / FRAGSIZE;                          // fragstot
+               ((int*)arg)[2] = FRAGSIZE;                                      // fragsize
+               now = nsec();
+               if(now < dsp->time){
+                       i -= ((2 * dsp->channels) * (((dsp->time - now) * (vlong)dsp->freq) / 1000000000));
+                       if(i < 0)
+                               i = 0;
+               }
+               ((int*)arg)[0] = i / FRAGSIZE;                          // available fragment count
+               ((int*)arg)[3] = i;                                             // available space in bytes
+               break;
+
+       case 0x8004500B:
+               trace("dsp: SNDCTL_DSP_GETFMTS(%d)", *(int*)arg);
+               *(int*)arg = AFMT_S16_LE;
+               break;
+
+       case 0x8004500F:
+               trace("dsp: SNDCTL_DSP_GETCAPS");
+               *(int*)arg = 0x400;
+               break;
+
+       case 0xC0045005:
+               trace("dsp: SNDCTL_DSP_SETFMT(%d)", *(int*)arg);
+               *(int*)arg = AFMT_S16_LE;
+               break;
+
+       case 0xC0045006:
+               trace("dsp: SOUND_PCM_WRITE_CHANNELS(%d)", *(int*)arg);
+               dsp->channels = *(int*)arg;
+               break;
+
+       case 0xC0045003:
+               trace("dsp: SNDCTL_DSP_STEREO(%d)", *(int*)arg);
+               dsp->channels = 2;
+               *(int*)arg = 1;
+               break;
+
+       case 0xC0045002:
+               trace("dsp: SNDCTL_DSP_SPEED(%d)", *(int*)arg);
+               dsp->freq = *(int*)arg;
+               for(i=0; i<CHANNELS; i++){
+                       dsp->chan[i].phase = 0;
+                       dsp->chan[i].last = 0;
+               }
+               break;
+
+       case 0x00005000:
+               trace("dsp: SNDCTL_DSP_RESET");
+               break;
+
+       case 0x00005001:
+               trace("dsp: SNDCTL_DSP_SYNC");
+               break;
+       }
+
+       return ret;
+}
+
+static int
+getaudiofreq(void)
+{
+       int ret, n, fd;
+       char buf[1024];
+
+       ret = FREQUENCY;
+       if((fd = open("/dev/volume", OREAD)) < 0)
+               return ret;
+       if((n = read(fd, buf, sizeof(buf)-1)) > 0){
+               char *p;
+
+               buf[n] = 0;
+               if(p = strstr(buf, "speed out "))
+                       ret = atoi(p + 10);
+       }
+       close(fd);
+       return ret;
+}
+
+int opendsp(char *path, int mode, int, Ufile **pf)
+{
+       DSP *dsp;
+       int freq;
+       int fd;
+
+       if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+               if((fd = open("/dev/audio", OWRITE)) < 0)
+                       return mkerror();
+
+               freq = getaudiofreq();
+               dsp = mallocz(sizeof(DSP), 1);
+               dsp->ref = 1;
+               dsp->mode = mode;
+               dsp->dev = DSPDEV;
+               dsp->fd = fd;
+               dsp->path = kstrdup(path);
+               dsp->rfreq = freq;
+               dsp->freq = freq;
+               dsp->channels = CHANNELS;
+
+               *pf = dsp;
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static int
+fstatdsp(Ufile *f, Ustat *s)
+{
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->ino = hashpath(f->path);
+       s->size = 0;
+       return 0;
+};
+
+static int
+statdsp(char *path, int , Ustat *s)
+{
+       if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+               s->mode = 0666 | S_IFCHR;
+               s->uid = current->uid;
+               s->gid = current->gid;
+               s->ino = hashpath(path);
+               s->size = 0;
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static Udev dspdev = 
+{
+       .open = opendsp,
+       .read = readdsp,
+       .write = writedsp,
+       .poll = polldsp,
+       .close = closedsp,
+       .ioctl = ioctldsp,
+       .stat = statdsp,
+       .fstat = fstatdsp,
+};
+
+void dspdevinit(void)
+{
+       devtab[DSPDEV] = &dspdev;
+
+       fsmount(&dspdev, "/dev/dsp");
+       fsmount(&dspdev, "/dev/dsp0");
+}
diff --git a/linux_emul_base/error.c b/linux_emul_base/error.c
new file mode 100644 (file)
index 0000000..fe4cb22
--- /dev/null
@@ -0,0 +1,266 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+int
+Efmt(Fmt *f)
+{
+       static char *t[] = {
+       [EPERM]                         "EPERM",
+       [ENOENT]                        "ENOENT",
+       [ESRCH]                         "ESRCH",
+       [EINTR]                         "EINTR",
+       [EIO]                           "EIO",
+       [ENXIO]                         "ENXIO",
+       [E2BIG]                         "E2BIG",
+       [ENOEXEC]                       "ENOEXEC",
+       [EBADF]                         "EBADF",
+       [ECHILD]                        "ECHILD",
+       [EAGAIN]                        "EAGAIN",
+       [ENOMEM]                        "ENOMEM",
+       [EACCES]                        "EACCES",
+       [EFAULT]                        "EFAULT",
+       [ENOTBLK]                       "ENOTBLK",
+       [EBUSY]                         "EBUSY",
+       [EEXIST]                        "EEXIST",
+       [EXDEV]                         "EXDEV",
+       [ENODEV]                        "ENODEV",
+       [ENOTDIR]                       "ENOTDIR",
+       [EISDIR]                        "EISDIR",
+       [EINVAL]                        "EINVAL",
+       [ENFILE]                        "ENFILE",
+       [EMFILE]                        "EMFILE",
+       [ENOTTY]                        "ENOTTY",
+       [ETXTBSY]                       "ETXTBSY",
+       [EFBIG]                         "EFBIG",
+       [ENOSPC]                        "ENOSPC",
+       [ESPIPE]                        "ESPIPE",
+       [EROFS]                         "EROFS",
+       [EMLINK]                        "EMLINK",
+       [EPIPE]                         "EPIPE",
+       [EDOM]                          "EDOM",
+       [ERANGE]                        "ERANGE",
+       [EDEADLK]                       "EDEADLK",
+       [ENAMETOOLONG]          "ENAMETOOLONG",
+       [ENOLCK]                        "ENOLCK",
+       [ENOSYS]                        "ENOSYS",
+       [ENOTEMPTY]                     "ENOTEMPTY",
+       [ELOOP]                         "ELOOP",
+       [ENOMSG]                        "ENOMSG",
+       [EIDRM]                         "EIDRM",
+       [ECHRNG]                        "ECHRNG",
+       [EL2NSYNC]                      "EL2NSYNC",
+       [EL3HLT]                        "EL3HLT",
+       [EL3RST]                        "EL3RST",
+       [ELNRNG]                        "ELNRNG",
+       [EUNATCH]                       "EUNATCH",
+       [ENOCSI]                        "ENOCSI",
+       [EL2HLT]                        "EL2HLT",
+       [EBADE]                         "EBADE",
+       [EBADR]                         "EBADR",
+       [EXFULL]                        "EXFULL",
+       [ENOANO]                        "ENOANO",
+       [EBADRQC]                       "EBADRQC",
+       [EBADSLT]                       "EBADSLT",
+       [EBFONT]                        "EBFONT",
+       [ENOSTR]                        "ENOSTR",
+       [ENODATA]                       "ENODATA",
+       [ETIME]                         "ETIME",
+       [ENOSR]                         "ENOSR",
+       [ENONET]                        "ENONET",
+       [ENOPKG]                        "ENOPKG",
+       [EREMOTE]                       "EREMOTE",
+       [ENOLINK]                       "ENOLINK",
+       [EADV]                          "EADV",
+       [ESRMNT]                        "ESRMNT",
+       [ECOMM]                         "ECOMM",
+       [EPROTO]                        "EPROTO",
+       [EMULTIHOP]                     "EMULTIHOP",
+       [EDOTDOT]                       "EDOTDOT",
+       [EBADMSG]                       "EBADMSG",
+       [EOVERFLOW]                     "EOVERFLOW",
+       [ENOTUNIQ]                      "ENOTUNIQ",
+       [EBADFD]                        "EBADFD",
+       [EREMCHG]                       "EREMCHG",
+       [ELIBACC]                       "ELIBACC",
+       [ELIBBAD]                       "ELIBBAD",
+       [ELIBSCN]                       "ELIBSCN",
+       [ELIBMAX]                       "ELIBMAX",
+       [ELIBEXEC]                      "ELIBEXEC",
+       [EILSEQ]                        "EILSEQ",
+       [ERESTART]                      "ERESTART",
+       [ESTRPIPE]                      "ESTRPIPE",
+       [EUSERS]                        "EUSERS",
+       [ENOTSOCK]                      "ENOTSOCK",
+       [EDESTADDRREQ]          "EDESTADDRREQ",
+       [EMSGSIZE]                      "EMSGSIZE",
+       [EPROTOTYPE]            "EPROTOTYPE",
+       [ENOPROTOOPT]           "ENOPROTOOPT",
+       [EPROTONOSUPPORT]       "EPROTONOSUPPORT",
+       [ESOCKTNOSUPPORT]       "ESOCKTNOSUPPORT",
+       [EOPNOTSUPP]            "EOPNOTSUPP",
+       [EPFNOSUPPORT]          "EPFNOSUPPORT",
+       [EAFNOSUPPORT]          "EAFNOSUPPORT",
+       [EADDRINUSE]            "EADDRINUSE",
+       [EADDRNOTAVAIL]         "EADDRNOTAVAIL",
+       [ENETDOWN]                      "ENETDOWN",
+       [ENETUNREACH]           "ENETUNREACH",
+       [ENETRESET]                     "ENETRESET",
+       [ECONNABORTED]          "ECONNABORTED",
+       [ECONNRESET]            "ECONNRESET",
+       [ENOBUFS]                       "ENOBUFS",
+       [EISCONN]                       "EISCONN",
+       [ENOTCONN]                      "ENOTCONN",
+       [ESHUTDOWN]                     "ESHUTDOWN",
+       [ETOOMANYREFS]          "ETOOMANYREFS",
+       [ETIMEDOUT]                     "ETIMEDOUT",
+       [ECONNREFUSED]          "ECONNREFUSED",
+       [EHOSTDOWN]                     "EHOSTDOWN",
+       [EHOSTUNREACH]          "EHOSTUNREACH",
+       [EALREADY]                      "EALREADY",
+       [EINPROGRESS]           "EINPROGRESS",
+       [ESTALE]                        "ESTALE",
+       [EUCLEAN]                       "EUCLEAN",
+       [ENOTNAM]                       "ENOTNAM",
+       [ENAVAIL]                       "ENAVAIL",
+       [EISNAM]                        "EISNAM",
+       [EREMOTEIO]                     "EREMOTEIO",
+       [EDQUOT]                        "EDQUOT",
+       [ENOMEDIUM]                     "ENOMEDIUM",
+       [EMEDIUMTYPE]           "EMEDIUMTYPE",
+       };
+
+       int e;
+
+       e = va_arg(f->args, int);
+       if(e >= 0 || -e >= nelem(t))
+               return fmtprint(f, "%d", e);
+       return fmtprint(f, "%d [%s]", e, t[-e]);
+}
+
+int
+mkerror(void)
+{
+       static struct {
+               int             num;
+               char    *msg;
+       } t[] = {
+       /* from /sys/src/9/port/errstr.h */
+       {EINVAL,                        "inconsistent mount"},
+       {EINVAL,                        "not mounted"},
+       {EINVAL,                        "not in union"},
+       {EIO,                           "mount rpc error"},
+       {EIO,                           "mounted device shut down"},
+       {EPERM,                         "mounted directory forbids creation"},
+       {ENOENT,                        "does not exist"},
+       {ENXIO,                         "unknown device in # filename"},
+       {ENOTDIR,                       "not a directory"},
+       {EISDIR,                        "file is a directory"},
+       {EINVAL,                        "bad character in file name"},
+       {EINVAL,                        "file name syntax"},
+       {EPERM,                         "permission denied"},
+       {EPERM,                         "inappropriate use of fd"},
+       {EINVAL,                        "bad arg in system call"},
+       {EBUSY,                         "device or object already in use"},
+       {EIO,                           "i/o error"},
+       {EIO,                           "read or write too large"},
+       {EIO,                           "read or write too small"},
+       {EADDRINUSE,            "network port not available"},
+       {ESHUTDOWN,                     "write to hungup stream"},
+       {ESHUTDOWN,                     "i/o on hungup channel"},
+       {EINVAL,                        "bad process or channel control request"},
+       {EBUSY,                         "no free devices"},
+       {ESRCH,                         "process exited"},
+       {ECHILD,                        "no living children"},
+       {EIO,                           "i/o error in demand load"},
+       {ENOMEM,                        "virtual memory allocation failed"},
+       {EBADF,                         "fd out of range or not open"},
+       {EMFILE,                        "no free file descriptors"},
+       {ESPIPE,                        "seek on a stream"},
+       {ENOEXEC,                       "exec header invalid"},
+       {ETIMEDOUT,                     "connection timed out"},
+       {ECONNREFUSED,          "connection refused"},
+       {ECONNREFUSED,          "connection in use"},
+       {ERESTART,                      "interrupted"},
+       {ENOMEM,                        "kernel allocate failed"},
+       {EINVAL,                        "segments overlap"},
+       {EIO,                           "i/o count too small"},
+       {EINVAL,                        "bad attach specifier"},
+
+       /* from exhausted() calls in kernel */
+       {ENFILE,                        "no free file descriptors"},
+       {EBUSY,                         "no free mount devices"},
+       {EBUSY,                         "no free mount rpc buffer"},
+       {EBUSY,                         "no free segments"},
+       {ENOMEM,                        "no free memory"},
+       {ENOBUFS,                       "no free Blocks"},
+       {EBUSY,                         "no free routes"},
+
+       /* from ken */
+       {EINVAL,                        "attach -- bad specifier"},
+       {EBADF,                         "unknown fid"},
+       {EINVAL,                        "bad character in directory name"},
+       {EBADF,                         "read/write -- on non open fid"},
+       {EIO,                           "read/write -- count too big"},
+       {EIO,                           "phase error -- directory entry not allocated"},
+       {EIO,                           "phase error -- qid does not match"},
+       {EACCES,                        "access permission denied"},
+       {ENOENT,                        "directory entry not found"},
+       {EINVAL,                        "open/create -- unknown mode"},
+       {ENOTDIR,                       "walk -- in a non-directory"},
+       {ENOTDIR,                       "create -- in a non-directory"},
+       {EIO,                           "phase error -- cannot happen"},
+       {EEXIST,                        "create -- file exists"},
+       {EINVAL,                        "create -- . and .. illegal names"},
+       {ENOTEMPTY,                     "remove -- directory not empty"},
+       {EINVAL,                        "attach -- privileged user"},
+       {EPERM,                         "wstat -- not owner"},
+       {EPERM,                         "wstat -- not in group"},
+       {EINVAL,                        "create/wstat -- bad character in file name"},
+       {EBUSY,                         "walk -- too many (system wide)"},
+       {EROFS,                         "file system read only"},
+       {ENOSPC,                        "file system full"},
+       {EINVAL,                        "read/write -- offset negative"},
+       {EBUSY,                         "open/create -- file is locked"},
+       {EBUSY,                         "close/read/write -- lock is broken"},
+
+       /* from sockets */
+       {ENOTSOCK,                      "not a socket"},
+       {EPROTONOSUPPORT,       "protocol not supported"},
+       {ECONNREFUSED,          "connection refused"},
+       {EAFNOSUPPORT,          "address family not supported"},
+       {ENOBUFS,                       "insufficient buffer space"},
+       {EOPNOTSUPP,            "operation not supported"},
+       {EADDRINUSE,            "address in use"},
+
+       /* other */
+       {EEXIST,                        "file already exists"},
+       {EEXIST,                        "is a directory"},
+       {ENOTEMPTY,                     "directory not empty"},
+       };
+
+       int r, i;
+       char msg[ERRMAX];
+
+       rerrstr(msg, sizeof(msg));
+
+       r = -EIO;
+       for(i=0; i<nelem(t); i++){
+               if(strstr(msg, t[i].msg)){
+                       r = -t[i].num;
+                       break;
+               }
+       }
+
+       trace("mkerror(%s): %E", msg, r);
+       return r;
+}
+
+int sys_nosys(void)
+{
+       trace("syscall %s not implemented", current->syscall);
+       return -ENOSYS;
+}
diff --git a/linux_emul_base/exec.c b/linux_emul_base/exec.c
new file mode 100644 (file)
index 0000000..73d4669
--- /dev/null
@@ -0,0 +1,647 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <tos.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Elfhdr          Elfhdr;
+typedef struct Proghdr Proghdr;
+typedef struct ElfEx   ElfEx;
+
+struct Elfhdr {
+       uchar   ident[16];
+       ushort  type;
+       ushort  machine;
+       ulong   version;
+       ulong   entry;
+       ulong   phoff;
+       ulong   shoff;
+       ulong   flags;
+       ushort  ehsize;
+       ushort  phentsize;
+       ushort  phnum;
+       ushort  shentsize;
+       ushort  shnum;
+       ushort  shstrndx;
+};
+
+struct Proghdr {
+       ulong   type;
+       ulong   offset;
+       ulong   vaddr;
+       ulong   paddr;
+       ulong   filesz;
+       ulong   memsz;
+       ulong   flags;
+       ulong   align;  
+};
+
+struct ElfEx
+{
+       ulong   ientry;
+       ulong   ibase;
+
+       ulong   entry;
+       ulong   base;
+
+       ulong   phdr;
+       ulong   phnum;
+       ulong   phent;
+};
+
+static void
+padzero(ulong addr)
+{
+       ulong n;
+
+       if(n = (pagealign(addr) - addr))
+               memset((void*)addr, 0, n);
+}
+
+enum {
+       /* file types */
+       ElfTNone = 0,
+       ElfTReloc = 1,
+       ElfTExec = 2,
+       ElfTShared = 3,
+       ElfTCore = 4,
+       ElfTMax = 5,
+
+       /* machine architectures */
+       ElfMNone = 0,
+       ElfM32 = 1,
+       ElfMSparc = 2,
+       ElfM386 = 3,
+       ElfM68 = 4,
+       ElfM88 = 5,
+       ElfM860 = 7,
+       ElfMMips = 8,
+       ElfMMax = 9,
+
+       /* program segment types */
+       ElfPNull = 0,
+       ElfPLoad = 1,
+       ElfPDynamic = 2,
+       ElfPInterp = 3,
+       ElfPNote = 4,
+       ElfPShlib = 5,
+       ElfPPhdr = 6,
+       ElfPMax = 7,
+
+       /* program segment flags */
+       ElfPFX = 1,
+       ElfPFW = 2,
+       ElfPFR = 4,
+};
+
+static int
+loadelf(char *file, ElfEx *ex, int depth)
+{
+       int fd;
+       int i, l;
+       int mapprot;
+       int mapflags;
+       ulong mapbase;
+       ulong loadaddr;
+       ulong bss;
+
+       Elfhdr hdr;
+       Proghdr *phdr;
+       char *interpreter;
+
+       interpreter = nil;
+       phdr = nil;
+
+       if((fd = sys_open(file, O_RDONLY, 0)) < 0){
+               werrstr("cant open %s", file);
+               goto errout;
+       }
+
+       if(sys_read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)){
+               werrstr("cant read elf header");
+               goto errout;
+       }
+
+       if(memcmp(hdr.ident, "\x7fELF", 4)!=0){
+               werrstr("no elf magic");
+               goto errout;
+       }
+
+       l = hdr.phnum * hdr.phentsize;
+       phdr = kmalloc(l);
+       sys_lseek(fd, hdr.phoff, 0);
+       if(sys_read(fd, phdr, l) != l){
+               werrstr("cant read program headers");
+               goto errout;
+       }
+
+       loadaddr = 0;
+       mapbase = 0;
+       mapflags = MAP_PRIVATE;
+       if(hdr.type != ElfTShared)
+               mapflags |= MAP_FIXED;
+
+       trace("loadelf(): phnum=%d", hdr.phnum);
+
+       bss = 0;
+       for(i=0; i<hdr.phnum; i++){
+               Proghdr *p;
+
+               p = &phdr[i];
+               if(p->type == ElfPInterp){
+                       if(interpreter){
+                               werrstr("multiple interpeter sections");
+                               goto errout;
+                       }
+                       l = p->filesz;
+
+                       interpreter = kmalloc(l+1);
+                       sys_lseek(fd, p->offset, 0);
+                       if(sys_read(fd, interpreter, l)!=l){
+                               werrstr("cant read interpreter section");
+                               goto errout;
+                       }
+                       interpreter[l] = '\0';
+               }
+
+               if(p->type == ElfPLoad){
+                       ulong a;
+                       int diff;
+
+                       trace("loadelf(): phdr %d: vaddr=%lux memsz=%lux filesz=%lux offset=%lux flags=%lux",
+                               i,
+                               p->vaddr,
+                               p->memsz,
+                               p->filesz,
+                               p->offset,
+                               p->flags);
+
+                       mapprot = 0;
+                       if(p->flags & ElfPFR)
+                               mapprot |= PROT_READ;
+                       if(p->flags & ElfPFW)
+                               mapprot |= PROT_WRITE;
+                       if(p->flags & ElfPFX)
+                               mapprot |= PROT_EXEC;
+
+                       if(hdr.entry >= p->vaddr && hdr.entry < p->vaddr + p->memsz)
+                               mapprot |= PROT_EXEC;
+
+                       diff = p->vaddr - (p->vaddr & ~(PAGESIZE-1));
+
+                       /* have to call mapdata() before we do the first mmap */
+                       if(loadaddr == 0 && depth == 0){
+                               if(hdr.type == ElfTShared){
+                                       mapbase = pagealign((ulong)end + 0x4000000);
+                                       mapflags |= MAP_FIXED;
+                               }
+                               mapdata((mapbase + p->vaddr) - diff);
+                       }
+
+                       a = sys_mmap(
+                               (mapbase + p->vaddr) - diff, 
+                               p->filesz + diff,
+                               mapprot,
+                               mapflags,
+                               fd,
+                               (p->offset - diff)/PAGESIZE);
+
+                       if(((int)a < 0) && ((int)a > -EMAX)){
+                               werrstr("mmap failed: %E", (int)a);
+                               goto errout;
+                       }
+                       if(loadaddr == 0)
+                               loadaddr = a;
+                       if(hdr.type == ElfTShared && mapbase == 0){
+                               mapbase = a + diff;
+                               mapflags |= MAP_FIXED;
+                       }
+                       if(mapprot & PROT_WRITE)
+                               padzero(mapbase + p->vaddr + p->filesz);
+                       if(depth == 0)
+                               if(mapbase + p->vaddr + p->memsz > bss)
+                                       bss = mapbase + p->vaddr + p->memsz;
+               } else {
+                       trace("loadelf(): phdr %d: type=%lux", i, p->type);
+               }
+       }
+
+       ex->base = loadaddr;
+       ex->entry = hdr.entry + ((hdr.type == ElfTShared) ? loadaddr : 0);
+
+       ex->phdr = loadaddr + hdr.phoff;
+       ex->phent = hdr.phentsize;
+       ex->phnum = hdr.phnum;
+
+       if(depth == 0){
+               sys_brk(pagealign(bss));
+
+               current->codestart = loadaddr;
+               current->codeend = bss;
+       }
+
+       if(interpreter){
+               ElfEx interpex;
+
+               if(loadelf(interpreter, &interpex, depth+1) < 0){
+                       werrstr("cant load interpreter: %r");
+                       goto errout;
+               }
+               free(interpreter);
+
+               ex->ientry = interpex.entry;
+               ex->ibase = interpex.base;
+       } else {
+               ex->ientry = ex->entry;
+               ex->ibase = 0;  /* no interpreter */
+       }
+
+       sys_close(fd);
+       free(phdr);
+       return 0;
+
+errout:
+       if(fd >= 0)
+               sys_close(fd);
+       free(interpreter);
+       free(phdr);
+       return -1;
+}
+
+
+enum {
+       AT_NULL,
+       AT_IGNORE,
+       AT_EXECFD,
+       AT_PHDR,
+       AT_PHENT,
+       AT_PHNUM,
+       AT_PAGESZ,
+       AT_BASE,
+       AT_FLAGS,
+       AT_ENTRY,
+       AT_NOTELF,
+       AT_UID,
+       AT_EUID,
+       AT_GID,
+       AT_EGID,
+       AT_PLATFORM,
+       AT_HWCAP,
+       AT_CLKTCK,
+       AT_SECURE = 23,
+
+       AT_SYSINFO = 32,
+       AT_SYSINFO_EHDR = 33,
+};
+
+static void*
+setupstack(ElfEx *ex, char *argv[], char *envp[])
+{
+       int envc;
+       int argc;
+
+       char **dargv;
+       char **denv;
+
+       ulong *stack;
+       ulong *p;
+       char *x;
+       int i, n;
+
+       /*
+        * calculate the size we need on stack
+        */
+       argc=0;
+       while(argv && argv[argc]) argc++;
+
+       envc=0;
+       while(envp && envp[envc]) envc++;
+
+       n = 0;
+       n += sizeof(ulong);                     // argc
+       n += (argc+1)*sizeof(char*);    // argv + nil
+       n += (envc+1)*sizeof(char*);    // envp + nil
+       n += 16*(2*sizeof(ulong));      // aux
+
+       for(i=0; i<argc; i++)
+               n += (strlen(argv[i])+1);
+       for(i=0; i<envc; i++)
+               n += (strlen(envp[i])+1);
+
+       if(USTACK - n < PAGESIZE){
+               werrstr("too many arguments passed on stack");
+               return nil;
+       }
+               
+       stack = mapstack(USTACK);
+
+       if(((int)stack < 0) && ((int)stack > -EMAX)){
+               werrstr("mapstack failed: %E", (int)stack);
+               return nil;
+       }
+       stack = (ulong*)(((ulong)stack - n) & ~7);
+
+       current->stackstart = (ulong)stack;
+
+       p = stack;
+
+       *p++ = argc;
+
+       dargv = (char**)p;
+       p += (argc + 1);
+
+       denv = (char**)p;
+       p += (envc + 1);
+
+#define AUXENT(k, v)  {p[0]=k; p[1]=v; p+=2;}
+       AUXENT(AT_PAGESZ, PAGESIZE);
+       AUXENT(AT_CLKTCK, HZ);
+       AUXENT(AT_PHDR, ex->phdr);
+       AUXENT(AT_PHENT, ex->phent);
+       AUXENT(AT_PHNUM, ex->phnum);
+       AUXENT(AT_BASE, ex->ibase);
+       AUXENT(AT_FLAGS, 0);
+       AUXENT(AT_ENTRY, ex->entry);
+       AUXENT(AT_UID, current->uid);
+       AUXENT(AT_EUID, current->uid);
+       AUXENT(AT_GID, current->gid);
+       AUXENT(AT_EGID, current->gid);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+#undef AUXENT
+
+       x = (char*)p;
+
+       for(i=0; i<argc; i++)
+               x += (strlen(dargv[i] = strcpy(x, argv[i])) + 1);
+       dargv[argc] = 0;
+       for(i=0; i<envc; i++)
+               x += (strlen(denv[i] = strcpy(x, envp[i])) + 1);
+       denv[envc] = 0;
+
+       return stack;
+}
+
+static char**
+copystrings(char *a[])
+{
+       char **r;
+       char *p;
+       int i, n;
+
+       if(a == nil)
+               return nil;
+       i = 0;
+       n = sizeof(a[0]);
+       while(a[i]){
+               n += sizeof(a[0]) + (strlen(a[i]) + 1);
+               i++;
+       }
+       r = kmalloc(n);
+       n = i;
+       p = (char*)&r[n+1];
+       for(i=0; i<n; i++)
+               p += strlen(r[i] = strcpy(p, a[i]))+1;
+       r[n] = 0;
+       return r;
+}
+
+static void
+setcomm(char *exe, char *name, char *argv[])
+{
+       char *buf, *p;
+       int i, n;
+
+       n = strlen(exe) + strlen(name) +2;
+       for(i=0; argv[i]; i++)
+               n += strlen(argv[i])+1;
+
+       buf = kmalloc(n);
+
+       p = buf;
+       p += strlen(strcpy(p, name));
+       for(i=0; argv[i]; i++){
+               p += strlen(strcpy(p, " "));
+               p += strlen(strcpy(p, argv[i]));
+       }
+       setprocname(buf);
+
+       /* comm contains the full exe name + argv */
+       p = buf;
+       p += strlen(strcpy(p, exe));
+       *p++ = 0;
+       for(i=0; argv[i]; i++){
+               p += strlen(strcpy(p, argv[i]));
+               *p++ = 0;
+       }
+       *p++ = 0;
+
+       free(current->comm);
+       current->comm = buf;
+       current->ncomm = p - buf;
+}
+
+static void
+clinote(struct Ureg *ureg)
+{
+       jmp_buf jmp;
+       ulong pc;
+       ulong sp;
+       ulong ax;
+
+       pc = ureg->pc;
+       sp = ureg->sp;
+       ax = ureg->ax;
+
+       if(!setjmp(jmp))
+               notejmp(ureg, jmp, 1);
+
+       ureg->pc = pc;
+       ureg->sp = sp;
+       ureg->ax = ax;
+}
+
+struct kexecveargs
+{
+       char            *name;
+       char            **argv;
+       char            **envp;
+};
+
+#pragma profile off
+
+static int
+kexecve(void *arg)
+{
+       struct kexecveargs *args;
+       Ufile *f;
+       ElfEx ex;
+       Ureg u;
+       int r, n;
+       char *b, *p, *e, *x, **a;
+       void *stack;
+       char *name, *exe;
+       char **argv;
+       char **envp;
+       int phase;
+
+       args =  arg;
+       name = args->name;
+       argv = args->argv;
+       envp = args->envp;
+
+       phase = 0;
+       n = 8192;
+       b = kmalloc(n);
+       p = b;
+       e = b + n;
+again:
+       if(r = sys_access(name, 05)){
+               if(r > 0)
+                       r = -EACCES;
+               goto errout;
+       }
+       if((r = sys_open(name, O_RDONLY, 0)) < 0)
+               goto errout;
+       exe = "/dev/null";
+       if(f = fdgetfile(r)){
+               if(f->path != nil){
+                       strncpy(p, f->path, e-p);
+                       p += strlen(exe = p)+1;
+               }
+               putfile(f);
+       }
+       n = sys_read(r, p, (e-p)-1);
+       sys_close(r);
+
+       r = -ENOEXEC;
+       if(n < 4)
+               goto errout;
+
+       if(memcmp(p, "#!", 2) == 0){
+               p[n] = 0;
+
+               r = -ENAMETOOLONG;
+               if((x = strchr(p, '\n')) == nil)
+                       goto errout;
+               *x = 0;
+
+               a = (char**)&x[1];
+               n = (e - (char*)a) / sizeof(a[0]);
+               if(n < 2)
+                       goto errout;
+               n = getfields(&p[2], a, n, 1, "\t\r\n ");
+               if(n < 1)
+                       goto errout;
+               r = -E2BIG;
+               if(&a[n+1] >= (char**)e)
+                       goto errout;
+               a[n++] = name; 
+               if(argv != nil){
+                       argv++;
+                       while(*argv){
+                               if(&a[n+1] >= (char**)e)
+                                       goto errout;
+                               a[n++] = *argv++;
+                       }
+               }
+               a[n++] = 0;
+               p = (char*)&a[n];
+               if(e - p < 4)
+                       goto errout;
+               argv = a;
+               name = argv[0];
+
+               goto again;
+       }
+
+       if(memcmp(p, "\x7fELF", 4)!=0)
+               goto errout;
+
+       /*
+        * the contents on envp[] or argv[] maybe stored in b[], stack or bss of the calling linux
+        * process that is destroyed on free(b) and exitmem()... so we need to temporary
+        * copy them.
+        */
+       r = -ENOMEM;
+       name = kstrdup(name);
+       phase++;
+       if(argv)
+               argv = copystrings(argv);
+       phase++;
+       if(envp)
+               envp = copystrings(envp);
+       phase++;
+
+       /* get out of the note before we destroy user stack */
+       if(current->innote){
+               clinote(current->ureg);
+               current->innote = 0;
+       }
+
+       /* this is the point of no return! */
+       qlock(&proctab);
+       zapthreads();
+       exitmem();
+       exitsignal();
+
+       initmem();
+       initsignal();
+       inittls();
+       qunlock(&proctab);
+
+       closexfds();
+
+       setcomm(exe, name, argv);
+
+       if(loadelf(name, &ex, 0) < 0){
+               trace("kexecve(): loadelf failed: %r");
+               goto errout;
+       }
+
+       if((stack = setupstack(&ex, argv, envp)) == nil){
+               trace("kexecve(): setupstack failed: %r");
+               goto errout;
+       }
+
+       memset(&u, 0, sizeof(u));
+       u.sp = (ulong)stack;
+       u.pc = (ulong)ex.ientry;
+       current->ureg = &u;
+       current->syscall = nil;
+       phase++;
+
+       trace("kexecve(): startup pc=%lux sp=%lux", current->ureg->pc, current->ureg->sp);
+
+errout:
+       switch(phase){
+       default:        free(envp);
+       case 2: free(argv);
+       case 1: free(name);
+       case 0: free(b);
+       }
+       switch(phase){
+       case 4: retuser();
+       case 3: exitproc(current, SIGKILL, 1);
+       }
+       return r;
+}
+
+int sys_execve(char *name, char *argv[], char *envp[])
+{
+       struct kexecveargs args;
+
+       trace("sys_execve(%s, %p, %p)", name, argv, envp);
+
+       args.name = name;
+       args.argv = argv;
+       args.envp = envp;
+
+       return onstack(kstack, kexecve, &args);
+}
+
+#pragma profile on
diff --git a/linux_emul_base/file.c b/linux_emul_base/file.c
new file mode 100644 (file)
index 0000000..8c048d5
--- /dev/null
@@ -0,0 +1,760 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Fd Fd;
+typedef struct Fdtab Fdtab;
+
+struct Fd
+{
+       int             flags;
+       Ufile           *file;
+};
+
+struct Fdtab
+{
+       Ref;
+       QLock;
+       int             lastfd;
+       int             nfd;
+       Fd              *fd;
+};
+
+Ufile*
+getfile(Ufile *file)
+{
+       if(file)
+               incref(file);
+       return file;
+}
+
+void
+putfile(Ufile *file)
+{
+       Udirent *d;
+
+       if(file == nil)
+               return;
+       if(decref(file))
+               return;
+       trace("putfile(): closing %p %s", file, file->path);
+       if(devtab[file->dev]->close)
+               devtab[file->dev]->close(file);
+       free(file->path);
+       while(d = file->rdaux){
+               file->rdaux = d->next;
+               free(d);
+       }
+       free(file);
+}
+
+static Fdtab*
+newfdtab(void)
+{
+       Fdtab *tab;
+
+       tab = kmallocz(sizeof(*tab), 1);
+       tab->ref = 1;
+       tab->lastfd = -1;
+       tab->nfd = 0;
+       tab->fd = nil;
+
+       return tab;
+}
+
+enum {
+       CHUNK   = 64,
+};
+
+/* assumes tab->lock aquired */
+static int
+grow1(Fdtab *tab)
+{
+       if(tab->nfd >= MAXFD)
+               return -EMFILE;
+       if((tab->nfd % CHUNK) == 0)
+               tab->fd = krealloc(tab->fd, sizeof(tab->fd[0]) * (tab->nfd + CHUNK));
+       memset(&tab->fd[tab->nfd], 0, sizeof(tab->fd[0]));
+       return tab->nfd++;
+}
+
+Ufile *procfdgetfile(Uproc *proc, int fd)
+{
+       Fdtab *tab;
+       Ufile *file;
+
+       file = nil;
+       if(tab = proc->fdtab){
+               qlock(tab);
+               if(fd >= 0 && fd < tab->nfd)
+                       file = getfile(tab->fd[fd].file);
+               qunlock(tab);
+       }
+       return file;
+}
+
+Ufile*
+fdgetfile(int fd)
+{
+       return procfdgetfile(current, fd);
+}
+
+int
+newfd(Ufile *file, int flags)
+{
+       int fd;
+       Fdtab *tab;
+
+       tab = current->fdtab;
+       qlock(tab);
+       fd = tab->lastfd;
+       if((fd >= 0) && (fd < tab->nfd) && (tab->fd[fd].file == nil))
+               goto found;
+       for(fd=0; fd<tab->nfd; fd++)
+               if(tab->fd[fd].file == nil)
+                       goto found;
+       fd = grow1(tab);
+found:
+       if(fd >= 0){
+               tab->fd[fd].file = file;
+               tab->fd[fd].flags = flags;
+               file = nil;
+       }
+       qunlock(tab);
+       putfile(file);
+
+       return fd;
+}
+
+static Fdtab*
+getfdtab(Fdtab *tab, int copy)
+{
+       Fdtab *new;
+       int i;
+
+       if(!copy){
+               incref(tab);
+               return tab;
+       }
+       qlock(tab);
+       new = newfdtab();
+       new->lastfd = tab->lastfd;
+       new->nfd = tab->nfd;
+       new->fd = kmallocz(sizeof(new->fd[0]) * (((tab->nfd+CHUNK-1)/CHUNK)*CHUNK), 1);
+       for(i=0; i<new->nfd; i++){
+               Ufile *file;
+
+               if((file = tab->fd[i].file) == nil)
+                       continue;
+               incref(file);
+               new->fd[i].file = file;
+               new->fd[i].flags = tab->fd[i].flags;
+       }
+       qunlock(tab);
+       return new;
+}
+
+static void
+putfdtab(Fdtab *tab)
+{
+       int i;
+
+       if(decref(tab))
+               return;
+       for(i=0; i<tab->nfd; i++){
+               Ufile *file;
+               if((file = tab->fd[i].file) == nil)
+                       continue;
+               tab->fd[i].file = nil;
+               putfile(file);
+       }
+       free(tab->fd);
+       free(tab);
+}
+
+int sys_dup2(int old, int new)
+{
+       Ufile *file;
+       Fdtab *tab;
+       int err;
+
+       trace("sys_dup2(%d, %d)", old, new);
+
+       tab = current->fdtab;
+
+       if((file = fdgetfile(old)) == nil)
+               return -EBADF;
+       if(new < 0)
+               return newfd(file, 0);
+       if(new >= MAXFD)
+               return -EBADF;
+       qlock(tab);
+       while(new >= tab->nfd){
+               err = grow1(tab);
+               if(err < 0){
+                       qunlock(tab);
+                       putfile(file);
+                       return err;
+               }
+       }
+       if(tab->fd[new].file != nil)
+               putfile(tab->fd[new].file);
+       tab->fd[new].file = file;
+       tab->fd[new].flags &= ~FD_CLOEXEC;
+       qunlock(tab);
+
+       return new;
+}
+
+int sys_dup(int fd)
+{
+       return sys_dup2(fd, -1);
+}
+
+struct linux_flock
+{
+       short   l_type;
+       short   l_whence;
+       ulong   l_start;
+       ulong   l_len;
+       int             l_pid;
+}; 
+
+struct linux_flock64
+{
+       short   l_type;
+       short   l_whence;
+       uvlong  l_start;
+       uvlong  l_len;
+       int             l_pid;
+};
+
+enum {
+       F_RDLCK,
+       F_WRLCK,
+       F_UNLCK,
+};
+
+int sys_fcntl(int fd, int cmd, int arg)
+{
+       int ret;
+       Ufile *file;
+       Fdtab *tab;
+
+       trace("sys_fcntl(%d, %lux, %lux)", fd, (ulong)cmd, (ulong)arg);
+
+       tab = current->fdtab;
+
+       ret = -EBADF;
+       if((file = fdgetfile(fd)) == nil)
+               goto out;
+       ret = -EINVAL;
+       switch(cmd){
+       default:
+               trace("sys_fcntl() cmd %lux not implemented", (ulong)cmd);
+               break;
+
+       case F_DUPFD:
+               if(arg < 0 || arg >= MAXFD)
+                       break;
+               qlock(tab);
+               for(ret=arg; ret<tab->nfd; ret++)
+                       if(tab->fd[ret].file == nil)
+                               goto found;
+               do {
+                       if((ret = grow1(tab)) < 0)
+                               break;
+               } while(ret < arg);
+found:
+               if(ret >= 0){
+                       tab->fd[ret].file = file;
+                       tab->fd[ret].flags = tab->fd[fd].flags & ~FD_CLOEXEC;
+                       file = nil;
+               }
+               qunlock(tab);
+               break;
+
+       case F_GETFD:
+       case F_SETFD:
+               qlock(tab);
+               if(cmd == F_GETFD){
+                       ret = tab->fd[fd].flags & FD_CLOEXEC;
+               } else {
+                       tab->fd[fd].flags = (arg & FD_CLOEXEC);
+                       ret = 0;
+               }
+               qunlock(tab);
+               break;
+
+       case F_GETFL:
+               ret = file->mode;
+               break;
+       case F_SETFL:
+               trace("sys_fcntl() changing mode from %o to %o", file->mode, arg);
+               file->mode = arg;
+               ret = 0;
+               break;
+
+       case F_GETLK:
+               ((struct linux_flock*)arg)->l_type = F_UNLCK;
+       case F_SETLK:
+       case F_SETLKW:
+               ret = 0;
+               break;
+
+       case F_GETLK64:
+               ((struct linux_flock64*)arg)->l_type = F_UNLCK;
+       case F_SETLK64:
+               ret = 0;
+               break;
+       }
+out:
+       putfile(file);
+       return ret;
+}
+
+int sys_close(int fd)
+{
+       Fdtab *tab;
+       Ufile *file;
+
+       trace("sys_close(%d)", fd);
+
+       tab = current->fdtab;
+       qlock(tab);
+       if(fd >= 0 && fd < tab->nfd){
+               if(file = tab->fd[fd].file){
+                       tab->fd[fd].file = nil;
+                       tab->lastfd = fd;
+                       qunlock(tab);
+
+                       putfile(file);
+                       return 0;
+               }
+       }
+       qunlock(tab);
+       return -EBADF;
+}
+
+int sys_ioctl(int fd, int cmd, void *arg)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_ioctl(%d, %lux, %p)", fd, (ulong)cmd, arg);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = -ENOTTY;
+       if(devtab[file->dev]->ioctl)
+               ret = devtab[file->dev]->ioctl(file, cmd, arg);
+       putfile(file);
+       return ret;
+}
+
+int preadfile(Ufile *file, void *buf, int len, vlong off)
+{
+       if(file->mode & O_NONBLOCK){
+               if(devtab[file->dev]->poll != nil){
+                       if((devtab[file->dev]->poll(file, nil) & POLLIN) == 0){
+                               trace("readfile(): nonblocking read blocked");
+
+                               return -EAGAIN;
+                       }
+               }
+       }
+       if(devtab[file->dev]->read == nil)
+               return 0;
+       return devtab[file->dev]->read(file, buf, len, off);
+}
+
+int readfile(Ufile *file, void *buf, int len)
+{
+       int err;
+
+       if((err = preadfile(file, buf, len, file->off)) > 0)
+               file->off += err;
+       return err;
+}
+
+int pwritefile(Ufile *file, void *buf, int len, vlong off)
+{
+       if(devtab[file->dev]->write == nil)
+               return 0;
+       if(file->mode & O_APPEND){
+               if(devtab[file->dev]->size){
+                       off = devtab[file->dev]->size(file);
+                       if(off < 0)
+                               return (int)off;
+               }
+       }
+       return devtab[file->dev]->write(file, buf, len, off);
+}
+
+int writefile(Ufile *file, void *buf, int len)
+{
+       int err;
+       vlong end;
+
+       if(devtab[file->dev]->write == nil)
+               return 0;
+       if(file->mode & O_APPEND){
+               if(devtab[file->dev]->size){
+                       end = devtab[file->dev]->size(file);
+                       if(end < 0)
+                               return (int)end;
+                       file->off = end;
+               }
+       }
+       if(len == 0)
+               return 0;
+       if((err = devtab[file->dev]->write(file, buf, len, file->off)) > 0)
+               file->off += err;
+       return err;
+}
+
+int sys_read(int fd, void *buf, int len)
+{
+       int ret;
+       Ufile *file;
+
+       trace("sys_read(%d, %p, %x)", fd, buf, len);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = readfile(file, buf, len);
+       putfile(file);
+       return ret;
+}
+
+int sys_write(int fd, void *buf, int len)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_write(%d, %p, %x)", fd, buf, len);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = writefile(file, buf, len);
+       putfile(file);
+
+       return ret;
+}
+
+int sys_pread64(int fd, void *buf, int len, ulong off)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_pread(%d, %p, %x, %lux)", fd, buf, len, off);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = preadfile(file, buf, len, off);
+       putfile(file);
+       return ret;
+}
+
+int sys_pwrite64(int fd, void *buf, int len, ulong off)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_pwrite(%d, %p, %x, %lux)", fd, buf, len, off);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = pwritefile(file, buf, len, off);
+       putfile(file);
+       return ret;
+}
+
+struct linux_iovec
+{
+       void            *base;
+       ulong   len;
+};
+
+int sys_writev(int fd, void *vec, int n)
+{
+       struct linux_iovec *v = vec;
+       int ret, i, w;
+       Ufile *file;
+
+       trace("sys_writev(%d, %p, %d)", fd, vec, n);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = 0;
+       for(i=0; i<n; i++){
+               w = writefile(file, v[i].base, v[i].len);
+               if(w < 0){
+                       if(ret == 0)
+                               ret = w;
+                       break;
+               }
+               ret += w;
+               if(w < v[i].len)
+                       break;
+       }
+       putfile(file);
+
+       return ret;
+}
+
+int sys_readv(int fd, void *vec, int n)
+{
+       struct linux_iovec *v = vec;
+       int ret, i, r;
+       Ufile *file;
+
+       trace("sys_readv(%d, %p, %d)", fd, vec, n);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = 0;
+       for(i=0; i<n; i++){
+               r = readfile(file, v[i].base, v[i].len);
+               if(r < 0){
+                       if(ret == 0)
+                               ret = r;
+                       break;
+               }
+               ret += r;
+               if(r < v[i].len)
+                       break;
+       }
+       putfile(file);
+
+       return ret;
+}
+
+int seekfile(Ufile *file, vlong off, int whence)
+{
+       vlong end;
+
+       if(devtab[file->dev]->size == nil)
+               return -ESPIPE;
+
+       switch(whence){
+       case 0:
+               file->off = off;
+               return 0;
+       case 1:
+               file->off += off;
+               return 0;
+       case 2:
+               end = devtab[file->dev]->size(file);
+               if(end < 0)
+                       return end;
+               file->off = end + off;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+ulong sys_lseek(int fd, ulong off, int whence)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_lseek(%d, %lux, %d)", fd, off, whence);
+
+       if((file = fdgetfile(fd)) == nil)
+               return (ulong)-EBADF;
+       ret = seekfile(file, off, whence);
+       if(ret == 0)
+               ret = file->off;
+       putfile(file);
+
+       return ret;
+}
+
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_llseek(%d, %lux, %lux, %p, %d)", fd, hioff, looff, res, whence);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = seekfile(file, ((vlong)hioff<<32) | ((vlong)looff), whence);
+       if((ret == 0) && res)
+               *res = file->off;
+       putfile(file);
+
+       return ret;
+}
+
+int sys_umask(int umask)
+{
+       int old;
+
+       trace("sys_umask(%#o)", umask);
+
+       old = current->umask;
+       current->umask = (umask & 0777);
+       return old;
+}
+
+int
+chdirfile(Ufile *f)
+{
+       Ustat s;
+       int err;
+
+       trace("chdirfile(%s)", f->path);
+
+       err = -ENOTDIR;
+       if(f->path == nil)
+               return err;
+       if(devtab[f->dev]->fstat == nil)
+               return err;
+       if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+               return err;
+       err = -ENOTDIR;
+       if((s.mode & ~0777) != S_IFDIR)
+               return err;
+       free(current->cwd);
+       current->cwd = kstrdup(fsrootpath(f->path));
+       if(f->dev == ROOTDEV && chdir(f->path) == 0){
+               free(current->kcwd);
+               current->kcwd = kstrdup(f->path);
+       }
+       return 0;
+}
+
+int
+sys_fchdir(int fd)
+{
+       Ufile *f;
+       int err;
+
+       trace("sys_fchdir(%d)", fd);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = chdirfile(f);
+       putfile(f);
+       return err;
+}
+
+int
+sys_fchown(int fd, int uid, int gid)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_fchown(%d, %d, %d)", fd, uid, gid);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->fchown)
+               err = devtab[f->dev]->fchown(f, uid, gid);
+       putfile(f);
+
+       return err;
+}
+
+int
+sys_fchmod(int fd, int mode)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_fchmod(%d, %#o)", fd, mode);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->fchmod)
+               err = devtab[f->dev]->fchmod(f, mode);
+       putfile(f);
+
+       return err;
+}
+
+int
+sys_ftruncate(int fd, ulong size)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_ftruncate(%d, %lux)", fd, size);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->ftruncate)
+               err = devtab[f->dev]->ftruncate(f, (uvlong)size);
+       putfile(f);
+
+       return err;
+}
+
+void initfile(void)
+{
+       current->fdtab = newfdtab();
+       current->umask = 022;
+}
+
+void exitfile(Uproc *proc)
+{
+       Fdtab *tab;
+
+       if(tab = proc->fdtab){
+               proc->fdtab = nil;
+               putfdtab(tab);
+       }
+}
+
+void clonefile(Uproc *new, int copy)
+{
+       Fdtab *tab;
+
+       if((tab = current->fdtab) == nil){
+               new->fdtab = nil;
+               return;
+       }
+       new->fdtab = getfdtab(tab, copy);
+}
+
+void closexfds(void)
+{
+       Fdtab *tab;
+       int i;
+
+       if((tab = current->fdtab) == nil)
+               return;
+       qlock(tab);
+       for(i=0; i<tab->nfd; i++){
+               Ufile *f;
+
+               if((f = tab->fd[i].file) == nil)
+                       continue;
+               if((tab->fd[i].flags & FD_CLOEXEC) == 0)
+                       continue;
+
+               tab->fd[i].file = nil;
+               tab->fd[i].flags = 0;
+
+               putfile(f);
+       }
+       qunlock(tab);
+}
+
+int sys_flock(int fd, int cmd)
+{
+       trace("sys_flock(%d, %d)", fd, cmd);
+       return 0;
+}
+
+int sys_fsync(int fd)
+{
+       trace("sys_fsync(%d)", fd);
+       return 0;
+}
+
diff --git a/linux_emul_base/fns.h b/linux_emul_base/fns.h
new file mode 100644 (file)
index 0000000..d6005d5
--- /dev/null
@@ -0,0 +1,311 @@
+/* error */
+int mkerror(void);
+#pragma varargck type "E" int
+int Efmt(Fmt *e);
+int sys_nosys(void);
+
+/* linuxcall */
+int linuxcall(void);
+
+/* trap */
+void inittrap(void);
+void retuser(void);
+
+/* bits */
+void incref(Ref *);
+int decref(Ref *);
+void jumpstart(ulong addr, ulong *stack);
+void jumpureg(void *ureg);
+void linux_sigreturn(void);
+void linux_rtsigreturn(void);
+
+/* trace */
+void inittrace(void);
+void exittrace(Uproc *proc);
+void clonetrace(Uproc *new, int copy);
+void tprint(char *fmt, ...);
+#pragma varargck argpos tprint 1
+#define trace if(debug)tprint
+
+/* proc */
+void initproc(void);
+void exitproc(Uproc *proc, int code, int group);
+void stopproc(Uproc *proc, int code, int group);
+void contproc(Uproc *proc, int code, int group);
+int procfork(void (*fproc)(void *aux), void *aux, int flags);
+Uproc* getproc(int tid);
+Uproc* getprocn(int n);
+int threadcount(int pid);
+void zapthreads(void);
+void setprocname(char *s);
+int notifyme(int on);
+void wakeme(int on);
+int sleepproc(QLock *l, int flags);
+Uwait* addwaitq(Uwaitq *q);
+void delwaitq(Uwait *w);
+int sleepq(Uwaitq *q, QLock *l, int flags);
+int wakeq(Uwaitq *q, int nwake);
+int requeue(Uwaitq *q1, Uwaitq *q2, int nrequeue);
+int killproc(Uproc *p, Usiginfo *info, int group);
+void setalarm(vlong t);
+
+int sys_waitpid(int pid, int *pexit, int opt);
+int sys_wait4(int pid, int *pexit, int opt, void *prusage);
+int sys_exit(int code);
+int sys_exit_group(int code);
+int sys_linux_clone(int flags, void *newstack, int *parenttidptr, int *tlsdescr, void *childtidptr);
+int sys_fork(void);
+int sys_vfork(void);
+int sys_getpid(void);
+int sys_getppid(void);
+int sys_gettid(void);
+int sys_setpgid(int pid, int pgid);
+int sys_getpgid(int pid);
+int sys_setpgrp(int pid);
+int sys_getpgrp(void);
+int sys_getuid(void);
+int sys_getgid(void);
+int sys_setgid(int gid);
+int sys_setuid(int uid);
+int sys_setresuid(int ruid, int euid, int suid);
+int sys_getresuid(int *ruid, int *euid, int *suid);
+int sys_setresgid(int rgid, int egid, int sgid);
+int sys_getresgid(int *rgid, int *egid, int *sgid);
+int sys_setreuid(int ruid, int euid);
+int sys_setregid(int rgid, int egid);
+int sys_uname(void *);
+int sys_personality(ulong p);
+int sys_setsid(void);
+int sys_getsid(int pid);
+int sys_getgroups(int size, int *groups);
+int sys_setgroups(int size, int *groups);
+
+int sys_kill(int pid, int sig);
+int sys_tkill(int tid, int sig);
+int sys_tgkill(int pid, int tid, int sig);
+int sys_rt_sigqueueinfo(int pid, int sig, void *info);
+
+int sys_set_tid_address(int *tidptr);
+
+int sys_sched_setscheduler(int pid, int policy, void *param);
+int sys_sched_getscheduler(int pid);
+int sys_sched_setparam(int pid, void *param);
+int sys_sched_getparam(int pid, void *param);
+int sys_sched_yield(void);
+
+int sys_getrlimit(long resource, void *rlim);
+int sys_setrlimit(long resource, void *rlim);
+
+/* signal */
+void initsignal(void);
+void exitsignal(void);
+void clonesignal(Uproc *new, int copyhand, int newproc);
+void settty(Ufile *tty);
+Ufile* gettty(void);
+#pragma varargck type "S" int
+int Sfmt(Fmt *f);
+
+int wantssignal(Uproc *proc, int sig);
+int ignoressignal(Uproc *proc, int sig);
+int signalspending(Uproc *proc);
+
+void handlesignals(void);
+int sendsignal(Uproc *proc, Usiginfo *info, int group);
+
+void siginfo2linux(Usiginfo *, void *);
+void linux2siginfo(void *, Usiginfo *);
+
+int sys_sigaltstack(void *stk, void *ostk);
+int sys_rt_sigaction(int sig, void *pact, void *poact, int setsize);
+int sys_rt_sigpending(uchar *set, int setsize);
+int sys_rt_sigprocmask(int how, uchar *act, uchar *oact, int setsize);
+int sys_rt_sigsuspend(uchar *set, int setsize);
+int sys_sigreturn(void);
+int sys_rt_sigreturn(void);
+
+int sys_setitimer(int which, void *value, void *ovalue);
+int sys_getitimer(int which, void *value);
+int sys_alarm(long seconds);
+
+/* file */
+void initfile(void);
+void exitfile(Uproc *proc);
+void clonefile(Uproc *new, int copy);
+void closexfds(void);
+Ufile *procfdgetfile(Uproc *proc, int fd);
+Ufile* fdgetfile(int fd);
+Ufile* getfile(Ufile *file);
+void putfile(Ufile *file);
+int newfd(Ufile *file, int flags);
+int chdirfile(Ufile *file);
+int readfile(Ufile *file, void *buf, int len);
+int writefile(Ufile *file, void *buf, int len);
+int preadfile(Ufile *file, void *buf, int len, vlong off);
+int pwritefile(Ufile *file, void *buf, int len, vlong off);
+int sys_dup(int fd);
+int sys_dup2(int old, int new);
+int sys_fcntl(int fd, int cmd, int arg);
+int sys_close(int fd);
+int sys_ioctl(int fd, int cmd, void *arg);
+int sys_read(int fd, void *buf, int len);
+int sys_readv(int fd, void *vec, int n);
+int sys_pread64(int fd, void *buf, int len, ulong off);
+int sys_write(int fd, void *buf, int len);
+int sys_pwrite64(int fd, void *buf, int len, ulong off);
+int sys_writev(int fd, void *vec, int n);
+ulong sys_lseek(int fd, ulong off, int whence);
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence);
+int sys_umask(int umask);
+int sys_flock(int fd, int cmd);
+int sys_fsync(int fd);
+int sys_fchdir(int fd);
+int sys_getcwd(char *buf, int len);
+int sys_fchmod(int fd, int mode);
+int sys_fchown(int fd, int uid, int gid);
+int sys_ftruncate(int fd, ulong size);
+
+/* poll */
+void pollwait(Ufile *f, Uwaitq *q, void *t);
+int sys_poll(void *p, int nfd, long timeout);
+int sys_select(int nfd, ulong *rfd, ulong *wfd, ulong *efd, void *ptv);
+
+/* mem */
+void* kmalloc(int size);
+void* kmallocz(int size, int zero);
+void* krealloc(void *ptr, int size);
+char* kstrdup(char *s);
+char* ksmprint(char *fmt, ...);
+#pragma varargck argpos ksmprint 1
+
+ulong pagealign(ulong addr);
+
+void initmem(void);
+void exitmem(void);
+void clonemem(Uproc *new, int copy);
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe);
+void* mapstack(int size);
+void mapdata(ulong base);
+void unmapuserspace(void);
+int okaddr(void *ptr, int len, int write);
+
+ulong sys_linux_mmap(void *a);
+ulong sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff);
+int sys_munmap(ulong addr, ulong len);
+ulong sys_brk(ulong bk);
+int sys_mprotect(ulong addr, ulong len, int prot);
+int sys_msync(ulong addr, ulong len, int flags);
+ulong sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr);
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3);
+
+/* exec */
+int sys_execve(char *name, char *argv[], char *envp[]);
+
+/* time */
+void inittime(void);
+int sys_time(long *p);
+int sys_gettimeofday(void *tvp, void *tzp);
+int sys_clock_gettime(int clock, void *t);
+int sys_nanosleep(void *rqp, void *rmp);
+int proctimes(Uproc *p, ulong *t);
+int sys_times(void *times);
+
+/* tls */
+void inittls(void);
+void clonetls(Uproc *new);
+
+int sys_set_thread_area(void *pinfo);
+int sys_get_thread_area(void *pinfo);
+int sys_modify_ldt(int func, void *data, int count);
+
+/* bufproc */
+void *newbufproc(int fd);
+void freebufproc(void *bp);
+int readbufproc(void *bp, void *data, int len, int peek, int noblock);
+int pollbufproc(void *bp, Ufile *file, void *tab);
+int nreadablebufproc(void *bp);
+
+/* main */
+void panic(char *msg, ...);
+int onstack(long *stk, int (*func)(void *arg), void *arg);
+void profme(void);
+
+/* stat */
+int ufstat(int fd, Ustat *ps);
+Udirent *newdirent(char *path, char *name, int mode);
+
+int sys_getxattr(char *path, char *name, void *value, int size);
+int sys_lgetxattr(char *path, char *name, void *value, int size);
+int sys_fgetxattr(int fd, char *name, void *value, int size);
+int sys_setxattr(char *path, char *name, void *value, int flags, int size);
+int sys_lsetxattr(char *path, char *name, void *value, int flags, int size);
+int sys_fsetxattr(int fd, char *name, void *value, int size, int flags);
+
+int sys_linux_fstat(int fd, void *st);
+int sys_linux_fstat64(int fd, void *st);
+int sys_linux_getdents(int fd, void *buf, int nbuf);
+int sys_linux_getdents64(int fd, void *buf, int nbuf);
+int sys_linux_lstat(char *path, void *st);
+int sys_linux_lstat64(char *path, void *st);
+int sys_linux_stat(char *path, void *st);
+int sys_linux_stat64(char *path, void *st);
+
+int sys_statfs(char *name, void *pstatfs);
+
+/* fs */
+void fsmount(Udev *dev, char *path);
+
+char* allocpath(char *base, char *prefix, char *name);
+char* fullpath(char *base, char *name);
+char* shortpath(char *base, char *path);
+char* fsfullpath(char *path);
+char* fsrootpath(char *path);
+char* basepath(char *p, char **ps);
+ulong hashpath(char *s);
+
+int fsaccess(char *path, int mode);
+int fschmod(char *path, int mode);
+int fschown(char *path, int uid, int gid, int link);
+int fslink(char *old, char *new, int sym);
+int fsmkdir(char *path, int mode);
+int fsopen(char *path, int mode, int perm, Ufile **pf);
+int fsreadlink(char *path, char *buf, int len);
+int fsrename(char *old, char *new);
+int fsstat(char *path, int link, Ustat *ps);
+int fstruncate(char *path, vlong size);
+int fsunlink(char *path, int rmdir);
+int fsutime(char *path, int atime, int mtime);
+
+int sys_access(char *name, int mode);
+int sys_chdir(char *name);
+int sys_chroot(char *name);
+int sys_chmod(char *name, int mode);
+int sys_chown(char *name, int uid, int gid);
+int sys_creat(char *name, int perm);
+int sys_lchown(char *name, int uid, int gid);
+int sys_link(char *old, char *new);
+int sys_open(char *name, int mode, int perm);
+int sys_readlink(char *name, char *buf, int len);
+int sys_rename(char *from, char *to);
+int sys_rmdir(char *name);
+int sys_symlink(char *old, char *new);
+int sys_truncate(char *name, ulong size);
+int sys_unlink(char *name);
+int sys_utime(char *name, void *times);
+int sys_utimes(char *name, void *tvp);
+int sys_mkdir(char *name, int mode);
+
+/* drivers */
+void rootdevinit(void);
+void sockdevinit(void);
+int sys_linux_socketcall(int call, int *arg);
+void pipedevinit(void);
+int sys_pipe(int *fds);
+void fddevinit(void);
+void ptsdevinit(void);
+void dspdevinit(void);
+void miscdevinit(void);
+void ptydevinit(void);
+void consdevinit(void);
+void procdevinit(void);
+
diff --git a/linux_emul_base/fs.c b/linux_emul_base/fs.c
new file mode 100644 (file)
index 0000000..4283991
--- /dev/null
@@ -0,0 +1,758 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Mount Mount;
+
+struct Mount
+{
+       Mount   *next;
+       Udev            *dev;
+       int             npath;
+       char            path[];
+};
+
+static Mount *mtab;
+
+void
+fsmount(Udev *dev, char *path)
+{
+       Mount *m, **p;
+       int n;
+
+       if(dev == nil)
+               return;
+
+       n = strlen(path);
+       m = kmalloc(sizeof(*m) + n + 1);
+       m->dev = dev;
+       m->next = nil;
+       m->npath = n;
+       strcpy(m->path, path);
+
+       for(p=&mtab;;p=&((*p)->next)){
+               Mount *x;
+
+               if(x = *p){
+                       if(m->npath < x->npath)
+                               continue;
+                       if(m->npath == x->npath){
+                               if(strcmp(m->path, x->path) < 0)
+                                       continue;
+                       }
+               }
+               m->next = *p;
+               *p = m;
+               break;
+       }
+}
+
+ulong
+hashpath(char *s)
+{
+       ulong h;
+       for(h=0; *s; s++)
+               h = (h * 13) + (*s - 'a');
+       return h;
+}
+
+char*
+basepath(char *p, char **ps)
+{
+       char *x, *s;
+       int n;
+
+       if(s = strrchr(p, '/')){
+               if(s[1] != 0){
+                       if(ps)
+                               *ps = kstrdup(s+1);
+                       if((n = s - p) == 0)
+                               n = 1;
+                       x = kmalloc(n+1);
+                       memmove(x, p, n);
+                       x[n] = 0;
+                       return x;
+               }
+       }
+       if(ps)
+               *ps = nil;
+       return nil;
+}
+
+char*
+allocpath(char *base, char *prefix, char *name)
+{
+       char *p, *s;
+       int n, m, k;
+
+       n = strlen(base);
+       m = strlen(name);
+       k = prefix ? strlen(prefix) : 0;
+       p = s = kmalloc(n+m+k+2);
+       memmove(p, base, n);
+       p += n;
+       if(m || k)
+               *p++ = '/';
+       if(k){
+               memmove(p, prefix, k);
+               p += k;
+       }
+       memmove(p, name, m+1);
+       return s;
+}
+
+char*
+fullpath(char *base, char *name)
+{
+       char *s;
+
+       if(*name == '/' || *name == '#'){
+               s = kstrdup(name);
+       } else if(base) {
+               s = allocpath(base, nil, name);
+       } else {
+               s = nil;
+       }
+       if(s != nil)
+               cleanname(s);
+       return s;
+}
+
+char*
+shortpath(char *base, char *path)
+{
+       int n;
+
+       n = strlen(base);
+       if((n <= strlen(path)) && (strncmp(path, base, n)==0)){
+               path += n;
+               if(*path == '/')
+                       path++;
+               if(*path == 0)
+                       path = ".";
+       }
+       return path;
+}
+
+char*
+fsfullpath(char *path)
+{
+       char *root;
+
+       path = fullpath(current->cwd, path);
+       if(path && (root = current->root)){
+               root = allocpath(root, nil, path+1);
+               free(path);
+               path = root;
+       }
+       return path;
+}
+
+char*
+fsrootpath(char *path)
+{
+       char *root;
+
+       if(root = current->root){
+               root = shortpath(root, path);
+               if(*root == '.'){
+                       path = "/";
+               } else if(root > path){
+                       path = root-1;
+               }
+       }
+       return path;
+}
+
+static Mount*
+path2mount(char *path)
+{
+       Mount *m;
+
+       for(m=mtab; m; m=m->next){
+               if(strncmp(path, m->path, m->npath) == 0){
+                       switch(path[m->npath]){
+                       case '\0':
+                       case '/':
+                               return m;
+                       }
+               }
+       }
+       return nil;
+}
+
+static Udev*
+path2dev(char *path)
+{
+       Mount *m;
+
+       if(m = path2mount(path))
+               return m->dev;
+       return nil;
+}
+
+static int
+fsenter(int *perr)
+{
+       int err;
+
+       if(perr == nil)
+               perr = &err;
+       if(current->linkloop > 8)
+               return *perr = -ELOOP;
+       current->linkloop++;
+       return 0;
+}
+
+static void
+fsleave(void)
+{
+       current->linkloop--;
+}
+
+int sys_getcwd(char *buf, int len)
+{
+       int n;
+       char *cwd;
+
+       trace("sys_getcwd(%p, %x)", buf, len);
+
+       cwd = current->cwd;
+       n = strlen(cwd)+1;
+       if(n > len)
+               return -ERANGE;
+       memmove(buf, cwd, n);
+       return n;
+}
+
+int
+fsopen(char *path, int mode, int perm, Ufile **pf)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsopen(%s, %#o, %#o)", path, mode, perm);
+
+       *pf = nil;
+       if(fsenter(&err) < 0)
+               return err;
+       err = -ENOENT;
+       if((dev = path2dev(path)) && dev->open)
+               err = dev->open(path, mode, perm, pf);
+       fsleave();
+       return err;
+}
+
+int
+fsaccess(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsaccess(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -ENOENT;
+       if(dev = path2dev(path)){
+               err = 0;
+               if(dev->access)
+                       err = dev->access(path, mode);
+       }
+       fsleave();
+
+       return err;
+}
+
+int sys_access(char *name, int mode)
+{
+       int err;
+
+       trace("sys_access(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsaccess(name, mode);
+       free(name);
+
+       return err;
+}
+
+int sys_open(char *name, int mode, int perm)
+{
+       int err;
+       Ufile *file;
+
+       trace("sys_open(%s, %#o, %#o)", name, mode, perm);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsopen(name, mode, perm, &file);
+       free(name);
+
+       if(err == 0)
+               err = newfd(file, FD_CLOEXEC);
+
+       return err;
+}
+
+int sys_creat(char *name, int perm)
+{
+       trace("sys_create(%s, %#o)", name, perm);
+
+       return sys_open(name, O_CREAT|O_TRUNC, perm);
+}
+
+int
+fsstat(char *path, int link, Ustat *ps)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsstat(%s, %d)", path, link);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->stat){
+               memset(ps, 0, sizeof(Ustat));
+               err = dev->stat(path, link, ps);
+       }
+       fsleave();
+       return err;
+}
+
+int
+sys_chdir(char *name)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_chdir(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsopen(name, O_RDONLY, 0, &f);
+       free(name);
+       if(err == 0){
+               err = chdirfile(f);
+               putfile(f);
+       }
+       return err;
+}
+
+int sys_chroot(char *name)
+{
+       Ufile *f;
+       Ustat s;
+       int err;
+
+       trace("sys_chroot(%s)", name);
+
+       f = nil;
+       if((err = fsopen(name, O_RDONLY, 0, &f)) < 0)
+               goto out;
+       err = -ENOTDIR;
+       if(f->path == nil)
+               goto out;
+       if(devtab[f->dev]->fstat == nil)
+               goto out;
+       if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+               goto out;
+       err = -ENOTDIR;
+       if((s.mode & ~0777) != S_IFDIR)
+               goto out;
+       err = 0;
+       free(current->root);
+       if(strcmp(f->path, "/") == 0){
+               current->root = nil;
+       } else {
+               current->root = kstrdup(f->path);
+       }
+out:
+       putfile(f);
+       return err;
+}
+
+int
+fschown(char *path, int uid, int gid, int link)
+{
+       int err;
+       Udev *dev;
+
+       trace("fschown(%s, %d, %d, %d)", path, uid, gid, link);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->chown)
+               err = dev->chown(path, uid, gid, link);
+       fsleave();
+       return err;
+}
+
+int sys_chown(char *name, int uid, int gid)
+{
+       int err;
+
+       trace("sys_chown(%s, %d, %d)", name, uid, gid);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err =  fschown(name, uid, gid, 0);
+       free(name);
+
+       return err;
+}
+
+int sys_lchown(char *name, int uid, int gid)
+{
+       int err;
+
+       trace("sys_lchown(%s, %d, %d)", name, uid, gid);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fschown(name, uid, gid, 1);
+       free(name);
+
+       return err;
+}
+
+int
+fsreadlink(char *path, char *buf, int len)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsreadlink(%s)", path);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->readlink)
+               err = dev->readlink(path, buf, len);
+       fsleave();
+
+       return err;
+}
+
+int sys_readlink(char *name, char *buf, int len)
+{
+       int err;
+
+       trace("sys_readlink(%s, %p, %x)", name, buf, len);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsreadlink(name, buf, len);
+       free(name);
+
+       return err;
+}
+
+int
+fsrename(char *old, char *new)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsrename(%s, %s)", old, new);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(old)) && dev->rename){
+               err = -EXDEV;
+               if(dev == path2dev(new))
+                       err = dev->rename(old, new);
+       }
+       fsleave();
+
+       return err;
+}
+
+
+int sys_rename(char *from, char *to)
+{
+       int err;
+
+       trace("sys_rename(%s, %s)", from, to);
+
+       if((from = fsfullpath(from)) == nil)
+               return -EFAULT;
+       if((to = fsfullpath(to)) == nil){
+               free(from);
+               return -EFAULT;
+       }
+       err = fsrename(from, to);
+       free(from);
+       free(to);
+
+       return err;
+}
+
+int
+fsmkdir(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsmkdir(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->mkdir)
+               err = dev->mkdir(path, mode);
+       fsleave();
+
+       return err;
+}
+
+int sys_mkdir(char *name, int mode)
+{
+       int err;
+
+       trace("sys_mkdir(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsmkdir(name, mode);
+       free(name);
+
+       return err;
+}
+
+int
+fsutime(char *path, int atime, int mtime)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsutime(%s, %d, %d)", path, atime, mtime);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->utime)
+               err = dev->utime(path, atime, mtime);
+       fsleave();
+
+       return err;
+}
+
+struct linux_utimbuf
+{
+       long    atime;
+       long    mtime;
+};
+
+int sys_utime(char *name, void *times)
+{
+       int err;
+       struct linux_utimbuf *t = times;
+
+       trace("sys_utime(%s, %p)", name, times);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       if(t != nil){
+               err = fsutime(name, t->atime, t->mtime);
+       }else{
+               long x = time(0);
+               err = fsutime(name, x, x);
+       }
+       free(name);
+
+       return err;
+}
+
+int sys_utimes(char *name, void *tvp)
+{
+       int err;
+       struct linux_timeval *t = tvp;
+
+       trace("sys_utimes(%s, %p)", name, tvp);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       if(t != nil){
+               err = fsutime(name, t[0].tv_sec, t[1].tv_sec);
+       }else{
+               long x = time(0);
+               err = fsutime(name, x, x);
+       }
+       free(name);
+
+       return err;
+}
+
+int
+fschmod(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fschmod(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->chmod)
+               err = dev->chmod(path, mode);
+       fsleave();
+
+       return err;
+}
+
+int sys_chmod(char *name, int mode)
+{
+       int err;
+
+       trace("sys_chmod(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fschmod(name, mode);
+       free(name);
+
+       return err;
+}
+
+int
+fstruncate(char *path, vlong size)
+{
+       int err;
+       Udev *dev;
+
+       trace("fstruncate(%s, %llx)", path, size);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->truncate)
+               err = dev->truncate(path, size);
+       fsleave();
+
+       return err;
+}
+
+int sys_truncate(char *name, ulong size)
+{
+       int err;
+
+       trace("sys_truncate(%s, %lux)", name, size);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fstruncate(name, size);
+       free(name);
+
+       return err;
+}
+
+int
+fsunlink(char *path, int rmdir)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsunlink(%s, %d)", path, rmdir);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->unlink)
+               err = dev->unlink(path, rmdir);
+       fsleave();
+
+       return err;
+}
+
+int sys_unlink(char *name)
+{
+       int err;
+
+       trace("sys_unlink(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsunlink(name, 0);
+       free(name);
+
+       return err;
+}
+
+int sys_rmdir(char *name)
+{
+       int err;
+
+       trace("sys_rmdir(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsunlink(name, 1);
+       free(name);
+
+       return err;
+}
+
+int
+fslink(char *old, char *new, int sym)
+{
+       int err;
+       Udev *dev;
+
+       trace("fslink(%s, %s, %d)", old, new, sym);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(new)) && dev->link){
+               err = -EXDEV;
+               if(sym || dev == path2dev(old))
+                       err = dev->link(old, new, sym);
+       }
+       fsleave();
+
+       return err;
+}
+
+int sys_link(char *old, char *new)
+{
+       int err;
+
+       trace("sys_link(%s, %s)", old, new);
+
+       if((old = fsfullpath(old)) == nil)
+               return -EFAULT;
+       if((new = fsfullpath(new)) == nil){
+               free(old);
+               return -EFAULT;
+       }
+       err = fslink(old, new, 0);
+       free(old);
+       free(new);
+
+       return err;
+}
+
+int sys_symlink(char *old, char *new)
+{
+       int err;
+
+       trace("sys_symlink(%s, %s)", old, new);
+
+       if((new = fsfullpath(new)) == nil)
+               return -EFAULT;
+       err = fslink(old, new, 1);
+       free(new);
+
+       return err;
+}
+
diff --git a/linux_emul_base/linux b/linux_emul_base/linux
new file mode 100755 (executable)
index 0000000..e262acf
--- /dev/null
@@ -0,0 +1,114 @@
+#!/bin/rc
+
+arg0=$0
+DISPLAY=:0
+HOME=/tmp
+PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/X11R6/bin:/usr/games
+
+x=''
+e=/bin/linuxemu
+r=/sys/lib/linux
+
+fn eprint {
+       echo $arg0: $* >[1=2]
+}
+
+fn usage {
+       echo usage: $arg0 [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...] >[1=2]
+       exit usage
+}
+
+# extract options
+o=()
+while(~ $1 -*){
+       switch($1){
+       case -h
+               usage
+       case -r
+               shift
+               r=$1
+       case -e
+               shift
+               e=$1
+       case -startx
+               x=1
+       case -display
+               shift
+               DISPLAY=$1
+       case -[ug]
+               o=($o $1 $2)
+               shift
+       case -*
+               o=($o $1)
+       }
+       shift
+}
+
+switch($#*){
+case 0
+       usage
+}
+
+if(! ~ $x ''){
+       # find free local display
+       d=(`{{seq 0 32; {echo /srv/UD.X* | sed 's!/srv/UD\.X!!g; s!\ !\
+       !g; s!\*!!g;'}} | sort | uniq -c | awk '/^\ *1\ /{print $2}'})
+       d=$d(1)
+       X11/equis -ac :$d &
+       k=/proc/$apid/notepg
+       $arg0 -e $e -r $r -display :$d $o $*
+       {echo kill >$k} >/dev/null >[2=1]
+       exit
+}
+
+# rewrite the path so it would accessible after binding $r to /
+fn ninepath {
+       if(~ $1 /* && test -e $1 && ! test -e $r/$1){
+               echo /9$1
+       }
+       if not {
+               echo $1
+       }
+}
+
+w=`{pwd}
+r=`{cleanname -d $w $r}
+if(! test -d $r){
+       eprint bad rootpath: $r
+       exit rootpath
+}
+e=`{cleanname -d $w $e}
+if(! test -x $e){
+       eprint bad emubin: $e
+       exit emubin
+}
+e=`{ninepath $e}
+p=`{ninepath $w}
+a=($e $o)
+while(! ~ $#* 0){
+       x=`{ninepath $1}
+       a=($a $"x)
+       shift
+}
+
+# bind the required plan9 stuff
+rfork n
+mntgen $r
+bind -a '#P' /dev
+for(d in /9 /dev /proc /net /env /srv /n /mnt /tmp){
+       t=$r^$d
+       switch($d){
+       case /tmp /env /srv
+               bind -c $d $t
+       case /9
+               bind / $t
+       case *
+               bind $d $t
+       }
+}
+
+# change root and run the emulator
+builtin cd /
+bind $r /
+builtin cd $p
+exec $a
diff --git a/linux_emul_base/linux.h b/linux_emul_base/linux.h
new file mode 100644 (file)
index 0000000..99bf1de
--- /dev/null
@@ -0,0 +1,352 @@
+enum {
+       O_ACCMODE       = 0003,
+       O_RDONLY        = 00,
+       O_WRONLY        = 01,
+       O_RDWR          = 02,
+       O_CREAT         = 0100,
+       O_EXCL          = 0200,
+       O_NOCTTY        = 0400,
+       O_TRUNC         = 01000,
+       O_APPEND        = 02000,
+       O_NONBLOCK      = 04000,
+       O_NDELAY        = 04000,
+       O_SYNC          = 010000,
+       FASYNC          = 020000,
+};
+
+enum {
+       FD_CLOEXEC = 1,
+};
+
+enum {
+       F_DUPFD         = 0,
+       F_GETFD,
+       F_SETFD,
+       F_GETFL,
+       F_SETFL,
+       F_GETLK,
+       F_SETLK,
+       F_SETLKW,
+       F_SETOWN,
+       F_GETOWN,
+       F_GETSIG,
+       F_GETLK64       = 12,
+       F_SETLK64       = 13,
+};
+
+enum {
+       S_IFMT                  = 0170000,
+       S_IFSOCK                = 0140000,
+       S_IFLNK                 = 0120000,
+       S_IFREG                 = 0100000,
+       S_IFBLK                 = 0060000,
+       S_IFDIR                 = 0040000,
+       S_IFCHR                 = 0020000,
+       S_IFIFO                 = 0010000,
+       S_ISUID                 = 0004000,
+       S_ISGID                 = 0002000,
+       S_ISVTX                 = 0001000,
+};
+
+enum {
+       PROT_READ               = 0x01,
+       PROT_WRITE              = 0x02,
+       PROT_EXEC               = 0x04,
+       PROT_SEM                = 0x08,
+       PROT_NONE               = 0x00,
+       PROT_GROWSDOWN  = 0x01000000,
+       PROT_GROWSUP    = 0x02000000,
+       MAP_SHARED              = 0x01,
+       MAP_PRIVATE             = 0x02,
+       MAP_TYPE                = 0x0f,
+       MAP_FIXED               = 0x10,
+       MAP_ANONYMOUS   = 0x20,
+
+       MREMAP_MAYMOVE  = 1,
+       MREMAP_FIXED    = 2,
+};
+
+enum {
+       CLONE_VM                                = 0x00000100,
+       CLONE_FS                                = 0x00000200,
+       CLONE_FILES                             = 0x00000400,
+       CLONE_SIGHAND                   = 0x00000800,
+       CLONE_PTRACE                    = 0x00002000,
+       CLONE_VFORK                             = 0x00004000,
+       CLONE_PARENT                    = 0x00008000,
+       CLONE_THREAD                    = 0x00010000,
+       CLONE_NEWNS                             = 0x00020000,
+       CLONE_SYSVSEM                   = 0x00040000,
+       CLONE_SETTLS                    = 0x00080000,
+       CLONE_PARENT_SETTID             = 0x00100000,
+       CLONE_CHILD_CLEARTID    = 0x00200000,
+       CLONE_DETACHED                  = 0x00400000,
+       CLONE_UNTRACED                  = 0x00800000,
+       CLONE_CHILD_SETTID              = 0x01000000,
+       CLONE_STOPPED                   = 0x02000000,
+};
+
+enum {
+       EPERM                   = 1,
+       ENOENT                  = 2,
+       ESRCH                   = 3,
+       EINTR                   = 4,
+       EIO                             = 5,
+       ENXIO                   = 6,
+       E2BIG                   = 7,
+       ENOEXEC                 = 8,
+       EBADF                   = 9,
+       ECHILD                  = 10,
+       EAGAIN                  = 11,
+       ENOMEM                  = 12,
+       EACCES                  = 13,
+       EFAULT                  = 14,
+       ENOTBLK                 = 15,
+       EBUSY                   = 16,
+       EEXIST                  = 17,
+       EXDEV                   = 18,
+       ENODEV                  = 19,
+       ENOTDIR                 = 20,
+       EISDIR                  = 21,
+       EINVAL                  = 22,
+       ENFILE                  = 23,
+       EMFILE                  = 24,
+       ENOTTY                  = 25,
+       ETXTBSY                 = 26,
+       EFBIG                   = 27,
+       ENOSPC                  = 28,
+       ESPIPE                  = 29,
+       EROFS                   = 30,
+       EMLINK                  = 31,
+       EPIPE                   = 32,
+       EDOM                    = 33,
+       ERANGE                  = 34,
+       EDEADLK                 = 35,
+       ENAMETOOLONG    = 36,
+       ENOLCK                  = 37,
+       ENOSYS                  = 38,
+       ENOTEMPTY               = 39,
+       ELOOP                   = 40,
+       ENOMSG                  = 42,
+       EIDRM                   = 43,
+       ECHRNG                  = 44,
+       EL2NSYNC                = 45,
+       EL3HLT                  = 46,
+       EL3RST                  = 47,
+       ELNRNG                  = 48,
+       EUNATCH                 = 49,
+       ENOCSI                  = 50,
+       EL2HLT                  = 51,
+       EBADE                   = 52,
+       EBADR                   = 53,
+       EXFULL                  = 54,
+       ENOANO                  = 55,
+       EBADRQC                 = 56,
+       EBADSLT                 = 57,
+       EBFONT                  = 59,
+       ENOSTR                  = 60,
+       ENODATA                 = 61,
+       ETIME                   = 62,
+       ENOSR                   = 63,
+       ENONET                  = 64,
+       ENOPKG                  = 65,
+       EREMOTE                 = 66,
+       ENOLINK                 = 67,
+       EADV                    = 68,
+       ESRMNT                  = 69,
+       ECOMM                   = 70,
+       EPROTO                  = 71,
+       EMULTIHOP               = 72,
+       EDOTDOT                 = 73,
+       EBADMSG                 = 74,
+       EOVERFLOW               = 75,
+       ENOTUNIQ                = 76,
+       EBADFD                  = 77,
+       EREMCHG                 = 78,
+       ELIBACC                 = 79,
+       ELIBBAD                 = 80,
+       ELIBSCN                 = 81,
+       ELIBMAX                 = 82,
+       ELIBEXEC                = 83,
+       EILSEQ                  = 84,
+       ERESTART                = 85,
+       ESTRPIPE                = 86,
+       EUSERS                  = 87,
+       ENOTSOCK                = 88,
+       EDESTADDRREQ    = 89,
+       EMSGSIZE                = 90,
+       EPROTOTYPE              = 91,
+       ENOPROTOOPT             = 92,
+       EPROTONOSUPPORT = 93,
+       ESOCKTNOSUPPORT = 94,
+       EOPNOTSUPP              = 95,
+       EPFNOSUPPORT    = 96,
+       EAFNOSUPPORT    = 97,
+       EADDRINUSE              = 98,
+       EADDRNOTAVAIL   = 99,
+       ENETDOWN                = 100,
+       ENETUNREACH             = 101,
+       ENETRESET               = 102,
+       ECONNABORTED    = 103,
+       ECONNRESET              = 104,
+       ENOBUFS                 = 105,
+       EISCONN                 = 106,
+       ENOTCONN                = 107,
+       ESHUTDOWN               = 108,
+       ETOOMANYREFS    = 109,
+       ETIMEDOUT               = 110,
+       ECONNREFUSED    = 111,
+       EHOSTDOWN               = 112,
+       EHOSTUNREACH    = 113,
+       EALREADY                = 114,
+       EINPROGRESS             = 115,
+       ESTALE                  = 116,
+       EUCLEAN                 = 117,
+       ENOTNAM                 = 118,
+       ENAVAIL                 = 119,
+       EISNAM                  = 120,
+       EREMOTEIO               = 121,
+       EDQUOT                  = 122,
+       ENOMEDIUM               = 123,
+       EMEDIUMTYPE             = 124,
+       EMAX                    = 125,
+};
+
+#define EWOULDBLOCK    EAGAIN
+#define EDEADLOCK      EDEADLK
+#define ENOATTR                ENODATA
+
+enum {
+       POLLIN                  = (1<<0),
+       POLLPRI                 = (1<<1),
+       POLLOUT                 = (1<<2),
+       POLLERR                 = (1<<3),
+       POLLHUP                 = (1<<4),
+       POLLNVAL                = (1<<5),
+       POLLRDNORM              = (1<<6),
+       POLLRDBAND              = (1<<7),
+       POLLWRNORM              = (1<<8),
+       POLLWRBAND              = (1<<9),
+       POLLMSG                 = (1<<10),
+       POLLREMOVE              = (1<<11),
+       POLLRDHUP               = 0x2000,
+       EPOLLONESHOT    = (1<<30),
+       EPOLLET                 = (1<<31),
+};
+
+enum {
+       SIGHUP          = 1,
+       SIGINT          = 2,
+       SIGQUIT         = 3,
+       SIGILL          = 4,
+       SIGTRAP         = 5,
+       SIGABRT         = 6,
+       SIGIOT          = 6,
+       SIGBUS          = 7,
+       SIGFPE          = 8,
+       SIGKILL         = 9,
+       SIGUSR1         = 10,
+       SIGSEGV         = 11,
+       SIGUSR2         = 12,
+       SIGPIPE         = 13,
+       SIGALRM         = 14,
+       SIGTERM         = 15,
+       SIGSTKFLT       = 16,
+       SIGCHLD         = 17,
+       SIGCONT         = 18,
+       SIGSTOP         = 19,
+       SIGTSTP         = 20,
+       SIGTTIN         = 21,
+       SIGTTOU         = 22,
+       SIGURG          = 23,
+       SIGXCPU         = 24,
+       SIGXFSZ         = 25,
+       SIGVTALRM       = 26,
+       SIGPROF         = 27,
+       SIGWINCH        = 28,
+       SIGIO           = 29,
+       SIGPOLL         = 29,
+       SIGLOST         = 29,
+       SIGPWR          = 30,
+       SIGSYS          = 31,
+
+       SIGRT1          = 32,
+       SIGRT2          = 33,
+       SIGRT3          = 34,
+       SIGRT4          = 35,
+       SIGRT5          = 36,
+       SIGRT6          = 37,
+       SIGRT7          = 38,
+       SIGRT8          = 39,
+
+       SIGMAX          = 40,
+};
+
+enum {
+       SI_USER                 = 0,
+       SI_QUEUE                = -1,
+       SI_TIMER                = -2,
+       SI_MESGQ                = -3,
+       SI_ASYNCIO              = -4,
+       SI_SIGIO                = -5,
+       SI_TKILL                = -6,
+       SI_DETHREAD             = -7,
+};
+
+enum {
+       ILL_ILLOPC              = 1,
+       ILL_ILLOPN,
+       ILL_ILLADR,
+       ILL_ILLTRP,
+       ILL_PROVOPC,
+       ILL_PRVREG,
+       ILL_COPROC,
+       ILL_BADSTK,
+};
+
+enum {
+       FPE_INTDIV              = 1,
+       FPE_INTOVF,
+       FPE_FLTDIV,
+       FPE_FLTOVF,
+       FPE_FLTUND,
+       FPE_FLTRES,
+       FPE_FLTINV,
+       FPE_FLTSUB, 
+};
+
+enum {
+       WNOHANG =0x00000001,
+       WUNTRACED       =0x00000002,
+       WSTOPPED        =0x00000002,
+       WEXITED         =0x00000004,
+       WCONTINUED      =0x00000008,
+       WNOWAIT =0x01000000,
+       WNOTHREAD       =0x20000000,
+       WALL            =0x40000000,
+       WCLONE          =0x80000000,
+};
+
+struct linux_timeval
+{
+       long    tv_sec;
+       long    tv_usec;
+};
+
+struct linux_timespec
+{
+       long    tv_sec;
+       long    tv_nsec;
+};
+
+struct linux_user_desc {
+       uint  entry_number;
+       ulong base_addr;
+       uint  limit;
+       uint  seg_32bit:1;
+       int  contents:2;
+       uint  read_exec_only:1;
+       uint  limit_in_pages:1;
+       uint  seg_not_present:1;
+       uint  useable:1;
+};
diff --git a/linux_emul_base/linuxcall.c b/linux_emul_base/linuxcall.c
new file mode 100644 (file)
index 0000000..26e4bbe
--- /dev/null
@@ -0,0 +1,79 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Linuxcall Linuxcall;
+
+struct Linuxcall
+{
+       char    *name;
+       void    *func;
+       int     (*stub)(Ureg *, void *);
+};
+
+static int fcall0(Ureg *, void *func){return ((int (*)(void))func)();}
+static int fcall1(Ureg *u, void *func){return ((int (*)(int))func)(u->bx);}
+static int fcall2(Ureg *u, void *func){return ((int (*)(int, int))func)(u->bx, u->cx);}
+static int fcall3(Ureg *u, void *func){return ((int (*)(int, int, int))func)(u->bx, u->cx, u->dx);}
+static int fcall4(Ureg *u, void *func){return ((int (*)(int, int, int, int))func)(u->bx, u->cx, u->dx, u->si);}
+static int fcall5(Ureg *u, void *func){return ((int (*)(int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di);}
+static int fcall6(Ureg *u, void *func){return ((int (*)(int, int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di, u->bp);}
+
+#include "linuxcalltab.out"
+
+static Linuxcall nocall = {
+       .name = "nosys",
+       .func = sys_nosys,
+       .stub = fcall0,
+};
+
+static void
+linuxret(int errno)
+{
+       Uproc *p;
+       Ureg *u;
+
+       p = current;
+       u = p->ureg;
+       trace("linuxret(%lux: %s, %lux: %E)", u->pc, p->syscall, (ulong)errno, errno);
+       if(errno == -ERESTART){
+               p->restart->syscall = p->syscall;
+               return;
+       }
+       u->ax = (ulong)errno;
+       u->pc += 2;
+       p->restart->syscall = nil;
+       p->syscall = nil;
+}
+
+
+int
+linuxcall(void)
+{
+       Uproc *p;
+       Ureg *u;
+       Linuxcall *c;
+       uchar *pc;
+
+       p = current;
+       u = p->ureg;
+
+       /* CD 80 = INT 0x80 */
+       pc = (uchar*)u->pc;
+       if(pc[0] != 0xcd || pc[1] != 0x80){
+               trace("linuxcall(): not a syscall pc=%lux sp=%lux", u->pc, u->sp);
+               return -1;
+       }
+       c = &linuxcalltab[u->ax];
+       if(c > &linuxcalltab[nelem(linuxcalltab)-1])
+               c = &nocall;
+       p->syscall = c->name;
+       p->sysret = linuxret;
+       if(p->restart->syscall)
+               trace("linuxcall(): restarting %s", p->syscall);
+       linuxret(c->stub(u, c->func));
+       return 0;
+}
diff --git a/linux_emul_base/linuxcalltab b/linux_emul_base/linuxcalltab
new file mode 100644 (file)
index 0000000..031dbf0
--- /dev/null
@@ -0,0 +1,286 @@
+0      0    restart_syscall                sys_nosys
+1      1    exit                           sys_exit
+2      0    fork                           sys_fork
+3      3    read                           sys_read
+4      3    write                          sys_write
+5      3    open                           sys_open
+6      1    close                          sys_close
+7      3    waitpid                        sys_waitpid
+8      2    creat                          sys_creat
+9      2    link                           sys_link
+10     1    unlink                         sys_unlink
+11     3    execve                         sys_execve
+12     1    chdir                          sys_chdir
+13     1    time                           sys_time
+14     0    mknod                          sys_nosys
+15     2    chmod                          sys_chmod
+16     0    lchown                         sys_lchown
+17     0    break                          sys_nosys
+18     0    oldstat                        sys_nosys
+19     3    lseek                          sys_lseek
+20     0    getpid                         sys_getpid
+21     0    mount                          sys_nosys
+22     0    umount                         sys_nosys
+23     1    setuid                         sys_setuid
+24     0    getuid                         sys_getuid
+25     0    stime                          sys_nosys
+26     0    ptrace                         sys_nosys
+27     1    alarm                          sys_alarm
+28     0    oldfstat                       sys_nosys
+29     0    pause                          sys_nosys
+30     2    utime                          sys_utime
+31     0    stty                           sys_nosys
+32     0    gtty                           sys_nosys
+33     2    access                         sys_access
+34     0    nice                           sys_nosys
+35     0    ftime                          sys_nosys
+36     0    sync                           sys_nosys
+37     2    kill                           sys_kill
+38     2    rename                         sys_rename
+39     2    mkdir                          sys_mkdir
+40     1    rmdir                          sys_rmdir
+41     1    dup                            sys_dup
+42     1    pipe                           sys_pipe
+43     1    times                          sys_times
+44     0    prof                           sys_nosys
+45     1    brk                            sys_brk
+46     1    setgid                         sys_setgid
+47     0    getgid                         sys_getgid
+48     0    signal                         sys_nosys
+49     0    geteuid                        sys_nosys
+50     0    getegid                        sys_nosys
+51     0    acct                           sys_nosys
+52     0    umount2                        sys_nosys
+53     0    lock                           sys_nosys
+54     3    ioctl                          sys_ioctl
+55     3    fcntl                          sys_fcntl
+56     0    mpx                            sys_nosys
+57     2    setpgid                        sys_setpgid
+58     0    ulimit                         sys_nosys
+59     0    oldolduname                    sys_nosys
+60     1    umask                          sys_umask
+61     1    chroot                         sys_chroot
+62     0    ustat                          sys_nosys
+63     2    dup2                           sys_dup2
+64     0    getppid                        sys_getppid
+65     0    getpgrp                        sys_getpgrp
+66     0    setsid                         sys_setsid
+67     0    sigaction                      sys_nosys
+68     0    sgetmask                       sys_nosys
+69     0    ssetmask                       sys_nosys
+70     0    setreuid                       sys_nosys
+71     0    setregid                       sys_nosys
+72     0    sigsuspend                     sys_nosys
+73     0    sigpending                     sys_nosys
+74     0    sethostname                    sys_nosys
+75     2    setrlimit                      sys_setrlimit
+76     2    getrlimit                      sys_getrlimit
+77     0    getrusage                      sys_nosys
+78     2    gettimeofday                   sys_gettimeofday
+79     0    settimeofday                   sys_nosys
+80     0    getgroups                      sys_nosys
+81     0    setgroups                      sys_nosys
+82     0    select                         sys_nosys
+83     2    symlink                        sys_symlink
+84     0    oldlstat                       sys_nosys
+85     3    readlink                       sys_readlink
+86     0    uselib                         sys_nosys
+87     0    swapon                         sys_nosys
+88     0    reboot                         sys_nosys
+89     0    readdir                        sys_nosys
+90     1    mmap                           sys_linux_mmap
+91     2    munmap                         sys_munmap
+92     2    truncate                       sys_truncate
+93     2    ftruncate                      sys_ftruncate
+94     2    fchmod                         sys_fchmod
+95     0    fchown                         sys_fchown
+96     0    getpriority                    sys_nosys
+97     0    setpriority                    sys_nosys
+98     0    profil                         sys_nosys
+99     2    statfs                         sys_statfs
+100    0    fstatfs                        sys_nosys
+101    0    ioperm                         sys_nosys
+102    2    socketcall                     sys_linux_socketcall
+103    0    syslog                         sys_nosys
+104    3    setitimer                      sys_setitimer
+105    2    getitimer                      sys_getitimer
+106    2    stat                           sys_linux_stat
+107    2    lstat                          sys_linux_lstat
+108    2    fstat                          sys_linux_fstat
+109    0    olduname                       sys_nosys
+110    0    iopl                           sys_nosys
+111    0    vhangup                        sys_nosys
+112    0    idle                           sys_nosys
+113    0    vm86old                        sys_nosys
+114    4    wait4                          sys_wait4
+115    0    swapoff                        sys_nosys
+116    0    sysinfo                        sys_nosys
+117    0    ipc                            sys_nosys
+118    1    fsync                          sys_fsync
+119    0    sigreturn                      sys_sigreturn
+120    5    clone                          sys_linux_clone
+121    0    setdomainname                  sys_nosys
+122    1    uname                          sys_uname
+123    3    modify_ldt                     sys_modify_ldt
+124    0    adjtimex                       sys_nosys
+125    3    mprotect                       sys_mprotect
+126    0    sigprocmask                    sys_nosys
+127    0    create_module                  sys_nosys
+128    0    init_module                    sys_nosys
+129    0    delete_module                  sys_nosys
+130    0    get_kernel_syms                sys_nosys
+131    0    quotactl                       sys_nosys
+132    1    getpgid                        sys_getpgid
+133    1    fchdir                         sys_fchdir
+134    0    bdflush                        sys_nosys
+135    0    sysfs                          sys_nosys
+136    1    personality                    sys_personality
+137    0    afs_syscall                    sys_nosys
+138    0    setfsuid                       sys_nosys
+139    0    setfsgid                       sys_nosys
+140    5    _llseek                        sys_llseek
+141    3    getdents                       sys_linux_getdents
+142    5    _newselect                     sys_select
+143    0    flock                          sys_flock
+144    3    msync                          sys_msync
+145    3    readv                          sys_readv
+146    3    writev                         sys_writev
+147    1    getsid                         sys_getsid
+148    0    fdatasync                      sys_nosys
+149    0    _sysctl                        sys_nosys
+150    0    mlock                          sys_nosys
+151    0    munlock                        sys_nosys
+152    0    mlockall                       sys_nosys
+153    0    munlockall                     sys_nosys
+154    2    sched_setparam                 sys_sched_setparam
+155    2    sched_getparam                 sys_sched_getparam
+156    3    sched_setscheduler             sys_sched_setscheduler
+157    1    sched_getscheduler             sys_sched_getscheduler
+158    0    sched_yield                    sys_sched_yield
+159    0    sched_get_priority_max         sys_nosys
+160    0    sched_get_priority_min         sys_nosys
+161    0    sched_rr_get_interval          sys_nosys
+162    2    nanosleep                      sys_nanosleep
+163    5    mremap                         sys_mremap
+164    3    setresuid                      sys_setresuid
+165    3    getresuid                      sys_getresuid
+166    0    vm86                           sys_nosys
+167    0    query_module                   sys_nosys
+168    3    poll                           sys_poll
+169    0    nfsservctl                     sys_nosys
+170    3    setresgid                      sys_setresgid
+171    3    getresgid                      sys_getresgid
+172    0    prctl                          sys_nosys
+173    0    rt_sigreturn                   sys_rt_sigreturn
+174    4    rt_sigaction                   sys_rt_sigaction
+175    4    rt_sigprocmask                 sys_rt_sigprocmask
+176    2    rt_sigpending                  sys_rt_sigpending
+177    0    rt_sigtimedwait                sys_nosys
+178    3    rt_sigqueueinfo                sys_rt_sigqueueinfo
+179    2    rt_sigsuspend                  sys_rt_sigsuspend
+180    4    pread64                        sys_pread64
+181    4    pwrite64                       sys_pwrite64
+182    0    chown                          sys_chown
+183    2    getcwd                         sys_getcwd
+184    0    capget                         sys_nosys
+185    0    capset                         sys_nosys
+186    2    sigaltstack                    sys_sigaltstack
+187    0    sendfile                       sys_nosys
+188    0    getpmsg                        sys_nosys
+189    0    putpmsg                        sys_nosys
+190    0    vfork                          sys_vfork
+191    0    ugetrlimit                     sys_nosys
+192    6    mmap2                          sys_mmap
+193    2    truncate64                     sys_truncate
+194    2    ftruncate64                    sys_ftruncate
+195    2    stat64                         sys_linux_stat64
+196    2    lstat64                        sys_linux_lstat64
+197    2    fstat64                        sys_linux_fstat64
+198    3    lchown32                       sys_lchown
+199    0    getuid32                       sys_getuid
+200    0    getgid32                       sys_getgid
+201    0    geteuid32                      sys_getuid
+202    0    getegid32                      sys_getgid
+203    2    setreuid32                     sys_setreuid
+204    2    setregid32                     sys_setregid
+205    2    getgroups32                    sys_getgroups
+206    2    setgroups32                    sys_setgroups
+207    3    fchown32                       sys_fchown
+208    3    setresuid32                    sys_setresuid
+209    3    getresuid32                    sys_getresuid
+210    3    setresgid32                    sys_setresgid
+211    3    getresgid32                    sys_getresgid
+212    3    chown32                        sys_chown
+213    1    setuid32                       sys_setuid
+214    1    setgid32                       sys_setgid
+215    0    setfsuid32                     sys_nosys
+216    0    setfsgid32                     sys_nosys
+217    0    pivot_root                     sys_nosys
+218    0    mincore                        sys_nosys
+219    0    madvise                        sys_nosys
+220    3    getdents64                     sys_linux_getdents64
+221    3    fcntl64                        sys_fcntl
+224    0    gettid                         sys_gettid
+225    0    readahead                      sys_nosys
+226    5    setxattr                       sys_setxattr
+227    5    lsetxattr                      sys_lsetxattr
+228    5    fsetxattr                      sys_fsetxattr
+229    4    getxattr                       sys_getxattr
+230    4    lgetxattr                      sys_lgetxattr
+231    4    fgetxattr                      sys_fgetxattr
+232    0    listxattr                      sys_nosys
+233    0    llistxattr                     sys_nosys
+234    0    flistxattr                     sys_nosys
+235    0    removexattr                    sys_nosys
+236    0    lremovexattr                   sys_nosys
+237    0    fremovexattr                   sys_nosys
+238    2    tkill                          sys_tkill
+239    0    sendfile64                     sys_nosys
+240    6    futex                          sys_futex
+241    0    sched_setaffinity              sys_nosys
+242    0    sched_getaffinity              sys_nosys
+243    1    set_thread_area                sys_set_thread_area
+244    1    get_thread_area                sys_get_thread_area
+245    0    io_setup                       sys_nosys
+246    0    io_destroy                     sys_nosys
+247    0    io_getevents                   sys_nosys
+248    0    io_submit                      sys_nosys
+249    0    io_cancel                      sys_nosys
+250    0    fadvise64                      sys_nosys
+252    1    exit_group                     sys_exit_group
+253    0    lookup_dcookie                 sys_nosys
+254    0    epoll_create                   sys_nosys
+255    0    epoll_ctl                      sys_nosys
+256    0    epoll_wait                     sys_nosys
+257    0    remap_file_pages               sys_nosys
+258    1    set_tid_address                sys_set_tid_address
+259    0    timer_create                   sys_nosys
+260    0    timer_settime                  sys_nosys
+261    0    timer_gettime                  sys_nosys
+262    0    timer_getoverrun               sys_nosys
+263    0    timer_delete                   sys_nosys
+264    0    clock_settime                  sys_nosys
+265    2    clock_gettime                  sys_clock_gettime
+266    0    clock_getres                   sys_nosys
+267    0    clock_nanosleep                sys_nosys
+268    0    statfs64                       sys_nosys
+269    0    fstatfs64                      sys_nosys
+270    0    tgkill                         sys_tgkill
+271    2    utimes                         sys_utimes
+272    0    fadvise64_64                   sys_nosys
+273    0    vserver                        sys_nosys
+274    0    mbind                          sys_nosys
+275    0    get_mempolicy                  sys_nosys
+276    0    set_mempolicy                  sys_nosys
+277    0    mq_open                        sys_nosys
+278    0    mq_unlink                      sys_nosys
+279    0    mq_timedsend                   sys_nosys
+280    0    mq_timedreceive                sys_nosys
+281    0    mq_notify                      sys_nosys
+282    0    mq_getsetattr                  sys_nosys
+283    0    sys_kexec_load                 sys_nosys
+284    0    waitid                         sys_nosys
+285    0    setaltroot                     sys_nosys
+286    0    add_key                        sys_nosys
+287    0    request_key                    sys_nosys
+288    0    keyctl                         sys_nosys
diff --git a/linux_emul_base/linuxcalltab.awk b/linux_emul_base/linuxcalltab.awk
new file mode 100755 (executable)
index 0000000..d750cdb
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/awk -f
+BEGIN {
+       nsys = 0
+}
+
+/^#/ {
+       next
+}
+
+{
+       i=$1
+       if(nsys > i){
+               print "BROKEN TABLE: "nsys" > "i
+               exit
+       }
+       while(nsys < i){
+               sysarg[nsys] = 0
+               sysnam[nsys] = "nosys"nsys
+               sysfun[nsys] = "sys_nosys"
+               nsys++;
+       }
+       sysarg[nsys] = $2
+       sysnam[nsys] = $3
+       sysfun[nsys] = $4
+       nsys++
+}
+
+END {
+       print "static Linuxcall linuxcalltab[] = {"
+       for(i=0; i<nsys; i++){
+               print " {       /* "i" */"
+               print "         .name = \""sysnam[i]"\","
+               print "         .func = "sysfun[i]","
+               print "         .stub = fcall"sysarg[i]","
+               print " },"
+       }
+       print "};"
+       print ""
+}
diff --git a/linux_emul_base/main.c b/linux_emul_base/main.c
new file mode 100644 (file)
index 0000000..2472e6b
--- /dev/null
@@ -0,0 +1,259 @@
+#include <u.h>
+#include <libc.h>
+#include <tos.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+static void
+die(void)
+{
+       exits(nil);
+}
+
+static char**
+readenv(void)
+{
+       char **env;
+       int fd, n, i, c;
+       Dir *d;
+
+       if((fd = open("/env", OREAD)) < 0)
+               return nil;
+       n = dirreadall(fd, &d);
+       close(fd);
+       env = kmalloc(sizeof(env[0]) * (n + 1));
+       c = 0;
+       for(i=0; i<n; i++){
+               char *v;
+               char *k;
+
+               k = d[i].name;
+
+               // filter out some stuff...
+               if(strncmp(k, "fn#", 3) == 0)
+                       continue;
+               if(strcmp(k, "timezone") == 0)
+                       continue;
+               if(strcmp(k, "0")==0)
+                       continue;
+
+               if((v = getenv(d[i].name)) == nil)
+                       continue;
+               if((env[c] = ksmprint("%s=%s", k, v)) == nil)
+                       continue;
+               free(v);
+
+               c++;
+       }
+       env[c] = 0;
+
+       free(d);
+
+       return env;
+}
+
+struct onstackargs
+{
+       long            *stk;
+       void            *arg;
+       int             (*func)(void *);
+       int             ret;
+       jmp_buf jmp;
+};
+
+int
+onstack(long *stk, int (*func)(void *), void *arg)
+{
+       struct onstackargs a, *args;
+       jmp_buf jmp;
+       long *sp;
+
+       sp = (long*)&a;
+       if((long*)sp >= stk && (long*)sp < stk+(KSTACK / sizeof(long)))
+               return func(arg);
+
+       if(args = (struct onstackargs*)setjmp(jmp)){
+               args->ret = onstack(args->stk, args->func, args->arg);
+               longjmp(args->jmp, 1);
+       }
+
+       sp = &stk[(KSTACK / sizeof(long))-16];
+       jmp[JMPBUFSP] = (long)sp;
+
+       memset(stk, 0, KSTACK);
+
+       args = &a;
+       args->stk = stk;
+       args->func = func;
+       args->arg = arg;
+
+       if(!setjmp(args->jmp))
+               longjmp(jmp, (int)args);
+
+       return args->ret;
+}
+
+#pragma profile off
+
+static void
+proff(void (*fn)(void*), void *arg)
+{
+       if(_tos->prof.what == 0){
+               fn(arg);
+       }else{
+               prof(fn, arg, 2000, _tos->prof.what);
+       }
+}
+
+static void
+profexitjmpfn(void *arg)
+{
+       /*
+        * we are now called by the profiling function on the profstack.
+        * save the current continuation so we can return here on exit.
+        */
+       if(!setjmp(exitjmp))
+               longjmp((long*)arg, 1); /* return from profme() */
+}
+
+static int
+profmeprofstack(void *arg)
+{
+       proff(profexitjmpfn, arg);
+       for(;;) die();
+}
+
+#pragma profile on
+
+static long *profstack;
+
+void
+profme(void)
+{
+       jmp_buf j;
+
+       if(!setjmp(j))
+               onstack(profstack, profmeprofstack, j);
+}
+
+
+static void
+vpanic(char *msg, va_list arg)
+{
+       char buf[32];
+       int fd;
+
+       fprint(2, "PANIC: ");
+       vfprint(2, msg, arg);
+       fprint(2, "\n");
+
+       if(debug)
+               abort();
+
+       snprint(buf, sizeof(buf), "/proc/%d/notepg", getpid());
+       if((fd = open(buf, OWRITE)) >= 0){
+               write(fd, "kill", 4);
+               close(fd);
+       }
+       exits("panic");
+}
+
+void
+panic(char *msg, ...)
+{
+       va_list arg;
+
+       va_start(arg, msg);
+       vpanic(msg, arg);
+       va_end(arg);
+}
+
+void usage(void)
+{
+       fprint(2, "usage: linuxemu [-d] [-u uid] [-g gid] cmd [args]\n");
+       exits("usage");
+}
+
+struct mainstack
+{
+       long            profstack[KSTACK / sizeof(long)];
+       long            kstack[KSTACK / sizeof(long)];
+       Uproc   *proc;
+       jmp_buf exitjmp;
+};
+
+void main(int argc, char *argv[])
+{
+       struct mainstack ms;
+       int err;
+       int uid, gid;
+       int fd;
+
+       fmtinstall('E', Efmt);
+       fmtinstall('S', Sfmt);
+
+       uid = 0;
+       gid = 0;
+       debug = 0;
+
+       ARGBEGIN {
+       case 'd':
+               debug++;
+               break;
+       case 'u':
+               uid = atoi(EARGF(usage()));
+               break;
+       case 'g':
+               gid = atoi(EARGF(usage()));
+               break;
+       default:
+               usage();
+       } ARGEND
+
+       if(argc < 1)
+               usage();
+
+       rootdevinit();
+       procdevinit();
+       ptydevinit();
+       consdevinit();
+       dspdevinit();
+       miscdevinit();
+       sockdevinit();
+       pipedevinit();
+
+       kstack = ms.kstack;
+       profstack = ms.profstack;
+       exitjmp = ms.exitjmp;
+       pcurrent = &ms.proc;
+       current = nil;
+
+       if(setjmp(exitjmp))
+               die();
+
+       initproc();
+       current->uid = uid;
+       current->gid = gid;
+
+       /* emulated console */
+       sys_close(0);
+       if((fd = sys_open("/dev/cons", O_RDWR, 0)) != 0)
+               fprint(2, "cant open console for stdin\n");
+       sys_close(1);
+       if(sys_dup(fd) != 1)
+               fprint(2, "cant dup stdout\n");
+       sys_close(2);
+       if(sys_dup(fd) != 2)
+               fprint(2, "cant dup stderr\n");
+
+       sys_fcntl(0, F_SETFD, 0);
+       sys_fcntl(1, F_SETFD, 0);
+       sys_fcntl(2, F_SETFD, 0);
+
+       err = sys_execve(*argv, argv, readenv());
+
+       fprint(2, "%s: %E\n", *argv, err);
+       longjmp(exitjmp, 1);
+}
diff --git a/linux_emul_base/mem.c b/linux_emul_base/mem.c
new file mode 100644 (file)
index 0000000..996cafb
--- /dev/null
@@ -0,0 +1,1538 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Range Range;
+typedef struct Area Area;
+typedef struct Filemap Filemap;
+typedef struct Futex Futex;
+typedef struct Seg Seg;
+typedef struct Space Space;
+
+/* keep in order, lowest base address first */
+enum {
+       SEGDATA,
+       SEGPRIVATE,
+       SEGSHARED,
+       SEGSTACK,
+       SEGMAX,
+};
+
+static char *segname[SEGMAX] = { "data", "private", "shared", "stack" };
+
+struct Range
+{
+       ulong   base;
+       ulong   top;
+};
+
+struct Filemap
+{
+       Range   addr;
+
+       Filemap *next;
+
+       char            *path;
+       ulong   offset;
+       int             mode;
+       Ufile           *file;
+
+       Ref;
+};
+
+struct Futex
+{
+       ulong   *addr;
+
+       Futex   *next;
+       Futex   **link;
+
+       Ref;
+       Uwaitq;
+};
+
+struct Area
+{
+       Range   addr;
+
+       Area    *next;          /* next higher area */
+       Area    *prev;          /* previous lower area */
+       Seg             *seg;                   /* segment we belong to */
+
+       int             prot;
+
+       Filemap         *filemap;
+       Futex   *futex;
+};
+
+struct Seg
+{
+       Ref;
+       QLock;
+
+       Range   addr;
+       ulong   limit;                  /* maximum address this segment can grow */
+
+       Area    *areas;         /* orderd by address */
+
+       int             type;                   /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */
+
+       Area            *freearea;
+       Filemap *freefilemap;
+       Futex   *freefutex;
+};
+
+struct Space
+{
+       Ref;
+       QLock;
+
+       ulong   brk;
+       Seg             *seg[SEGMAX];
+};
+
+
+void*
+kmalloc(int size)
+{
+       void *p;
+
+       p = malloc(size);
+       if(p == nil)
+               panic("kmalloc: out of memory");
+       setmalloctag(p, getcallerpc(&size));
+       return p;
+}
+void*
+krealloc(void *ptr, int size)
+{
+       void *p;
+
+       p = realloc(ptr, size);
+       if(size > 0){
+               if(p == nil)
+                       panic("krealloc: out of memory");
+               setmalloctag(p, getcallerpc(&ptr));
+       }
+       return p;
+}
+
+void*
+kmallocz(int size, int zero)
+{
+       void *p;
+
+       p = mallocz(size, zero);
+       if(p == nil)
+               panic("kmallocz: out of memory");
+       setmalloctag(p, getcallerpc(&size));
+       return p;
+}
+
+char*
+kstrdup(char *s)
+{
+       char *p;
+       int n;
+
+       n = strlen(s);
+       p = kmalloc(n+1);
+       memmove(p, s, n);
+       p[n] = 0;
+       setmalloctag(p, getcallerpc(&s));
+       return p;
+}
+
+char*
+ksmprint(char *fmt, ...)
+{
+       va_list args;
+       char *p;
+       int n;
+
+       n = 4096;
+       p = kmalloc(n);
+       va_start(args, fmt);
+       n = vsnprint(p, n, fmt, args);
+       va_end(args);
+       if((p = realloc(p, n+1)) == nil)
+               panic("ksmprint: out of memory");
+       setmalloctag(p, getcallerpc(&fmt));
+       return p;
+}
+
+ulong
+pagealign(ulong addr)
+{
+       ulong m;
+
+       m = PAGESIZE-1;
+       return (addr + m) & ~m;
+}
+
+static void
+syncarea(Area *a, Range r)
+{
+       if(a->filemap == nil)
+               return;
+       if(a->filemap->file == nil)
+               return;
+       if((a->prot & PROT_WRITE) == 0)
+               return;
+
+       if(r.base < a->addr.base)
+               r.base = a->addr.base;
+       if(r.top > a->addr.top)
+               r.top = a->addr.top;
+       if(r.base < a->filemap->addr.base)
+               r.base = a->filemap->addr.base;
+       if(r.top > a->filemap->addr.top)
+               r.top = a->filemap->addr.top;
+       pwritefile(a->filemap->file, (void*)r.base, r.top - r.base,
+               (r.base - a->filemap->addr.base) + a->filemap->offset);
+}
+
+static void
+linkarea(Seg *seg, Area *a)
+{
+       Area *p;
+
+       a->next = nil;
+       a->prev = nil;
+       a->seg = seg;
+
+       for(p = seg->areas; p && p->next; p=p->next)
+               if(p->addr.base > a->addr.base)
+                       break;
+       if(p != nil){
+               if(p->addr.base > a->addr.base){
+                       a->next = p;
+                       if(a->prev = p->prev)
+                               a->prev->next = a;
+                       p->prev = a;
+               } else {
+                       a->prev = p;
+                       p->next = a;
+               }
+       }
+       if(a->prev == nil)
+               seg->areas = a;
+}
+
+static Area *
+duparea(Area *a)
+{
+       Area *r;
+
+       if(r = a->seg->freearea){
+               a->seg->freearea = r->next;
+       } else {
+               r = kmalloc(sizeof(Area));
+       }
+       r->addr = a->addr;
+       r->next = nil;
+       r->prev = nil;
+       r->seg = nil;
+       r->prot = a->prot;
+       if(r->filemap = a->filemap)
+               incref(r->filemap);
+       r->futex = nil;
+       return r;
+}
+
+static void
+freearea(Area *a)
+{
+       Filemap *f;
+       Futex *x;
+       Seg *seg;
+
+       seg = a->seg;
+       if(f = a->filemap){
+               syncarea(a, a->addr);
+               a->filemap = nil;
+               if(!decref(f)){
+                       free(f->path);
+                       putfile(f->file);
+                       f->next = seg->freefilemap;
+                       seg->freefilemap = f;
+               }
+       }
+       while(x = a->futex){
+               if(a->futex = x->next)
+                       x->next->link = &a->futex;
+               x->link = nil;
+               x->next = nil;
+               wakeq(x, MAXPROC);
+       }
+       if(a->prev == nil){
+               if(seg->areas = a->next)
+                       a->next->prev = nil;
+       } else {
+               if(a->prev->next = a->next)
+                       a->next->prev = a->prev;
+       }
+
+       a->next = seg->freearea;
+       seg->freearea = a;
+}
+
+static Seg *
+allocseg(int type, Range addr, ulong limit, int attr, char *class)
+{
+       Seg *seg;
+
+       if(class){
+               trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top);
+               if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base)
+                       panic("allocseg: segattach %s segment: %r", segname[type]);
+       }
+
+       seg = kmallocz(sizeof(Seg), 1);
+       seg->addr = addr;
+       seg->limit = limit;
+       seg->type = type;
+       seg->ref = 1;
+
+       return seg;
+}
+
+static Seg *
+dupseg(Seg *old, int copy)
+{
+       Seg *new;
+       Area *a, *p, *x;
+
+       if(old == nil)
+               return nil;
+       if(!copy){
+               incref(old);
+               return old;
+       }
+       new = allocseg(old->type, old->addr, old->limit, 0, nil);
+       p = nil;
+       for(a=old->areas; a; a=a->next){
+               x = duparea(a);
+               x->seg = new;
+               if(x->prev = p){
+                       p->next = x;
+               } else {
+                       new->areas = x;
+               }
+               p = x;
+       }
+
+       return new;
+}
+
+static Space *
+getspace(Space *old, int copy)
+{      
+       Space *new;
+       Seg *seg;
+       int t;
+
+       if(!copy){
+               incref(old);
+               return old;
+       }
+
+       new = kmallocz(sizeof(Space), 1);
+       new->ref = 1;
+
+       qlock(old);
+       for(t=0; t<SEGMAX; t++){
+               if(seg = old->seg[t]){
+                       qlock(seg);
+                       new->seg[t] = dupseg(seg, t != SEGSHARED);
+                       qunlock(seg);
+               }
+       }
+       new->brk = old->brk;
+       qunlock(old);
+
+       return new;
+}
+
+static void
+putspace(Space *space)
+{
+       Seg *seg;
+       int t;
+       Area *a;
+       Filemap *f;
+       Futex *x;
+       void *addr;
+
+       if(decref(space))
+               return;
+       for(t=0; t<SEGMAX; t++){
+               if(seg = space->seg[t]){
+                       addr = (void*)seg->addr.base;
+                       if(!decref(seg)){
+                               qlock(seg);
+                               /* mark all areas as free */
+                               while(a = seg->areas)
+                                       freearea(a);
+
+                               /* clear the free lists */
+                               while(a = seg->freearea){
+                                       seg->freearea = a->next;
+                                       free(a);
+                               }
+                               while(f = seg->freefilemap){
+                                       seg->freefilemap = f->next;
+                                       free(f);
+                               }
+                               while(x = seg->freefutex){
+                                       seg->freefutex = x->next;
+                                       free(x);
+                               }
+                               free(seg);
+                       }
+                       if(segdetach(addr) < 0)
+                               panic("putspace: segdetach %s segment: %r", segname[t]);
+               }
+       }
+       free(space);
+}
+
+static int
+canmerge(Area *a, Area *b)
+{
+       return a->filemap==nil && 
+               a->futex==nil &&
+               b->filemap==nil &&
+               b->futex==nil &&
+               a->prot == b->prot;
+}
+
+static void
+mergearea(Area *a)
+{
+       if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){
+               a->addr.base = a->prev->addr.base;
+               freearea(a->prev);
+       }
+       if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){
+               a->addr.top = a->next->addr.top;
+               freearea(a->next);
+       }
+}
+
+static int
+findhole(Seg *seg, Range *r, int fixed)
+{
+       Range h;
+       Area *a;
+       ulong m;
+       ulong z;
+       ulong hz;
+
+       z = r->top - r->base;
+       m = ~0;
+       h.base = seg->addr.base;
+       a = seg->areas;
+       for(;;) {
+               if((h.top = a ? a->addr.base : seg->addr.top) > h.base) {
+                       if(fixed){
+                               if(h.base > r->base)
+                                       break;
+                               if((r->base >= h.base) && (r->top <= h.top))
+                                       goto found;
+                       } else {
+                               hz = h.top - h.base;
+                               if((hz >= z) && (hz < m)) {
+                                       r->base = h.top - z;
+                                       r->top = h.top;
+                                       if((m = hz) == z)
+                                               goto found;
+                               }
+                       }
+               }
+               if(a == nil)
+                       break;
+               h.base = a->addr.top;
+               a = a->next;
+       }
+       if(!fixed && (m != ~0))
+               goto found;
+       return 0;
+
+found:
+       return 1;
+}
+
+/* wake up all futexes in range and unlink from area */
+static void
+wakefutexarea(Area *a, Range addr)
+{
+       Futex *fu, *x;
+
+       for(fu = a->futex; fu; fu = x){
+               x = fu->next;
+               if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){
+                       if(*fu->link = x)
+                               x->link = fu->link;
+                       fu->link = nil;
+                       fu->next = nil;
+
+                       trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr);
+                       wakeq(fu, MAXPROC);
+               }
+       }
+}
+
+static void
+makehole(Seg *seg, Range r)
+{
+       Area *a, *b, *x;
+       Range f;
+
+       for(a = seg->areas; a; a = x){
+               x = a->next;
+
+               if(a->addr.top <= r.base)
+                       continue;
+               if(a->addr.base >= r.top)
+                       break;
+
+               f = r;
+               if(f.base < a->addr.base)
+                       f.base = a->addr.base;
+               if(f.top > a->addr.top)
+                       f.top = a->addr.top;
+
+               wakefutexarea(a, f);
+               if(f.base == a->addr.base){
+                       if(f.top == a->addr.top){
+                               freearea(a);
+                       } else {
+                               a->addr.base = f.top;
+                       }
+               } else if(f.top == a->addr.top){
+                       a->addr.top = f.base;
+               } else {
+                       b = duparea(a);
+                       b->addr.base = f.top;
+
+                       a->addr.top = f.base;
+                       linkarea(seg, b);
+               }
+
+               if(segfree((void*)f.base, f.top - f.base) < 0)
+                       panic("makehole: segfree %s segment: %r", segname[seg->type]);
+       }
+}
+
+static Seg*
+addr2seg(Space *space, ulong addr)
+{
+       Seg *seg;
+       int t;
+
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               qlock(seg);
+               if((addr >= seg->addr.base) && (addr < seg->addr.top))
+                       return seg;
+               qunlock(seg);
+       }
+
+       return nil;
+}
+
+static Area*
+addr2area(Seg *seg, ulong addr)
+{
+       Area *a;
+
+       for(a=seg->areas; a; a=a->next)
+               if((addr >= a->addr.base) && (addr < a->addr.top))
+                       return a;
+       return nil;
+}
+
+int
+okaddr(void *ptr, int len, int write)
+{
+       ulong addr;
+       Space *space;
+       Seg *seg;
+       Area *a;
+       int ok;
+
+       ok = 0;
+       addr = (ulong)ptr;
+       if(addr < PAGESIZE)
+               goto out;
+       if(space = current->mem){
+               qlock(space);
+               if(seg = addr2seg(space, addr)){
+                       while(a = addr2area(seg, addr)){
+                               if(write){
+                                       if((a->prot & PROT_WRITE) == 0)
+                                               break;
+                               } else {
+                                       if((a->prot & PROT_READ) == 0)
+                                               break;
+                               }
+                               if((ulong)ptr + len <= a->addr.top){
+                                       ok = 1;
+                                       break;
+                               }
+                               addr = a->addr.top;
+                       }
+                       qunlock(seg);
+               }
+               qunlock(space);
+       }
+out:
+       trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok);
+       return ok;
+}
+
+static void
+unmapspace(Space *space, Range r)
+{
+       Seg *seg;
+       int t;
+
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               qlock(seg);
+               if(seg->addr.base >= r.top){
+                       qunlock(seg);
+                       break;
+               }
+               if(seg->addr.top > r.base)
+                       makehole(seg, r);
+               qunlock(seg);
+       }
+}
+
+static Area*
+mapspace(Space *space, Range r, int flags, int prot, int *perr)
+{
+       Seg *seg;
+       Area *a;
+       Range f;
+       int t;
+
+       if(flags & MAP_PRIVATE){
+               if(r.base >= space->seg[SEGSTACK]->addr.base){
+                       t = SEGSTACK;
+               } else if(r.base >= space->seg[SEGDATA]->addr.base && 
+                       r.base < space->seg[SEGDATA]->limit){
+                       t = SEGDATA;
+               } else {
+                       t = SEGPRIVATE;
+               }
+       } else {
+               t = SEGSHARED;
+       }
+
+       if((seg = space->seg[t]) == nil)
+               goto nomem;
+
+       qlock(seg);
+       if((r.base >= seg->addr.base) && (r.top <= seg->limit)){
+               if(r.base >= seg->addr.top)
+                       goto addrok;
+
+               f = r;
+               if(f.top > seg->addr.top)
+                       f.top = seg->addr.top;
+               if(findhole(seg, &f, 1))
+                       goto addrok;
+               if(flags & MAP_FIXED){
+                       if(seg->type == SEGSHARED){
+                               trace("mapspace(): cant make hole %lux-%lux in shared segment",
+                                       f.base, f.top);
+                               goto nomem;
+                       }
+                       makehole(seg, f);
+                       goto addrok;
+               }               
+       }
+
+       if(flags & MAP_FIXED){
+               trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment", 
+                       r.base, r.top, segname[seg->type]);
+               goto nomem;
+       }
+
+       if(findhole(seg, &r, 0))
+               goto addrok;
+
+       r.top -= r.base;
+       r.base = seg->addr.top;
+       r.top += r.base;
+
+addrok:
+       trace("mapspace(): addr %lux-%lux", r.base, r.top);
+
+       if(r.top > seg->addr.top){
+               if(r.top > seg->limit){
+                       trace("mapspace(): area top %lux over %s segment limit %lux",
+                               r.top, segname[seg->type], seg->limit);
+                       goto nomem;
+               }
+               trace("mapspace(): segbrk %s segment %lux-%lux -> %lux",
+                       segname[seg->type], seg->addr.base, seg->addr.top, r.top);
+               if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){
+                       trace("mapspace(): segbrk failed: %r");
+                       goto nomem;
+               }
+               seg->addr.top = r.top;
+       }
+
+       if(a = seg->freearea){
+               seg->freearea = a->next;
+       } else {
+               a = kmalloc(sizeof(Area));
+       }
+       a->addr = r;
+       a->prot = prot;
+       a->filemap = nil;
+       a->futex = nil;
+
+       linkarea(seg, a);
+
+       /* keep seg locked */
+       return a;
+
+nomem:
+       if(seg != nil)
+               qunlock(seg);
+       if(perr) *perr = -ENOMEM;
+       return nil;
+}
+
+static ulong
+brkspace(Space *space, ulong bk)
+{
+       Seg *seg;
+       Area *a;
+       ulong old, new;
+       Range r;
+
+       if((seg = space->seg[SEGDATA]) == nil)
+               goto out;
+
+       qlock(seg);
+       if(space->brk < seg->addr.base)
+               space->brk = seg->addr.top;
+
+       if(bk < seg->addr.base)
+               goto out;
+
+       old = pagealign(space->brk);
+       new = pagealign(bk);
+
+       if(old != new){
+               if(bk < space->brk){
+                       r.base = new;
+                       r.top = old;
+                       qunlock(seg);
+                       seg = nil;
+
+                       unmapspace(space, r);
+               } else {
+                       r.base = old;
+                       r.top = new;
+
+                       trace("brkspace(): new mapping %lux-%lux", r.base, r.top);
+                       for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){
+                               if(a->addr.top <= r.base)
+                                       continue;
+                               if(a->addr.base > r.top + PAGESIZE)
+                                       break;
+
+                               trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top);
+                               goto out;
+                       }
+                       qunlock(seg);
+                       seg = nil;
+
+                       a = mapspace(space, r,
+                               MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED,
+                               PROT_READ|PROT_WRITE|PROT_EXEC, nil);
+
+                       if(a == nil)
+                               goto out;
+
+                       seg = a->seg;
+                       mergearea(a);
+               }
+       }
+
+       if(space->brk != bk){
+               trace("brkspace: set new brk %lux", bk);
+               space->brk = bk;
+       }
+
+out:
+       if(seg != nil)
+               qunlock(seg);
+
+       return space->brk;
+}
+
+static ulong
+remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags)
+{
+       Area *a;
+       Seg *seg;
+       int move;
+       Range r;
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+
+       oldlen = pagealign(oldlen);
+       newlen = pagealign(newlen);
+
+       if((addr + oldlen) < addr)
+               return -EINVAL;
+       if((addr + newlen) <= addr)
+               return -EINVAL;
+
+       move = 0;
+       if(flags & MREMAP_FIXED){
+               if(pagealign(newaddr) != newaddr)
+                       return -EINVAL;
+               if((flags & MREMAP_MAYMOVE) == 0)
+                       return -EINVAL;
+               if((newaddr <= addr) && ((newaddr+newlen)  > addr))
+                       return -EINVAL;
+               if((addr <= newaddr) && ((addr+oldlen) > newaddr))
+                       return -EINVAL;
+               move = (newaddr != addr);
+       }
+
+       if(newlen < oldlen){
+               r.base = addr + newlen;
+               r.top = addr + oldlen;
+
+               unmapspace(space, r);
+
+               oldlen = newlen;
+       }
+
+       if((newlen == oldlen) && !move)
+               return addr;
+
+       if((seg = addr2seg(space, addr)) == nil)
+               return -EFAULT;
+
+       if((a = addr2area(seg, addr)) == nil)
+               goto fault;
+       if(a->addr.top < (addr + oldlen))
+               goto fault;
+
+       if(move)
+               goto domove;
+       if((addr + oldlen) != a->addr.top)
+               goto domove;
+       if((addr + newlen) > seg->limit)
+               goto domove;
+       if(a->next != nil)
+               if((addr + newlen) > a->next->addr.base)
+                       goto domove;
+
+       if((addr + newlen) > seg->addr.top){
+               trace("remapspace(): segbrk %s segment %lux-%lux -> %lux", 
+                       segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen));
+               if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){
+                       trace("remapspace(): segbrk: %r");
+                       goto domove;
+               }
+
+               seg->addr.top = (addr + newlen);
+       }
+       a->addr.top = (addr + newlen);
+       mergearea(a);
+       qunlock(seg);
+
+       return addr;
+
+domove:
+       trace("remapspace(): domove not implemented");
+       if(seg != nil)
+               qunlock(seg);
+       return -ENOMEM;
+
+fault:
+       if(seg != nil)
+               qunlock(seg);
+       return -EFAULT;
+}
+
+static void
+syncspace(Space *space, Range r)
+{
+       Seg *seg;
+       Area *a;
+
+       if(seg = addr2seg(space, r.base)){
+               for(a = addr2area(seg, r.base); a; a=a->next){
+                       if(r.base >= a->addr.top)
+                               break;
+                       syncarea(a, r);
+               }
+               qunlock(seg);
+       }
+}
+
+void*
+mapstack(int size)
+{
+       Space *space;
+       ulong a;
+
+       space = current->mem;
+       a = space->seg[SEGSTACK]->addr.top;
+       size = pagealign(size);
+       a = sys_mmap(a - size, size, 
+               PROT_READ|PROT_WRITE, 
+               MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
+       if(a == 0)
+               return nil;
+
+       return (void*)(a + size);
+}
+
+void
+mapdata(ulong base)
+{
+       Space *space;
+       Range r;
+       ulong top;
+       int t;
+
+       space = current->mem;
+       base = pagealign(base);
+       top = space->seg[SEGSTACK]->addr.base - PAGESIZE;
+
+       for(t=0; t<SEGMAX; t++){
+               if(space->seg[t] == nil){
+                       switch(t){
+                       case SEGDATA:
+                               r.base = base;
+                               break;
+                       case SEGPRIVATE:
+                               r.base = base + 0x10000000;
+                               break;
+                       case SEGSHARED:
+                               r.base = top - 0x10000000;
+                               break;
+                       }
+                       r.top = r.base + PAGESIZE;
+                       space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory");
+               }
+               if(t > 0 && space->seg[t-1])
+                       space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE;
+       }
+}
+
+/*
+ * unmapuserspace is called from kprocfork to get rid of
+ * the linux memory segments used by the calling process
+ * before current is set to zero. we just segdetach() all that
+ * segments but keep the data structures valid for the calling
+ * (linux) process.
+ */
+void
+unmapuserspace(void)
+{
+       Space *space;
+       Seg *seg;
+       int t;
+
+       space = current->mem;
+       qlock(space);
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               if(segdetach((void*)seg->addr.base) < 0)
+                       panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]);
+       }
+       qunlock(space);
+}
+
+/* hack: 
+ * we write segment out into a file, detach it and reattach
+ * a new one and reading contents back. i'm surprised that
+ * this even works seamless with the Plan9 Bss! :-)
+ */
+static void
+convertseg(Range r, ulong attr, char *class)
+{
+       char name[64];
+       ulong p;
+       int n;
+       int fd;
+       ulong len;
+
+       snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid());
+       fd = create(name, ORDWR|ORCLOSE, 0600);
+       if(fd < 0)
+               panic("convertseg: cant create %s: %r", name);
+
+       len = r.top - r.base;
+
+       if(len > 0){
+               n = write(fd, (void*)r.base, len);
+               if(n != len)
+                       panic("convertseg: write: %r");
+       }
+
+       /* copy string to stack because its memory gets detached :-) */
+       strncpy(name, class, sizeof(name));
+
+       trace("detaching %lux-%lux", r.base, r.top);
+
+       /* point of no return */
+       if(segdetach((void*)r.base) < 0)
+               panic("convertseg: segdetach: %r");
+       if(segattach(attr, name, (void*)r.base, len) != (void*)r.base)
+               *((int*)0) = 0;
+
+       p = 0;
+       while(p < len) {
+               /*
+                * we use pread directly to avoid hitting profiling code until
+                * data segment is read back again. pread is unprofiled syscall
+                * stub.
+                */
+               n = pread(fd, (void*)(r.base + p), len - p, (vlong)p);
+               if(n <= 0)
+                       *((int*)0) = 0;
+               p += n;
+       }
+
+       /* anything normal again */
+       trace("segment %lux-%lux reattached as %s", r.base, r.top, class);
+
+       close(fd);
+}
+
+void initmem(void)
+{
+       Space *space;
+       Range r, x;
+       char buf[80];
+       int fd;
+       int n;
+
+       static int firsttime = 1;
+
+       space = kmallocz(sizeof(Space), 1);
+       space->ref = 1;
+
+       snprint(buf, sizeof(buf), "/proc/%d/segment", getpid());
+       if((fd = open(buf, OREAD)) < 0)
+               panic("initspace: cant open %s: %r", buf);
+
+       n = 10 + 9 + 9 + 4 + 1;
+       x.base = x.top = 0;
+       while(readn(fd, buf, n)==n){
+               char *name;
+
+               buf[8] = 0;
+               buf[18] = 0;
+               buf[28] = 0;
+               buf[33] = 0;
+       
+               name = &buf[0];
+               r.base = strtoul(&buf[9], nil, 16);
+               r.top = strtoul(&buf[19], nil, 16);
+
+               trace("initspace(): %s %lux-%lux", name, r.base, r.top);
+
+               if(firsttime){
+                       /*
+                        * convert Plan9 data+bss segments into shared segments so
+                        * that the memory of emulator data structures gets shared across 
+                        * all processes. This only happens if initspace() is called the first time.
+                        */
+                       if(strstr(name, "Data")==name)
+                               convertseg(r, 0, "shared");
+                       if(strstr(name, "Bss")==name)
+                               convertseg(r, 0, "shared");
+               }
+
+               if(strstr(name, "Stack")==name){
+                       x.top = r.base - PAGESIZE;
+                       x.base = x.top - pagealign((MAXPROC / 4) * USTACK);
+
+                       if(!firsttime)
+                               break;
+               }
+       }
+       close(fd);
+       firsttime = 0;
+
+       /* allocate the linux stack */
+       space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory");
+
+       current->mem = space;
+}
+
+void exitmem(void)
+{
+       Space *space;
+
+       if(space = current->mem){
+               current->mem = nil;
+               putspace(space);
+       }
+}
+
+void clonemem(Uproc *new, int copy)
+{
+       Space *space;
+
+       if((space = current->mem) == nil){
+               new->mem = nil;
+               return;
+       }
+       new->mem = getspace(space, copy);
+}
+
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe)
+{
+       Space *space;
+       ulong size, z;
+       int i;
+
+       if(pdat) *pdat = 0;
+       if(plib) *plib = 0;
+       if(pshr) *pshr = 0;
+       if(pstk) *pstk = 0;
+       if(pexe) *pexe = 0;
+
+       if((space = proc->mem) == nil)
+               return 0;
+
+       size = 0;
+       qlock(space);
+       for(i=0; i<SEGMAX; i++){
+               Area *a;
+               Seg *seg;
+               if((seg = space->seg[i]) == nil)
+                       continue;
+               qlock(seg);
+               for(a = seg->areas; a; a = a->next){
+                       z = a->addr.top - a->addr.base;
+                       switch(i){
+                       case SEGDATA:
+                               if(pdat)
+                                       *pdat += z;
+                       case SEGPRIVATE:
+                               if(plib)
+                                       *plib += z;
+                               break;
+                       case SEGSHARED:
+                               if(pshr)
+                                       *pshr += z;
+                               break;
+                       case SEGSTACK:
+                               if(pstk)
+                                       *pstk += z;
+                               break;
+                       }
+                       if(pexe && (a->prot & PROT_EXEC))
+                               *pexe += z;
+                       size += z;
+               }
+               qunlock(seg);
+       }
+       qunlock(space);
+
+       return size;
+}
+
+struct linux_mmap_args {
+       ulong addr;
+       int len;
+       int prot;
+       int flags;
+       int fd;
+       ulong offset;
+};
+
+ulong
+sys_linux_mmap(void *a)
+{
+       struct linux_mmap_args *p = a;
+
+       if(pagealign(p->offset) != p->offset)
+               return -EINVAL;
+
+       return sys_mmap(
+               p->addr, 
+               p->len,
+               p->prot,
+               p->flags,
+               p->fd,
+               p->offset / PAGESIZE);
+}
+
+ulong
+sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff)
+{
+       Space *space;
+       Seg *seg;
+       Range r;
+       ulong o;
+       int e, n;
+       Area *a;
+       Filemap *f;
+       Ufile *file;
+
+       trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff);
+
+       if(pagealign(addr) != addr)
+               return (ulong)-EINVAL;
+
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return (ulong)-EINVAL;
+
+       file = nil;
+       if((flags & MAP_ANONYMOUS)==0)
+               if((file = fdgetfile(fd))==nil)
+                       return (ulong)-EBADF;
+
+       space = current->mem;
+       qlock(space);
+       if((a = mapspace(space, r, flags, prot, &e)) == nil){
+               qunlock(space);
+               putfile(file);
+               return (ulong)e;
+       }
+
+       seg = a->seg;
+       r = a->addr;
+
+       if(flags & MAP_ANONYMOUS){
+               mergearea(a);
+               qunlock(seg);
+               qunlock(space);
+
+               return r.base;
+       }
+
+       o = pgoff * PAGESIZE;
+
+       if(f = seg->freefilemap)
+               seg->freefilemap = f->next;
+       if(f == nil)
+               f = kmalloc(sizeof(Filemap));
+       f->ref = 1;
+       f->addr = r;
+       f->next = nil;
+       f->path = kstrdup(file->path);
+       f->offset = o;
+       if((f->mode = file->mode) != O_RDONLY){
+               f->file = getfile(file);
+       } else {
+               f->file = nil;
+       }
+       a->filemap = f;
+       qunlock(seg);
+       qunlock(space);
+
+       trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top);
+
+       addr = r.base;
+       while(addr < r.top){
+               n = preadfile(file, (void*)addr, r.top - addr, o);
+               if(n == 0)
+                       break;
+               if(n < 0){
+                       trace("read failed at offset %lux for address %lux failed: %r", o, addr);
+                       break;
+               }
+               addr += n;
+               o += n;
+       }
+
+       putfile(file);
+
+       return r.base;
+}
+
+int sys_munmap(ulong addr, ulong len)
+{
+       Space *space;
+       Range r;
+
+       trace("sys_munmap(%lux, %lux)", addr, len);
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return -EINVAL;
+
+       space = current->mem;
+       qlock(space);
+       unmapspace(current->mem, r);
+       qunlock(space);
+
+       return 0;
+}
+
+ulong
+sys_brk(ulong bk)
+{
+       Space *space;
+       ulong a;
+
+       trace("sys_brk(%lux)", bk);
+
+       space = current->mem;
+       qlock(space);
+       a = brkspace(space, bk);
+       qunlock(space);
+
+       return a;
+}
+
+int sys_mprotect(ulong addr, ulong len, int prot)
+{
+       Space *space;
+       Seg *seg;
+       Area *a, *b;
+       int err;
+
+       trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot);
+
+       len = pagealign(len);
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       if(len == 0)
+               return -EINVAL;
+
+       err = -ENOMEM;
+       space = current->mem;
+       qlock(space);
+       if(seg = addr2seg(space, addr)){
+               for(a = addr2area(seg, addr); a!=nil; a=a->next){
+                       if(addr + len <= a->addr.base)
+                               break;
+                       err = 0;
+                       if(a->prot == prot)
+                               continue;
+                       wakefutexarea(a, a->addr);
+                       if(a->addr.base < addr){
+                               b = duparea(a);
+                               a->addr.base = addr;
+                               b->addr.top = addr;
+                               linkarea(seg, b);
+                       }
+                       if(a->addr.top > addr + len){
+                               b = duparea(a);
+                               a->addr.top = addr + len;
+                               b->addr.base = addr + len;
+                               linkarea(seg, b);
+                       }
+                       trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot);
+                       a->prot = prot;
+               }
+               qunlock(seg);
+       }
+       qunlock(space);
+
+       return err;
+}
+
+int sys_msync(ulong addr, ulong len, int flags)
+{
+       Space *space;
+       Range r;
+
+       trace("sys_msync(%lux, %lux, %x)", addr, len, flags);
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return -EINVAL;
+
+       space = current->mem;
+       qlock(space);
+       syncspace(space, r);
+       qunlock(space);
+
+       return 0;
+}
+
+ulong
+sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr)
+{
+       Space *space;
+       int r;
+
+       trace("sys_mremap(%lux, %lux, %lux, %x, %lux)",
+               addr, oldlen, newlen, flags, newaddr);
+
+       space = current->mem;
+       qlock(space);
+       r = remapspace(space, addr, oldlen, newlen, newaddr, flags);
+       qunlock(space);
+
+       return r;
+}
+
+enum {
+       FUTEX_WAIT,
+       FUTEX_WAKE,
+       FUTEX_FD,
+       FUTEX_REQUEUE,
+       FUTEX_CMP_REQUEUE,
+};
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3)
+{
+       Space *space;
+       Seg *seg;
+       Area *a;
+       Futex *fu, *fu2;
+       int err, val2;
+       vlong timeout;
+
+       trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3);
+
+       seg = nil;
+       err = -EFAULT;
+       if((space = current->mem) == 0)
+               goto out;
+
+       qlock(space);
+       if((seg = addr2seg(space, (ulong)addr)) == nil){
+               qunlock(space);
+               goto out;
+       }
+       qunlock(space);
+       if((a = addr2area(seg, (ulong)addr)) == nil)
+               goto out;
+       for(fu = a->futex; fu; fu = fu->next)
+               if(fu->addr == addr)
+                       break;
+
+       switch(op){
+       case FUTEX_WAIT:
+               trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr);
+
+               if(fu == nil){
+                       if(fu = seg->freefutex){
+                               seg->freefutex = fu->next;
+                       } else {
+                               fu = kmallocz(sizeof(Futex), 1);
+                       }
+                       fu->ref = 1;
+                       fu->addr = addr;
+                       if(fu->next = a->futex)
+                               fu->next->link = &fu->next;
+                       fu->link = &a->futex;
+                       a->futex = fu;
+               } else {
+                       incref(fu);
+               }
+
+               err = 0;
+               timeout = 0;
+               if(ptime != nil){
+                       struct linux_timespec *ts = ptime;
+                       vlong now;
+
+                       wakeme(1);
+                       now = nsec();
+                       if(current->restart->syscall){
+                               timeout = current->restart->futex.timeout;
+                       } else {
+                               timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec;
+                       }
+                       if(now < timeout){
+                               current->timeout = timeout;
+                               setalarm(timeout);
+                       } else {
+                               err = -ETIMEDOUT;
+                       }
+               }
+               if(err == 0){
+                       if(*addr != val){
+                               err = -EWOULDBLOCK;
+                       } else {
+                               err = sleepq(fu, seg, 1);
+                       }
+               }
+               if(ptime != nil){
+                       current->timeout = 0;
+                       wakeme(0);
+               }
+               if(err == -ERESTART)
+                       current->restart->futex.timeout = timeout;
+
+               if(!decref(fu)){
+                       if(fu->link){
+                               if(*fu->link = fu->next)
+                                       fu->next->link = fu->link;
+                               fu->link = nil;
+                               fu->next = nil;
+                       }
+                       fu->next = seg->freefutex;
+                       seg->freefutex = fu;
+               }
+               break;
+
+       case FUTEX_WAKE:
+               trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr);
+               err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+               break;
+
+       case FUTEX_CMP_REQUEUE:
+               trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr);
+               if(*addr != val3){
+                       err = -EAGAIN;
+                       break;
+       case FUTEX_REQUEUE:
+                       trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr);
+               }
+               err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+               if(err > 0){
+                       val2 = (int)ptime;
+
+                       /* BUG: fu2 has to be in the same segment as fu */
+                       if(a = addr2area(seg, (ulong)addr2)){
+                               for(fu2 = a->futex; fu2; fu2 = fu2->next){
+                                       if(fu2->addr == addr2){
+                                               err += requeue(fu, fu2, val2);
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               break;
+
+       default:
+               err = -ENOSYS;
+       }
+
+out:
+       if(seg)
+               qunlock(seg);
+       return err;
+}
diff --git a/linux_emul_base/miscdev.c b/linux_emul_base/miscdev.c
new file mode 100644 (file)
index 0000000..21ee4aa
--- /dev/null
@@ -0,0 +1,156 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <mp.h>
+#include <libsec.h>
+
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum
+{
+       Mnull,
+       Mzero,
+       Mfull,
+       Mrandom,
+       Murandom,
+       Mmax,
+};
+
+typedef struct Miscfile Miscfile;
+struct Miscfile
+{
+       Ufile;
+       int     m;
+};
+
+static int
+path2m(char *path)
+{
+       int m;
+
+       m = -1;
+       if(strcmp(path, "/dev/null")==0){
+               m = Mnull;
+       } else if(strcmp(path, "/dev/zero")==0){
+               m = Mzero;
+       } else if(strcmp(path, "/dev/full")==0){
+               m = Mfull;
+       } else if(strcmp(path, "/dev/random")==0){
+               m = Mrandom;
+       } else if(strcmp(path, "/dev/urandom")==0){
+               m = Murandom;
+       }
+
+       return m;
+}
+
+static int
+openmisc(char *path, int mode, int, Ufile **pf)
+{
+       Miscfile *f;
+       int m;
+
+       if((m = path2m(path)) < 0)
+               return -ENOENT;
+       f = kmallocz(sizeof(*f), 1);
+       f->ref = 1;
+       f->mode = mode;
+       f->path = kstrdup(path);
+       f->fd = -1;
+       f->dev = MISCDEV;
+       f->m = m;
+       *pf = f;
+       return 0;
+}
+
+static int
+closemisc(Ufile *)
+{
+       return 0;
+}
+
+static int
+readmisc(Ufile *f, void *buf, int len, vlong)
+{
+       switch(((Miscfile*)f)->m){
+       case Mnull:
+               return 0;
+       case Mzero:
+               memset(buf, 0, len);
+               return len;
+       case Mfull:
+               return -EIO;
+       case Mrandom:
+               genrandom(buf, len);
+               return len;
+       case Murandom:
+               prng(buf, len);
+               return len;
+       default:
+               return -EIO;
+       }
+}
+
+static int
+writemisc(Ufile *f, void *, int len, vlong)
+{
+       switch(((Miscfile*)f)->m){
+       case Mnull:
+       case Mzero:
+       case Mrandom:
+       case Murandom:
+               return len;
+       case Mfull:
+               return -ENOSPC;
+       default:
+               return -EIO;
+       }
+}
+
+static int
+statmisc(char *path, int, Ustat *s)
+{
+       if(path2m(path) < 0)
+               return -ENOENT;
+
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->ino = hashpath(path);
+       s->dev = 0;
+       s->rdev = 0;
+       s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+       return 0;
+}
+
+static int
+fstatmisc(Ufile *f, Ustat *s)
+{
+       return fsstat(f->path, 0, s);
+};
+
+static Udev miscdev =
+{
+       .open = openmisc,
+       .read = readmisc,
+       .write = writemisc,
+       .close = closemisc,
+       .stat = statmisc,
+       .fstat = fstatmisc,
+};
+
+void miscdevinit(void)
+{
+       devtab[MISCDEV] = &miscdev;
+
+       fsmount(&miscdev, "/dev/null");
+       fsmount(&miscdev, "/dev/zero");
+       fsmount(&miscdev, "/dev/full");
+       fsmount(&miscdev, "/dev/random");
+       fsmount(&miscdev, "/dev/urandom");
+
+       srand(truerand());
+}
diff --git a/linux_emul_base/mkfile b/linux_emul_base/mkfile
new file mode 100644 (file)
index 0000000..422112c
--- /dev/null
@@ -0,0 +1,67 @@
+</$objtype/mkfile
+
+TARG=linuxemu
+BIN=$home/bin/$objtype
+RCBIN=$home/bin/rc
+CFLAGS=-FTVw
+
+OFILES=\
+       bits.$O \
+       bufproc.$O \
+       error.$O \
+       exec.$O \
+       file.$O \
+       fs.$O \
+       main.$O \
+       mem.$O \
+       poll.$O \
+       proc.$O \
+       signal.$O \
+       stat.$O \
+       time.$O \
+       tls.$O \
+       trace.$O \
+       trap.$O \
+       linuxcall.$O \
+       consdev.$O \
+       dspdev.$O \
+       miscdev.$O \
+       pipedev.$O \
+       ptydev.$O \
+       rootdev.$O \
+       sockdev.$O \
+       procdev.$O \
+
+
+HFILES=fns.h dat.h linux.h
+
+CLEANFILES=linuxcalltab.out linuxdat.acid
+
+</sys/src/cmd/mkone
+
+linuxcalltab.out:      linuxcalltab linuxcalltab.awk
+       ./linuxcalltab.awk <linuxcalltab >$target
+
+linuxcall.$O:  linuxcalltab.out
+
+linuxdat.acid: $HFILES main.c trace.c signal.c mem.c file.c
+       rm -f $target
+       for(i in main.c){
+               $CC -a $i >>$target
+       }
+       for(i in bufproc.c error.c exec.c file.c fs.c mem.c poll.c \
+               proc.c signal.c stat.c time.c tls.c trace.c trap.c \
+               consdev.c dspdev.c miscdev.c pipedev.c \
+               ptydev.c rootdev.c sockdev.c procdev.c){
+               $CC -aa $i >>$target
+       }
+
+$RCBIN/linux:  linux
+       cp linux $RCBIN/linux
+       
+acid:V:        linuxdat.acid
+
+install:V:     $RCBIN/linux
+       
+
+
diff --git a/linux_emul_base/pipedev.c b/linux_emul_base/pipedev.c
new file mode 100644 (file)
index 0000000..dd4f5f6
--- /dev/null
@@ -0,0 +1,202 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Pipe Pipe;
+
+struct Pipe
+{
+       Ufile;
+       void    *bufproc;
+       ulong atime;
+       ulong mtime;
+       int ino;
+};
+
+enum{
+       Maxatomic = 64*1024,
+};
+
+int
+pipewrite(int fd, void *buf, int len)
+{
+       uchar *p, *e;
+       int err, n;
+
+       p = buf;
+       e = p + len;
+       while(p < e){
+               n = e - p;
+               if(n > Maxatomic)
+                       n = Maxatomic;
+               if(notifyme(1))
+                       err = -ERESTART;
+               else {
+                       err = write(fd, p, n);
+                       notifyme(0);
+                       if(err < 0)
+                               err = mkerror();
+               }
+               if(err < 0){
+                       if(p == (uchar*)buf)
+                               return err;
+                       break;
+               }
+               p += err;
+       }
+       return p - (uchar*)buf;
+}
+
+static int
+closepipe(Ufile *file)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       close(pipe->fd);
+       freebufproc(pipe->bufproc);
+
+       return 0;
+}
+
+static void*
+bufprocpipe(Pipe *pipe)
+{
+       if(pipe->bufproc == nil)
+               pipe->bufproc = newbufproc(pipe->fd);
+       return pipe->bufproc;
+}
+
+static int
+pollpipe(Ufile *file, void *tab)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       return pollbufproc(bufprocpipe(pipe), pipe, tab);
+}
+
+static int
+readpipe(Ufile *file, void *buf, int len, vlong)
+{
+       Pipe *pipe = (Pipe*)file;
+       int ret;
+
+       if((pipe->mode & O_NONBLOCK) || (pipe->bufproc != nil)){
+               ret = readbufproc(bufprocpipe(pipe), buf, len, 0, (pipe->mode & O_NONBLOCK));
+       } else {
+               if(notifyme(1))
+                       return -ERESTART;
+               ret = read(pipe->fd, buf, len);
+               notifyme(0);
+               if(ret < 0)
+                       ret = mkerror();
+       }
+       if(ret > 0)
+               pipe->atime = time(nil);
+       return ret;
+}
+
+static int
+writepipe(Ufile *file, void *buf, int len, vlong)
+{
+       Pipe *pipe = (Pipe*)file;
+       int ret;
+
+       ret = pipewrite(pipe->fd, buf, len);
+       if(ret > 0)
+               pipe->mtime = time(nil);
+       return ret;
+}
+
+static int
+ioctlpipe(Ufile *file, int cmd, void *arg)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       switch(cmd){
+       default:
+               return -ENOTTY;
+       case 0x541B:
+               {
+                       int r;
+
+                       if(arg == nil)
+                               return -EINVAL;
+                       if((r = nreadablebufproc(bufprocpipe(pipe))) < 0){
+                               *((int*)arg) = 0;
+                               return r;
+                       }
+                       *((int*)arg) = r;
+               }
+               return 0;
+       }
+}
+
+int sys_pipe(int *fds)
+{
+       Pipe *file;
+       int p[2];
+       int i, fd;
+       static int ino = 0x1234;
+
+       trace("sys_pipe(%p)", fds);
+
+       if(pipe(p) < 0)
+               return mkerror();
+
+       for(i=0; i<2; i++){
+               file = kmallocz(sizeof(Pipe), 1);
+               file->ref = 1;
+               file->mode = O_RDWR;
+               file->dev = PIPEDEV;
+               file->fd =  p[i];
+               file->ino = ino++;
+               file->atime = file->mtime = time(nil);
+               if((fd = newfd(file, 0)) < 0){
+                       if(i > 0)
+                               sys_close(fds[0]);
+                       close(p[0]);
+                       close(p[1]);
+                       return fd;
+               }
+               fds[i] = fd;
+       }
+       return 0;
+}
+
+static void
+fillstat(Pipe *pipe, Ustat *s)
+{
+       s->ino = pipe->ino;
+       s->mode = 0666 | S_IFIFO;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->atime = pipe->atime;
+       s->mtime = pipe->mtime;
+       s->size = 0;
+}
+
+static int
+fstatpipe(Ufile *file, Ustat *s)
+{
+       Pipe *pipe = (Pipe*)file;
+       fillstat(pipe, s);
+       return 0;
+};
+
+static Udev pipedev = 
+{
+       .read = readpipe,
+       .write = writepipe,
+       .poll = pollpipe,
+       .close = closepipe,
+       .ioctl = ioctlpipe,
+       .fstat = fstatpipe,
+};
+
+void pipedevinit(void)
+{
+       devtab[PIPEDEV] = &pipedev;
+}
diff --git a/linux_emul_base/poll.c b/linux_emul_base/poll.c
new file mode 100644 (file)
index 0000000..79fb8f5
--- /dev/null
@@ -0,0 +1,250 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+void pollwait(Ufile *f, Uwaitq *q, void *t)
+{
+       Uwait *w, **p;
+
+       if(f == nil || t == nil || q == nil)
+               return;
+
+       p = t;
+       w = addwaitq(q);
+       w->file = getfile(f);
+       w->next = *p;
+       *p = w;
+}
+
+static void
+clearpoll(Uwait **p)
+{
+       Uwait *w;
+
+       while(w = *p){
+               *p = w->next;
+               delwaitq(w);
+       }
+}
+
+struct linux_pollfd
+{
+       int                     fd;
+       short           events;
+       short           revents;
+};
+
+int sys_poll(void *p, int nfd, long timeout)
+{
+       int i, e, err;
+       Uwait *tab;
+       Ufile *file;
+       vlong now, t;
+       struct linux_pollfd *fds = p;
+
+       trace("sys_poll(%p, %d, %ld)", p, nfd, timeout);
+
+       if(nfd < 0)
+               return -EINVAL;
+
+       t = 0;
+       wakeme(1);
+       if(timeout > 0){
+               now = nsec();
+               if(current->restart->syscall){
+                       t = current->restart->poll.timeout;
+               } else {
+                       t = now + timeout*1000000LL;
+               }
+               if(now < t){
+                       current->timeout = t;
+                       setalarm(t);
+               }
+       }
+
+       tab = nil;
+       for(;;){
+               clearpoll(&tab);
+
+               err = 0;
+               for(i=0; i<nfd; i++){
+                       e = 0;
+                       if(fds[i].fd >= 0){
+                               e = POLLNVAL;
+                               if(file = fdgetfile(fds[i].fd)){
+                                       if(devtab[file->dev]->poll == nil){
+                                               e = POLLIN|POLLOUT;
+                                       } else {
+                                               e = devtab[file->dev]->poll(file, (err == 0) ? &tab : nil);
+                                       }
+                                       putfile(file);
+                                       e &= fds[i].events | POLLERR | POLLHUP;
+                               }
+                       }
+                       if(fds[i].revents = e){
+                               trace("sys_poll(): fd %d is ready with %x", fds[i].fd, fds[i].revents);
+                               err++;
+                       }
+               }
+               if(err > 0)
+                       break;
+               if(timeout >= 0 && current->timeout == 0){
+                       trace("sys_poll(): timeout");
+                       break;
+               }
+               if((err = sleepproc(nil, 1)) < 0){
+                       trace("sys_poll(): interrupted");
+                       current->restart->poll.timeout = t;
+                       break;
+               }
+       }
+       clearpoll(&tab);
+       wakeme(0);
+
+       if(timeout > 0)
+               current->timeout = 0;
+
+       return err;
+}
+
+int sys_select(int nfd, ulong *rfd, ulong *wfd, ulong *efd, void *ptv)
+{
+       int i, p, e, w, nwrd, nbits, fd, err;
+       ulong m;
+       Uwait *tab;
+       Ufile *file;
+       vlong now, t;
+       struct linux_timeval *tv = ptv;
+       struct {
+               int fd;
+               int ret;
+       } *ardy, astk[16];
+
+       trace("sys_select(%d, %p, %p, %p, %p)", nfd, rfd, wfd, efd, ptv);
+
+       if(nfd < 0)
+               return -EINVAL;
+
+       if(tv != nil)
+               if(tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+                       return -EINVAL;
+
+       nwrd = (nfd + (8 * sizeof(m))-1) / (8 * sizeof(m));
+
+       nbits = 0;
+       for(w=0; w<nwrd; w++)
+               for(m=1; m; m<<=1)
+                       if((rfd && rfd[w] & m) || (wfd && wfd[w] & m) || (efd && efd[w] & m))
+                               nbits++;
+
+       if(nbits > nelem(astk)){
+               ardy = kmalloc(nbits * sizeof(ardy[0]));
+       } else {
+               ardy = astk;
+       }
+
+       t = 0;
+       wakeme(1);
+       if(tv != nil){
+               now = nsec();
+               if(current->restart->syscall){
+                       t = current->restart->select.timeout;
+               } else {
+                       t = now + tv->tv_sec*1000000000LL + tv->tv_usec*1000;
+               }
+               if(now < t){
+                       current->timeout = t;
+                       setalarm(t);
+               }
+       }
+
+       tab = nil;
+       for(;;){
+               clearpoll(&tab);
+
+               fd = 0;
+               err = 0;
+               for(w=0; w<nwrd; w++){
+                       for(m=1; m; m<<=1, fd++){
+                               p = 0;
+                               if(rfd && rfd[w] & m)
+                                       p |= POLLIN;
+                               if(wfd && wfd[w] & m)
+                                       p |= POLLOUT;
+                               if(efd && efd[w] & m)
+                                       p |= POLLERR;
+                               if(!p || ((file = fdgetfile(fd)) == nil))
+                                       continue;
+                               if(devtab[file->dev]->poll == nil){
+                                       e = POLLIN|POLLOUT;
+                               } else {
+                                       e = devtab[file->dev]->poll(file, (err == 0) ? &tab : nil);
+                               }
+                               putfile(file);
+                               if(e &= p) {
+                                       ardy[err].fd = fd;
+                                       ardy[err].ret = e;
+                                       if(++err == nbits)
+                                               break;
+                               }
+                       }
+               }
+               if(err > 0)
+                       break;
+               if(tv != nil && current->timeout == 0){
+                       trace("sys_select(): timeout");
+                       break;
+               }
+               if((err = sleepproc(nil, 1)) < 0){
+                       trace("sys_select(): interrupted");
+                       current->restart->select.timeout = t;
+                       break;
+               }
+       }
+       clearpoll(&tab);
+       wakeme(0);
+
+       if(tv != nil){
+               current->timeout = 0;
+               t -= nsec();
+               if(t < 0)
+                       t = 0;
+               tv->tv_sec = (long)(t/1000000000LL);
+               tv->tv_usec = (long)((t%1000000000LL)/1000);
+       }
+
+       if(err >= 0){
+               if(rfd) memset(rfd, 0, nwrd*sizeof(m));
+               if(wfd) memset(wfd, 0, nwrd*sizeof(m));
+               if(efd) memset(efd, 0, nwrd*sizeof(m));
+
+               nbits = 0;
+               for(i=0; i<err; i++){
+                       e = ardy[i].ret;
+                       fd = ardy[i].fd;
+                       w =  fd / (8 * sizeof(m));
+                       m = 1 << (fd % (8 * sizeof(m)));
+                       if(rfd && (e & POLLIN)){
+                               rfd[w] |= m;
+                               nbits++;
+                       }
+                       if(wfd && (e & POLLOUT)){
+                               wfd[w] |= m;
+                               nbits++;
+                       }
+                       if(efd && (e & POLLERR)){
+                               efd[w] |= m;
+                               nbits++;
+                       }
+               }
+               err = nbits;
+       }
+
+       if(ardy != astk)
+               free(ardy);
+
+       return err;
+}
diff --git a/linux_emul_base/proc.c b/linux_emul_base/proc.c
new file mode 100644 (file)
index 0000000..b950e06
--- /dev/null
@@ -0,0 +1,1777 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+static int timernotefd;
+static void timerproc(void*);
+
+static int
+pidhash(int pid)
+{
+       return (pid - 1) % MAXPROC;
+}
+
+Uproc*
+getproc(int tid)
+{
+       Uproc *p;
+
+       if(tid > 0){
+               p = &proctab.proc[pidhash(tid)];
+               if(p->tid == tid)
+                       return p;
+       }
+       return nil;
+}
+
+Uproc*
+getprocn(int n)
+{
+       Uproc *p;
+
+       p = &proctab.proc[n];
+       if(p->tid > 0)
+               return p;
+       return nil;
+}
+
+static Uproc*
+allocproc(void)
+{
+       Uproc *p;
+       int tid, i;
+
+       for(i=0; i<MAXPROC; i++){
+               tid = proctab.nextpid++;
+               p = &proctab.proc[pidhash(tid)];
+               if(p->tid <= 0){
+                       proctab.alloc++;
+
+                       p->tid = tid;
+                       p->pid = tid;
+                       p->pgid = tid;
+                       p->psid = tid;
+                       return p;
+               }
+       }
+
+       trace("allocproc(): out of processes");
+       return nil;
+}
+
+static void
+freeproc(Uproc *p)
+{
+       Uwait *w;
+
+       while(w = p->freewait){
+               p->freewait = w->next;
+               free(w);
+       }
+       exittrace(p);
+       free(p->comm);
+       free(p->root);
+       free(p->cwd);
+       free(p->kcwd);
+       memset(p, 0, sizeof(*p));
+       proctab.alloc--;
+}
+
+void initproc(void)
+{
+       Uproc *p;
+       char buf[1024];
+       int pid;
+
+       proctab.nextpid = 10;
+
+       p = allocproc();
+       p->kpid = getpid();
+       snprint(buf, sizeof(buf), "/proc/%d/note", p->kpid);
+       p->notefd = open(buf, OWRITE);
+       snprint(buf, sizeof(buf), "/proc/%d/args", p->kpid);
+       p->argsfd = open(buf, ORDWR);
+
+       current = p;
+
+       inittrace();
+       inittime();
+       initsignal();
+       initmem();
+       inittls();
+       initfile();
+
+       if((pid = procfork(timerproc, nil, 0)) < 0)
+               panic("initproc: unable to fork timerproc: %r");
+
+       snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+       timernotefd = open(buf, OWRITE);
+
+       current->root = nil;
+       current->cwd = kstrdup(getwd(buf, sizeof(buf)));
+       current->kcwd = kstrdup(current->cwd);
+       current->linkloop = 0;
+       current->starttime = nsec();
+
+       inittrap();
+}
+
+void
+setprocname(char *s)
+{
+       if(current == nil){
+               char buf[32];
+               int fd;
+
+               snprint(buf, sizeof(buf), "/proc/%d/args", getpid());
+               if((fd = open(buf, OWRITE)) >= 0){
+                       write(fd, s, strlen(s));
+                       close(fd);
+               }
+       } else {
+               write(current->argsfd, s, strlen(s));
+       }
+}
+
+static void
+intrnote(void *, char *msg)
+{
+       if(strncmp(msg, "interrupt", 9) == 0)
+               noted(NCONT);
+       noted(NDFLT);
+}
+
+struct kprocforkargs
+{
+       int     flags;
+       void    (*func)(void *aux);
+       void    *aux;
+};
+
+static int
+kprocfork(void *arg)
+{
+       struct kprocforkargs args;
+       int pid;
+
+       memmove(&args, arg, sizeof(args));
+
+       if((pid = rfork(RFPROC|RFMEM|args.flags)) != 0)
+               return pid;
+
+       notify(intrnote);
+
+       unmapuserspace();
+       current = nil;
+
+       profme();
+       args.func(args.aux);
+       longjmp(exitjmp, 1);
+       return -1;
+}
+
+/*
+ * procfork starts a kernel process running on kstack.
+ * that process will have linux memory segments (stack, private,
+ * shared) unmapped but plan9 segments (text, bss, stack) shared.
+ * here is no Uproc associated with it! current will be set to nil so
+ * you cant call sys_????() functions in here.
+ * procfork returns the plan9 pid. (usefull for posting notes)
+ */
+int procfork(void (*func)(void *aux), void *aux, int flags)
+{
+       struct kprocforkargs args;
+
+       args.flags = flags;
+       args.func = func;
+       args.aux = aux;
+
+       return onstack(kstack, kprocfork, &args);
+}
+
+static void *Intr = (void*)~0;
+
+static char Notifyme[] = "notifyme";
+static char Wakeme[] = "wakeme";
+static char Xchange[] = "xchange";
+
+static char Wakeup[] = "wakeup";
+static char Abort[] = "abort";
+
+int notifyme(int on)
+{
+       Uproc *p;
+
+       p = current;
+       qlock(p);
+       if(on){
+               if(p->notified || signalspending(p)){
+                       qunlock(p);
+                       return 1;
+               }
+               if(p->state == nil)
+                       p->state = Notifyme;
+       } else {
+               p->state = nil;
+       }
+       qunlock(p);
+       return 0;
+}
+
+void wakeme(int on)
+{
+       Uproc *p;
+
+       p = current;
+       qlock(p);
+       if(on){
+               if(p->state == nil)
+                       p->state = Wakeme;
+       } else {
+               p->state = nil;
+       }
+       qunlock(p);
+}
+
+int sleepproc(QLock *l, int flags)
+{
+       Uproc *p;
+       void *ret;
+       char *x;
+
+       p = current;
+       qlock(p);
+       x = p->state;
+       if(x == nil || x == Wakeme){
+               p->xstate = x;
+               p->state = Xchange;
+               if(l != nil)
+                       qunlock(l);
+               qunlock(p);
+               if(flags && signalspending(p)){
+                       ret = Intr;
+               } else {
+                       ret = rendezvous(p, Xchange);
+               }
+               if(ret == Intr){
+                       qlock(p);
+                       if(p->state != Xchange){
+                               while((ret = rendezvous(p, Xchange)) == Intr)
+                                       ;
+                       } else {
+                               p->state = x;
+                       }
+                       qunlock(p);
+               }
+               if(l != nil)
+                       qlock(l);
+       } else {
+               p->state = Wakeme;
+               ret = x;
+               qunlock(p);
+       }
+       return (ret == Wakeup) ? 0 : -ERESTART;
+}
+
+static int
+wakeup(Uproc *proc, char *m, int force)
+{
+       char *x;
+
+       if(proc != nil){
+               qlock(proc);
+               x = proc->state;
+
+               if(x == Wakeme){
+                       proc->state = m;
+                       qunlock(proc);
+                       return 1;
+               }
+               if(x == Xchange){
+                       proc->state = proc->xstate;
+                       proc->xstate = nil;
+                       qunlock(proc);
+                       while(rendezvous(proc, m) == Intr)
+                               ;
+                       return 1;
+               }
+               if((m != Wakeup) && (proc->notified == 0)){
+                       if(x == Notifyme)
+                               proc->state = nil;
+                       if(x == Notifyme || force){
+                               proc->notified = 1;
+                               qunlock(proc);
+                               write(proc->notefd, "interrupt", 9);
+                               return 1;
+                       }
+               }
+               qunlock(proc);
+       }
+       return 0;
+}
+
+Uwait* addwaitq(Uwaitq *q)
+{
+       Uproc *p;
+       Uwait *w;
+
+       p = current;
+       if(w = p->freewait){
+               p->freewait = w->next;
+       } else {
+               w = kmalloc(sizeof(*w));
+       }
+
+       w->next = nil;
+
+       w->proc = p;
+       w->file = nil;
+
+       w->q = q;
+       qlock(q);
+       w->nextq = q->w;
+       q->w = w;
+       qunlock(q);
+
+       return w;
+}
+
+void delwaitq(Uwait *w)
+{
+       Uwaitq *q;
+       Uwait **x;
+
+       q = w->q;
+       qlock(q);
+       for(x = &q->w; *x; x=&((*x)->nextq)){
+               if(*x == w){
+                       *x = w->nextq;
+                       break;
+               }
+       }
+       qunlock(q);
+
+       w->q = nil;
+       w->nextq = nil;
+
+       w->proc = nil;
+       putfile(w->file);
+       w->file = nil;
+
+       w->next = current->freewait;
+       current->freewait = w;
+}
+
+int requeue(Uwaitq *q1, Uwaitq *q2, int nrequeue)
+{
+       int n;
+       Uwait *w;
+
+       n = 1000;
+       for(;;){
+               qlock(q1);
+               if(canqlock(q2))
+                       break;
+               qunlock(q1);    
+               if(--n <= 0)
+                       return 0;
+               sleep(0);
+       }
+       n = 0;
+       while((w = q1->w) && (n < nrequeue)){
+               q1->w = w->nextq;
+               w->q = q2;
+               w->nextq = q2->w;
+               q2->w = w;
+               n++;
+       }
+       qunlock(q2);
+       qunlock(q1);
+       return n;
+}
+
+int wakeq(Uwaitq *q, int nwake)
+{
+       int n;
+       Uwait *w;
+
+       n = 0;
+       if(q != nil){
+               qlock(q);
+               for(w = q->w; w && n < nwake; w=w->nextq)
+                       n += wakeup(w->proc, Wakeup, 0);
+               qunlock(q);
+       }
+       return n;
+}
+
+int sleepq(Uwaitq *q, QLock *l, int flags)
+{
+       Uwait *w;
+       int ret;
+
+       w = addwaitq(q);
+       ret = sleepproc(l, flags);
+       delwaitq(w);
+
+       return ret;
+}
+
+static Uproc *alarmq;
+
+int
+procsetalarm(Uproc *proc, vlong t)
+{
+       Uproc **pp;
+       int ret;
+
+       if(proc->alarm && t >= proc->alarm)
+               return 0;
+       ret = (alarmq == nil) || (t < alarmq->alarm);
+       for(pp = &alarmq; *pp; pp = &((*pp)->alarmq)){
+               if(*pp == proc){
+                       *pp = proc->alarmq;
+                       break;
+               }
+       }
+       for(pp = &alarmq; *pp; pp = &((*pp)->alarmq))
+               if((*pp)->alarm > t)
+                       break;
+       proc->alarm = t;
+       proc->alarmq = *pp;
+       *pp = proc;
+       return ret;
+}
+
+void
+setalarm(vlong t)
+{
+       qlock(&proctab);
+       if(procsetalarm(current, t))
+               write(timernotefd, "interrupt", 9);
+       qunlock(&proctab);
+}
+
+/* signal.c */
+extern void alarmtimer(Uproc *proc, vlong now);
+
+static void
+timerproc(void*)
+{
+       Uproc *h;
+       vlong now;
+       long m;
+
+       setprocname("timerproc()");
+
+       while(proctab.alloc > 0){
+               qlock(&proctab);
+               m = 2000;
+               now = nsec();
+               while(h = alarmq){
+                       if(now < h->alarm){
+                               m = (h->alarm - now) / 1000000;
+                               break;
+                       }
+                       alarmq = h->alarmq;
+                       h->alarm = 0;
+                       h->alarmq = nil;
+                       if(h->timeout){
+                                if(now >= h->timeout){
+                                       h->timeout = 0;
+                                       wakeup(h, Wakeup, 0);
+                               } else
+                                       procsetalarm(h, h->timeout);
+                       }
+                       alarmtimer(h, now);
+               }
+               qunlock(&proctab);
+               sleep((m + (1000/HZ-1))/(1000/HZ));
+       }
+}
+
+/*
+static void
+timerproc(void *)
+{
+       Uproc *p;
+       vlong expire, now, wake, dead;
+       int err, i, alive;
+       char c;
+
+       setprocname("timerproc()");
+       dead = 0;
+       for(;;){
+               qlock(&proctab);
+               now = nsec();
+               wake = now + 60000000000LL;
+               alive = 0;
+               for(i=0; i<MAXPROC; i++){
+                       if((p = getprocn(i)) == nil)
+                               continue;
+                       if(p->wstate & WEXITED)
+                               continue;
+                       if(p->kpid <= 0)
+                               continue;
+
+                       if(now >= dead){
+                               if(read(p->argsfd, &c, 1) < 0){
+                                       err = mkerror();
+                                       if(err != -EINTR && err != -ERESTART){
+                                               p->kpid = 0;
+                                               qunlock(&proctab);
+                                               exitproc(p, SIGKILL, 1);
+                                               qlock(&proctab);
+                                               continue;       
+                                       }       
+                               }
+                       }
+                       alive++;
+                       expire = p->timeout;
+                       if(expire > 0){
+                               if(now >= expire){
+                                       p->timeout = 0;
+                                       wakeup(p, Wakeup, 0);
+                               } else {
+                                       if(expire < wake)
+                                               wake = expire;
+                               }
+                       }
+                       expire = alarmtimer(p, now, wake);
+                       if(expire < wake)
+                               wake = expire;
+               }
+               qunlock(&proctab);
+
+               if(now >= dead)
+                       dead = now + 5000000000LL;
+               if(dead < wake)
+                       wake = dead;
+               if(alive == 0)
+                       break;
+               wake -= now;
+
+               sleep(wake/1000000LL);
+       }
+}
+*/
+
+int sys_waitpid(int pid, int *pexit, int opt)
+{
+       int i, n, m, status;
+       Uproc *p;
+
+       trace("sys_waitpid(%d, %p, %d)", pid, pexit, opt);
+
+       m = WEXITED;
+       if(opt & WUNTRACED)
+               m |= WSTOPPED;
+       if(opt & WCONTINUED)
+               m |= WCONTINUED;
+
+       qlock(&proctab);
+       for(;;){
+               n = 0;
+               for(i=0; i<MAXPROC; i++){
+                       if((p = getprocn(i)) == nil)
+                               continue;
+                       if(p == current)
+                               continue;
+                       if((p->exitsignal != SIGCHLD) && (opt & (WALL|WCLONE))==0)
+                               continue;
+                       if(p->ppid != current->pid)
+                               continue;
+                       if(pid > 0){
+                               if(p->pid != pid)
+                                       continue;
+                       } else if(pid == 0){
+                               if(p->pgid != current->pgid)
+                                       continue;
+                       } else if(pid < -1){
+                               if(p->pgid != -pid)
+                                       continue;
+                       }
+                       n++;
+                       trace("sys_waitpid(): child %d wstate %x", p->pid, p->wstate);
+                       if(p->wevent & m)
+                               goto found;
+               }
+               if(n == 0){
+                       qunlock(&proctab);
+                       trace("sys_waitpid(): no children we can wait for");
+                       return -ECHILD;
+               }
+               if(opt & WNOHANG){
+                       qunlock(&proctab);
+                       trace("sys_waitpid(): no exited/stoped/cont children");
+                       return 0;
+               }
+               if((i = sleepproc(&proctab, 1)) < 0){
+                       qunlock(&proctab);
+                       return i;
+               }
+       }
+
+found:
+       pid = p->pid;
+       status = p->exitcode;
+       p->wevent &= ~(p->wevent & m);
+       if(p->wstate & WEXITED){
+               trace("sys_waitpid(): found zombie %d exitcode %d", pid, status);
+               freeproc(p);
+       }
+       qunlock(&proctab);
+       if(pexit)
+               *pexit = status;
+       return pid;
+}
+
+struct linux_rusage { 
+    struct linux_timeval ru_utime; /* user time used */ 
+    struct linux_timeval ru_stime; /* system time used */ 
+    long   ru_maxrss;        /* maximum resident set size */ 
+    long   ru_ixrss;         /* integral shared memory size */ 
+    long   ru_idrss;         /* integral unshared data size */ 
+    long   ru_isrss;         /* integral unshared stack size */ 
+    long   ru_minflt;        /* page reclaims */ 
+    long   ru_majflt;        /* page faults */ 
+    long   ru_nswap;         /* swaps */ 
+    long   ru_inblock;       /* block input operations */ 
+    long   ru_oublock;       /* block output operations */ 
+    long   ru_msgsnd;        /* messages sent */ 
+    long   ru_msgrcv;        /* messages received */ 
+    long   ru_nsignals;      /* signals received */ 
+    long   ru_nvcsw;         /* voluntary context switches */ 
+    long   ru_nivcsw;        /* involuntary context switches */ 
+}; 
+
+int sys_wait4(int pid, int *pexit, int opt, void *prusage)
+{
+       int ret;
+       struct linux_rusage *ru = prusage;
+
+       trace("sys_wait4(%d, %p, %d, %p)", pid, pexit, opt, prusage);
+
+       ret = sys_waitpid(pid, pexit, opt);
+       if(ru != nil)
+               memset(ru, 0, sizeof(*ru));
+
+       return ret;
+}
+
+int
+threadcount(int pid)
+{
+       Uproc *p;
+       int i, n;
+
+       n = 0;
+       for(i = 0; i<MAXPROC; i++){
+               p = getprocn(i);
+               if(p != nil && p->pid == pid)
+                       n++;
+       }
+       return n;
+}
+
+int
+killproc(Uproc *p, Usiginfo *info, int group)
+{
+       int i, n;
+       Uproc *w;
+       int sig, err;
+
+       if((err = sendsignal(p, info, group)) <= 0)
+               return err;
+       w = p;
+       sig = info->signo;
+       if(group && !wantssignal(w, sig)){
+               for(i=1, n = p->tid + 1; i<MAXPROC; i++, n++){
+                       if((p = getprocn(pidhash(n))) == nil)
+                               continue;
+                       if(p->pid != w->pid)
+                               continue;
+                       if(!wantssignal(p, info->signo))
+                               continue;
+                       w = p;
+                       break;
+               }
+       }
+       wakeup(w, Abort, (sig == SIGKILL || sig == SIGSTOP || sig == SIGALRM));
+       return 0;
+}
+
+enum
+{
+       CLD_EXITED              = 1,
+       CLD_KILLED,
+       CLD_DUMPED,
+       CLD_TRAPPED,
+       CLD_STOPPED,
+       CLD_CONTINUED,
+};
+
+/*
+ * queue the exit signal into the parent process. this
+ * doesnt do the wakeup like killproc(). 
+  */
+static int
+sendexitsignal(Uproc *parent, Uproc *proc, int sig, int code)
+{
+       Usiginfo si;
+
+       memset(&si, 0, sizeof(si));
+       switch(si.signo = sig){
+       case SIGCHLD:
+               switch(code & 0xFF){
+               case 0:
+                       si.code = CLD_EXITED;
+                       break;
+               case SIGSTOP:
+                       si.code = CLD_STOPPED;
+                       break;
+               case SIGCONT:
+                       si.code = CLD_CONTINUED;
+                       break;
+               case SIGKILL:
+                       si.code = CLD_KILLED;
+                       break;
+               default:
+                       si.code = CLD_DUMPED;
+                       break;
+               }
+               si.chld.pid = proc->pid;
+               si.chld.uid = proc->uid;
+               si.chld.status = code;
+       }
+       return sendsignal(parent, &si, 1);
+}
+
+/*
+ * wakeup all threads who are in the same thread group
+ * as p including p. must be called with proctab locked.
+ */
+static void
+wakeupall(Uproc *p, char *m, int force)
+{
+       int pid, i, n;
+
+       pid = p->pid;
+       for(i=0, n = p->tid; i<MAXPROC; i++, n++)
+               if(p = getprocn(pidhash(n)))
+                       if(p->pid == pid)
+                               wakeup(p, m, force);
+}
+
+static void
+zap(void *)
+{
+       exitproc(current, 0, 0);
+}
+
+void
+zapthreads(void)
+{
+       Uproc *p;
+       int i, n, z;
+
+       for(;;){
+               z = 0;
+               for(i=1, n = current->tid+1; i<MAXPROC; i++, n++){
+                       if((p = getprocn(pidhash(n))) == nil)
+                               continue;
+                       if(p->pid != current->pid || p == current)
+                               continue;
+                       if(p->kpid <= 0)
+                               continue;
+
+                       trace("zapthreads() zapping thread %p", p);
+                       p->tracearg = current;
+                       p->traceproc = zap;
+                       wakeup(p, Abort, 1);
+                       z++;
+               }
+               if(z == 0)
+                       break;
+               sleepproc(&proctab, 0);
+       }
+}
+
+struct kexitprocargs
+{
+       Uproc   *proc;
+       int             code;
+       int             group;
+};
+
+#pragma profile off
+
+static int
+kexitproc(void *arg)
+{
+       struct kexitprocargs *args;
+       Uproc *proc;
+       int code, group;
+       Uproc *parent, *child, **pp;
+       int i;
+
+       args = arg;
+       proc = args->proc;
+       code = args->code;
+       group = args->group;
+
+       if(proc == current){
+               trace("kexitproc: cleartidptr = %p", proc->cleartidptr);
+               if(okaddr(proc->cleartidptr, sizeof(*proc->cleartidptr), 1))
+                       *proc->cleartidptr = 0;
+               sys_futex((ulong*)proc->cleartidptr, 1, MAXPROC, nil, nil, 0);
+
+               qlock(&proctab);
+               exitsignal();
+               qunlock(&proctab);
+
+               exitmem();
+       }
+
+       exitfile(proc);
+
+       close(proc->notefd); proc->notefd = -1;
+       close(proc->argsfd); proc->argsfd = -1;
+
+       qlock(&proctab);
+
+       for(pp = &alarmq; *pp; pp = &((*pp)->alarmq)){
+               if(*pp == proc){
+                       *pp = proc->alarmq;
+                       proc->alarmq = nil;
+                       break;
+               }
+       }
+
+       /* reparent children, and reap when zombies */
+       for(i=0; i<MAXPROC; i++){
+               if((child = getprocn(i)) == nil)
+                       continue;
+               if(child->ppid != proc->pid)
+                       continue;
+               child->ppid = 0;
+               if(child->wstate & WEXITED)
+                       freeproc(child);
+       }
+
+       /* if we got zapped, just free the proc and wakeup zapper */
+       if((proc == current) && (proc->traceproc == zap) && (parent = proc->tracearg)){
+               freeproc(proc);
+               wakeup(parent, Wakeup, 0);
+               goto zapped;
+       }
+
+       if(group && proc == current)
+               zapthreads();
+
+       parent = getproc(proc->ppid);
+       if((threadcount(proc->pid)==1) && parent && 
+               (proc->exitsignal == SIGCHLD) && !ignoressignal(parent, SIGCHLD)){
+
+               /* we are zombie */
+               proc->exitcode = code;
+               proc->wstate = WEXITED;
+               proc->wevent = proc->wstate;
+               if(proc == current){
+                       current->kpid = 0;
+                       sendexitsignal(parent, proc, proc->exitsignal, code);
+                       wakeupall(parent, Abort, 0);
+                       qunlock(&proctab);
+                       longjmp(exitjmp, 1);
+               } else {
+                       sendexitsignal(parent, proc, proc->exitsignal, code);
+               }
+       } else {
+               /* we are clone */
+               if(parent && proc->exitsignal > 0)
+                       sendexitsignal(parent, proc, proc->exitsignal, code);
+               freeproc(proc);
+       }
+       if(parent)
+               wakeupall(parent, Abort, 0);
+
+zapped:
+       qunlock(&proctab);
+
+       if(proc == current)
+               longjmp(exitjmp, 1);
+
+       return 0;
+}
+
+void exitproc(Uproc *proc, int code, int group)
+{
+       struct kexitprocargs args;
+
+       trace("exitproc(%p, %d, %d)", proc, code, group);
+
+       args.proc = proc;
+       args.code = code;
+       args.group = group;
+
+       if(proc == current){
+               onstack(kstack, kexitproc, &args);
+       } else {
+               kexitproc(&args);
+       }
+}
+
+struct kstoparg
+{
+       Uproc   *stopper;
+       int             code;
+};
+
+static void
+stop(void *aux)
+{
+       struct kstoparg *arg = aux;
+
+       stopproc(current, arg->code, 0);
+}
+
+void stopproc(Uproc *proc, int code, int group)
+{
+       struct kstoparg *arg;
+       Uproc *p, *parent;
+       int i, n, z;
+
+       trace("stopproc(%p, %d, %d)", proc, code, group);
+
+       qlock(&proctab);
+       proc->exitcode = code;
+       proc->wstate = WSTOPPED;
+       proc->wevent = proc->wstate;
+
+       if((proc == current) && (proc->traceproc == stop) && (arg = proc->tracearg)){
+               proc->traceproc = nil;
+               proc->tracearg = nil;
+               wakeup(arg->stopper, Wakeup, 0);
+               qunlock(&proctab);
+               return;
+       }
+
+       /* put all threads in the stopped state */
+       arg = nil;
+       while(group){
+               if(arg == nil){
+                       arg = kmalloc(sizeof(*arg));
+                       arg->stopper = current;
+                       arg->code = code;
+               }
+               z = 0;
+               for(i=1, n = proc->tid+1; i<MAXPROC; i++, n++){
+                       if((p = getprocn(pidhash(n))) == nil)
+                               continue;
+                       if(p->pid != proc->pid || p == proc)
+                               continue;
+                       if(p->kpid <= 0)
+                               continue;
+                       if(p->wstate & (WSTOPPED | WEXITED))
+                               continue;
+
+                       trace("stopproc() stopping thread %p", p);
+                       p->tracearg = arg;
+                       p->traceproc = stop;
+                       wakeup(p, Abort, 1);
+                       z++;
+               }
+               if(z == 0)
+                       break;
+               sleepproc(&proctab, 0);
+       }
+       free(arg);
+
+       if(parent = getproc(proc->ppid)){
+               if(group && !ignoressignal(parent, SIGCHLD))
+                       sendexitsignal(parent, proc, SIGCHLD, code);
+               wakeupall(parent, Abort, 0);
+       }
+       qunlock(&proctab);
+}
+
+void contproc(Uproc *proc, int code, int group)
+{
+       Uproc *p, *parent;
+       int i, n;
+
+       trace("contproc(%p, %d, %d)", proc, code, group);
+
+       qlock(&proctab);
+       proc->exitcode = code;
+       proc->wstate = WCONTINUED;
+       proc->wevent = proc->wstate;
+       if(group){
+               for(i=1, n = proc->tid+1; i<MAXPROC; i++, n++){
+                       if((p = getprocn(pidhash(n))) == nil)
+                               continue;
+                       if(p->pid != proc->pid || p == proc)
+                               continue;
+                       if(p->kpid <= 0)
+                               continue;
+                       if((p->wstate & WSTOPPED) == 0)
+                               continue;
+                       if(p->wstate & (WCONTINUED | WEXITED))
+                               continue;
+
+                       trace("contproc() waking thread %p", p);
+                       p->exitcode = code;
+                       p->wstate = WCONTINUED;
+                       p->wevent = p->wstate;
+                       wakeup(p, Wakeup, 0);
+               }
+       }
+       if(parent = getproc(proc->ppid)){
+               if(group && !ignoressignal(parent, SIGCHLD))
+                       sendexitsignal(parent, proc, SIGCHLD, code);
+               wakeupall(parent, Abort, 0);
+       }
+       qunlock(&proctab);
+}
+
+int sys_exit(int code)
+{
+       trace("sys_exit(%d)", code);
+
+       exitproc(current, (code & 0xFF)<<8, 0);
+       return -1;
+}
+
+int sys_exit_group(int code)
+{
+       trace("sys_exit_group(%d)", code);
+
+       exitproc(current, (code & 0xFF)<<8, 1);
+       return -1;
+}
+
+struct kcloneprocargs
+{
+       int     flags;
+       void    *newstack;
+       int     *parenttidptr;
+       void    *tlsdescr;
+       int     *childtidptr;
+};
+
+static int
+kcloneproc(void *arg)
+{
+       struct kcloneprocargs args;
+       struct linux_user_desc tls;
+       Ureg ureg;
+       int rflags, pid, tid;
+       char buf[80];
+       Uproc *new;
+
+       memmove(&args, arg, sizeof(args));
+       memmove(&ureg, current->ureg, sizeof(ureg));
+       if(args.flags & CLONE_SETTLS){
+               if(!okaddr(args.tlsdescr, sizeof(tls), 0))
+                       return -EFAULT;
+               memmove(&tls, args.tlsdescr, sizeof(tls));
+       }
+
+       qlock(&proctab);
+       if((new = allocproc()) == nil){
+               qunlock(&proctab);
+               return -EAGAIN;
+       }
+       tid = new->tid;
+
+       if(args.flags & CLONE_PARENT_SETTID){
+               if(!okaddr(args.parenttidptr, sizeof(*args.parenttidptr), 1)){
+                       freeproc(new);
+                       qunlock(&proctab);
+                       return -EFAULT;
+               }
+               *args.parenttidptr = tid;
+       }
+
+       rflags = RFPROC;
+       if(args.flags & CLONE_VM)
+               rflags |= RFMEM;
+
+       qlock(current);
+       if((pid = rfork(rflags)) < 0){
+               freeproc(new);
+               qunlock(current);
+               qunlock(&proctab);
+
+               trace("kcloneproc(): rfork failed: %r");
+               return mkerror();
+       }
+
+       if(pid){
+               /* parent */
+               new->kpid = pid;
+               new->exitsignal = args.flags & 0xFF;
+               new->innote = 0;
+               new->ureg = &ureg;
+               new->syscall = current->syscall;
+               new->sysret = current->sysret;
+               new->comm = nil;
+               new->ncomm = 0;
+               new->linkloop = 0;
+               new->root = current->root ? kstrdup(current->root) : nil;
+               new->cwd = kstrdup(current->cwd);
+               new->kcwd = kstrdup(current->kcwd);
+               new->starttime = nsec();
+
+               snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+               new->notefd = open(buf, OWRITE);
+               snprint(buf, sizeof(buf), "/proc/%d/args", pid);
+               new->argsfd = open(buf, ORDWR);
+
+               if(args.flags & (CLONE_THREAD | CLONE_PARENT)){
+                       new->ppid = current->ppid;
+               } else {
+                       new->ppid = current->pid;
+               }
+
+               if(args.flags & CLONE_THREAD)
+                       new->pid = current->pid;
+
+               new->cleartidptr = nil;
+               if(args.flags & CLONE_CHILD_CLEARTID)
+                       new->cleartidptr = args.childtidptr;
+
+               new->pgid = current->pgid;
+               new->psid = current->psid;
+               new->uid = current->uid;
+               new->gid = current->gid;
+
+               clonetrace(new, !(args.flags & CLONE_THREAD));
+               clonesignal(new, !(args.flags & CLONE_SIGHAND), !(args.flags & CLONE_THREAD));
+               clonemem(new, !(args.flags & CLONE_VM));
+               clonefile(new, !(args.flags & CLONE_FILES));
+               clonetls(new);
+               qunlock(&proctab);
+
+               while(rendezvous(new, 0) == (void*)~0)
+                       ;
+
+               qunlock(current);
+
+               return tid;
+       } 
+
+       /* child */
+       current = new;
+       profme();
+
+       /* wait for parent to copy our resources */
+       while(rendezvous(new, 0) == (void*)~0)
+               ;
+
+       trace("kcloneproc(): hello world");
+
+       if(args.flags & CLONE_SETTLS)
+               sys_set_thread_area(&tls);
+
+       if(args.flags & CLONE_CHILD_SETTID)
+               if(okaddr(args.childtidptr, sizeof(*args.childtidptr), 1))
+                       *args.childtidptr = tid;
+
+       if(args.newstack != nil)
+               current->ureg->sp = (ulong)args.newstack;
+       current->sysret(0);
+       retuser();
+
+       return -1;
+}
+
+#pragma profile on
+
+int sys_linux_clone(int flags, void *newstack, int *parenttidptr, int *tlsdescr, void *childtidptr)
+{
+       struct kcloneprocargs a;
+
+       trace("sys_linux_clone(%x, %p, %p, %p, %p)", flags, newstack, parenttidptr, childtidptr, tlsdescr);
+
+       a.flags = flags;
+       a.newstack = newstack;
+       a.parenttidptr = parenttidptr;
+       a.childtidptr = childtidptr;
+       a.tlsdescr = tlsdescr;
+
+       return onstack(kstack, kcloneproc, &a);
+}
+
+int sys_fork(void)
+{
+       trace("sys_fork()");
+
+       return sys_linux_clone(SIGCHLD, nil, nil, nil, nil);
+}
+
+int sys_vfork(void)
+{
+       trace("sys_vfork()");
+
+       return sys_fork();
+}
+
+int sys_getpid(void)
+{
+       trace("sys_getpid()");
+
+       return current->pid;
+}
+
+int sys_getppid(void)
+{
+       trace("sys_getppid()");
+
+       return current->ppid;
+}
+
+int sys_gettid(void)
+{
+       trace("sys_gettid()");
+
+       return current->tid;
+}
+
+int sys_setpgid(int pid, int pgid)
+{
+       int i, n;
+
+       trace("sys_setpgid(%d, %d)", pid, pgid);
+
+       if(pgid == 0)
+               pgid = current->pgid;
+       if(pid == 0)
+               pid = current->pid;
+
+       n = 0;
+       qlock(&proctab);
+       for(i=0; i<MAXPROC; i++){
+               Uproc *p;
+
+               if((p = getprocn(i)) == nil)
+                       continue;
+               if(p->pid != pid)
+                       continue;
+
+               p->pgid = pgid;
+               n++;
+       }
+       qunlock(&proctab);
+
+       return n ? 0 : -ESRCH;
+}
+
+int sys_getpgid(int pid)
+{
+       int i;
+       int pgid;
+
+       trace("sys_getpgid(%d)", pid);
+
+       pgid = -ESRCH;
+       if(pid == 0)
+               return current->pgid;
+       qlock(&proctab);
+       for(i=0; i<MAXPROC; i++){
+               Uproc *p;
+
+               if((p = getprocn(i)) == nil)
+                       continue;
+               if(p->pid != pid)
+                       continue;
+
+               pgid = p->pgid;
+               break;
+       }
+       qunlock(&proctab);
+
+       return pgid;
+}
+
+int sys_getpgrp(void)
+{
+       trace("sys_getpgrp()");
+
+       return sys_getpgid(0);
+}
+
+int sys_getuid(void)
+{
+       trace("sys_getuid()");
+
+       return current->uid;
+}
+
+int sys_getgid(void)
+{
+       trace("sys_getgid()");
+
+       return current->gid;
+}
+
+int sys_setuid(int uid)
+{
+       trace("sys_setuid(%d)", uid);
+
+       current->uid = uid;
+       return 0;
+}
+
+int sys_setgid(int gid)
+{
+       trace("sys_setgid(%d)", gid);
+
+       current->gid = gid;
+       return 0;
+}
+
+int sys_setresuid(int ruid, int euid, int suid)
+{
+       trace("sys_setresuid(%d, %d, %d)", ruid, euid, suid);
+
+       return 0;
+}
+
+int sys_setresgid(int rgid, int egid, int sgid)
+{
+       trace("sys_setresgid(%d, %d, %d)", rgid, egid, sgid);
+
+       return 0;
+}
+int sys_setreuid(int ruid, int euid)
+{
+       trace("sys_setreuid(%d, %d)", ruid, euid);
+
+       return 0;
+}
+
+int sys_setregid(int rgid, int egid)
+{
+       trace("sys_setregid(%d, %d)", rgid, egid);
+
+       return 0;
+}
+
+int sys_getresuid(int *ruid, int *euid, int *suid)
+{
+       trace("sys_getresuid(%p, %p, %p)", ruid, euid, suid);
+
+       if(ruid == nil)
+               return -EINVAL;
+       if(euid == nil)
+               return -EINVAL;
+       if(suid == nil)
+               return -EINVAL;
+
+       *ruid = current->uid;
+       *euid = current->uid;
+       *suid = current->uid;
+
+       return 0;
+}
+
+int sys_getresgid(int *rgid, int *egid, int *sgid)
+{
+       trace("sys_getresgid(%p, %p, %p)", rgid, egid, sgid);
+
+       if(rgid == nil)
+               return -EINVAL;
+       if(egid == nil)
+               return -EINVAL;
+       if(sgid == nil)
+               return -EINVAL;
+
+       *rgid = current->gid;
+       *egid = current->gid;
+       *sgid = current->gid;
+
+       return 0;
+}
+
+int sys_setsid(void)
+{
+       int i;
+
+       trace("sys_setsid()");
+
+       if(current->pid == current->pgid)
+               return -EPERM;
+
+       qlock(&proctab);
+       for(i=0; i<MAXPROC; i++){
+               Uproc *p;
+
+               if((p = getprocn(i)) == nil)
+                       continue;
+               if(p->pid != current->pid)
+                       continue;
+               p->pgid = current->pid;
+               p->psid = current->pid;
+       }
+       qunlock(&proctab);
+
+       settty(nil);
+
+       return current->pgid;
+}
+
+int sys_getsid(int pid)
+{
+       int i, pgid;
+
+       trace("sys_getsid(%d)", pid);
+
+       pgid = -ESRCH;
+       if(pid == 0)
+               pid = current->pid;
+       qlock(&proctab);
+       for(i=0; i<MAXPROC; i++){
+               Uproc *p;
+
+               if((p = getprocn(i)) == nil)
+                       continue;
+               if(p->pid != pid)
+                       continue;
+               if(p->pid != p->psid)
+                       continue;
+               pgid = p->pgid;
+               break;
+       }
+       qunlock(&proctab);
+
+       return pgid;
+}
+
+int sys_getgroups(int size, int *groups)
+{
+       trace("sys_getgroups(%d, %p)", size, groups);
+       if(size < 0)
+               return -EINVAL;
+       return 0;
+}
+
+int sys_setgroups(int size, int *groups)
+{
+       trace("sys_setgroups(%d, %p)", size, groups);
+       return 0;
+}
+
+struct linux_utsname
+{
+       char sysname[65];
+       char nodename[65];
+       char release[65];
+       char version[65];
+       char machine[65];
+       char domainname[65];
+};
+
+int sys_uname(void *a)
+{
+       struct linux_utsname *p = a;
+
+       trace("sys_uname(%p)", a);
+
+       strncpy(p->sysname, "Linux", 65);
+       strncpy(p->nodename, sysname(), 65);
+       strncpy(p->release, "3.2.1", 65);
+       strncpy(p->version, "linuxemu", 65);
+       strncpy(p->machine, "i386", 65);
+       strncpy(p->domainname, sysname(), 65);
+
+       return 0;
+}
+
+int sys_personality(ulong p)
+{
+       trace("sys_personality(%lux)", p);
+
+       if(p != 0 && p != 0xffffffff)
+               return -EINVAL;
+       return 0;
+}
+
+int sys_tkill(int tid, int sig)
+{
+       int err;
+
+       trace("sys_tkill(%d, %S)", tid, sig);
+
+       err = -EINVAL;
+       if(tid > 0){
+               Uproc *p;
+
+               err = -ESRCH;
+               qlock(&proctab);
+               if(p = getproc(tid)){
+                       Usiginfo si;
+
+                       memset(&si, 0, sizeof(si));
+                       si.signo = sig;
+                       si.code = SI_TKILL;
+                       si.kill.pid = current->tid;
+                       si.kill.uid = current->uid;
+                       err = killproc(p, &si, 0);
+               }
+               qunlock(&proctab);
+       }
+       return err;
+}
+
+int sys_tgkill(int pid, int tid, int sig)
+{
+       int err;
+
+       trace("sys_tgkill(%d, %d, %S)", pid, tid, sig);
+
+       err = -EINVAL;
+       if(tid > 0){
+               Uproc *p;
+
+               err = -ESRCH;
+               qlock(&proctab);
+               if((p = getproc(tid)) && (p->pid == pid)){
+                       Usiginfo si;
+
+                       memset(&si, 0, sizeof(si));
+                       si.signo = sig;
+                       si.code = SI_TKILL;
+                       si.kill.pid = current->tid;
+                       si.kill.uid = current->uid;
+                       err = killproc(p, &si, 0);
+               }
+               qunlock(&proctab);
+       }
+       return err;
+}
+
+int sys_rt_sigqueueinfo(int pid, int sig, void *info)
+{
+       int err;
+       Uproc *p;
+       Usiginfo si;
+
+       trace("sys_rt_sigqueueinfo(%d, %S, %p)", pid, sig, info);
+
+       err = -ESRCH;
+       qlock(&proctab);
+       if(p = getproc(pid)){
+               memset(&si, 0, sizeof(si));
+               linux2siginfo(info, &si);
+               si.signo = sig;
+               si.code = SI_QUEUE;
+               err = killproc(p, &si, 1);
+       }
+       qunlock(&proctab);
+       return err;
+}
+
+enum {
+       PIDMAPBITS1     = 8*sizeof(ulong),
+};
+
+int sys_kill(int pid, int sig)
+{
+       int i, j, n;
+       Uproc *p;
+       Usiginfo si;
+       ulong pidmap[(MAXPROC + PIDMAPBITS1-1) / PIDMAPBITS1];
+       ulong m;
+
+       trace("sys_kill(%d, %S)", pid, sig);
+
+       n = 0;
+       memset(pidmap, 0, sizeof(pidmap));
+       qlock(&proctab);
+       for(i=0; i<MAXPROC; i++){
+               if((p = getprocn(i)) == nil)
+                       continue;
+               if(p->wstate & WEXITED)
+                       continue;
+               if(p->kpid <= 0)
+                       continue;
+
+               if(pid == 0){
+                       if(p->pgid != current->pgid)
+                               continue;
+               } else if(pid == -1){
+                       if(p->pid <= 1)
+                               continue;
+                       if(p->tid == current->tid)
+                               continue;
+               } else if(pid < -1) {
+                       if(p->pgid != -pid)
+                               continue;
+               } else {
+                       if(p->pid != pid)
+                               continue;
+               }
+
+               /* make sure we send only one signal per pid */
+               j = pidhash(p->pid);
+               m = 1 << (j % PIDMAPBITS1);
+               j /= PIDMAPBITS1;
+               if(pidmap[j] & m)
+                       continue;
+               pidmap[j] |= m;
+
+               if(sig > 0){
+                       memset(&si, 0, sizeof(si));
+                       si.signo = sig;
+                       si.code = SI_USER;
+                       si.kill.pid = current->tid;
+                       si.kill.uid = current->uid;
+                       killproc(p, &si, 1);
+               }
+               n++;
+       }
+       qunlock(&proctab);
+       if(n == 0)
+               return -ESRCH;
+       return 0;
+}
+
+int sys_set_tid_address(int *tidptr)
+{
+       trace("sys_set_tid_address(%p)", tidptr);
+
+       current->cleartidptr = tidptr;
+       return current->pid;
+}
+
+struct linux_sched_param
+{
+       int sched_priority;
+};
+
+int sys_sched_setscheduler(int pid, int policy, void *param)
+{
+       trace("sys_sched_setscheduler(%d, %d, %p)", pid, policy, param);
+
+       if(getproc(pid) == nil)
+               return -ESRCH;
+       return 0;
+}
+
+int sys_sched_getscheduler(int pid)
+{
+       trace("sys_sched_getscheduler(%d)", pid);
+
+       if(getproc(pid) == nil)
+               return -ESRCH;
+       return 0;
+}
+
+int sys_sched_setparam(int pid, void *param)
+{
+       trace("sys_sched_setparam(%d, %p)", pid, param);
+
+       if(getproc(pid) == nil)
+               return -ESRCH;
+       return 0;
+}
+
+int sys_sched_getparam(int pid, void *param)
+{
+       struct linux_sched_param *p = param;
+
+       trace("sys_sched_getparam(%d, %p)", pid, param);
+
+       if(getproc(pid) == nil)
+               return -ESRCH;
+       if(p == nil)
+               return -EINVAL;
+       p->sched_priority = 0;
+
+       return 0;
+}
+
+int sys_sched_yield(void)
+{
+       trace("sys_sched_yield()");
+
+       sleep(0);
+       return 0;
+}
+
+enum {
+       RLIMIT_CPU,
+       RLIMIT_FSIZE,
+       RLIMIT_DATA,
+       RLIMIT_STACK,
+       RLIMIT_CORE,
+       RLIMIT_RSS,
+       RLIMIT_NPROC,
+       RLIMIT_NOFILE,
+       RLIMIT_MEMLOCK,
+       RLIMIT_AS,
+       RLIMIT_LOCKS,
+       RLIMIT_SIGPENDING,
+       RLIMIT_MSGQUEUE,
+
+       RLIM_NLIMITS,
+
+       RLIM_INFINITY           = ~0UL,
+};
+
+struct linux_rlimit
+{
+       ulong   rlim_cur;
+       ulong   rlim_max;
+};
+
+int sys_getrlimit(long resource, void *rlim)
+{
+       struct linux_rlimit *r = rlim;
+
+       trace("sys_getrlimit(%ld, %p)", resource, rlim);
+
+       if(resource >= RLIM_NLIMITS)
+               return -EINVAL;
+       if(rlim == nil)
+               return -EFAULT;
+
+       r->rlim_cur = RLIM_INFINITY;
+       r->rlim_max = RLIM_INFINITY;
+
+       switch(resource){
+       case RLIMIT_STACK:
+               r->rlim_cur = USTACK;
+               r->rlim_max = USTACK;
+               break;
+       case RLIMIT_CORE:
+               r->rlim_cur = 0;
+               break;
+       case RLIMIT_NPROC:
+               r->rlim_cur = MAXPROC;
+               r->rlim_max = MAXPROC;
+               break;
+       case RLIMIT_NOFILE:
+               r->rlim_cur = MAXFD;
+               r->rlim_max = MAXFD;
+               break;
+       }
+       return 0;
+}
+
+int sys_setrlimit(long resource, void *rlim)
+{
+       trace("sys_setrlimit(%ld, %p)", resource, rlim);
+
+       if(resource >= RLIM_NLIMITS)
+               return -EINVAL;
+       if(rlim == nil)
+               return -EFAULT;
+
+       return -EPERM;
+}
+
diff --git a/linux_emul_base/procdev.c b/linux_emul_base/procdev.c
new file mode 100644 (file)
index 0000000..1028a76
--- /dev/null
@@ -0,0 +1,732 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <mp.h>
+#include <libsec.h>
+
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+       Qproc,
+       Qstat,
+       Qcpuinfo,
+       Qmeminfo,
+       Quptime,
+       Qloadavg,
+       Qself,
+       Qpid,
+       Qcwd,
+       Qcmdline,
+       Qenviron,
+       Qexe,
+       Qroot,
+       Qpidstat,
+       Qpidstatm,
+       Qstatus,
+       Qmaps,
+       Qfd,
+       Qfd1,
+       Qtask,
+       Qtask1,
+       Qmax,
+};
+
+static struct {
+       int mode;
+       char *name;
+} procdevtab[] = {
+       0555|S_IFDIR,   "proc",
+               0444|S_IFREG,   "stat",
+               0444|S_IFREG,   "cpuinfo",
+               0444|S_IFREG,   "meminfo",
+               0444|S_IFREG,   "uptime",
+               0444|S_IFREG,   "loadavg",
+               0777|S_IFLNK,   "self",
+               0555|S_IFDIR,   "###",
+                       0777|S_IFLNK,   "cwd",
+                       0444|S_IFREG,   "cmdline",
+                       0444|S_IFREG,   "environ",
+                       0777|S_IFLNK,   "exe",
+                       0777|S_IFLNK,   "root",
+                       0444|S_IFREG,   "stat",
+                       0444|S_IFREG,   "statm",
+                       0444|S_IFREG,   "status",
+                       0444|S_IFREG,   "maps",
+                       0555|S_IFDIR,   "fd",
+                               0777|S_IFLNK,   "###",
+                       0555|S_IFDIR,   "task",
+                               0555|S_IFDIR,   "###",
+};
+
+typedef struct Procfile Procfile;
+struct Procfile
+{
+       Ufile;
+       int     q;
+       int     pid;
+       vlong lastoff;
+       char    *data;
+       int     ndata;
+};
+
+static int
+path2q(char *path, int *ppid, int *pfd)
+{
+       int i, q, pid, fd;
+       char *x;
+
+       q = -1;
+       pid = -1;
+       fd = -1;
+       path++;
+       for(i=Qproc; i<Qmax; i++){
+               if(x = strchr(path, '/'))
+                       *x = 0;
+               if(path[0]>='0' && path[0]<='9'){
+                       switch(i){
+                       case Qpid:
+                       case Qtask1:
+                               pid = atoi(path);
+                               goto match;
+                       case Qfd1:
+                               fd = atoi(path);
+                               goto match;
+                       }
+               }
+               if(strcmp(path, procdevtab[i].name) == 0){
+match:         if(x == nil){
+                               q = i;
+                               break;
+                       }
+                       if(i == Qself){ /* hack */
+                               pid = current->pid;
+                               i = Qpid;
+                       }
+                       if((procdevtab[i].mode & ~0777) == S_IFDIR){
+                               path = x+1;
+                               if(i == Qtask1)
+                                       i = Qpid;
+                       }
+               }
+               if(x != nil)
+                       *x = '/';
+       }
+       if(ppid)
+               *ppid = pid;
+       if(pfd)
+               *pfd = fd;
+       return q;
+}
+
+/*
+ * the proc device also implements the functionality
+ * for /dev/std^(in out err) and /dev/fd. we just
+ * rewrite the path to the names used in /proc.
+ */
+static char*
+rewritepath(char *path)
+{
+       if(strcmp(path, "/dev/stdin")==0){
+               path = kstrdup("/proc/self/fd/0");
+       } else if(strcmp(path, "/dev/stdout")==0){
+               path = kstrdup("/proc/self/fd/1");
+       } else if(strcmp(path, "/dev/stderr")==0){
+               path = kstrdup("/proc/self/fd/2");
+       } else if(strncmp(path, "/dev/fd", 7) == 0){
+               path = allocpath("/proc/self", "fd", path+7);
+       } else {
+               path = kstrdup(path);
+       }
+       return path;
+}
+
+static int
+readlinkproc(char *path, char *buf, int len);
+
+static int
+openproc(char *path, int mode, int perm, Ufile **pf)
+{
+       char buf[256], *t;
+       int n, q, pid, err;
+       Procfile *f;
+
+       err = -ENOENT;
+       path = rewritepath(path);
+       if((q = path2q(path, &pid, nil)) < 0)
+               goto out;
+       if((procdevtab[q].mode & ~0777) == S_IFLNK){
+               n = readlinkproc(path, buf, sizeof(buf)-1);
+               if(n > 0){
+                       buf[n] = 0;
+                       err = fsopen(buf, mode, perm, pf);
+               }
+               goto out;
+       }
+       if((mode & O_ACCMODE) != O_RDONLY){
+               err = -EPERM;
+               goto out;
+       }
+       if(q >= Qpid){
+               qlock(&proctab);
+               if(getproc(pid) == nil){
+                       qunlock(&proctab);
+                       goto out;
+               }
+               qunlock(&proctab);
+       }
+
+       /* hack */
+       if(strncmp(path, "/proc/self", 10) == 0){
+               t = ksmprint("/proc/%d%s", pid, path+10);
+               free(path); path = t;
+       }
+
+       f = kmallocz(sizeof(*f), 1);
+       f->ref = 1;
+       f->mode = mode;
+       f->path = path; path = nil;
+       f->fd = -1;
+       f->dev = PROCDEV;
+       f->q = q;
+       f->pid = pid;
+       *pf = f;
+       err = 0;
+
+out:
+       free(path);
+       return err;
+}
+
+static int
+closeproc(Ufile *file)
+{
+       Procfile *f = (Procfile*)file;
+
+       if(f->data)
+               free(f->data);
+       return 0;
+}
+
+enum {
+       SScpu,
+       SSswitches,
+       SSinterrupts,
+       SSsyscalls,
+       SSpagefaults,
+       SStlbmisses,
+       SStlbpurges,
+       SSloadavg,
+       SSidletime,
+       SSintrtime,
+       SSmax,
+};
+
+static char*
+sysstat(ulong *prun, ulong *pidle, ulong *pload)
+{
+       char buf[1024], *p, *e, *t, *data;
+       ulong dt, swtch, user, sys, load;
+       static ulong run, idle, intr;
+       int n, fd;
+
+       data = nil;
+       swtch = user = sys = load = 0;
+
+       dt = (ulong)(((nsec() - boottime) * HZ) / 1000000000LL) - run;
+       run += dt;
+
+       n = 0;
+       if((fd = open("/dev/sysstat", OREAD)) >= 0){
+               n = read(fd, buf, sizeof(buf)-1);
+               close(fd);
+       }
+       if(n > 0){
+               buf[n] = 0;
+               p = buf;
+               while(e = strchr(p, '\n')){
+                       char *f[SSmax];
+                       *e = 0;
+                       if(getfields(p, f, SSmax, 1, "\t ") != SSmax)
+                               break;
+
+                       if(p == buf){
+                               swtch += atoi(f[SSswitches]);
+
+                               idle += (atoi(f[SSidletime]) * dt)/100;
+                               intr += (atoi(f[SSintrtime]) * dt)/100;
+
+                               load = 100-atoi(f[SSidletime]);
+
+                               user = run - idle - intr;
+                               sys = run - user;
+
+                               data = ksmprint("cpu %lud %lud %lud %lud %lud %lud %lud\n",
+                                       user, 0UL, sys, idle, 0UL, intr, 0UL);
+                       }
+                       t = ksmprint("%scpu%d %lud %lud %lud %lud %lud %lud %lud\n",
+                               data, atoi(f[SScpu]), user, 0UL, sys, idle, 0UL, intr, 0UL);
+                       free(data);
+                       data = t;
+
+                       p = e+1;
+               }
+               t = ksmprint("%sbtime %lud\nctxt %lud\n", data, 
+                               (ulong)(boottime/1000000000LL), swtch);
+               free(data);
+               data = t;
+       }
+       if(prun)
+               *prun = run;
+       if(pidle)
+               *pidle = idle;
+       if(pload)
+               *pload = load;
+
+       return data;
+}
+
+static char*
+procstat(Uproc *p)
+{
+       return
+               (p->wstate & WEXITED) ? "Z (zombie)" : 
+               (p->wstate & WSTOPPED) ? "T (stopped)" :
+               (p->state == nil) ? "R (running)" : "S (sleeping)";
+}
+
+static char*
+procname(Uproc *p)
+{
+       char *s;
+
+       p = getproc(p->pid);
+       if(p == nil || p->comm == nil)
+               return "";
+       if(s = strrchr(p->comm, '/'))
+               return s+1;
+       return p->comm;
+}
+
+
+static void
+gendata(Procfile *f)
+{
+       char *s, *t;
+       int i, nproc, nready;
+       ulong tms[4];
+       Uproc *p;
+
+       f->ndata = 0;
+       if(s = f->data){
+               f->data = nil;
+               free(s);
+       }
+       s = nil;
+
+       if(f->q >= Qpid){
+               ulong vmsize, vmdat, vmlib, vmshr, vmstk, vmexe;
+
+               qlock(&proctab);
+               if((p = getproc(f->pid)) == nil){
+                       qunlock(&proctab);
+                       return;
+               }
+               switch(f->q){
+               case Qcmdline:
+                       p = getproc(p->pid);
+                       if(p == nil || p->comm == nil)
+                               break;
+                       i = strlen(p->comm)+1;
+                       if(i >= p->ncomm-2)
+                               break;
+                       f->ndata = p->ncomm-i-2;
+                       f->data = kmalloc(f->ndata);
+                       memmove(f->data, p->comm + i, f->ndata);
+                       qunlock(&proctab);
+                       return;
+
+               case Qenviron:
+                       break;
+               case Qpidstat:
+                       if(proctimes(p, tms) != 0)
+                               memset(tms, 0, sizeof(tms));
+                       vmsize = procmemstat(p, nil, nil, nil, nil, nil);
+                       s = ksmprint(
+                               "%d (%s) %c %d %d %d %d %d %lud %lud "
+                               "%lud %lud %lud %lud %lud %ld %ld %ld %ld %ld "
+                               "%ld %lud %lud %ld %lud %lud %lud %lud %lud %lud "
+                               "%lud %lud %lud %lud %lud %lud %lud %d %d\n",
+                               p->tid,
+                               procname(p),
+                               procstat(p)[0],
+                               p->ppid, 
+                               p->pgid,
+                               p->psid,
+                               0,      /* tty */
+                               0,      /* tty pgrp */
+                               0UL,    /* flags */
+                               0UL, 0UL, 0UL, 0UL,     /* pagefault stats */
+                               tms[0], /* utime */
+                               tms[1], /* stime */
+                               tms[2], /* cutime */
+                               tms[3], /* cstime */
+                               0UL,    /* priority */
+                               0UL,    /* nice */
+                               0UL,    /* always 0UL */
+                               0UL,    /* time to next alarm */
+                               (ulong)(((p->starttime - boottime) * HZ) / 1000000000LL),
+                               vmsize, /* vm size in bytes */
+                               vmsize, /* vm working set */
+                               0UL,    /* rlim */
+                               p->codestart,
+                               p->codeend,
+                               p->stackstart,
+                               0UL,    /* SP */
+                               0UL,    /* PC */
+                               0UL,    /* pending signal mask */
+                               0UL,    /* blocked signal mask */
+                               0UL,    /* ignored signal mask */
+                               0UL,    /* catched signal mask */
+                               0UL,    /* wchan */
+                               0UL,    /* nswap */
+                               0UL,    /* nswap children */
+                               p->exitsignal,
+                               0);     /* cpu */
+                       break;
+               case Qpidstatm:
+                       vmsize = procmemstat(p, &vmdat, &vmlib, &vmshr, &vmstk, &vmexe);
+                       s = ksmprint("%lud %lud %lud %lud %lud %lud %lud\n", 
+                               vmsize/PAGESIZE, vmsize/PAGESIZE, vmshr/PAGESIZE,
+                               vmexe/PAGESIZE, vmstk/PAGESIZE, vmlib/PAGESIZE, 0UL);
+                       break;
+               case Qstatus:
+                       s = ksmprint(
+                               "Name:\t%s\n"
+                               "State:\t%s\n"
+                               "Tgid:\t%d\n"
+                               "Pid:\t%d\n"
+                               "PPid:\t%d\n"
+                               "Uid:\t%d\t%d\t%d\t%d\n"
+                               "Gid:\t%d\t%d\t%d\t%d\n"
+                               "FDSize:\t%d\n"
+                               "Threads:\t%d\n",
+                               procname(p),
+                               procstat(p),
+                               p->pid,
+                               p->tid,
+                               p->ppid,
+                               p->uid, p->uid, p->uid, p->uid,
+                               p->gid, p->gid, p->gid, p->gid,
+                               MAXFD,
+                               threadcount(p->pid));
+                       break;
+               case Qmaps:
+                       break;
+               }
+               qunlock(&proctab);
+       } else {
+               ulong run, idle, load;
+
+               nproc = nready = 0;
+               qlock(&proctab);
+               for(i=0; i<MAXPROC; i++){
+                       p = getprocn(i);
+                       if(p == nil)
+                               continue;
+                       nproc++;
+                       if(p->state == nil)
+                               nready++;
+               }
+               i = proctab.nextpid;
+               qunlock(&proctab);
+
+               switch(f->q){
+               case Qstat:
+                       s = sysstat(nil, nil, nil);
+                       t = ksmprint(
+                               "%s"
+                               "processes %d\n"
+                               "procs_running %d\n"
+                               "procs_blocked %d\n",
+                               s,
+                               i, 
+                               nready,
+                               nproc-nready);
+                       free(s);
+                       s = t;
+                       break;
+               case Qcpuinfo:
+                       break;
+               case Qmeminfo:
+                       break;
+               case Quptime:
+                       free(sysstat(&run, &idle, nil));
+                       s = ksmprint("%lud.%lud %lud.%lud\n", run/HZ, run%HZ, idle/HZ, idle%HZ);
+                       break;
+               case Qloadavg:
+                       free(sysstat(nil, nil, &load));
+                       s = ksmprint("%lud.%lud 0 0 %d/%d %d\n", load/100, load%100, nready, nproc, i);
+                       break;
+               }
+       }
+
+       f->data = s;
+       f->ndata = s ? strlen(s) : 0;
+}
+
+static vlong
+sizeproc(Ufile *file)
+{
+       Procfile *f = (Procfile*)file;
+
+       if(f->data == nil)
+               gendata(f);
+       return f->ndata;
+}
+
+static int
+readproc(Ufile *file, void *buf, int len, vlong off)
+{
+       Procfile *f = (Procfile*)file;
+       int ret;
+
+       if((f->data == nil) || (off != f->lastoff))
+               gendata(f);
+       ret = 0;
+       if(f->data && (off < f->ndata)){
+               ret = f->ndata - off;
+               if(ret > len)
+                       ret = len;
+               memmove(buf, f->data + off, ret);
+               f->lastoff = off + ret;
+       }
+       return ret;
+}
+
+static int
+readlinkproc(char *path, char *buf, int len)
+{
+       int err, q, pid, fd;
+       char *data;
+       Uproc *p;
+       Ufile *a;
+
+       err = -ENOENT;
+       path = rewritepath(path);
+       if((q = path2q(path, &pid, &fd)) < 0)
+               goto out;
+       data = nil;
+       if(q >= Qpid){
+               qlock(&proctab);
+               if((p = getproc(pid)) == nil){
+                       qunlock(&proctab);
+                       goto out;
+               }
+               switch(q){
+               case Qcwd:
+                       data = kstrdup(p->cwd);
+                       break;
+               case Qexe:
+                       p = getproc(p->pid);
+                       if(p == nil || p->comm == nil)
+                               break;
+                       data = kstrdup(p->comm);
+                       break;
+               case Qroot:
+                       data = kstrdup(p->root ? p->root : "/");
+                       break;
+               case Qfd1:
+                       a = procfdgetfile(p, fd);
+                       if(a == nil || a->path == nil){
+                               putfile(a);
+                               qunlock(&proctab);
+                               goto out;
+                       }
+                       data = kstrdup(a->path);
+                       putfile(a);
+                       break;
+               }
+               qunlock(&proctab);
+       } else {
+               switch(q){
+               case Qself:
+                       data = ksmprint("/proc/%d", current->pid);
+                       break;
+               }
+       }
+       err = 0;
+       if(data){
+               err = strlen(data);
+               if(err > len)
+                       err = len;
+               memmove(buf, data, err);
+               free(data);
+       }
+out:
+       free(path);
+       return err;
+}
+
+static int
+readdirproc(Ufile *file, Udirent **pd)
+{
+       Procfile *f = (Procfile*)file;
+       char buf[12];
+       Uproc *p;
+       Ufile *a;
+       int n, i;
+
+       n = 0;
+       switch(f->q){
+       case Qproc:
+               for(i=f->q+1; (procdevtab[i].mode & ~0777) != S_IFDIR; i++){
+                       if((*pd = newdirent(f->path, procdevtab[i].name, procdevtab[i].mode)) == nil)
+                               break;
+                       pd = &((*pd)->next);
+                       n++;
+               }
+               /* no break */
+       case Qtask:
+               qlock(&proctab);
+               for(i=0; i<MAXPROC; i++){
+                       p = getprocn(i);
+                       if(p == nil)
+                               continue;
+                       if((f->q == Qproc) && (p->pid != p->tid))
+                               continue;
+                       if((f->q == Qtask) && (p->pid != f->pid))
+                               continue;
+                       snprint(buf, sizeof(buf), "%d", p->tid);
+                       if((*pd = newdirent(f->path, buf, procdevtab[i].mode)) == nil)
+                               break;
+                       pd = &((*pd)->next);
+                       n++;
+               }
+               qunlock(&proctab);
+               break;
+
+       case Qpid:
+               if((*pd = newdirent(f->path, procdevtab[Qtask].name, procdevtab[Qtask].mode)) == nil)
+                       break;
+               pd = &((*pd)->next);
+               n++;
+               /* no break */
+       case Qtask1:
+               if((*pd = newdirent(f->path, procdevtab[Qfd].name, procdevtab[Qfd].mode)) == nil)
+                       break;
+               pd = &((*pd)->next);
+               n++;
+               for(i=Qpid+1; (procdevtab[i].mode & ~0777) != S_IFDIR; i++){
+                       if((*pd = newdirent(f->path, procdevtab[i].name, procdevtab[i].mode)) == nil)
+                               break;
+                       pd = &((*pd)->next);
+                       n++;
+               }
+               break;
+
+       case Qfd:
+               qlock(&proctab);
+               if((p = getproc(f->pid)) == nil){
+                       qunlock(&proctab);
+                       break;
+               }
+               for(i=0; i<MAXFD; i++){
+                       a = procfdgetfile(p, i);
+                       if(a == nil || a->path == nil){
+                               putfile(a);
+                               continue;
+                       }
+                       putfile(a);
+                       snprint(buf, sizeof(buf), "%d", i);
+                       if((*pd = newdirent(f->path, buf, procdevtab[Qfd1].mode)) == nil)
+                               break;
+                       pd = &((*pd)->next);
+                       n++;
+               }
+               qunlock(&proctab);
+               break;
+       }
+
+       return n;
+}
+
+static int
+statproc(char *path, int, Ustat *s)
+{
+       int q, pid, fd, uid, gid, err;
+       ulong ctime;
+       Uproc *p;
+       Ufile *a;
+
+       err = -ENOENT;
+       path = rewritepath(path);
+       if((q = path2q(path, &pid, &fd)) < 0)
+               goto out;
+       if(q >= Qpid){
+               qlock(&proctab);
+               if((p = getproc(pid)) == nil){
+                       qunlock(&proctab);
+                       goto out;
+               }
+               if(q == Qfd1){
+                       a = procfdgetfile(p, fd);
+                       if(a == nil || a->path == nil){
+                               putfile(a);
+                               qunlock(&proctab);
+                               goto out;
+                       }
+                       putfile(a);
+               }
+               uid = p->uid;
+               gid = p->gid;
+               ctime = p->starttime/1000000000LL;
+               qunlock(&proctab);
+       } else {
+               uid = current->uid;
+               gid = current->gid;
+               ctime = boottime/1000000000LL;
+       }
+       err = 0;
+       s->mode = procdevtab[q].mode;
+       s->uid = uid;
+       s->gid = gid;
+       s->size = 0;
+       s->ino = hashpath(path);
+       s->dev = 0;
+       s->rdev = 0;
+       s->atime = s->mtime = s->ctime = ctime;
+out:
+       free(path);
+       return err;
+}
+
+static int
+fstatproc(Ufile *f, Ustat *s)
+{
+       return fsstat(f->path, 0, s);
+};
+
+static Udev procdev =
+{
+       .open = openproc,
+       .read = readproc,
+       .size = sizeproc,
+       .readlink = readlinkproc,
+       .readdir = readdirproc,
+       .close = closeproc,
+       .stat = statproc,
+       .fstat = fstatproc,
+};
+
+void procdevinit(void)
+{
+       devtab[PROCDEV] = &procdev;
+
+       fsmount(&procdev, "/proc");
+       fsmount(&procdev, "/dev/fd");
+       fsmount(&procdev, "/dev/stdin");
+       fsmount(&procdev, "/dev/stdout");
+       fsmount(&procdev, "/dev/stderr");
+}
diff --git a/linux_emul_base/ptydev.c b/linux_emul_base/ptydev.c
new file mode 100644 (file)
index 0000000..80de36f
--- /dev/null
@@ -0,0 +1,944 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Termios Termios;
+typedef struct Winsize Winsize;
+typedef struct Cbuf Cbuf;
+typedef struct Tty Tty;
+typedef struct Pty Pty;
+typedef struct Ptyfile Ptyfile;
+
+/* cflags */
+enum {
+       IGNBRK  = 01,
+       BRKINT  = 02,
+       IGNPAR  = 04,
+       PARMRK  = 010,
+       INPCK   = 020,
+       ISTRIP  = 040,
+       INLCR   = 0100,
+       IGNCR   = 0200,
+       ICRNL   = 0400,
+       IUCLC   = 01000,
+       IXON    = 02000,
+       IXANY   = 04000,
+       IXOFF   = 010000,
+       IMAXBEL = 020000,
+       IUTF8   = 040000,
+};
+
+/* oflags */
+enum {
+       OPOST   = 01,
+       OLCUC   = 02,
+       ONLCR   = 04,
+       OCRNL   = 010,
+       ONOCR   = 020,
+       ONLRET  = 040,
+       OFILL   = 0100,
+       OFDEL   = 0200,
+       NLDLY   = 0400,
+       NL0             = 0,
+       NL1             = 0400,
+       CRDLY   = 03000,
+       CR0             = 0,
+       CR1             = 01000,
+       CR2             = 02000,
+       CR3             = 03000,
+       TABDLY  = 014000,
+       TAB0    = 0,
+       TAB1    = 04000,
+       TAB2    = 010000,
+       TAB3    = 014000,
+       XTABS   = 014000,
+       BSDLY   = 020000,
+       BS0             = 0,
+       BS1             = 020000,
+       VTDLY   = 040000,
+       VT0             = 0,
+       VT1             = 040000,
+       FFDLY   = 0100000,
+       FF0             = 0,
+       FF1             = 0100000,
+};
+
+/* cflags */
+enum {
+       CSIZE   = 060,
+       CS5             = 0,
+       CS6             = 020,
+       CS7             = 040,
+       CS8             = 060,
+       CREAD   = 0200,
+       CLOCAL  = 04000,
+       HUPCL   = 02000,
+};
+
+/* lflags */
+enum {
+       ISIG            = 01,
+       ICANON  = 02,
+       XCASE   = 04,
+       ECHO    = 010,
+       ECHOE   = 020,
+       ECHOK   = 040,
+       ECHONL  = 0100,
+       NOFLSH  = 0200,
+       TOSTOP  = 0400,
+       ECHOCTL = 01000,
+       ECHOPRT = 02000,
+       ECHOKE  = 04000,
+       FLUSH0  = 010000,
+       PENDIN  = 040000,
+       IEXTEN  = 0100000,
+};
+
+/* cc */
+enum {
+       VINTR   = 0,
+       VQUIT,
+       VERASE,
+       VKILL,
+       VEOF,
+       VTIME,
+       VMIN,
+       VSWTCH,
+       VSTART,
+       VSTOP,
+       VSUSP,
+       VEOL,
+       VREPRINT,
+       VDISCARD,
+       VERASEW,
+       VLNEXT,
+       VEOL2,
+       NCCS,
+};
+
+struct Termios
+{
+       int             iflag;          /* input modes */
+       int             oflag;          /* output modes */
+       int             cflag;          /* control modes */
+       int             lflag;          /* local modes */
+       uchar   cline;
+       uchar   cc[NCCS];       /* control characters */
+};
+
+struct Winsize
+{
+       ushort  row;
+       ushort  col;
+       ushort  px;
+       ushort  py;
+};
+
+struct Cbuf
+{
+       int     rp;
+       int     wp;
+       char    cb[256];
+};
+
+struct Tty
+{
+       Termios t;
+       Winsize winsize;
+
+       int             escaped;
+       int             eol;
+
+       int             pgid;
+
+       Cbuf            wb;
+       Cbuf            rb;
+};
+
+struct Pty
+{
+       Tty;
+
+       int     id;
+       int     closed;
+       int     locked;
+
+       struct {
+               Uwaitq r;
+               Uwaitq w;
+       }       q[2];
+
+       Ref;
+       QLock;
+};
+
+struct Ptyfile
+{
+       Ufile;
+
+       Pty     *pty;
+
+       int     master;
+};
+
+static Pty *ptys[64];
+
+int cbput(Cbuf *b, char c)
+{
+       int x;
+       x = b->wp+1&(sizeof(b->cb)-1);
+       if(x == b->rp)
+               return -1;
+       b->cb[b->wp] = c;
+       b->wp = x;
+       return 0;
+}
+
+int cbget(Cbuf *b)
+{
+       char c;
+       if(b->rp == b->wp)
+               return -1;
+       c = b->cb[b->rp];
+       b->rp = (b->rp + 1) & (sizeof(b->cb)-1);
+       return c;
+}
+
+int cbfill(Cbuf *b)
+{
+       return (b->wp - b->rp) & (sizeof(b->cb)-1);
+}
+
+void ttyinit(Tty *t)
+{
+       memset(&t->t, 0, sizeof(t->t));
+
+       t->t.iflag = ICRNL;
+       t->t.oflag = OPOST|ONLCR|NL0|CR0|TAB0|BS0|VT0|FF0;
+       t->t.lflag = ICANON|IEXTEN|ECHO|ECHOE|ECHOK;
+
+       if(current)
+               t->pgid = current->pgid;
+}
+
+int ttywrite(Tty *t, char *buf, int len)
+{
+       char *p, *e;
+
+       for(p=buf, e=buf+len; p<e; p++){
+               char c;
+
+               c = *p;
+               if((t->t.oflag & OPOST) == 0) {
+                       if(cbput(&t->wb, c) < 0)
+                               break;
+                       continue;
+               }
+               switch(c) {
+               case '\n':
+                       if(t->t.oflag & ONLCR) {
+                               if(cbput(&t->wb, '\r') < 0)
+                                       goto done;
+                       }
+                       if(cbput(&t->wb, c) < 0)
+                               goto done;
+                       break;
+                       
+               case '\t':
+                       if((t->t.oflag & TAB3) == TAB3) {
+                               int tab;
+
+                               tab = 8;
+                               while(tab--)
+                                       cbput(&t->wb, ' ');
+                               break;
+                       }
+                       /* Fall Through */
+               default:
+                       if(t->t.oflag & OLCUC)
+                               if(c >= 'a' && c <= 'z')
+                                       c = 'A' + (c-'a');
+                       if(cbput(&t->wb, c) < 0)
+                               goto done;
+               }
+       }
+done:
+       return p-buf;
+}
+
+int ttycanread(Tty *t, int *n)
+{
+       int x;
+
+       x = cbfill(&t->rb);
+       if(t->t.lflag & ICANON){
+               if(t->eol == 0)
+                       return 0;
+       } else {
+               if(x == 0)
+                       return 0;
+       }
+       if(n != nil)
+               *n = x;
+       return 1;
+}
+
+int ttyread(Tty *t, char *buf, int len)
+{
+       char *p, *e;
+
+       if((t->t.lflag & ICANON) && t->eol == 0)
+               return 0;
+
+       for(p=buf, e=buf+len; p<e; p++){
+               int c;
+
+               if((c = cbget(&t->rb)) < 0)
+                       break;
+
+               if(c==0 || c=='\n'){
+                       t->eol--;
+                       if(t->t.lflag & ICANON){
+                               if(c == 0)
+                                       break;
+                               *p++ = c;
+                               break;
+                       }
+               }
+
+               *p = c;
+       }
+       return p-buf;
+}
+
+
+static void
+echo(Tty *t, char c)
+{
+       if(t->t.lflag & ECHO) {
+               switch(c) {
+               case '\r':
+                       if(t->t.oflag & OCRNL) {
+                               cbput(&t->wb, '\n');
+                               break;
+                       }
+                       cbput(&t->wb, c);
+                       break;
+               case '\n':
+                       if(t->t.oflag & ONLCR)
+                               cbput(&t->wb, '\r');
+                       cbput(&t->wb, '\n');
+                       break;
+               case '\t':
+                       if((t->t.oflag & TAB3) == TAB3) {
+                               int tab;
+
+                               tab = 8;
+                               while(tab--)
+                                       cbput(&t->wb, ' ');
+                               break;
+                       }
+                       /* Fall Through */
+               default:
+                       cbput(&t->wb, c);
+                       break;
+               }
+       }
+       else
+       if(c == '\n' && (t->t.lflag&(ECHONL|ICANON)) == (ECHONL|ICANON)) {
+               if(t->t.oflag & ONLCR)
+                       cbput(&t->wb, '\r');
+               cbput(&t->wb, '\n');
+       }
+}
+
+static int
+bs(Tty *t)
+{
+       char c;
+       int x;
+
+       if(cbfill(&t->rb) == 0)
+               return 0;
+       x = (t->rb.wp-1)&(sizeof(t->rb.cb)-1);
+       c = t->rb.cb[x];
+       if(c == 0 || c == '\n')
+               return 0;
+       t->rb.wp = x;
+       echo(t, '\b');
+       if(t->t.lflag & ECHOE) {
+               echo(t, ' ');
+               echo(t, '\b');
+       }
+       return 1;
+}
+
+int ttywriteinput(Tty *t, char *buf, int len)
+{
+       char *p, *e;
+
+       for(p=buf, e=buf+len; p<e; p++){
+               char c;
+
+               c = *p;
+
+               if(t->t.iflag & ISTRIP)
+                       c &= 0177;
+
+               if((t->t.iflag & IXON) && c == t->t.cc[VSTOP]) {
+                       p++;
+                       break;
+               }
+
+               switch(c) {
+               case '\r':
+                       if(t->t.iflag & IGNCR)
+                               continue;
+                       if(t->t.iflag & ICRNL)
+                               c = '\n';
+                       break;
+               case '\n':
+                       if(t->t.iflag&INLCR)
+                               c = '\r';
+                       break;
+               }
+
+               if(t->t.lflag & ISIG) {
+                       if(c == t->t.cc[VINTR]){
+                               if(t->pgid > 0)
+                                       sys_kill(-t->pgid, SIGINT);
+                               continue;
+                       }
+                       if(c == t->t.cc[VQUIT]){
+                               if(t->pgid > 0)
+                                       sys_kill(-t->pgid, SIGQUIT);
+                               continue;
+                       }
+               }
+
+               if((t->t.lflag & ICANON) && t->escaped == 0) {
+                       if(c == t->t.cc[VERASE]) {
+                               bs(t);
+                               continue;
+                       }
+                       if(c == t->t.cc[VKILL]) {
+                               while(bs(t))
+                                       ;
+                               if(t->t.lflag & ECHOK)
+                                       echo(t, '\n');
+                               continue;
+                       }
+               }
+
+               if(t->escaped == 0 && (c == t->t.cc[VEOF] || c == '\n'))
+                       t->eol++;
+
+               if((t->t.lflag & ICANON) == 0) {
+                       echo(t, c);
+                       cbput(&t->rb, c);
+                       continue;
+               }
+
+               if(t->escaped) 
+                       echo(t, '\b');
+
+               if(c != t->t.cc[VEOF])
+                       echo(t, c);
+
+               if(c != '\\') {
+                       if(c == t->t.cc[VEOF])
+                               c = 0;
+                       cbput(&t->rb, c);
+                       t->escaped = 0;
+                       continue;
+               }
+               if(t->escaped) {
+                       cbput(&t->rb, '\\');
+                       t->escaped = 0;
+               }
+               else
+                       t->escaped = 1;
+       }
+
+       return p-buf;
+}
+
+int ttycanreadoutput(Tty *t, int *n)
+{
+       int x;
+
+       x = cbfill(&t->wb);
+       if(n != nil)
+               *n = x;
+       return x > 0 ? 1 : 0;
+}
+
+int ttyreadoutput(Tty *t, char *buf, int len)
+{
+       char *p, *e;
+
+       for(p=buf, e=buf+len; p<e; p++){
+               int c;
+
+               if((c = cbget(&t->wb)) < 0)
+                       break;
+               *p = c;
+       }
+       return p-buf;
+}
+
+static int
+pollpty(Ufile *f, void *tab)
+{
+       Ptyfile *p = (Ptyfile*)f;
+       int err;
+       int n;
+
+       if(p->pty == nil)
+               return 0;
+
+       qlock(p->pty);
+       if(p->master){
+               pollwait(p, &p->pty->q[1].r, tab);
+               n = ttycanreadoutput(p->pty, nil);
+       } else {
+               pollwait(p, &p->pty->q[0].r, tab);
+               n = ttycanread(p->pty, nil);
+       }
+       err = POLLOUT;
+       if(n){
+               err |= POLLIN;
+       } else if(p->master==0 && p->pty->closed){
+               err |= (POLLIN | POLLHUP);
+       }
+       qunlock(p->pty);
+
+       return err;
+}
+
+static int
+readpty(Ufile *f, void *data, int len, vlong)
+{
+       int err;
+       Ptyfile *p = (Ptyfile*)f;
+
+       if(p->pty == nil)
+               return -EPERM;
+       qlock(p->pty);
+       for(;;){
+               if(p->master){
+                       err = ttycanreadoutput(p->pty, nil);
+               } else {
+                       err = ttycanread(p->pty, nil);
+               }
+               if(err > 0){
+                       if(p->master){
+                               err = ttyreadoutput(p->pty, (char*)data, len);
+                       }else{
+                               err = ttyread(p->pty, (char*)data, len);
+                       }
+               } else {
+                       if(p->master == 0 && p->pty->closed){
+                               err = -EIO;
+                       } else if(p->mode & O_NONBLOCK){        
+                               err = -EAGAIN;
+                       } else {
+                               if((err = sleepq(&p->pty->q[p->master].r, p->pty, 1)) == 0)
+                                       continue;
+                       }
+               }
+               wakeq(&p->pty->q[!p->master].w, MAXPROC);
+               break;
+       }
+       qunlock(p->pty);
+
+       return err;
+}
+
+static int
+writepty(Ufile *f, void *data, int len, vlong)
+{
+       Ptyfile *p = (Ptyfile*)f;
+       int err;
+
+       if(p->pty == nil)
+               return -EPERM;
+       if(len == 0)
+               return len;
+
+       qlock(p->pty);
+       for(;;){
+               if(p->pty->closed){
+                       err = -EIO;
+                       break;
+               }
+               if(p->master){
+                       err = ttywriteinput(p->pty, (char*)data, len);
+               } else{
+                       err = ttywrite(p->pty, (char*)data, len);
+               }
+               if(err == 0){
+                       if((err = sleepq(&p->pty->q[p->master].w, p->pty, 1)) == 0)
+                               continue;
+               } else {
+                       if(ttycanread(p->pty, nil))
+                               wakeq(&p->pty->q[0].r, MAXPROC);
+                       if(ttycanreadoutput(p->pty, nil))
+                               wakeq(&p->pty->q[1].r, MAXPROC);
+               }
+               break;
+       }
+       qunlock(p->pty);
+
+       return err;
+}
+
+static int
+closepty(Ufile *f)
+{
+       Ptyfile *p = (Ptyfile*)f;
+
+       if(p->pty == nil)
+               return 0;
+
+       qlock(p->pty);
+       if(p->master)
+               p->pty->closed = 1;
+       if(!decref(p->pty)){
+               ptys[p->pty->id] = nil;
+               qunlock(p->pty);
+               free(p->pty);
+       } else {
+               wakeq(&p->pty->q[0].r, MAXPROC);
+               wakeq(&p->pty->q[0].w, MAXPROC);
+               wakeq(&p->pty->q[1].r, MAXPROC);
+               wakeq(&p->pty->q[1].w, MAXPROC);
+               qunlock(p->pty);
+       }
+       return 0;
+}
+
+static int
+changetty(Ptyfile *tty)
+{
+       Ufile *old;
+
+       if(old = gettty()){
+               putfile(old);
+               return (old == tty) ? 0 : -EPERM;
+       }
+       tty->pty->pgid = current->pgid;
+       settty(tty);
+       return 0;
+}
+
+static int
+ioctlpty(Ufile *f, int cmd, void *arg)
+{
+       Ptyfile *p = (Ptyfile*)f;
+       int err, pid;
+
+       if(p->pty == nil)
+               return -ENOTTY;
+
+       trace("ioctlpty(%s, %lux, %p)", p->path, (ulong)cmd, arg);
+
+       err = 0;
+       qlock(p->pty);
+       switch(cmd){
+       default:
+               trace("ioctlpty: unknown: 0x%x", cmd);
+               err = -ENOTTY;
+               break;
+
+       case 0x5401:    /* TCGETS */
+               memmove(arg, &p->pty->t, sizeof(Termios));
+               break;
+
+       case 0x5402:    /* TCSETS */
+       case 0x5403:    /* TCSETSW */
+       case 0x5404:    /* TCSETSF */
+               memmove(&p->pty->t, arg, sizeof(Termios));
+               break;
+
+       case 0x5422:    // TIOCNOTTY
+               if((f = gettty()) && (f != p)){
+                       putfile(f);
+                       err = -ENOTTY;
+                       break;
+               }
+               settty(nil);
+               break;
+
+       case 0x540E:    // TIOCSCTTY
+               err = changetty(p);
+               break;
+
+       case 0x540F:    // TIOCGPGRP
+               *(int*)arg = p->pty->pgid;
+               break;
+
+       case 0x5410:    // TIOCSPGRP
+               p->pty->pgid = *(int*)arg;
+               break;
+
+       case 0x5413:    // TIOCGWINSZ
+               memmove(arg, &p->pty->winsize, sizeof(Winsize));
+               break;
+
+       case 0x5414:    // TIOCSWINSZ
+               if(memcmp(&p->pty->winsize, arg, sizeof(Winsize)) == 0)
+                       break;
+               memmove(&p->pty->winsize, arg, sizeof(Winsize));
+               if((pid = p->pty->pgid) > 0){
+                       qunlock(p->pty);
+
+                       sys_kill(-pid, SIGWINCH);
+                       return 0;
+               }
+               break;
+       case 0x40045431:        // TIOCSPTLCK
+               if(p->master)
+                       p->pty->locked = *(int*)arg;
+               break;
+
+       case 0x80045430:
+               *(int*)arg = p->pty->id;
+               break;
+
+       case 0x541B:
+               if(arg == nil)
+                       break;
+               if(p->master){
+                       ttycanreadoutput(p->pty, &err);
+               } else {
+                       ttycanread(p->pty, &err);
+               }
+               if(err < 0){
+                       *((int*)arg) = 0;
+                       break;
+               }
+               *((int*)arg) = err;
+               err = 0;
+               break;          
+       }
+       qunlock(p->pty);
+
+       return err;
+}
+
+static int
+openpty(char *path, int mode, int perm, Ufile **pf)
+{
+       Pty *pty;
+       Ptyfile *p;
+       int id;
+       int master;
+
+       USED(perm);
+
+       if(strcmp("/dev/tty", path)==0){
+               if(*pf = gettty())
+                       return 0;
+               return -ENOTTY;
+       } else if(strcmp("/dev/pts", path)==0){
+               pty = nil;
+               master = -1;
+       } else if(strcmp("/dev/ptmx", path)==0){
+               master = 1;
+               for(id=0; id<nelem(ptys); id++){
+                       if(ptys[id] == nil)
+                               break;
+               }
+               if(id == nelem(ptys))
+                       return -EBUSY;
+
+               pty = kmallocz(sizeof(*pty), 1);
+               pty->ref = 1;
+
+               ttyinit(pty);
+
+               ptys[pty->id = id] = pty;
+       } else {
+               master = 0;
+               if(strncmp("/dev/pts/", path, 9) != 0)
+                       return -ENOENT;
+               id = atoi(path + 9);
+               if(id < 0 || id >= nelem(ptys))
+                       return -ENOENT;
+               if((pty = ptys[id]) == nil)
+                       return -ENOENT;
+
+               qlock(pty);
+               if(pty->closed || pty->locked){
+                       qunlock(pty);
+                       return -EIO;
+               }
+               incref(pty);
+               qunlock(pty);
+       }
+
+       p = kmallocz(sizeof(*p), 1);
+       p->dev = PTYDEV;
+       p->ref = 1;
+       p->fd = -1;
+       p->mode = mode;
+       p->path = kstrdup(path);
+       p->pty = pty;
+       p->master = master;
+
+       if(!master && !(mode & O_NOCTTY))
+               changetty(p);
+
+       *pf = p;
+
+       return 0;
+}
+
+static int
+readdirpty(Ufile *f, Udirent **pd)
+{
+       Ptyfile *p = (Ptyfile*)f;
+       int i, n;
+
+       *pd = nil;
+       if(p->pty != nil)
+               return -EPERM;
+       n = 0;  
+       for(i=0; i<nelem(ptys); i++){
+               char buf[12];
+
+               if(ptys[i] == nil)
+                       continue;
+               snprint(buf, sizeof(buf), "%d", i);
+               if((*pd = newdirent(f->path, buf, S_IFCHR | 0666)) == nil)
+                       break;
+               pd = &((*pd)->next);
+               n++;
+       }
+       return n;
+}
+
+static int
+fstatpty(Ufile *f, Ustat *s)
+{
+       Ptyfile *p = (Ptyfile*)f;
+
+       if(p->pty != nil){
+               s->mode = 0666 | S_IFCHR;
+               if(p->master){
+                       s->rdev = 5<<8 | 2;
+               } else {
+                       s->rdev = 3<<8;
+               }
+       } else {
+               s->mode = 0777 | S_IFDIR;
+               s->rdev = 0;
+       }
+       s->ino = hashpath(p->path);
+       s->dev = 0;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+       return 0;
+};
+
+static int
+statpty(char *path, int, Ustat *s)
+{
+       if(strcmp("/dev/tty", path)==0){
+               s->mode = 0666 | S_IFCHR;
+       } else if(strcmp("/dev/ptmx", path)==0){
+               s->mode = 0666 | S_IFCHR;
+               s->rdev = 5<<8 | 2;
+       } else if(strcmp("/dev/pts", path)==0){
+               s->mode = 0777 | S_IFDIR;
+       } else if(strncmp("/dev/pts/", path, 9)==0){
+               int id;
+
+               id = atoi(path + 9);
+               if(id < 0 || id >= nelem(ptys))
+                       return -ENOENT;
+               if(ptys[id] == nil)
+                       return -ENOENT;
+
+               s->mode = 0666 | S_IFCHR;
+               s->rdev = 3<<8;
+       } else {
+               return -ENOENT;
+       }
+       s->ino = hashpath(path);
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+       return 0;
+}
+
+static int
+chmodpty(char *path, int mode)
+{
+       USED(path);
+       USED(mode);
+
+       return 0;
+}
+
+static int
+chownpty(char *path, int uid, int gid, int link)
+{
+       USED(path);
+       USED(uid);
+       USED(gid);
+       USED(link);
+
+       return 0;
+}
+
+static int
+fchmodpty(Ufile *f, int mode)
+{
+       USED(f);
+       USED(mode);
+
+       return 0;
+}
+
+static int
+fchownpty(Ufile *f, int uid, int gid)
+{
+       USED(f);
+       USED(uid);
+       USED(gid);
+
+       return 0;
+}
+
+static Udev ptydev = 
+{
+       .open = openpty,
+       .read = readpty,
+       .write = writepty,
+       .poll = pollpty,
+       .close = closepty,
+       .readdir = readdirpty,
+       .ioctl = ioctlpty,
+       .fstat = fstatpty,
+       .stat = statpty,
+       .fchmod = fchmodpty,
+       .fchown = fchownpty,
+       .chmod = chmodpty,
+       .chown = chownpty,
+};
+
+void ptydevinit(void)
+{
+       devtab[PTYDEV] = &ptydev;
+       fsmount(&ptydev, "/dev/pts");
+       fsmount(&ptydev, "/dev/ptmx");
+       fsmount(&ptydev, "/dev/tty");
+}
diff --git a/linux_emul_base/rootdev.c b/linux_emul_base/rootdev.c
new file mode 100644 (file)
index 0000000..c0f3987
--- /dev/null
@@ -0,0 +1,1286 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Rfile Rfile;
+typedef struct Rpath Rpath;
+
+struct Rfile
+{
+       Ufile;
+       struct
+       {
+               Dir     *d;
+               int     i;
+               int     n;
+       }               diraux;
+};
+
+struct Rpath
+{
+       Ref;
+
+       Rpath   *hash;
+
+       int             deleted;
+       char            str[];
+};
+
+static Rpath   *rpathtab[64];
+static QLock   rpathtablk;
+
+static Rpath **
+rpathent(char *path)
+{
+       Rpath **prp;
+
+       prp = &rpathtab[hashpath(path) % nelem(rpathtab)];
+       while(*prp){
+               if(strcmp(path, (*prp)->str) == 0)
+                       break;
+               prp = &((*prp)->hash);
+       }
+       return prp;
+}
+
+static char*
+linkname(char *name)
+{
+       if(strncmp(name, ".udir.L.", 8) == 0)
+               name += 8;
+       return name;
+}
+
+static char*
+udirpath(char *base, char *name, char type)
+{
+       char buf[9];
+
+       strcpy(buf, ".udir.T.");
+       buf[6] = type;
+       return allocpath(base, buf, name);
+}
+
+static int
+udirget(char *base, char *name, char type, char **val)
+{
+       char *f, *b;
+       int n, r, s;
+       int fd;
+
+       r = -1;
+       f = udirpath(base, name, type);
+       if((fd = open(shortpath(current->kcwd, f), OREAD)) < 0)
+               goto out;
+       if(val == nil){
+               r = 0;
+               goto out;
+       }
+       if((s = seek(fd, 0, 2)) < 0)
+               goto out;
+       b = kmalloc(s+1);
+       n = 0;
+       if(s > 0){
+               seek(fd, 0, 0);
+               if((n = read(fd, b, s)) < 0){
+                       free(b);
+                       goto out;
+               }
+       }
+       b[n] = 0;
+
+       r = 0;
+       *val = b;
+out:
+       free(f);
+       close(fd);
+       return r;
+}
+
+static char*
+resolvepath1(char *path, int link)
+{
+       char *r, *b, *p, *o, *e;
+       char **a;
+
+       int n;
+       int i;
+
+       r = nil;
+       a = nil;
+       n = 0;
+
+       b = kstrdup(path);
+       for(p=b; *p; p++){
+               if(*p == '/'){
+                       if((n % 16) == 0)
+                               a = krealloc(a, sizeof(a[0]) * (n+16));
+                       a[n++] = p;
+               }
+       }
+
+       e = nil;
+       for(i=n-1; i>=0; i--){
+               char *t;
+               char *f;
+
+               o = e;
+               e = a[i];
+               *e++ = 0;
+
+               f = linkname(e);
+               t = nil;
+
+               if(!udirget(b, f, 'L', &t)){
+                       if(t == nil)
+                               break;
+                       if(link && o==nil){
+                               free(t);
+                               if(f != e)
+                                       break;
+                               t = udirpath(b, e, 'L');
+                       }
+                       r = fullpath(b, t);
+                       free(t);
+                       if(o && o[1]){
+                               t = r;
+                               r = fullpath(t, &o[1]);
+                               free(t);
+                       }
+                       break;
+               }
+
+               --e;
+               if(o) *o = '/';
+       }
+       free(b);
+       free(a);
+
+       return r;
+}
+
+static char *
+resolvepath(char *path, int link)
+{
+       char *t;
+       int x;
+
+       x = 0;
+       path = kstrdup(path);
+       while(t = resolvepath1(path, link)){
+               if(++x > 8){
+                       free(t);
+                       free(path);
+                       return nil;
+               }
+               free(path); path = t;
+       }
+       return path;
+}
+
+static int
+ropen(char *path, int mode, int perm, Ufile **pf)
+{
+       Ufile *f;
+       int err;
+       char *s, *t;
+       int mode9, perm9;
+       int fd;
+       char *base;
+       char *name;
+       Rpath **prp;
+
+       trace("ropen(%s, %#o, %#o, ...)", path, mode, perm);
+
+       base = nil;
+       name = nil;
+       mode9 = mode & 3;
+       perm9 = (perm & ~current->umask) & 0777;
+
+       s = shortpath(current->kcwd, path);
+
+       if(mode & O_CREAT) {
+               Dir *d;
+
+               err = -EINVAL;
+               if((base = basepath(path, &name)) == nil)
+                       goto out;
+
+               /* resolve base directory */
+               if((d = dirstat(shortpath(current->kcwd, base))) == nil){
+                       err = mkerror();
+                       if(t = resolvepath1(base, 0)){
+                               free(base); base = t;
+                               t = allocpath(t, nil, name);
+                               err = fsopen(t, mode, perm, pf);
+                               free(t);
+                       }
+                       goto out;
+               }
+               err = -ENOTDIR;
+               if((d->mode & DMDIR) == 0){
+                       free(d);
+                       goto out;
+               }
+               free(d);
+
+               /* check if here is a symlink in the way */
+               t = udirpath(base, name, 'L');
+               if((fd = open(shortpath(current->kcwd, t), OREAD)) >= 0){
+                       free(t);
+                       close(fd);
+
+                       if(mode & O_EXCL){
+                               err = -EEXIST;
+                               goto out;
+                       }
+
+                       if((t = resolvepath1(path, 0)) == nil)
+                               goto out;
+                       err = fsopen(t, mode, perm, pf);
+                       free(t);
+                       goto out;
+               }
+               free(t);
+
+               if(mode & (O_EXCL | O_TRUNC)){
+                       if(mode & O_EXCL)
+                               mode9 |= OEXCL;
+                       fd = create(s, mode9, perm9);
+               } else {
+                       /* try open first to avoid truncating existing the file */
+                       if((fd = open(s, mode9)) < 0)
+                               fd = create(s, mode9, perm9);
+               }
+               if(fd < 0){
+                       err = mkerror();
+                       goto out;
+               }
+       } else {
+               if(((mode & 3) == O_RDWR) || ((mode & 3) == O_WRONLY))
+                       if(mode & O_TRUNC)
+                               mode9 |= OTRUNC;
+
+               if((fd = open(s, mode9)) < 0){
+                       err = mkerror();
+                       if(t = resolvepath1(path, 0)){
+                               err = fsopen(t, mode, perm, pf);
+                               free(t);
+                       }
+                       goto out;
+               }
+       }
+
+       qlock(&rpathtablk);
+       prp = rpathent(path);
+       if(*prp != nil){
+               incref(*prp);
+       } else {
+               Rpath *rp;
+
+               rp = kmalloc(sizeof(*rp) + strlen(path) + 1);
+               rp->ref = 1;
+               rp->hash = nil;
+               rp->deleted = 0;
+               strcpy(rp->str, path);
+               *prp = rp;
+       }
+       qunlock(&rpathtablk);
+
+       f = kmallocz(sizeof(Rfile), 1);
+       f->ref = 1;
+       f->path = kstrdup(path);
+       f->dev = ROOTDEV;
+       f->mode = mode;
+       f->fd = fd;
+       f->off = 0;
+       *pf = f;
+
+       err = 0;
+
+out:
+       free(base);
+       free(name);
+
+       return err;
+}
+
+static int
+rclose(Ufile *f)
+{
+       Rpath **prp;
+       Rfile *file = (Rfile*)f;
+       static char path[1024]; /* protected by rpathtablk */
+
+       qlock(&rpathtablk);
+       prp = rpathent(file->path);
+       if(!decref(*prp)){
+               Rpath *rp = *prp;
+               *prp = rp->hash;
+               if(rp->deleted){
+                       if(fd2path(file->fd, path, sizeof(path)) == 0)
+                               remove(shortpath(current->kcwd, path));
+               }
+               free(rp);
+       }
+       qunlock(&rpathtablk);
+
+       close(file->fd);
+       return 0;
+}
+
+static int
+rread(Ufile *f, void *buf, int len, vlong off)
+{
+       Rfile *file = (Rfile*)f;
+       int ret, n;
+
+       n = ret = 0;
+       if(notifyme(1))
+               return -ERESTART;
+       while((n < len) && ((ret = pread(file->fd, (uchar*)buf + n, len - n, off + n)) > 0))
+               n += ret;
+       notifyme(0);
+       if(ret < 0)
+               return mkerror();
+       return n;
+}
+
+static int
+rwrite(Ufile *f, void *buf, int len, vlong off)
+{
+       Rfile *file = (Rfile*)f;
+       int ret;
+
+       if(notifyme(1))
+               return -ERESTART;
+       ret = pwrite(file->fd, buf, len, off);
+       notifyme(0);
+       if(ret < 0)
+               ret = mkerror();
+       return ret;
+}
+
+static vlong
+rsize(Ufile *f)
+{
+       Rfile *file = (Rfile*)f;
+
+       return seek(file->fd, 0, 2);
+}
+
+static int
+raccess(char *path, int mode)
+{
+       static char omode[] = {
+               0,                      // ---
+               OEXEC,          // --x
+               OWRITE,         // -w- 
+               ORDWR,          // -wx
+               OREAD,          // r--
+               OEXEC,          // r-x
+               ORDWR,          // rw-
+               ORDWR           // rwx
+       };
+
+       int err;
+       int fd;
+       Dir *d;
+       char *s;
+
+       err = -EINVAL;
+       if(mode & ~07)
+               return err;
+
+       s = shortpath(current->kcwd, path);
+       if((d = dirstat(s)) == nil){
+               err = mkerror();
+               if(path = resolvepath1(path, 0)){
+                       err = fsaccess(path, mode);
+                       free(path);
+               }
+               goto out;
+       }
+
+       /* ignore the exec bit... firefox gets confused */
+       mode &= ~01;
+       if((mode == 0) || (d->mode & DMDIR)){
+               err = 0;
+       } else {
+               err = -EACCES;
+               if((mode & 01) && ((d->mode & 0111) == 0))
+                       goto out;
+               if((mode & 02) && ((d->mode & 0222) == 0))
+                       goto out;
+               if((mode & 04) && ((d->mode & 0444) == 0))
+                       goto out;
+               if((fd = open(s, omode[mode])) >= 0){
+                       close(fd);
+                       err = 0;
+               }
+       }
+out:
+       free(d);
+       return err;
+}
+
+static ulong
+dir2statmode(Dir *d)
+{
+       ulong mode;
+
+       mode = d->mode & 0777;
+       if(d->mode & DMDIR)
+               mode |= S_IFDIR;
+       else if(strcmp(d->name, "cons") == 0)
+               mode |= S_IFCHR;
+       else if(strncmp(d->name, "PTS.", 4) == 0)
+               mode |= S_IFCHR;
+       else if(strcmp(d->name, "zero") == 0)
+               mode |= S_IFCHR | 0222;
+       else if(strcmp(d->name, "null") == 0)
+               mode |= S_IFCHR | 0222;
+       else if(strncmp(d->name, ".udir.", 6) == 0){
+               switch(d->name[6]){
+               case 'L':
+                       mode |= S_IFLNK;
+                       break;
+               case 'S':
+                       mode |= S_IFSOCK;
+                       break;
+               case 'F':
+                       mode |= S_IFIFO;
+                       break;
+               case 'C':
+                       mode |= S_IFCHR;
+                       break;
+               case 'B':
+                       mode |= S_IFBLK;
+                       break;
+               }
+       } else if(d->type == '|') 
+               mode |= S_IFIFO;
+       else if(d->type == 'H')
+               mode |= S_IFBLK;
+       else
+               mode |= S_IFREG;
+
+       return mode;
+}
+
+static void
+dir2ustat(Dir *d, Ustat *s)
+{
+       s->mode = dir2statmode(d);
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = d->length;
+       s->atime = d->atime;
+       s->mtime = d->mtime;
+       s->ctime = d->mtime;
+       s->ino = 0;     // use d->qid?
+       s->dev = 0;
+       s->rdev = 0;
+}
+
+static int
+rstat(char *path, int link, Ustat *s)
+{
+       Dir *d;
+       int err;
+       char *t;
+
+       if((d = dirstat(shortpath(current->kcwd, path))) == nil){
+               if(link){
+                       char *base;
+                       char *name;
+                       if(base = basepath(path, &name)){
+                               t = udirpath(base, name, 'L');
+                               free(name);
+                               free(base);
+                               d = dirstat(shortpath(current->kcwd, t));
+                               free(t);
+                       }
+                               
+               }
+       }
+       if(d == nil){
+               err = mkerror();
+               if(t = resolvepath1(path, 0)){
+                       err = fsstat(t, link, s);
+                       free(t);
+               }
+               return err;
+       }
+
+       dir2ustat(d, s);
+       s->ino = hashpath(path);
+
+       free(d);
+       return 0;
+}
+
+static int
+rfstat(Ufile *f, Ustat *s)
+{
+       Dir *d;
+
+       if((d = dirfstat(f->fd)) == nil)
+               return mkerror();
+
+       dir2ustat(d, s);
+       s->ino = hashpath(f->path);
+
+       free(d);
+       return 0;
+}
+
+static char*
+fixname(char *name)
+{
+       if(name == nil)
+               return nil;
+       if(strncmp(name, ".udir.", 6) == 0){
+               if(name[6] && name[7]=='.')
+                       name += 8;
+       }
+       return name;
+}
+
+static int
+rreaddir(Ufile *f, Udirent **pd)
+{
+       Dir *d;
+       int i, n;
+
+       seek(f->fd, 0, 0);
+       n = dirreadall(f->fd, &d);
+       if(n < 0)
+               return mkerror();
+       for(i=0; i<n; i++){
+               if((*pd = newdirent(f->path, fixname(d[i].name), dir2statmode(&d[i]))) == nil)
+                       break;
+               pd = &((*pd)->next);
+       }
+       free(d);
+       return i;
+}
+
+static int
+rreadlink(char *path, char *buf, int len)
+{
+       int err;
+       int fd;
+
+       char *t;
+       char *name;
+       char *base;
+
+       trace("rreadlink(%s)", path);
+
+       if((base = basepath(path, &name)) == nil)
+               return -EINVAL;
+
+       /* resolve base path */
+       if((fd = open(shortpath(current->kcwd, base), OREAD)) < 0){
+               err = mkerror();
+               if(t = resolvepath1(base, 0)){
+                       free(base); base = t;
+                       t = allocpath(base, nil, name);
+                       err = fsreadlink(t, buf, len);
+                       free(t);
+               }
+               goto out;
+       }
+       close(fd);
+
+       /* check if path is regular file */
+       if((fd = open(shortpath(current->kcwd, path), OREAD)) >= 0){
+               close(fd);
+               err = -EINVAL;
+               goto out;
+       }
+
+       t = udirpath(base, name, 'L');
+       if((fd = open(shortpath(current->kcwd, t), OREAD)) < 0){
+               err = mkerror();
+               free(t);
+               goto out;
+       }
+       free(t);
+       if((err = read(fd, buf, len)) < 0)
+               err = mkerror();
+       close(fd);
+out:
+       free(base);
+       free(name);
+       return err;
+}
+
+enum {
+       COPYSIZE = 8*1024,
+};
+
+static int
+copyfile(char *from, char *to)
+{
+       int err, fromfd, tofd;
+       char *buf, *s;
+       Dir *ent;
+       Dir *dir;
+
+       dir = nil;
+       buf = nil;
+       ent = nil;
+
+       tofd = -1;
+
+       trace("copyfile(%s, %s)", from, to);
+
+       if((fromfd = open(shortpath(current->kcwd, from), OREAD)) < 0){
+               err = mkerror();
+               goto out;
+       }
+       if((dir = dirfstat(fromfd)) == nil){
+               err = mkerror();
+               goto out;
+       }
+       s = shortpath(current->kcwd, to);
+       if((err = open(s, OREAD)) >= 0){
+               close(err);
+               err = -EEXIST;
+               goto out;
+       }
+       if(dir->mode & DMDIR){
+               int n;
+               if((tofd = create(s, OREAD, dir->mode)) < 0){
+                       err = mkerror();
+                       goto out;
+               }
+               close(tofd);
+               tofd = -1;
+               while((n = dirread(fromfd, &ent)) > 0){
+                       int i;
+
+                       for(i=0; i<n; i++){
+                               char *froment, *toent;
+
+                               froment = allocpath(from, nil, ent[i].name);
+                               toent = allocpath(to, nil, ent[i].name);
+                               err = copyfile(froment, toent);
+                               free(froment);
+                               free(toent);
+
+                               if(err < 0)
+                                       goto out;
+                       }
+                       free(ent); ent = nil;
+               }
+       } else {
+               if((tofd = create(s, OWRITE, dir->mode)) < 0){
+                       err = mkerror();
+                       goto out;
+               }
+               buf = kmalloc(COPYSIZE);
+               for(;;){
+                       err = read(fromfd, buf, COPYSIZE);
+                       if(err == 0)
+                               break;
+                       if(err < 0){
+                               err = mkerror();
+                               goto out;
+                       }
+                       if(write(tofd, buf, err) != err){
+                               err = mkerror();
+                               goto out;
+                       }
+               }
+       }
+
+       err = 0;
+out:
+       free(ent);
+       free(dir);
+       free(buf);
+       close(fromfd);
+       close(tofd);
+       return err;
+}
+
+static int
+removefile(char *path)
+{
+       int err;
+       int n;
+       Dir *d;
+       int fd;
+       char *s;
+
+       trace("removefile(%s)", path);
+
+       s = shortpath(current->kcwd, path);
+
+       if((d = dirstat(s)) == nil)
+               return mkerror();
+       if(remove(s) == 0){
+               free(d);
+               return 0;
+       }
+       if((d->mode & DMDIR) == 0){
+               free(d);
+               return mkerror();
+       }
+       free(d);
+       if((fd = open(s, OREAD)) < 0)
+               return mkerror();
+       err = 0;
+       d = nil;
+       while((n = dirread(fd, &d)) > 0){
+               char *t;
+               int i;
+
+               for(i=0; i<n; i++){
+                       t = allocpath(path, nil, d[i].name);
+                       err = removefile(t);
+                       free(t);
+
+                       if(err < 0)
+                               break;
+               }
+               free(d); d = nil;
+
+               if(err < 0)
+                       break;
+       }
+       close(fd);
+       if(err < 0)
+               return err;
+       if(n < 0)
+               return mkerror();
+       if(remove(s) < 0)
+               return mkerror();
+       return 0;
+}
+
+static int
+resolvefromtopath(char **from, char **to)
+{
+       char *t;
+
+       trace("resolvefromtopath(%s, %s)", *from, *to);
+
+       if((*from = resolvepath(*from, 1)) == nil){
+               *to = nil;
+               return -ELOOP;
+       }
+       if((*to = resolvepath(*to, 1)) == nil){
+               free(*from);
+               *from = nil;
+               return -ELOOP;
+       }
+       if(strstr(*from, ".udir.L")){
+               char *x;
+
+               x = nil;
+               for(t=*to; *t; t++){
+                       if(*t == '/')
+                               x = t;
+               }
+
+               if(strncmp(x+1, ".udir.", 6)){
+                       *x = 0;
+                       t = udirpath(*to, x+1, 'L');
+                       free(*to); *to = t;
+               }
+       }
+
+       return 0;
+}
+
+static int
+rrename(char *from, char *to)
+{
+       int err;
+       char *x, *y, *t;
+
+       trace("rrename(%s, %s)", from, to);
+
+       if((err = resolvefromtopath(&from, &to)) < 0)
+               goto out;
+       if(strcmp(from, to) == 0)
+               goto out;
+       x = nil;
+       for(t=from; *t; t++){
+               if(*t == '/')
+                       x = t;
+       }
+       y = nil;
+       for(t=to; *t; t++){
+               if(*t == '/')
+                       y = t;
+       }
+       if(x && y){
+               char *e;
+
+               e = nil;
+               *x = 0; *y = 0;
+               if(strcmp(from, to) == 0)
+                       e = &y[1];
+               *x = '/'; *y = '/';
+
+               if(e != nil){
+                       Dir d;
+
+                       nulldir(&d);
+                       d.name = e;
+
+                       remove(to);
+                       if(dirwstat(shortpath(current->kcwd, from), &d) < 0)
+                               err = mkerror();
+                       goto out;
+               }
+       }
+       t = ksmprint("%s%d%d.tmp", to, current->pid, current->tid);
+       if((err = copyfile(from, t)) == 0){
+               Dir d;
+
+               nulldir(&d);
+               d.name = &y[1];
+
+               remove(shortpath(current->kcwd, to));
+               if(dirwstat(shortpath(current->kcwd, t), &d) < 0) {
+                       err = mkerror();
+               } else {
+                       removefile(from);
+               }
+       }
+       if(err != 0)
+               removefile(t);
+       free(t);
+out:
+       free(from);
+       free(to);
+
+       return err;
+}
+
+static int
+rmkdir(char *path, int mode)
+{
+       int err;
+       Dir *d;
+       int fd;
+       int mode9;
+
+       char *base;
+       char *name;
+       char *t;
+
+       trace("rmkdir(%s, %#o)", path, mode);
+
+       if((base = basepath(path, &name)) == nil)
+               return -EINVAL;
+
+       if((d = dirstat(shortpath(current->kcwd, base))) == nil){
+               err = mkerror();
+               if(t = resolvepath1(base, 0)){
+                       free(base); base = t;
+                       t = allocpath(base, nil, name);
+                       err = fsmkdir(t, mode);
+                       free(t);
+               }
+               goto out;
+       }
+       err = -ENOTDIR;
+       if((d->mode & DMDIR) == 0){
+               free(d);
+               goto out;
+       }
+       free(d);
+
+       err = -EEXIST;
+       t = udirpath(base, name, 'L');
+       if(d = dirstat(shortpath(current->kcwd, t))){
+               free(d);
+               free(t);
+               goto out;
+       }
+       free(t);
+
+       mode9 = DMDIR | ((mode & ~current->umask) & 0777);
+       if((fd = create(shortpath(current->kcwd, path), OREAD|OEXCL, mode9)) < 0){
+               err = mkerror();
+               goto out;
+       }
+       close(fd);
+       err = 0;
+
+out:
+       free(name);
+       free(base);
+       return err;
+}
+
+static void
+combinedir(Dir *ndir, Dir *odir)
+{
+       if(ndir->mode != ~0)
+               ndir->mode = (odir->mode & ~0777) | (ndir->mode & 0777);
+}
+
+static int
+uwstat(char *path, Dir *ndir, int link)
+{
+       int err;
+       Dir *dir;
+       char *s;
+
+       trace("uwstat(%s, ..., %d)", path, link);
+
+       s = shortpath(current->kcwd, path);
+       if((dir = dirstat(s)) == nil){
+               err = mkerror();
+               if(link){
+                       char *base;
+                       char *name;
+
+                       if(base = basepath(path, &name)){
+                               char *t;
+
+                               t = udirpath(base, name, 'L');
+                               free(base);
+                               free(name);
+
+                               err = uwstat(t, ndir, 0);
+                               free(t);
+                       }
+               }
+               return err;
+       }
+       combinedir(ndir, dir);
+       err = 0;
+       if(dirwstat(s, ndir) < 0)
+               err = mkerror();
+       free(dir);
+       return err;     
+}
+
+static int
+uwfstat(Ufile *f, Dir *ndir)
+{
+       int err;
+       Dir *dir;
+
+       if((dir = dirfstat(f->fd)) == nil){
+               err = mkerror();
+               goto out;
+       }
+       combinedir(ndir, dir);
+       err = 0;
+       if(dirfwstat(f->fd, ndir) < 0)
+               err = mkerror();
+out:
+       free(dir);
+       return err;
+}
+
+static int
+rutime(char *path, long atime, long mtime)
+{
+       Dir ndir;
+       int err;
+
+       trace("rutime(%s, %ld, %ld)", path, atime, mtime);
+
+       nulldir(&ndir);
+       ndir.atime = atime;
+       ndir.mtime = mtime;
+
+       if((err = uwstat(path, &ndir, 1)) < 0){
+               char *t;
+
+               if(t = resolvepath1(path, 0)){
+                       err = fsutime(t, atime, mtime);
+                       free(t);
+               }
+       }
+       return err;
+}
+
+static int
+rchmod(char *path, int mode)
+{
+       Dir ndir;
+       int err;
+
+       trace("rchmod(%s, %#o)", path, mode);
+
+       nulldir(&ndir);
+       ndir.mode = mode;
+
+       if((err = uwstat(path, &ndir, 1)) < 0){
+               char *t;
+
+               if(t = resolvepath1(path, 0)){
+                       err = fschmod(t, mode);
+                       free(t);
+               }
+       }
+       return err;
+}
+
+static int
+rchown(char *path, int uid, int gid, int link)
+{
+       Ustat s;
+
+       USED(uid);
+       USED(gid);
+
+       /* FIXME, just return the right errorcode for now */
+       return fsstat(path, link, &s);
+}
+
+static int
+rtruncate(char *path, vlong size)
+{
+       Dir ndir;
+       int err;
+
+       trace("rtruncate(%s, %lld)", path, size);
+
+       nulldir(&ndir);
+       ndir.length = size;
+
+       if((err = uwstat(path, &ndir, 0)) < 0){
+               char *t;
+
+               if(t = resolvepath1(path, 0)){
+                       err = fstruncate(t, size);
+                       free(t);
+               }
+       }
+       return err;
+}
+
+static int
+rfchmod(Ufile *f, int mode)
+{
+       Dir ndir;
+
+       nulldir(&ndir);
+       ndir.mode = mode;
+       return uwfstat(f, &ndir);
+}
+
+static int
+rfchown(Ufile *f, int uid, int gid)
+{
+       USED(f);
+       USED(uid);
+       USED(gid);
+
+       return 0;
+}
+
+static int
+rftruncate(Ufile *f, vlong size)
+{
+       Dir ndir;
+
+       nulldir(&ndir);
+       ndir.length = size;
+       return uwfstat(f, &ndir);
+}
+
+static int
+runlink(char *path, int rmdir)
+{
+       int err;
+       Dir *dir;
+       char *t, *s;
+       char *base;
+       char *name;
+       char *rpath;
+       Rpath **prp;
+
+       trace("runlink(%s, %d)", path, rmdir);
+
+       rpath = nil;
+       dir = nil;
+       err = -EINVAL;
+       if((base = basepath(path, &name)) == nil)
+               goto out;
+       if(dir = dirstat(shortpath(current->kcwd, path))){
+               rpath = kstrdup(path);
+       } else {
+               rpath = udirpath(base, name, 'L');
+               dir = dirstat(shortpath(current->kcwd, rpath));
+       }
+       if(dir == nil){
+               err = mkerror();
+               if(t = resolvepath1(path, 0)){
+                       err = fsunlink(t, rmdir);
+                       free(t);
+               }               
+               goto out;
+       }
+       if(rmdir){
+               if((dir->mode & DMDIR) == 0){
+                       err = -ENOTDIR;
+                       goto out;
+               }
+       } else {
+               if(dir->mode & DMDIR){
+                       err = -EISDIR;
+                       goto out;
+               }
+       }
+
+       s = shortpath(current->kcwd, rpath);
+
+       qlock(&rpathtablk);
+       prp = rpathent(path);
+       if(*prp){
+               Dir ndir;
+
+               t = ksmprint(".%s.%d.deleted", name, current->kpid);
+               nulldir(&ndir);
+               ndir.name = t;
+               trace("runlink: file %s still in use renaming to -> %s", path, t);
+               if(dirwstat(s, &ndir) < 0){
+                       qunlock(&rpathtablk);
+                       err = mkerror();
+                       free(t);
+                       goto out;
+               }
+               free(t);
+               (*prp)->deleted = 1;
+               qunlock(&rpathtablk);
+
+       } else {
+               int x;
+               qunlock(&rpathtablk);
+
+               x = 0;
+               while(remove(s) < 0){
+                       err = mkerror();
+                       if(++x > 8){
+                               /* old debian bug clashes with mntgen */
+                               if(strcmp(base, "/")==0 && strstr(path, ".dpkg-"))
+                                       err = -ENOENT;
+                               goto out;
+                       }
+               }
+       }
+       err = 0;
+out:
+       free(dir);
+       free(name);
+       free(base);
+       free(rpath);
+
+       return err;
+}
+
+static int
+rlink(char *old, char *new, int sym)
+{
+       int err;
+       int fd;
+       char *base;
+       char *name;
+       char *t;
+
+       trace("rlink(%s, %s, %d)", old, new, sym);
+
+       if((base = basepath(new, &name)) == nil)
+               return -EINVAL;
+
+       /* resolve base directory */
+       if((fd = open(shortpath(current->kcwd, base), OREAD)) < 0){
+               err = mkerror();
+               if(t = resolvepath1(base, 0)){
+                       free(base); base = t;
+                       t = allocpath(base, nil, name);
+                       err = fslink(old, t, sym);
+                       free(t);
+               }
+               goto out;
+       }
+       close(fd);
+
+       if(sym == 0){
+               if((err = resolvefromtopath(&old, &new)) == 0)
+                       err = copyfile(old, new);
+               free(old);
+               free(new);
+               goto out;
+       }
+
+       /* check if regular file is in the way */
+       err = -EEXIST;
+       if((fd = open(shortpath(current->kcwd, new), OREAD)) >= 0){
+               close(fd);
+               goto out;
+       }
+
+       /* try to create the link, will fail if alreadt exists */
+       t = udirpath(base, name, 'L');
+       if((fd = create(shortpath(current->kcwd, t), OWRITE|OEXCL, 0777)) < 0){
+               err = mkerror();
+               free(t);
+               goto out;
+       }
+       free(t);
+
+       if(write(fd, old, strlen(old)) < 0){
+               err = mkerror();
+               close(fd);
+               goto out;
+       }
+       close(fd);
+       err = 0;
+out:
+       free(base);
+       free(name);
+       return err;
+}
+
+static Udev rootdev = 
+{
+       .open = ropen,
+       .access = raccess,
+       .stat = rstat,
+       .link = rlink,
+       .unlink = runlink,
+       .rename = rrename,
+       .mkdir = rmkdir,
+       .utime = rutime,
+       .chmod = rchmod,
+       .chown = rchown,
+       .truncate = rtruncate,
+
+       .read = rread,
+       .write = rwrite,
+       .size = rsize,
+       .close = rclose,
+
+       .fstat = rfstat,
+       .readdir = rreaddir,
+       .readlink = rreadlink,
+
+       .fchmod = rfchmod,
+       .fchown = rfchown,
+       .ftruncate = rftruncate,
+};
+
+void rootdevinit(void)
+{
+       devtab[ROOTDEV] = &rootdev;
+
+       fsmount(&rootdev, "");
+}
diff --git a/linux_emul_base/signal.c b/linux_emul_base/signal.c
new file mode 100644 (file)
index 0000000..50687a1
--- /dev/null
@@ -0,0 +1,1387 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Signal Signal;
+typedef struct Action Action;
+typedef struct Queue Queue;
+typedef struct Timers Timers;
+typedef struct Handlers Handlers;
+typedef struct Private Private;
+
+struct Signal
+{
+       Usiginfo;
+       Signal  *next;
+};
+
+struct Action
+{
+       void    *handler;
+       int             flags;
+       uvlong  block;
+};
+
+struct Queue
+{
+       Ref;
+       QLock;
+
+       Signal  *head;
+       Signal  **tailp;
+       Signal  *free;
+       Signal  a[64];
+
+       Ufile           *tty;
+};
+
+struct Timers
+{
+       Ref;
+       struct {
+               vlong   interval;
+               vlong   expire;
+       }               itimer[2];
+};
+
+struct Handlers
+{
+       Ref;
+       QLock;
+       Action  a[SIGMAX-1];
+};
+
+struct Private
+{
+       Handlers        *h;
+       Queue           *q;
+       Timers          *t;
+
+       struct {
+               ulong   sp;
+               ulong   size;
+       }                       altstack;
+       
+       uvlong          block;
+
+       Urestart                *freerestart;
+};
+
+enum
+{
+       SIG_ERR         = -1,
+       SIG_DFL         = 0,
+       SIG_IGN         = 1,
+       SIG_HOLD        = 2,
+};
+
+enum
+{
+       SA_NOCLDSTOP    = 1,
+       SA_NOCLDWAIT    = 2,
+       SA_SIGINFO              = 4,
+       SA_ONSTACK              = 0x08000000,
+       SA_RESTART              = 0x10000000,
+       SA_NODEFER              = 0x40000000,
+       SA_RESETHAND    = 0x80000000,
+};
+
+enum
+{
+       SS_ONSTACK              = 1,
+       SS_DISABLE              = 2,
+};
+
+#define MASK(sig)      (1LL << ((sig)-1))
+
+static void
+nextsignal(uvlong rblock, int wait);
+
+static int
+getsignal(Private *p, Usiginfo *pinfo, int wait);
+
+static void
+initrestart(Uproc *proc)
+{
+       Urestart *r;
+
+       r = &proc->restart0;
+       r->syscall = nil;
+       r->link = nil;
+       proc->restart = r;
+}
+
+static void
+poprestart(Private *p)
+{
+       Urestart *r;
+
+       for(;;){
+               r = current->restart;
+               if(r->link==nil || r->syscall)
+                       break;
+               current->restart = r->link;
+
+               r->link = p->freerestart;
+               p->freerestart = r;
+       }
+       if(r->syscall)
+               current->syscall = r->syscall;
+}
+
+static Queue*
+mkqueue(void)
+{
+       Queue *q;
+       int i;
+
+       q = kmallocz(sizeof(Queue), 1);
+       q->ref = 1;
+       q->head = nil;
+       q->tailp = &q->head;
+       for(i=0; i<nelem(q->a); i++)
+               q->a[i].next = (i+1 == nelem(q->a)) ? nil : &q->a[i+1];
+       q->free = q->a;
+
+       return q;
+}
+
+static Handlers*
+mkhandlers(void)
+{
+       Handlers *h;
+       int i;
+
+       h = kmallocz(sizeof(Handlers), 1);
+       h->ref = 1;
+       for(i=1; i<SIGMAX; i++)
+               h->a[i-1].handler = (void*)SIG_DFL;
+       return h;
+}
+
+static Timers*
+mktimers(void)
+{
+       Timers *t;
+
+       t = kmallocz(sizeof(Timers), 1);
+       t->ref = 1;
+       return t;
+}
+
+/* bits.s */
+extern int get_ds(void);
+extern int get_cs(void);
+static ulong user_cs, user_ds;
+
+void initsignal(void)
+{
+       Private *p;
+
+       if(user_ds==0 && user_cs==0){
+               user_ds = get_ds();
+               user_cs = get_cs();
+       }
+
+       p = kmallocz(sizeof(*p), 1);
+       p->block = 0;
+
+       p->q = mkqueue();
+       p->h = mkhandlers();
+       p->t = mktimers();
+
+       current->signal = p;
+       initrestart(current);
+}
+
+void exitsignal(void)
+{
+       Private *p;
+       Queue *q;
+       Timers *t;
+       Signal **i;
+       Handlers *h;
+       Urestart *r;
+
+       if((p = current->signal) == nil)
+               return;
+       current->signal = nil;
+       q = p->q;
+       qlock(q);
+again:
+       for(i=&q->head; *i; i=&((*i)->next)){
+               Signal *r;
+               r = *i;
+               if(!r->group && (r->topid == current->tid)){
+                       if((*i = r->next) == nil)
+                               q->tailp = i;
+                       r->next = q->free;
+                       q->free = r;
+                       goto again;
+               }
+       }
+       qunlock(q);
+       if(!decref(q)){
+               putfile(q->tty);
+               q->tty = nil;
+               free(q);
+       }
+       h =  p->h;
+       if(!decref(h))
+               free(h);
+       t = p->t;
+       if(!decref(t))
+               free(t);
+       while(r = current->restart){
+               if(r->link == nil)
+                       break;
+               current->restart = r->link;
+               r->link = p->freerestart;
+               p->freerestart = r;
+       }
+       current->restart = nil;
+       while(r = p->freerestart){
+               p->freerestart = r->link;
+               free(r);
+       }
+       free(p);
+}
+
+void clonesignal(Uproc *new, int copyhand, int newproc)
+{
+       Private *p, *n;
+
+       if((p = current->signal) == nil)
+               return;
+
+       n = kmallocz(sizeof(*n), 1);
+       if(copyhand){
+               n->h = mkhandlers();
+
+               qlock(p->h);
+               memmove(n->h->a, p->h->a, sizeof(n->h->a));
+               qunlock(p->h);
+       } else {
+               incref(p->h);
+               n->h = p->h;
+       }
+
+       qlock(p->q);
+       if(newproc){
+               n->q = mkqueue();
+               n->q->tty = getfile(p->q->tty);
+               n->t = mktimers();
+               n->altstack = p->altstack;
+       } else {
+               incref(p->q);
+               n->q = p->q;
+               incref(p->t);
+               n->t = p->t;
+       }
+       qunlock(p->q);
+
+       n->block = p->block;
+       new->signal = n;
+
+       initrestart(new);
+}
+
+void
+settty(Ufile *tty)
+{
+       Private *p;
+       Ufile *old;
+
+       if((p = current->signal) == nil)
+               return;
+       tty = getfile(tty);
+       qlock(p->q);
+       old = p->q->tty;
+       p->q->tty = tty;
+       qunlock(p->q);
+       putfile(old);
+}
+
+Ufile*
+gettty(void)
+{
+       Private *p;
+       Ufile *tty;
+
+       if((p = current->signal) == nil)
+               return nil;
+       qlock(p->q);
+       tty = getfile(p->q->tty);
+       qunlock(p->q);
+       return tty;
+}
+
+int ignoressignal(Uproc *proc, int sig)
+{
+       Private *p;
+       int a, f;
+
+       if((p = proc->signal) == nil)
+               return 1;
+       qlock(p->h);
+       a = (int)p->h->a[sig-1].handler;
+       f = p->h->a[sig-1].flags;
+       qunlock(p->h);
+       switch(sig){
+       case SIGKILL:
+       case SIGSTOP:
+               return 0;
+       case SIGCHLD:
+               if(f & SA_NOCLDWAIT)
+                       return 1;
+               break;
+       case SIGWINCH:
+       case SIGURG:
+               if(a == SIG_DFL)
+                       return 1;
+       }
+       return (a == SIG_IGN);
+}
+
+int wantssignal(Uproc *proc, int sig)
+{
+       Private *p;
+
+       p = proc->signal;
+       if(p == nil || p->block & MASK(sig))
+               return 0;
+       return !ignoressignal(proc, sig);
+}
+
+int sendsignal(Uproc *proc, Usiginfo *info, int group)
+{
+       Private *p;
+       Signal *s;
+
+       trace("sendsignal(%S) to %d from %d",
+               info->signo, proc->tid, (current != nil) ? current->tid : 0);
+
+       if(ignoressignal(proc, info->signo)){
+               trace("sendsignal(): ignored signal %S", info->signo);
+               return  0;
+       }
+
+       p = proc->signal;
+       qlock(p->q);
+       if(info->signo < SIGRT1){
+               for(s=p->q->head; s; s=s->next){
+                       if(!s->group && (s->topid != proc->tid))
+                               continue;
+                       if(s->signo == info->signo){
+                               qunlock(p->q);
+                               trace("sendsignal(): droping follow up signal %S", info->signo);
+                               return 0;
+                       }
+               }
+       }
+       if((s = p->q->free) == nil){
+               qunlock(p->q);
+               trace("sendsignal(): out of signal buffers");
+               return -EAGAIN;
+       }
+       p->q->free = s->next;
+       s->next = nil;
+       memmove(s, info, sizeof(*info));
+       s->group = group;
+       s->topid = group ? proc->pid : proc->tid;
+       *p->q->tailp = s;
+       p->q->tailp = &s->next;
+       qunlock(p->q);
+       return 1;
+}
+
+int
+signalspending(Uproc *proc)
+{
+       Private *p;
+       Signal *s;
+       int ret;
+
+       p = proc->signal;
+       if(p == nil || p->q->head == nil)
+               return 0;
+
+       ret = 0;
+       qlock(p->q);
+       for(s=p->q->head; s; s=s->next){
+               if(!s->group && (s->topid != current->tid))
+                       continue;
+               if(MASK(s->signo) & p->block)
+                       continue;
+               ret = 1;
+               break;
+       }
+       qunlock(p->q);
+
+       return ret;
+}
+
+static int
+getsignal(Private *p, Usiginfo *pinfo, int wait)
+{
+       Signal *r;
+       Signal **i;
+       int sig;
+
+       if(!wait && p->q->head == nil)
+               return 0;
+
+       sig = 0;
+       qlock(p->q);
+       for(;;){
+               for(i=&p->q->head; *i; i=&((*i)->next)){
+                       r = *i;
+
+                       if(!r->group && (r->topid != current->tid))
+                               continue;
+
+                       if(p->block & MASK(r->signo)){
+                               if(sig == 0)
+                                       sig = -r->signo;
+                               continue;
+                       }
+                       sig = r->signo;
+
+                       /* dequeue nonblocked signal */
+                       memmove(pinfo, r, sizeof(*pinfo));
+                       if((*i = r->next) == nil)
+                               p->q->tailp = i;
+                       r->next = p->q->free;
+                       p->q->free = r;
+                       break;
+               }
+               if(wait && sig <= 0){
+                       if(sleepproc(p->q, 0) == 0)
+                               continue;
+               }
+               break;
+       }
+       qunlock(p->q);
+
+       return sig;
+}
+
+static uvlong
+sigset2uvlong(uchar *set, int setsize)
+{
+       uvlong r;
+       int i;
+
+       r = 0;
+       if(setsize > sizeof(uvlong))
+               setsize = sizeof(uvlong);
+       for(i=0; i<setsize; i++)
+               r |= (uvlong)set[i] << (i * 8);
+       return r;
+}
+
+static void
+uvlong2sigset(uchar *set, int setsize, uvlong mask)
+{
+       int i;
+
+       for(i=0; i<setsize; i++){
+               if(i < sizeof(uvlong)){
+                       set[i] = ((mask >> (i*8)) & 0xff);
+               } else {
+                       set[i] = 0;
+               }
+       }
+}
+
+struct linux_siginfo {
+       int     signo;
+       int     errno;
+       int     code;
+
+       union {
+               int _pad[29];
+
+               /* kill() */
+               struct {
+                       int     pid;                    /* sender's pid */
+                       int     uid;                    /* sender's uid */
+               } kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       int     tid;                    /* timer id */
+                       int     overrun;                /* overrun count */
+                       int     val;                    /* same as below */
+               } timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       int     pid;                    /* sender's pid */
+                       int     uid;                    /* sender's uid */
+                       int     val;
+               } rt;
+
+               /* SIGCHLD */
+               struct {
+                       int     pid;                    /* which child */
+                       int     uid;                    /* sender's uid */
+                       int     status;                 /* exit code */
+                       long    utime;
+                       long    stime;
+               } chld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       void    *addr;  /* faulting insn/memory ref. */
+                       int             trapno; /* TRAP # which caused the signal */
+               } fault;
+
+               /* SIGPOLL */
+               struct {
+                       long    band;   /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int     fd;
+               } poll;
+       };
+};
+
+void
+siginfo2linux(Usiginfo *info, void *p)
+{
+       struct linux_siginfo *li = p;
+       int sig;
+
+       sig = info->signo;
+
+       li->signo = sig;
+       li->errno = info->errno;
+       li->code = info->code;
+
+       switch(sig){
+       case SIGALRM:
+               li->timer.tid = info->timer.tid;
+               li->timer.overrun = info->timer.overrun;
+               li->timer.val = info->timer.val;
+               break;
+       case SIGCHLD:
+               li->chld.pid = info->chld.pid;
+               li->chld.uid = info->chld.uid;
+               li->chld.status = info->chld.status;
+               li->chld.utime = info->chld.utime;
+               li->chld.stime = info->chld.stime;
+               break;
+       case SIGILL:
+       case SIGBUS:
+       case SIGFPE:
+       case SIGSEGV:
+               li->fault.addr = info->fault.addr;
+               li->fault.trapno = info->fault.trapno;
+               break;
+       case SIGPOLL:
+               li->poll.fd = info->poll.fd;
+               li->poll.band = info->poll.band;
+               break;
+       case SIGRT1:
+       case SIGRT2:
+       case SIGRT3:
+       case SIGRT4:
+       case SIGRT5:
+       case SIGRT6:
+       case SIGRT7:
+       case SIGRT8:
+               li->rt.pid = info->rt.pid;
+               li->rt.uid = info->rt.uid;
+               li->rt.val = info->rt.val;
+               break;
+       default:
+               li->kill.pid = info->kill.pid;
+               li->kill.uid = info->kill.uid;
+       }
+}
+
+void
+linux2siginfo(void *p, Usiginfo *info)
+{
+       struct linux_siginfo *li = p;
+       int sig;
+
+       sig = li->signo;
+
+       info->signo = sig;
+       info->errno = li->errno;
+       info->code = li->code;
+
+       switch(sig){
+       case SIGALRM:
+               info->timer.tid = li->timer.tid;
+               info->timer.overrun = li->timer.overrun;
+               info->timer.val = li->timer.val;
+               break;
+       case SIGCHLD:
+               info->chld.pid = li->chld.pid;
+               info->chld.uid = li->chld.uid;
+               info->chld.status = li->chld.status;
+               info->chld.utime = li->chld.utime;
+               info->chld.stime = li->chld.stime;
+               break;
+       case SIGILL:
+       case SIGBUS:
+       case SIGFPE:
+       case SIGSEGV:
+               info->fault.addr = li->fault.addr;
+               info->fault.trapno = li->fault.trapno;
+               break;
+       case SIGPOLL:
+               info->poll.fd = li->poll.fd;
+               info->poll.band = li->poll.band;
+               break;
+       case SIGRT1:
+       case SIGRT2:
+       case SIGRT3:
+       case SIGRT4:
+       case SIGRT5:
+       case SIGRT6:
+       case SIGRT7:
+       case SIGRT8:
+               info->rt.pid = li->rt.pid;
+               info->rt.uid = li->rt.uid;
+               info->rt.val = li->rt.val;
+               break;
+       default:
+               info->kill.pid = li->kill.pid;
+               info->kill.uid = li->kill.uid;
+       }
+}
+
+struct linux_sigcontext {
+       ulong   gs;
+       ulong   fs;
+       ulong   es;
+       ulong   ds;
+       ulong   di;
+       ulong   si;
+       ulong   bp;
+       ulong   sp;
+       ulong   bx;
+       ulong   dx;
+       ulong   cx;
+       ulong   ax;
+       ulong   trapno;
+       ulong   err;
+       ulong   ip;
+       ulong   cs;
+       ulong   flags;
+       ulong   sp_at_signal;
+       ulong   ss;
+       void*   fpstate;
+       ulong   oldmask;
+       ulong   cr2;
+};
+
+static void
+ureg2linuxsigcontext(Ureg *u, struct linux_sigcontext *sc)
+{
+       sc->gs = u->gs;
+       sc->fs = u->fs;
+       sc->es = u->es;
+       sc->ds = u->ds;
+       sc->di = u->di;
+       sc->si = u->si;
+       sc->bp = u->bp;
+       sc->sp = u->sp;
+       sc->bx = u->bx;
+       sc->dx = u->dx;
+       sc->cx = u->cx;
+       sc->ax = u->ax;
+       sc->trapno = u->trap;
+       sc->err = u->ecode;
+       sc->ip = u->pc;
+       sc->cs = u->cs;
+       sc->flags = u->flags;
+       sc->sp_at_signal = u->sp;
+       sc->ss = u->ss;
+       sc->cr2 = 0;
+}
+
+struct linux_sigset {
+       ulong   sig[2];
+};
+
+struct linux_signalstack {
+       ulong   sp;
+       int             flags;
+       ulong   size;
+};
+
+struct linux_ucontext {
+       ulong   flags;
+       struct linux_ucontext   *link;
+       struct linux_signalstack        stack;
+       struct linux_sigcontext context;
+       struct linux_sigset     sigmask;
+};
+
+static void
+linuxsigcontext2ureg(struct linux_sigcontext *sc, Ureg *u)
+{
+       u->pc = sc->ip;
+       u->sp = sc->sp;
+       u->ax = sc->ax;
+       u->bx = sc->bx;
+       u->cx = sc->cx;
+       u->dx = sc->dx;
+       u->di = sc->di;
+       u->si = sc->si;
+       u->bp = sc->bp;
+
+       u->cs = sc->cs;
+       u->ss = sc->ss;
+       u->ds = sc->ds;
+       u->es = sc->es;
+       u->fs = sc->fs;
+       u->gs = sc->gs;
+}
+
+struct linux_sigframe {
+       void    *ret;
+       int             sig;
+
+       union {
+               struct linux_sigcontext         sc;
+
+               struct {
+                       struct linux_siginfo    *pinfo;
+                       struct linux_ucontext   *puc;
+
+                       struct linux_siginfo    info;
+                       struct linux_ucontext   uc;
+               } rt;
+       };
+};
+
+#pragma profile off
+
+static int
+linuxstackflags(Private *p, ulong sp)
+{
+       if(p->altstack.size == 0 || p->altstack.sp == 0)
+               return SS_DISABLE;
+       if(sp - p->altstack.sp < p->altstack.size)
+               return SS_ONSTACK;
+       return 0;
+}
+
+static void
+linuxsignal(Private *p, Action *a, Usiginfo *i, uvlong rblock)
+{
+       struct linux_sigframe _frame;
+       struct linux_sigframe *f;
+       Ureg *u;
+       int stackflags;
+
+       u = current->ureg;
+
+       stackflags = linuxstackflags(p, u->sp);
+       if((a->flags & SA_ONSTACK) && (stackflags == 0)){
+               trace("linuxsignal: altstack %lux %lux", p->altstack.sp, p->altstack.size);
+               f = (struct linux_sigframe*)(p->altstack.sp + p->altstack.size);
+               f--;
+       } else {
+               f = &_frame;
+       }
+
+       trace("linuxsignal(): frame %p", f);
+       memset(f, 0, sizeof(*f));
+
+       f->sig = i->signo;
+
+       if(a->flags & SA_SIGINFO){
+               f->ret = linux_rtsigreturn;
+               siginfo2linux(i, &f->rt.info);
+               f->rt.pinfo = &f->rt.info;
+
+               f->rt.uc.stack.sp = p->altstack.sp;
+               f->rt.uc.stack.size = p->altstack.size;
+               f->rt.uc.stack.flags = stackflags;
+
+               ureg2linuxsigcontext(u, &f->rt.uc.context);
+               f->rt.uc.context.oldmask = rblock & 0xFFFFFFFF;
+               f->rt.uc.sigmask.sig[0] = rblock & 0xFFFFFFFF;
+               f->rt.uc.sigmask.sig[1] = (rblock >> 32) & 0xFFFFFFFF;
+               f->rt.puc = &f->rt.uc;
+               u->cx = (ulong)f->rt.puc;
+               u->dx = (ulong)f->rt.pinfo;
+       } else {
+               f->ret = linux_sigreturn;
+               ureg2linuxsigcontext(u, &f->sc);
+               f->sc.oldmask = rblock & 0xFFFFFFFF;
+               u->cx = 0;
+               u->dx = 0;
+       }
+
+       u->di = 0;
+       u->si = 0;
+       u->bp = 0;
+       u->bx = 0;
+
+       u->ax = (ulong)i->signo;
+
+       u->sp = (ulong)f;
+       u->pc = (ulong)a->handler;
+
+       u->cs = user_cs;
+       u->ss = user_ds;
+       u->ds = user_ds;
+       u->es = user_ds;
+
+       p->block |= a->block;
+
+       trace("linuxsignal(): retuser pc=%lux sp=%lux", u->pc, u->sp);
+       retuser();
+}
+
+int
+sys_sigreturn(void)
+{
+       struct linux_sigframe *f;
+       Private *p;
+       Ureg *u;
+
+       trace("sys_sigreturn()");
+
+       p = current->signal;
+       u = current->ureg;
+
+       f = (struct linux_sigframe*)(u->sp - 4);
+
+       trace("sys_sigreturn(): frame %p", f);
+
+       linuxsigcontext2ureg(&f->sc, u);
+       p->block &= ~0xFFFFFFFF;
+       p->block |= f->sc.oldmask;
+       nextsignal(p->block, 0);
+       poprestart(p);
+
+       trace("sys_sigreturn(): retuser pc=%lux sp=%lux", u->pc, u->sp);
+       retuser();
+
+       return -1;
+}
+
+int
+sys_rt_sigreturn(void)
+{
+       struct linux_sigframe *f;
+       Private *p;
+       Ureg *u;
+
+       trace("sys_rt_sigreturn()");
+
+       p = current->signal;
+       u = current->ureg;
+
+       f = (struct linux_sigframe*)(u->sp - 4);
+       trace("sys_rt_sigreturn(): frame %p", f);
+
+       linuxsigcontext2ureg(&f->rt.uc.context, u);
+       p->block = (uvlong)f->rt.uc.sigmask.sig[0] | (uvlong)f->rt.uc.sigmask.sig[1]<<32;
+       nextsignal(p->block, 0);
+       poprestart(p);
+
+       trace("sys_rt_sigreturn(): pc=%lux sp=%lux", u->pc, u->sp);
+       retuser();
+
+       return -1;
+}
+
+/*
+ * nextsignal transfers execution to the next pending
+ * signal or just returns. after the signal got executed,
+ * the block mask is restored to rblock. if heres no
+ * pending signal and wait is non zero the current
+ * process is suspended until here is a signal available.
+ */
+
+static void
+nextsignal(uvlong rblock, int wait)
+{
+       Private *p;
+       int sig;
+       Usiginfo info;
+       Action a;
+       Urestart *r;
+       
+       for(;;){
+               if((p = current->signal) == nil)
+                       return;
+
+               if(current->wstate & WSTOPPED){
+                       p->block = ~(MASK(SIGCONT) | MASK(SIGKILL));
+                       sig = getsignal(p, &info, 1);
+                       p->block = rblock;
+                       if(sig <= 0)
+                               return;
+                       if(sig == SIGCONT){
+                               contproc(current, sig, info.group);
+                               continue;
+                       }
+               } else {
+                       if((sig = getsignal(p, &info, wait)) <= 0)
+                               return;
+                       if(sig == SIGCONT)
+                               continue;
+                       if(sig == SIGSTOP){
+                               stopproc(current, sig, info.group);
+                               continue;
+                       }
+               }
+               break;
+       }
+
+       trace("nextsignal(): signal %S", sig);
+
+       qlock(p->h);
+       a = p->h->a[sig-1];
+       if(a.flags & SA_RESETHAND)
+               p->h->a[sig-1].handler = (void*)SIG_DFL;
+       if(a.flags & SA_NODEFER == 0)
+               a.block |= MASK(sig);
+       qunlock(p->h);
+
+       switch((int)a.handler){
+       case SIG_DFL:
+               switch(sig){
+               case SIGCHLD:
+               case SIGWINCH:
+               case SIGURG:
+                       goto Ignored;
+               }
+               /* no break */
+       case SIG_ERR:
+               trace("nextsignal(): signal %S causes exit", sig);
+               exitproc(current, sig, 1);
+Ignored:
+       case SIG_IGN:
+       case SIG_HOLD:
+               trace("nextsignal(): signal %S ignored", sig);
+               return;
+       }
+
+       if(current->restart->syscall){
+               if(a.flags & SA_RESTART){
+                       if(r = p->freerestart)
+                               p->freerestart = r->link;
+                       if(r == nil)
+                               r = kmalloc(sizeof(*r));
+                       r->syscall = nil;
+                       r->link = current->restart;
+                       current->restart = r;
+               } else {
+                       trace("nextsignal(): interrupting syscall %s", current->syscall);
+                       current->sysret(-EINTR);
+               }
+       }
+
+       linuxsignal(p, &a, &info, rblock);
+}
+
+void handlesignals(void)
+{
+       Private *p;
+
+       if(p = current->signal)
+               nextsignal(p->block, 0);
+}
+
+int
+sys_rt_sigsuspend(uchar *set, int setsize)
+{
+       Private *p;
+       uvlong b, rblock;
+
+       trace("sys_rt_sigsuspend(%p, %d)", set, setsize);
+
+       p = current->signal;
+       b = sigset2uvlong(set, setsize);
+       b &= ~(MASK(SIGKILL) | MASK(SIGSTOP));
+
+       rblock = p->block;
+       p->block = b;
+
+       /*
+        * if a signal got handled, it will pop out after the the
+        * sigsuspend syscall with return value set to -EINTR
+        */
+       current->sysret(-EINTR);
+
+       for(;;)
+               nextsignal(rblock, 1);
+}
+
+#pragma profile on
+
+struct linux_altstack
+{
+       ulong   sp;
+       int             flags;
+       ulong   size;
+};
+
+int sys_sigaltstack(void *stk, void *ostk)
+{
+       Private *p;
+       struct linux_altstack *a = stk, *oa = ostk;
+       int flags;
+       ulong sp, size;
+
+       trace("sys_sigaltstack(%lux, %lux)", (ulong)stk, (ulong)ostk);
+
+       p = current->signal;
+       sp = p->altstack.sp;
+       size = p->altstack.size;
+       flags = linuxstackflags(p, current->ureg->sp);
+
+       if(a){
+               if(flags == SS_ONSTACK)
+                       return -EPERM;
+
+               if(a->flags == SS_DISABLE){
+                       p->altstack.sp = 0;
+                       p->altstack.size = 0;
+               } else {
+                       p->altstack.sp = a->sp;
+                       p->altstack.size = a->size;
+               }
+
+               trace("sys_signalstack(): new altstack %lux-%lux",
+                       p->altstack.sp, p->altstack.sp + p->altstack.size);
+       }
+       if(oa){
+               oa->sp = sp;
+               oa->size = size;
+               oa->flags = flags;
+       }
+
+       return 0;
+}
+
+struct linux_sigaction
+{
+       void *handler;
+       ulong flags;
+       void *restorer;
+       uchar mask[];
+};
+
+int sys_rt_sigaction(int sig, void *pact, void *poact, int setsize)
+{
+       Private *p;
+       Action *a;
+       struct linux_sigaction *act;
+       struct linux_sigaction *oact;
+       void *handler;
+       int flags;
+       uvlong block;
+
+       trace("sys_rt_sigaction(%S, %p, %p, %d)", sig, pact, poact, setsize);
+
+       p = current->signal;
+       act = (struct linux_sigaction*)pact;
+       oact = (struct linux_sigaction*)poact;
+
+       if((sig < 1) || (sig >= SIGMAX))
+               return -EINVAL;
+
+       qlock(p->h);
+       a = &p->h->a[sig-1];
+       handler = a->handler;
+       flags = a->flags;
+       block = a->block;
+       if(act){
+               trace("flags = %x", a->flags);
+               a->handler = act->handler;
+               a->flags = act->flags;
+               a->block = sigset2uvlong(act->mask, setsize);
+       }
+       if(oact){
+               oact->handler = handler;
+               oact->flags = flags;
+               oact->restorer = 0;
+               uvlong2sigset(oact->mask, setsize, block);
+       }
+       qunlock(p->h);
+
+       return 0;
+}
+
+int sys_rt_sigpending(uchar *set, int setsize)
+{
+       Private *p;
+       Signal *s;
+       uvlong m;
+
+       trace("sys_rt_sigpending(%p, %d)", set, setsize);
+
+       p = current->signal;
+       m = 0LL;
+       qlock(p->q);
+       for(s=p->q->head; s; s=s->next){
+               if(!s->group && (s->topid != current->tid))
+                       continue;
+               m |= MASK(s->signo);
+       }
+       qunlock(p->q);
+
+       uvlong2sigset(set, setsize, m);
+       return 0;
+}
+
+enum
+{
+       SIG_BLOCK       = 0,
+       SIG_UNBLOCK     = 1,
+       SIG_SETMASK     = 2,
+};
+
+int sys_rt_sigprocmask(int how, uchar *act, uchar *oact, int setsize)
+{
+       Private *p;
+       uvlong m, block;
+
+       trace("sys_rt_sigprocmask(%d, %p, %p, %d)", how, act, oact, setsize);
+
+       p = current->signal;
+       block = p->block;
+       if(act){
+               m = sigset2uvlong(act, setsize);
+               m &= ~(MASK(SIGKILL) | MASK(SIGSTOP));
+               switch(how){
+               default:
+                       return -EINVAL;
+               case SIG_BLOCK:
+                       p->block |= m;
+                       break;
+               case SIG_UNBLOCK:
+                       p->block &= ~m;
+                       break;
+               case SIG_SETMASK:
+                       p->block = m;
+                       break;
+               }
+       }
+       if(oact)
+               uvlong2sigset(oact, setsize, block);
+       return 0;
+}
+
+struct linux_itimer
+{
+       struct linux_timeval    it_interval;
+       struct linux_timeval    it_value;
+};
+
+static vlong
+hzround(vlong t)
+{
+       vlong q = 1000000000LL/HZ;
+       return (t + q-1) / q;
+}
+
+int sys_setitimer(int which, void *value, void *ovalue)
+{
+       Private *p;
+       Timers *t;
+       vlong now, rem, delta;
+       struct linux_itimer *nv = value, *ov = ovalue;
+
+       trace("sys_setitimer(%d, %p, %p)", which, value, ovalue);
+
+       p = current->signal;
+       t = p->t;
+
+       if(which < 0 || which >= nelem(t->itimer))
+               return -EINVAL;
+
+       now = nsec();
+       delta = t->itimer[which].interval;
+       rem = t->itimer[which].expire - now;
+       if(rem < 0)
+               rem = 0;
+       if(nv != nil){
+               trace("nv->{interval->{%ld, %ld}, value->{%ld, %ld}}",
+                       nv->it_interval.tv_sec, nv->it_interval.tv_usec,
+                       nv->it_value.tv_sec, nv->it_value.tv_usec);
+               t->itimer[which].interval = hzround(nv->it_interval.tv_sec*1000000000LL +
+                       nv->it_interval.tv_usec*1000);
+               t->itimer[which].expire = (now + nv->it_value.tv_sec*1000000000LL +
+                       nv->it_value.tv_usec*1000);
+               setalarm(t->itimer[which].expire);
+       }
+
+       if(ov != nil){
+               ov->it_interval.tv_sec =  delta / 1000000000LL;
+               ov->it_interval.tv_usec = (delta % 1000000000LL)/1000;
+               ov->it_value.tv_sec = rem / 1000000000LL;
+               ov->it_value.tv_usec = (rem % 1000000000LL)/1000;
+               trace("ov->{interval->{%ld, %ld}, value->{%ld, %ld}}",
+                       ov->it_interval.tv_sec, ov->it_interval.tv_usec,
+                       ov->it_value.tv_sec, ov->it_value.tv_usec);
+       }
+
+       return 0;
+}
+
+int sys_getitimer(int which, void *value)
+{
+       Private *p;
+       Timers *t;
+       vlong rem, delta;
+       struct linux_itimer *v = value;
+
+       trace("sys_getitimer(%d, %p)", which, value);
+
+       p = current->signal;
+       t = p->t;
+
+       if(value == nil)
+               return -EINVAL;
+       if(which < 0 || which >= nelem(t->itimer))
+               return -EINVAL;
+
+       delta =t->itimer[which].interval;
+       rem = t->itimer[which].expire - nsec();
+
+       if(rem < 0)
+               rem = 0;
+       v->it_interval.tv_sec = delta / 1000000000LL;
+       v->it_interval.tv_usec = (delta % 1000000000LL)/1000;
+       v->it_value.tv_sec = rem / 1000000000LL;
+       v->it_value.tv_usec = (rem % 1000000000LL)/1000;
+
+       return 0;
+}
+
+int sys_alarm(long seconds)
+{
+       Private *p;
+       Timers *t;
+       vlong old, now;
+
+       trace("sys_alarm(%ld)", seconds);
+       p = current->signal;
+       t = p->t;
+       now = nsec();
+       old = t->itimer[0].expire - now;
+       if(old < 0)
+               old = 0;
+       t->itimer[0].interval = 0;
+       if(seconds > 0){
+               t->itimer[0].expire = now + (vlong)seconds * 1000000000LL;
+               setalarm(t->itimer[0].expire);
+       } else {
+               t->itimer[0].expire = 0;
+       }
+       return old / 1000000000LL;
+}
+
+int
+Sfmt(Fmt *f)
+{
+       static char *t[] = {
+       [SIGHUP]        = "SIGHUP",
+       [SIGINT]        = "SIGINT",
+       [SIGQUIT]       = "SIGQUIT",
+       [SIGILL]        = "SIGILL",
+       [SIGTRAP]       = "SIGTRAP",
+       [SIGABRT]       = "SIGABRT",
+       [SIGBUS]        = "SIGBUS",
+       [SIGFPE]        = "SIGFPE",
+       [SIGKILL]       = "SIGKILL",
+       [SIGUSR1]       = "SIGUSR1",
+       [SIGSEGV]       = "SIGSEGV",
+       [SIGUSR2]       = "SIGUSR2",
+       [SIGPIPE]       = "SIGPIPE",
+       [SIGALRM]       = "SIGALRM",
+       [SIGTERM]       = "SIGTERM",
+       [SIGSTKFLT]     = "SIGSTKFLT",
+       [SIGCHLD]       = "SIGCHLD",
+       [SIGCONT]       = "SIGCONT",
+       [SIGSTOP]       = "SIGSTOP",
+       [SIGTSTP]       = "SIGTSTP",
+       [SIGTTIN]       = "SIGTTIN",
+       [SIGTTOU]       = "SIGTTOU",
+       [SIGURG]        = "SIGURG",
+       [SIGXCPU]       = "SIGXCPU",
+       [SIGXFSZ]       = "SIGXFSZ",
+       [SIGVTALRM]     = "SIGVTALRM",
+       [SIGPROF]       = "SIGPROF",
+       [SIGWINCH]      = "SIGWINCH",
+       [SIGIO]         = "SIGIO",
+       [SIGPWR]        = "SIGPWR",
+       [SIGSYS]        = "SIGSYS",
+       [SIGRT1]        = "SIGRT1",
+       [SIGRT2]        = "SIGRT2",
+       [SIGRT3]        = "SIGRT3",
+       [SIGRT4]        = "SIGRT4",
+       [SIGRT5]        = "SIGRT5",
+       [SIGRT6]        = "SIGRT6",
+       [SIGRT7]        = "SIGRT7",
+       [SIGRT8]        = "SIGRT8",
+       };
+
+       int sig;
+
+       sig = va_arg(f->args, int);
+       if(sig < 1 || sig >= SIGMAX)
+               return fmtprint(f, "%d", sig);
+       return fmtprint(f, "%d [%s]", sig, t[sig]);
+}
+
+/* proc.c */
+extern int procsetalarm(Uproc *proc, vlong t);
+
+void
+alarmtimer(Uproc *proc, vlong now)
+{
+       Private *p;
+       Timers *t;
+       vlong expire, delta;
+       Usiginfo si;
+       int i, overrun;
+
+       if((p = proc->signal) == nil)
+               return;
+       t = p->t;
+       for(i=0; i < nelem(t->itimer); i++){
+               expire = t->itimer[i].expire;
+               if(expire <= 0)
+                       continue;
+               if(now < expire){
+                       procsetalarm(proc, expire);
+                       continue;
+               }
+               overrun = 0;
+               delta = (t->itimer[i].interval);
+               if(delta > 0){
+                       expire += delta;
+                       while(expire <= now){
+                               expire += delta;
+                               overrun++;
+                       }
+                       procsetalarm(proc, expire);
+               } else {
+                       expire = 0;
+               }
+               t->itimer[i].expire = expire;
+
+               memset(&si, 0, sizeof(si));
+               si.signo = SIGALRM;
+               si.code = SI_TIMER;
+               si.timer.tid = i;
+               si.timer.overrun = overrun;
+               killproc(proc, &si, 1);
+       }
+}
diff --git a/linux_emul_base/sockdev.c b/linux_emul_base/sockdev.c
new file mode 100644 (file)
index 0000000..2197327
--- /dev/null
@@ -0,0 +1,1163 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Socket Socket;
+typedef struct Connectproc Connectproc;
+typedef struct Listenproc Listenproc;
+
+enum {
+       Ctlsize = 128,
+};
+
+struct Socket
+{
+       Ufile;
+
+       int                             family;
+       int                             stype;
+       int                             protocol;
+
+       int                             other;
+       char                            net[40];
+       char                            name[Ctlsize];
+
+       int                             naddr;
+       uchar                   addr[40];
+
+       void                            *bufproc;
+       Connectproc             *connectproc;
+       Listenproc              *listenproc;
+
+       int                             connected;
+       int                             error;
+
+       Socket                  *next;
+};
+
+struct Connectproc
+{
+       Ref;
+       QLock;
+       Socket  *sock;
+       int             notefd;
+       Uwaitq  wq;
+       char            str[Ctlsize];
+};
+
+struct Listenproc
+{
+       Ref;
+       QLock;
+       Socket  *sock;
+       int             notefd;
+       Uwaitq  wq;
+       Socket  *q;
+       char            str[Ctlsize];
+};
+
+enum
+{
+       AF_UNIX                 =1,
+       AF_INET                 =2,
+       AF_INET6                        =10,
+};
+
+enum
+{
+       SOCK_STREAM             =1,
+       SOCK_DGRAM              =2,
+       SOCK_RAW                =3,
+};
+
+static char*
+srvname(char *npath, char *path, int len)
+{
+       char *p;
+
+       p = strrchr(path, '/');
+       if(p == 0)
+               p = path;
+       else
+               p++;
+       snprint(npath, len, "/srv/UD.%s", p);
+       return npath;
+}
+
+static int
+srvunixsock(int fd, char *path)
+{
+       int ret;
+       int sfd;
+       char buf[8+Ctlsize+1];
+
+       sfd = -1;
+       ret = -1;
+       if(fd < 0)
+               goto out;
+       srvname(buf, path, sizeof(buf));
+       remove(buf);
+       if((sfd = create(buf, OWRITE, 0666)) < 0)
+               goto out;
+       sprint(buf, "%d", fd);
+       if(write(sfd, buf, strlen(buf)) < 0)
+               goto out;
+       ret = 0;
+out:
+       if(sfd >= 0)
+               close(sfd);
+       return ret;
+}
+
+static void
+unsrvunixsock(char *path)
+{
+       char buf[8+Ctlsize+1];
+
+       srvname(buf, path, sizeof(buf));
+       remove(buf);
+}
+
+static Socket*
+allocsock(int family, int stype, int protocol)
+{
+       Socket *sock;
+
+       sock = kmallocz(sizeof(*sock), 1);
+       sock->family = family;
+       sock->stype = stype;
+       sock->protocol = protocol;
+       sock->fd = -1;
+       sock->other = -1;
+       sock->ref = 1;
+       sock->dev = SOCKDEV;
+       sock->mode = O_RDWR;
+
+       return sock;
+}
+
+static int
+newsock(int family, int stype, int protocol)
+{
+       Socket *sock;
+       char *net;
+       char buf[Ctlsize];
+       int pfd[2];
+       int cfd, dfd;
+       int n;
+       int err;
+
+       trace("newsock(%d, %d, %d)", family, stype, protocol);
+
+       err = -EINVAL;
+       switch(family){
+       case AF_INET:
+       case AF_INET6:
+               switch(stype){
+               case SOCK_DGRAM:
+                       net = "udp";
+                       break;
+               case SOCK_STREAM:
+                       net = "tcp";
+                       break;
+               default:
+                       trace("newsock() unknown socket type %d/%d", family, stype);
+                       return err;
+               }
+               break;
+       case AF_UNIX:
+               net = nil;
+               break;
+
+       default:
+               trace("newsock() unknown network family %d", family);
+               return err;
+       }
+
+       sock = allocsock(family, stype, protocol);
+       cfd = -1;
+       if(net == nil){
+               if(pipe(pfd) < 0){
+                       err = mkerror();
+                       goto errout;
+               }
+               sock->other = pfd[1];
+               sock->fd = pfd[0];
+       } else {
+               snprint(buf, sizeof(buf), "/net/%s/clone", net);
+               if((cfd = open(buf, ORDWR)) < 0){
+                       err = mkerror();
+                       goto errout;
+               }
+               n = read(cfd, buf, sizeof(buf)-1);
+               if(n < 0)
+                       err = mkerror();
+               if(n <= 0)
+                       goto errout;
+               buf[n] = 0;
+               n = atoi(buf);
+               snprint(buf, sizeof(buf), "/net/%s/%d/data", net, n);
+               if((dfd = open(buf, ORDWR)) < 0){
+                       err = mkerror();
+                       goto errout;
+               }
+               close(cfd);
+               sock->fd = dfd;
+               snprint(sock->net, sizeof(sock->net), "/net/%s", net);
+               snprint(sock->name, sizeof(sock->name), "%s/%d", sock->net, n);
+       }
+       return newfd(sock, FD_CLOEXEC);
+
+errout:
+       close(cfd);
+       free(sock);
+       return err;
+}
+
+static void
+freeconnectproc(Connectproc *cp)
+{
+       if(cp == nil)
+               return;
+       qlock(cp);
+       cp->sock = nil;
+       if(decref(cp)){
+               write(cp->notefd, "interrupt", 9);
+               qunlock(cp);
+               return;
+       }
+       qunlock(cp);
+       close(cp->notefd);
+       free(cp);
+}
+
+static void
+freelistenproc(Listenproc *lp)
+{
+       Socket *q;
+
+       if(lp == nil)
+               return;
+       qlock(lp);
+       lp->sock = nil;
+       if(decref(lp)){
+               write(lp->notefd, "interrupt", 9);
+               qunlock(lp);
+               return;
+       }
+       while(q = lp->q){
+               lp->q = q->next;
+               putfile(q);
+       }
+       qunlock(lp);
+       close(lp->notefd);
+       free(lp);
+}
+
+static int
+closesock(Ufile *file)
+{
+       Socket *sock = (Socket*)file;
+
+       close(sock->fd);
+       close(sock->other);
+       freebufproc(sock->bufproc);
+       freeconnectproc(sock->connectproc);
+       freelistenproc(sock->listenproc);
+       return 0;
+}
+
+
+static void
+connectproc(void *aux)
+{
+       int fd, cfd, other;
+       char buf[Ctlsize], tmp[8+Ctlsize+1];
+       Connectproc *cp;
+       Socket *sock;
+       int err;
+
+       cp =  (Connectproc*)aux;
+       qlock(cp);
+       if((sock = cp->sock) == nil)
+               goto out;
+
+       snprint(buf, sizeof(buf), "connectproc() %s", cp->str); 
+       setprocname(buf);
+
+       err = 0;
+       switch(sock->family){
+       case AF_UNIX:
+               fd = sock->fd;
+               other = sock->other;
+               qunlock(cp);
+
+               err = -ECONNREFUSED;
+               srvname(tmp, cp->str, sizeof(buf));
+               if((cfd = open(tmp, ORDWR)) < 0)
+                       break;
+
+               memset(buf, 0, sizeof(buf));
+               snprint(buf, sizeof(buf), "linuxemu.%d.%lux", getpid(), (ulong)sock);
+               if(srvunixsock(other, buf) < 0){
+                       close(cfd);
+                       break;
+               }
+
+               /*
+                * write Ctrlsize-1 bytes so concurrent writes will not be merged together as
+                * Ctrlsize-1 is the size used in read(). see /sys/src/ape/lib/bsd/accept.c:87
+                * this should be fixed in ape's connect() as well.
+                */
+               if(write(cfd, buf, sizeof(buf)-1) != sizeof(buf)-1){
+                       close(cfd);
+                       unsrvunixsock(buf);
+                       break;
+               }
+               close(cfd);
+               if((read(fd, tmp, strlen(buf)) != strlen(buf)) || memcmp(buf, tmp, strlen(buf))){
+                       unsrvunixsock(buf);
+                       break;
+               }
+               unsrvunixsock(buf);
+               err = 0;
+               break;
+
+       default:
+               snprint(buf, sizeof(buf), "%s/ctl", sock->name);
+               qunlock(cp);
+               if((cfd = open(buf, ORDWR)) < 0){
+                       err = mkerror();
+                       break;
+               }
+               if(fprint(cfd, "connect %s", cp->str) < 0)
+                       err = mkerror();
+               close(cfd);
+       }
+
+       qlock(cp);
+       if((sock = cp->sock) == nil)
+               goto out;
+       if(err == 0){
+               close(sock->other);
+               sock->other = -1;
+               sock->connected = 1;
+       }
+       sock->error = err;
+out:
+       wakeq(&cp->wq, MAXPROC);
+       qunlock(cp);
+       freeconnectproc(cp);
+}
+
+static int
+sockaddr2str(Socket *sock, uchar *addr, int addrlen, char *buf, int nbuf)
+{
+       int err;
+
+       err = -EINVAL;
+       switch(sock->family){
+       case AF_INET:
+               if(addrlen < 8)
+                       break;
+               err = snprint(buf, nbuf, "%d.%d.%d.%d!%d",
+                       (int)(addr[4]),
+                       (int)(addr[5]),
+                       (int)(addr[6]),
+                       (int)(addr[7]),
+                       (int)(((ulong)addr[2]<<8)|(ulong)addr[3]));
+               break;
+
+       case AF_INET6:
+               /* TODO */
+               break;
+
+       case AF_UNIX:
+               if(addrlen <= 2)
+                       break;
+               addrlen -= 2;
+               if(addrlen >= nbuf)
+                       addrlen = nbuf-1;
+               memmove(buf, addr+2, addrlen);
+               buf[addrlen] = 0;
+               err = addrlen;
+               break;
+       }
+
+       return err;
+}
+
+static int
+connectsock(Socket *sock, uchar *addr, int addrlen)
+{
+       Connectproc *cp;
+       int err;
+       char buf[Ctlsize];
+       int pid;
+
+       if(sock->connected)
+               return -EISCONN;
+       if(sock->connectproc)
+               return -EALREADY;
+
+       if((err = sockaddr2str(sock, addr, addrlen, buf, sizeof(buf))) < 0)
+               return err;
+
+       cp = kmallocz(sizeof(*cp), 1);
+       cp->ref = 2;
+       cp->sock = sock;
+       strncpy(cp->str, buf, sizeof(cp->str));
+
+       qlock(cp);
+       sock->error = 0;
+       if((pid = procfork(connectproc, cp, 0)) < 0){
+               qunlock(cp);
+               free(cp);
+               return mkerror();
+       }
+       snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+       cp->notefd = open(buf, OWRITE);
+
+       if(addrlen > sizeof(sock->addr))
+               addrlen = sizeof(sock->addr);
+       sock->naddr = addrlen;
+       memmove(sock->addr, addr, addrlen);
+
+       sock->connectproc = cp;
+       if(sock->mode & O_NONBLOCK){
+               qunlock(cp);
+               return -EINPROGRESS;
+       }
+       if((err = sleepq(&cp->wq, cp, 1)) == 0)
+               err = sock->error;
+       qunlock(cp);
+
+       /*
+        * crazy shit is going on!
+        * see: http://www.madore.org/~david/computers/connect-intr.html
+       */
+       if(err != -EINTR && err != -ERESTART){
+               sock->connectproc = nil;
+               freeconnectproc(cp);
+       }
+       return err;
+}
+
+static int
+shutdownsock(Socket *sock, int how)
+{
+       USED(how);
+
+       freebufproc(sock->bufproc);
+       sock->bufproc = nil;
+       freeconnectproc(sock->connectproc);
+       sock->connectproc = nil;
+       freelistenproc(sock->listenproc);
+       sock->listenproc = nil;
+       close(sock->fd);
+       sock->fd = -1;
+       sock->connected = 0;
+
+       return 0;
+}
+
+static int 
+bindsock(Socket *sock, uchar *addr, int addrlen)
+{
+       int port;
+       int cfd;
+       char buf[Ctlsize];
+
+       port = -1;
+       switch(sock->family){
+       default:
+               return -EINVAL;
+
+       case AF_UNIX:
+               break;
+       case AF_INET:
+               if(addrlen < 4)
+                       return -EINVAL;
+               port = (int)(((ulong)addr[2]<<8)|(ulong)addr[3]);
+               break;
+       case AF_INET6:
+               /* TODO */
+               return -EINVAL;
+       }
+
+       if(port >= 0){
+               snprint(buf, sizeof(buf), "%s/ctl", sock->name);
+               if((cfd = open(buf, ORDWR)) < 0)
+                       return mkerror();
+               if((fprint(cfd, "announce %d", port) < 0) || (fprint(cfd, "bind %d", port) < 0)){
+                       close(cfd);
+                       return mkerror();
+               }
+               close(cfd);
+       }
+
+       if(addrlen > sizeof(sock->addr))
+               addrlen = sizeof(sock->addr);
+       sock->naddr = addrlen;
+       memmove(sock->addr, addr, addrlen);
+
+       return 0;
+}
+
+static int
+strtoip(char *str, uchar *ip, int iplen)
+{
+       int i, d, v6;
+       char *p, *k;
+
+       i = 0;
+       v6 = 1;
+       memset(ip, 0, iplen);
+       for(p = str; *p; p++){
+               if(*p == ':'){
+                       if(p[1] == ':'){
+                               p++;
+                               i = iplen;
+                               for(k = p+1; *k; k++){
+                                       if(*k == ':'){
+                                               v6 = 1;
+                                               i -= 2;
+                                       }
+                                       if(*k == '.'){
+                                               v6 = 0;
+                                               i -= 1;
+                                       }
+                               }
+                               i -= v6+1;
+                       } else {
+                               i += 2;
+                       }
+                       continue;
+               } else if(*p == '.'){
+                       i++;
+                       continue;
+               }
+
+               for(k = p; *k && *k != '.' && *k != ':'; k++)
+                       ;
+               if(*k == '.'){
+                       v6 = 0;
+               } else if(*k == ':'){
+                       v6 = 1;
+               }
+
+               if(i < 0 || i + v6+1 > iplen)
+                       return -1;
+
+               if(*p >= '0' && *p <= '9'){
+                       d = *p - '0';
+               } else if(v6 && (*p >= 'a' && *p <= 'f')){
+                       d = 0x0A + *p - 'a';
+               } else if(v6 && (*p >= 'A' && *p <= 'F')){
+                       d = 0x0A + *p - 'A';
+               } else {
+                       return -1;
+               }
+
+               if(v6){
+                       d |= ((int)ip[i]<<12 | (int)ip[i+1]<<4);
+                       ip[i] = (d>>8) & 0xFF;
+                       ip[i+1] = d & 0xFF;
+               } else {
+                       ip[i] = ip[i]*10 + d;
+               }
+       }
+
+       return i + v6+1;
+}
+
+static int
+getsockaddr(Socket *sock, int remote, uchar *addr, int len)
+{
+       char buf[Ctlsize];
+       char *p;
+       uchar *a;
+       int fd;
+       int n, port;
+
+       a = addr;
+       switch(sock->family){
+       case AF_UNIX:
+               if(len < sock->naddr)
+                       break;
+               memmove(a, sock->addr, sock->naddr);
+               return sock->naddr;
+       case AF_INET:
+       case AF_INET6:
+               snprint(buf, sizeof(buf), "%s/%s", sock->name, remote?"remote":"local");
+               if((fd = open(buf, OREAD)) < 0)
+                       return mkerror();
+               if((n = read(fd, buf, sizeof(buf)-1)) < 0){
+                       close(fd);
+                       return mkerror();
+               }
+               close(fd);
+               if(n > 0 && buf[n-1] == '\n')
+                       n--;
+               buf[n] = 0;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if((p = strrchr(buf, '!')) == nil)
+               return -EINVAL;
+       *p++ = 0;
+       port = atoi(p);
+
+       trace("getsockaddr(): ip=%s port=%d", buf, port);
+
+       switch(sock->family){
+       case AF_INET:
+               if(len < 8)
+                       break;
+               if(len > 16)
+                       len = 16;
+               memset(a, 0, len);
+               a[0] = sock->family & 0xFF;
+               a[1] = (sock->family>>8) & 0xFF;
+               a[2] = (port >> 8) & 0xFF;
+               a[3] = port & 0xFF;
+               if(strtoip(buf, &a[4], 4) < 0)
+                       break;
+               return len;
+
+       case AF_INET6:
+               /* TODO */
+               break;
+       }
+
+       return -EINVAL;
+}
+
+static void
+listenproc(void *aux)
+{
+       Listenproc *lp;
+       Socket *sock, *q;
+       char buf[Ctlsize], tmp[8+Ctlsize+1];
+       int cfd, fd, n;
+
+       lp = (Listenproc*)aux;
+       qlock(lp);
+       if((sock = lp->sock) == nil)
+               goto out;
+
+       snprint(buf, sizeof(buf), "listenproc() %s", lp->str);
+       setprocname(buf);
+
+       for(;;){
+               n = 0;
+               cfd = -1;
+               switch(sock->family){
+               case AF_UNIX:
+                       srvunixsock(sock->other, lp->str);
+                       close(sock->other);
+                       sock->other = -1;
+                       fd = sock->fd;
+                       qunlock(lp);
+                       n = read(fd, buf, sizeof(buf)-1);
+                       qlock(lp);
+                       break;
+
+               default:
+                       snprint(buf, sizeof(buf), "%s/listen", sock->name);
+                       qunlock(lp);
+                       if((cfd = open(buf, ORDWR)) >= 0)
+                               n = read(cfd, buf, sizeof(buf)-1);
+                       qlock(lp);
+                       if(n <= 0)
+                               close(cfd);
+               }
+               if(n <= 0)
+                       break;
+               buf[n] = 0;
+
+               if((sock = lp->sock) == nil){
+                       close(cfd);
+                       break;
+               }
+
+               switch(sock->family){
+               case AF_UNIX:
+                       srvname(tmp, buf, sizeof(tmp));
+                       if((fd = open(tmp, ORDWR)) < 0)
+                               break;
+                       unsrvunixsock(buf);
+                       if(write(fd, buf, strlen(buf)) != strlen(buf)){
+                               close(fd);
+                               fd = -1;
+                       }
+                       buf[0] = 0;
+                       break;
+
+               default:
+                       n = atoi(buf);
+                       snprint(buf, sizeof(buf), "%s/%d", sock->net, n);
+                       snprint(tmp, sizeof(tmp), "%s/data", buf);
+                       fd = open(tmp, ORDWR);
+                       close(cfd);
+                       break;
+               }
+
+               if(fd < 0)
+                       continue;
+
+               q = allocsock(sock->family, sock->stype, sock->protocol);
+               strncpy(q->net, sock->net, sizeof(q->net));
+               strncpy(q->name, buf, sizeof(q->name));
+
+               if(sock->family == AF_UNIX){
+                       memmove(q->addr, sock->addr, q->naddr = sock->naddr);
+               } else {
+                       q->naddr = getsockaddr(q, 0, q->addr, sizeof(q->addr));
+               }
+
+               q->fd = fd;
+               q->connected = 1;
+               q->next = lp->q;
+               lp->q = q;
+               wakeq(&lp->wq, MAXPROC);
+       }
+
+       if(sock->family == AF_UNIX)
+               unsrvunixsock(lp->str);
+out:
+       wakeq(&lp->wq, MAXPROC);
+       qunlock(lp);
+       freelistenproc(lp);
+}
+
+
+static int
+listensock(Socket *sock)
+{
+       Listenproc *lp;
+       int pid, err;
+       char buf[Ctlsize];
+
+       trace("listensock()");
+
+       if(sock->listenproc)
+               return 0;
+       if((err = sockaddr2str(sock, sock->addr, sock->naddr, buf, sizeof(buf))) < 0)
+               return err;
+
+       lp = kmallocz(sizeof(*lp), 1);
+       lp->ref = 2;
+       lp->sock = sock;
+       strncpy(lp->str, buf, sizeof(lp->str));
+
+       qlock(lp);
+       if((pid = procfork(listenproc, lp, 0)) < 0){
+               qunlock(lp);
+               free(lp);
+               return mkerror();
+       }
+       snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+       lp->notefd = open(buf, OWRITE);
+       sock->listenproc = lp;
+       qunlock(lp);
+
+       return 0;
+}
+
+static int
+getsockname(Socket *sock, uchar *addr, int *paddrlen)
+{
+       int ret;
+
+       trace("getsockname(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+       if(addr == nil || paddrlen == nil)
+               return -EINVAL;
+
+       ret = sock->naddr;
+       memmove(addr, sock->addr, ret);
+       *paddrlen = ret;
+
+       return ret;
+}
+
+static int
+getpeername(Socket *sock, uchar *addr, int *paddrlen)
+{
+       int ret;
+
+       trace("getpeername(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+       if(addr == nil || paddrlen == nil)
+               return -EINVAL;
+
+       if((ret = getsockaddr(sock, 1, addr, *paddrlen)) > 0)
+               *paddrlen = ret;
+       return ret;
+}
+
+static int
+acceptsock(Socket *sock, uchar *addr, int *paddrlen)
+{
+       Listenproc *lp;
+       Socket *nsock;
+       int err;
+
+       trace("acceptsock(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+       if((lp = sock->listenproc) == nil)
+               return -EINVAL;
+
+       qlock(lp);
+       for(;;){
+               if(nsock = lp->q){
+                       lp->q = nsock->next;
+                       nsock->next = nil;
+                       qunlock(lp);
+
+                       if(addr != nil && paddrlen != nil){
+                               err = getsockaddr(nsock, 1, addr, *paddrlen);
+                               *paddrlen = err < 0 ? 0 : err;
+                       }
+                       return newfd(nsock, FD_CLOEXEC);
+               }
+
+               if(sock->mode & O_NONBLOCK){
+                       err = -EAGAIN;
+                       break;
+               }
+
+               if((err = sleepq(&lp->wq, lp, 1)) < 0)
+                       break;
+       }
+       qunlock(lp);
+
+       return err;
+}
+
+static int
+socketpair(int family, int stype, int protocol, int sv[2])
+{
+       Socket *sock;
+       int p[2];
+       int i, fd;
+
+       trace("socketpair(%d, %d, %d, %p)", family, stype, protocol, sv);
+
+       if(family != AF_UNIX)
+               return -EAFNOSUPPORT;
+       if(pipe(p) < 0)
+               return mkerror();
+       for(i=0; i<2; i++){
+               sock = allocsock(family, stype, protocol);
+               sock->fd = p[i];
+               sock->connected = 1;
+               if((fd = newfd(sock, FD_CLOEXEC)) < 0){
+                       if(i > 0)
+                               sys_close(sv[0]);
+                       close(p[0]);
+                       close(p[1]);
+                       return fd;
+               }
+               sv[i] = fd;
+       }
+       return 0;
+}
+
+static void*
+bufprocsock(Socket *sock)
+{
+       if(sock->bufproc == nil)
+               sock->bufproc = newbufproc(sock->fd);
+       return sock->bufproc;
+}
+
+static int
+pollsock(Ufile *file, void *tab)
+{
+       Socket *sock = (Socket*)file;
+       Listenproc *lp;
+       Connectproc *cp;
+
+       if(!sock->connected){
+               if(lp = sock->listenproc){
+                       qlock(lp);
+                       pollwait(file, &lp->wq, tab);
+                       if(lp->q){
+                               qunlock(lp);
+                               return POLLIN;
+                       }
+                       qunlock(lp);
+               }
+               if(cp = sock->connectproc){
+                       qlock(cp);
+                       pollwait(file, &cp->wq, tab);
+                       if(sock->error < 0){
+                               qunlock(cp);
+                               return POLLOUT;
+                       }
+                       qunlock(cp);
+               }
+               return 0;
+       }
+
+       return pollbufproc(bufprocsock(sock), sock, tab);
+}
+
+static int
+readsock(Ufile *file, void *buf, int len, vlong)
+{
+       Socket *sock = (Socket*)file;
+       int ret;
+
+       if(!sock->connected)
+               return -ENOTCONN;
+       if((sock->mode & O_NONBLOCK) || (sock->bufproc != nil)){
+               ret = readbufproc(bufprocsock(sock), buf, len, 0, (sock->mode & O_NONBLOCK));
+       } else {
+               if(notifyme(1))
+                       return -ERESTART;
+               ret = read(sock->fd, buf, len);
+               notifyme(0);
+               if(ret < 0)
+                       ret = mkerror();
+       }
+       return ret;
+}
+
+extern int pipewrite(int fd, void *buf, int len);
+
+static int
+writesock(Ufile *file, void *buf, int len, vlong)
+{
+       Socket *sock = (Socket*)file;
+       int ret;
+
+       if(!sock->connected)
+               return -ENOTCONN;
+       if(sock->family == AF_UNIX)
+               return pipewrite(sock->fd, buf, len);
+       if(notifyme(1))
+               return -ERESTART;
+       ret = write(sock->fd, buf, len);
+       notifyme(0);
+       if(ret < 0)
+               ret = mkerror();
+       return ret;
+}
+
+static int
+ioctlsock(Ufile *file, int cmd, void *arg)
+{
+       Socket *sock = (Socket*)file;
+
+       switch(cmd){
+       default:
+               return -ENOTTY;
+       case 0x541B:
+               {
+                       int r;
+
+                       if(arg == nil)
+                               return -EINVAL;
+                       if((r = nreadablebufproc(bufprocsock(sock))) < 0){
+                               *((int*)arg) = 0;
+                               return r;
+                       }
+                       *((int*)arg) = r;
+               }
+               return 0;
+       }
+}
+
+static int
+sendto(Socket *sock, void *data, int len, int, uchar *, int)
+{
+       trace("sendto(%p, %p, %d, ...)", sock, data, len);
+
+       return writesock(sock, data, len, sock->off);
+}
+
+static int
+recvfrom(Socket *sock, void *data, int len, int flags, uchar *addr, int addrlen)
+{
+       int ret;
+
+       trace("recvfrom(%p, %p, %d, %x, %p, %d)", sock, data, len, flags, addr, addrlen);
+
+       if(flags & 2){
+               if(!sock->connected)
+                       return -ENOTCONN;
+               ret = readbufproc(bufprocsock(sock), data, len, 1, 1);
+       } else {
+               ret = readsock(sock, data, len, sock->off);
+       }
+       if(addr){
+               memmove(addr, sock->addr, sock->naddr);
+       }
+       return ret;
+}
+
+enum {
+       SOL_SOCKET = 1,
+
+       SO_DEBUG = 1,
+       SO_REUSEADDR,
+       SO_TYPE,
+       SO_ERROR,
+};
+
+static int
+getoptsock(Socket *sock, int lvl, int opt, char *ov, int *ol)
+{
+       trace("getoptsock(%p, %d, %d, %p, %p)", sock, lvl, opt, ov, ol);
+
+       switch(lvl){
+       default:
+       Default:
+               return -EINVAL;
+
+       case SOL_SOCKET:
+               switch(opt){
+               default:
+                       goto Default;
+               case SO_ERROR:
+                       *ol = sizeof(int);
+                       *((int*)ov) = sock->error;
+                       break;
+               }
+               break;
+       }
+
+       return 0;
+}
+
+enum {
+       SYS_SOCKET=1,
+       SYS_BIND,
+       SYS_CONNECT,
+       SYS_LISTEN,
+       SYS_ACCEPT,
+       SYS_GETSOCKNAME,
+       SYS_GETPEERNAME,
+       SYS_SOCKETPAIR,
+       SYS_SEND,
+       SYS_RECV,
+       SYS_SENDTO,
+       SYS_RECVFROM,
+       SYS_SHUTDOWN,
+       SYS_SETSOCKOPT,
+       SYS_GETSOCKOPT,
+       SYS_SENDMSG,
+       SYS_RECVMSG,
+};
+
+int sys_linux_socketcall(int call, int *arg)
+{
+       Socket *sock;
+       int ret;
+
+       trace("sys_linux_socketcall(%d, %p)", call, arg);
+
+       if(call == SYS_SOCKET)
+               return newsock(arg[0], arg[1], arg[2]);
+
+       if(call == SYS_SOCKETPAIR)
+               return socketpair(arg[0], arg[1], arg[2], (int*)arg[3]);
+
+       if((sock = (Socket*)fdgetfile(arg[0])) == nil)
+               return -EBADF;
+
+       if(sock->dev != SOCKDEV){
+               putfile(sock);
+               return -ENOTSOCK;
+       }
+
+       ret = -1;
+       switch(call){
+       case    SYS_CONNECT:
+               ret = connectsock(sock, (void*)arg[1], arg[2]);
+               break;
+       case    SYS_SENDTO:
+               ret = sendto(sock, (void*)arg[1], arg[2], arg[3], (void*)arg[4], arg[5]);
+               break;
+       case    SYS_RECVFROM:
+               ret = recvfrom(sock, (void*)arg[1], arg[2], arg[3], (void*)arg[4], arg[5]);
+               break;
+       case    SYS_SEND:
+               ret = sendto(sock, (void*)arg[1], arg[2], arg[3], nil, 0);
+               break;
+       case    SYS_RECV:
+               ret = recvfrom(sock, (void*)arg[1], arg[2], arg[3], nil, 0);
+               break;
+       case    SYS_GETSOCKNAME:
+               ret = getsockname(sock, (void*)arg[1], (void*)arg[2]);
+               break;
+       case    SYS_GETPEERNAME:
+               ret = getpeername(sock, (void*)arg[1], (void*)arg[2]);
+               break;
+       case    SYS_SHUTDOWN:
+               ret = shutdownsock(sock, arg[1]);
+               break;
+       case    SYS_BIND:
+               ret = bindsock(sock, (void*)arg[1], arg[2]);
+               break;
+       case    SYS_LISTEN:
+               ret = listensock(sock);
+               break;
+       case    SYS_ACCEPT:
+               ret = acceptsock(sock, (void*)arg[1], (void*)arg[2]);
+               break;
+       case    SYS_SETSOCKOPT:
+               ret = 0;
+               break;
+       case    SYS_GETSOCKOPT:
+               ret = getoptsock(sock, (int)arg[1], (int)arg[2], (char*)arg[3], (int*)arg[4]);
+               break;
+       case SYS_SENDMSG:
+       case SYS_RECVMSG:
+       default:
+               trace("socketcall(): call %d not implemented", call);
+       }
+
+       putfile(sock);
+
+       return ret;
+}
+
+static void
+fillstat(Ustat *s)
+{
+       s->mode = 0666 | S_IFSOCK;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+}
+
+static int
+fstatsock(Ufile *, Ustat *s)
+{
+       fillstat(s);
+       return 0;
+};
+
+static Udev sockdev = 
+{
+       .read = readsock,
+       .write = writesock,
+       .poll = pollsock,
+       .close = closesock,
+       .ioctl = ioctlsock,
+       .fstat = fstatsock,
+};
+
+void sockdevinit(void)
+{
+       devtab[SOCKDEV] = &sockdev;
+}
diff --git a/linux_emul_base/stat.c b/linux_emul_base/stat.c
new file mode 100644 (file)
index 0000000..0a0aaaa
--- /dev/null
@@ -0,0 +1,437 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+int
+ufstat(int fd, Ustat *ps)
+{
+       Ufile *f;
+       int err;
+
+       err = -EBADF;
+       if((f = fdgetfile(fd)) == nil)
+               goto out;
+       err = -EPERM;
+       if(devtab[f->dev]->fstat == nil)
+               goto out;
+       memset(ps, 0, sizeof(Ustat));
+       err = devtab[f->dev]->fstat(f, ps);
+out:
+       putfile(f);
+       return err;
+}
+
+struct linux_stat {
+       ushort st_dev;
+       ushort __pad1;
+       ulong st_ino;
+       ushort st_mode;
+       ushort st_nlink;
+       ushort st_uid;
+       ushort st_gid;
+       ushort st_rdev;
+       ushort __pad2;
+       ulong  st_size;
+       ulong  st_blksize;
+       ulong  st_blocks;
+       ulong  st_atime;
+       ulong  __unused1;
+       ulong  st_mtime;
+       ulong  __unused2;
+       ulong  st_ctime;
+       ulong  __unused3;
+       ulong  __unused4;
+       ulong  __unused5;
+};
+
+static void
+ustat2linuxstat(Ustat *x, struct linux_stat *s)
+{
+       memset(s, 0, sizeof(*s));
+       s->st_dev = x->dev;
+       s->st_ino = x->ino;
+       s->st_mode = x->mode;
+       s->st_nlink = 1;
+       s->st_uid = x->uid;
+       s->st_gid = x->gid;
+       s->st_size = x->size;
+       s->st_rdev = x->rdev;
+       s->st_blksize = 4096;
+       s->st_blocks = (x->size+s->st_blksize-1) / s->st_blksize;
+       s->st_atime = x->atime;
+       s->st_mtime = x->mtime;
+       s->st_ctime = x->ctime;
+}
+
+
+struct linux_stat64 {
+       uvlong  lst_dev;
+       uint            __pad1;
+       uint            __lst_ino;
+       uint            lst_mode;
+       uint            lst_nlink;
+       uint            lst_uid;
+       uint            lst_gid;
+       uvlong  lst_rdev;
+       uint            __pad2;
+       vlong   lst_size;
+       uint            lst_blksize;
+       uvlong  lst_blocks;
+       uint            lst_atime;
+       uint            lst_atime_nsec;
+       uint            lst_mtime;
+       uint            lst_mtime_nsec;
+       uint            lst_ctime;
+       uint            lst_ctime_nsec;
+       uvlong  lst_ino;
+};
+
+static void
+ustat2linuxstat64(Ustat *x, struct linux_stat64 *s)
+{
+       memset(s, 0, sizeof(*s));
+       s->lst_dev = x->dev;
+       s->lst_ino = x->ino;
+       s->__lst_ino = x->ino & 0xFFFFFFFF;
+       s->lst_mode = x->mode;
+       s->lst_nlink = 1;
+       s->lst_uid = x->uid;
+       s->lst_gid = x->gid;
+       s->lst_size = x->size;
+       s->lst_rdev = x->rdev;
+       s->lst_blksize = 4096; // good as any
+       s->lst_blocks = (x->size+s->lst_blksize-1) / s->lst_blksize;
+       s->lst_atime = x->atime;
+       s->lst_mtime = x->mtime;
+       s->lst_ctime = x->ctime;        
+}
+
+int sys_linux_stat(char *path, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_stat(%s, %p)", path, st);
+       err = fsstat(path, 0, &x);
+       if(err < 0)
+               return err;
+       ustat2linuxstat(&x, (struct linux_stat*)st);
+       return err;
+}
+
+int sys_linux_lstat(char *path, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_lstat(%s, %p)", path, st);
+
+       if((path = fsfullpath(path)) == nil)
+               return -EFAULT;
+       err = fsstat(path, 1, &x);
+       free(path);
+
+       if(err < 0)
+               return err;
+       ustat2linuxstat(&x, (struct linux_stat*)st);
+       return err;
+}
+
+int sys_linux_stat64(char *path, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_stat64(%s, %p)", path, st);
+
+       if((path = fsfullpath(path)) == nil)
+               return -EFAULT;
+       err = fsstat(path, 0, &x);
+       free(path);
+
+       if(err < 0)
+               return err;
+       ustat2linuxstat64(&x, (struct linux_stat64*)st);
+       return err;
+}
+
+int sys_linux_lstat64(char *path, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_lstat64(%s, %p)", path, st);
+
+       if((path = fsfullpath(path)) == nil)
+               return -EFAULT;
+       err = fsstat(path, 1, &x);
+       free(path);
+
+       if(err < 0)
+               return err;
+       ustat2linuxstat64(&x, (struct linux_stat64*)st);
+       return err;
+}
+
+int sys_linux_fstat(int fd, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_fstat(%d, %p)", fd, st);
+
+       err = ufstat(fd, &x);
+       if(err < 0)
+               return err;
+       ustat2linuxstat(&x, (struct linux_stat*)st);
+       return err;
+}
+
+int sys_linux_fstat64(int fd, void *st)
+{
+       int err;
+       Ustat x;
+
+       trace("sys_linux_fstat64(%d, %p)", fd, st);
+
+       err = ufstat(fd, &x);
+       if(err < 0)
+               return err;
+       ustat2linuxstat64(&x, (struct linux_stat64*)st);
+       return err;
+}
+
+static int
+getdents(int fd, void *buf, int len, int (*fconv)(Udirent *, void *, int, int))
+{
+       Ufile *f;
+       Udirent *t, *x;
+       uchar *p, *e;
+       int o, r, err;
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       o = 0;
+       p = buf;
+       e = p + len;
+       t = f->rdaux;
+       if(t == nil || f->off == 0){
+               f->rdaux = nil;
+               while(x = t){
+                       t = t->next;
+                       free(x);
+               }
+               if((err = devtab[f->dev]->readdir(f, &t)) <= 0){
+                       putfile(f);
+                       return err;
+               }
+               f->rdaux = t;
+       }
+       for(; t; t=t->next){
+               /* just calculate size */
+               r = fconv(t, nil, 0, e - p);
+               if(r <= 0)
+                       break;
+               if(o >= f->off){
+                       /* convert */
+                       f->off = o + r;
+                       r = fconv(t, p, t->next ? f->off : 0, e - p);
+                       p += r;
+               }
+               o += r;
+       }
+       putfile(f);
+       return p - (uchar*)buf;
+}
+
+Udirent*
+newdirent(char *path, char *name, int mode)
+{
+       Udirent *d;
+       int nlen;
+       char *s;
+
+       nlen = strlen(name);
+       d = kmallocz(sizeof(*d) + nlen + 1, 1);
+       d->mode = mode;
+       strcpy(d->name, name);
+       s = allocpath(path, nil, d->name);
+       d->ino = hashpath(s);
+       free(s);
+
+       return d;
+}
+
+struct linux_dirent {
+       long            d_ino;
+       long            d_off;
+       ushort  d_reclen;
+       char            d_name[];
+};
+
+static int
+udirent2linux(Udirent *u, void *d, int off, int left)
+{
+       int n;
+       struct linux_dirent *e = d;
+
+       n = sizeof(*e) + strlen(u->name) + 1;
+       if(n > left)
+               return 0;
+       if(e){
+               e->d_ino = u->ino & 0xFFFFFFFF;
+               e->d_off = off;
+               e->d_reclen = n;
+               strcpy(e->d_name, u->name);
+       }
+       return n;
+}
+
+struct linux_dirent64 {
+       uvlong  d_ino;
+       vlong   d_off;
+       ushort  d_reclen;
+       uchar   d_type;
+       char            d_name[];
+};
+
+static int
+udirent2linux64(Udirent *u, void *d, int off, int left)
+{
+       int n;
+       struct linux_dirent64 *e = d;
+
+       n = sizeof(*e) + strlen(u->name) + 1;
+       if(n > left)
+               return 0;
+       if(e){
+               e->d_ino = u->ino;
+               e->d_off = off;
+               e->d_reclen = n;
+               e->d_type = (u->mode>>12)&15;
+               strcpy(e->d_name, u->name);
+       }
+       return n;
+}
+
+int sys_linux_getdents(int fd, void *buf, int nbuf)
+{
+       trace("sys_linux_getdents(%d, %p, %x)", fd, buf, nbuf);
+
+       return getdents(fd, buf, nbuf, udirent2linux);
+}
+
+int sys_linux_getdents64(int fd, void *buf, int nbuf)
+{
+       trace("sys_linux_getdents64(%d, %p, %x)", fd, buf, nbuf);
+
+       return getdents(fd, buf, nbuf, udirent2linux64);
+}
+
+struct  linux_statfs  { 
+       long f_type; 
+       long f_bsize; 
+       long f_blocks; 
+       long f_bfree; 
+       long f_bavail; 
+       long f_files; 
+       long f_ffree;
+       long f_fsid[2]; 
+       long f_namelen; 
+       long f_frsize; 
+       long f_spare[5]; 
+}; 
+
+int sys_statfs(char *name, void *pstatfs)
+{
+       struct linux_statfs *s = pstatfs;
+
+       trace("sys_statfs(%s, %p)", name, s);
+
+       if((s == nil) || (name == nil))
+               return -EINVAL;
+
+       memset(s, 0, sizeof(*s));
+
+       s->f_namelen = 256;
+       s->f_bsize = 4096;
+       s->f_blocks = 0x80000000;
+       s->f_bavail = s->f_bfree = 0x80000000;
+       s->f_files = s->f_ffree = 0x40000000;
+
+       if(strncmp(name, "/dev/pts", 8) == 0){
+               s->f_type = 0x1cd1;
+               return 0;
+       }
+
+       memmove(&s->f_type, "PLN9", 4);
+       memmove(s->f_fsid, "PLAN9_FS", 8);
+
+       return 0;
+}
+
+int
+sys_getxattr(char *path, char *name, void *value, int size)
+{
+       trace("sys_getxattr(%s, %s, %p, %x)", path, name, value, size);
+
+       return -EOPNOTSUPP;
+}
+
+int
+sys_lgetxattr(char *path, char *name, void *value, int size)
+{
+       trace("sys_lgetxattr(%s, %s, %p, %x)", path, name, value, size);
+
+       return -EOPNOTSUPP;
+}
+
+int
+sys_fgetxattr(int fd, char *name, void *value, int size)
+{
+       Ufile *f;
+       int err;
+
+       trace("sys_fgetxattr(%d, %s, %p, %x)", fd, name, value, size);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EOPNOTSUPP;
+       putfile(f);
+
+       return err;
+}
+
+int
+sys_setxattr(char *path, char *name, void *value, int flags, int size)
+{
+       trace("sys_setxattr(%s, %s, %p, %x, %x)", path, name, value, flags, size);
+
+       return -EOPNOTSUPP;
+}
+
+int
+sys_lsetxattr(char *path, char *name, void *value, int flags, int size)
+{
+       trace("sys_lsetxattr(%s, %s, %p, %x, %x)", path, name, value, flags, size);
+
+       return -EOPNOTSUPP;
+}
+
+int
+sys_fsetxattr(int fd, char *name, void *value, int size, int flags)
+{
+       Ufile *f;
+       int err;
+
+       trace("sys_fsetxattr(%d, %s, %p, %x, %x)", fd, name, value, flags, size);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EOPNOTSUPP;
+       putfile(f);
+       return err;
+}
diff --git a/linux_emul_base/time.c b/linux_emul_base/time.c
new file mode 100644 (file)
index 0000000..f11a929
--- /dev/null
@@ -0,0 +1,160 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <tos.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+struct linux_timezone
+{
+       int             tz_minuteswest;
+       int             tz_dsttime;
+};
+
+static struct linux_timezone systz;
+
+void
+inittime(void)
+{
+       Tm *t;
+
+       boottime = nsec();
+
+       systz.tz_minuteswest = 0;
+       systz.tz_dsttime = 0;
+
+       if(t = localtime(time(nil)))
+               systz.tz_minuteswest = t->tzoff / 60;
+}
+
+int sys_time(long *p)
+{
+       return time(p);
+}
+
+int sys_clock_gettime(int clock, void *t)
+{
+       struct linux_timespec *ts = t;
+       vlong x;
+
+       trace("sys_clock_gettime(%d, %p)", clock, t);
+       x = nsec();
+       ts->tv_sec = (long)(x/1000000000LL);
+       ts->tv_nsec = (long)(x%1000000000LL);
+       return 0;
+}
+
+int sys_gettimeofday(void *tvp, void *tzp)
+{
+       struct linux_timeval *tv = tvp;
+       struct linux_timezone *tz = tzp;
+       vlong t;
+
+       trace("sys_gettimeofday(%p, %p)", tvp, tzp);
+
+       t = nsec();
+       tv->tv_sec = (long)(t/1000000000LL);
+       tv->tv_usec = (long)((t%1000000000LL)/1000);
+
+       if(tz)
+               *tz = systz;
+
+       return 0;
+}
+
+int sys_nanosleep(void *rqp, void *rmp)
+{
+       struct linux_timespec *req = rqp;
+       struct linux_timespec *rem = rmp;
+       vlong t, now;
+       int err;
+
+       trace("sys_nanosleep(%p, %p)", rqp, rmp);
+
+       if(req == nil)
+               return -EFAULT;
+       if(req->tv_sec < 0 || req->tv_nsec < 0 || req->tv_nsec >= 1000000000LL)
+               return -EINVAL;
+
+       now = nsec();
+       if(current->restart->syscall){
+               t = current->restart->nanosleep.timeout;
+       } else {
+               t = now + req->tv_sec*1000000000LL + req->tv_nsec;
+       }
+
+       if(now < t){
+               if(notifyme(1))
+                       err = -1;
+               else {
+                       err = sleep((t - now) / 1000000LL);
+                       notifyme(0);
+               }
+               if(err < 0){
+                       now = nsec();
+                       if(now < t){
+                               current->restart->nanosleep.timeout = t;
+                               if(rem != nil){
+                                       t -= now;
+                                       rem->tv_sec = (long)(t/1000000000LL);
+                                       rem->tv_nsec = (long)(t%1000000000LL);
+                               }
+                               return -ERESTART;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+int proctimes(Uproc *p, ulong *t)
+{
+       char buf[1024], *f[12];
+       int fd, n;
+
+       t[0] = t[1] = t[2] = t[3] = 0;
+       snprint(buf, sizeof(buf), "/proc/%d/status", p->kpid);
+       if((fd = open(buf, OREAD)) < 0)
+               return mkerror();
+       if((n = read(fd, buf, sizeof(buf)-1)) <= 0){
+               close(fd);
+               return mkerror();
+       }
+       close(fd);
+       buf[n] = 0;
+       if(getfields(buf, f, 12, 1, "\t ") != 12)
+               return -EIO;
+       t[0] = atoi(f[2])*HZ / 1000;
+       t[1] = atoi(f[3])*HZ / 1000;
+       t[2] = atoi(f[4])*HZ / 1000;
+       t[3] = atoi(f[5])*HZ / 1000;
+       return 0;
+}
+
+struct linux_tms
+{
+       long    tms_utime;
+       long    tms_stime;
+       long    tms_cutime;
+       long    tms_cstime;
+};
+
+int sys_times(void *m)
+{
+       struct linux_tms *x = m;
+       ulong t[4];
+       int err;
+
+       trace("sys_times(%p)", m);
+
+       if(x != nil){
+               if((err = proctimes(current, t)) < 0)
+                       return err;
+               x->tms_utime = t[0];
+               x->tms_stime = t[1];
+               x->tms_cutime = t[2];
+               x->tms_cstime = t[3];
+       }
+       return (HZ*(nsec() - boottime)) / 1000000000LL;
+}
\ No newline at end of file
diff --git a/linux_emul_base/tls.c b/linux_emul_base/tls.c
new file mode 100644 (file)
index 0000000..7e67b7b
--- /dev/null
@@ -0,0 +1,232 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+       Index,
+       Type,
+       Flags,
+       DPL,
+       Base,
+       Limit,
+       Nfields,
+};
+
+static int
+descempty(struct linux_user_desc *info)
+{
+       return info->base_addr==0 && info->limit==0 &&
+               info->contents==0 && info->read_exec_only==1 &&
+               info->seg_32bit==0 && info->limit_in_pages==0 &&
+               info->seg_not_present==1 && info->useable==0;
+}
+
+int sys_set_thread_area(void *pinfo)
+{
+       struct linux_user_desc *info = pinfo;
+       char buf[1024];
+       char *p, *e, *f[Nfields];
+       int n, fd, idx, err;
+
+       trace("sys_set_thread_area(%p)", pinfo);
+
+       err = -ENOSYS;
+       if((fd = open("/dev/gdt", ORDWR)) < 0)
+               goto out;
+
+       idx = info->entry_number;
+       if(idx == -1){
+               err = -ESRCH;
+               if((n = read(fd, buf, sizeof(buf)-1)) <= 0)
+                       goto out;
+               buf[n] = 0;
+               p = buf;
+               while(e = strchr(p, '\n')){
+                       *e = 0;
+                       if(getfields(p, f, nelem(f), 1, " ") != nelem(f))
+                               goto out;
+                       idx = strtoul(f[Index], nil, 16);
+                       if(idx >= 8*sizeof(current->tlsmask))
+                               break;
+                       if((current->tlsmask & (1<<idx)) == 0)
+                               goto found;
+                       p = e+1;
+               }
+               goto out;
+       }
+
+found:
+       err = -EINVAL;
+       if(idx < 0 || idx >= 8*sizeof(current->tlsmask))
+               goto out;
+
+       buf[0] = 0;
+       if(!info->seg_not_present)
+               strcat(buf, "P");
+       if(info->limit_in_pages)
+               strcat(buf, "G");
+       if(info->useable)
+               strcat(buf, "U");
+       if(info->contents & 2){
+               /* code segment */
+               if(info->contents & 1)
+                       strcat(buf, "C");
+               if(info->seg_32bit)
+                       strcat(buf, "D");
+               if(!info->read_exec_only)
+                       strcat(buf, "R");
+               if(buf[0] == 0)
+                       strcat(buf, "-");
+
+               if(fprint(fd, "%x code %s 3 %lux %lux\n",
+                       idx, buf, (ulong)info->base_addr, (ulong)info->limit) < 0)
+                       goto out;
+       } else {
+               /* data segment */
+               if(info->contents & 1)
+                       strcat(buf, "E");
+               if(info->seg_32bit)
+                       strcat(buf, "B");
+               if(!info->read_exec_only)
+                       strcat(buf, "W");
+               if(buf[0] == 0)
+                       strcat(buf, "-");
+
+               if(fprint(fd, "%x data %s 3 %lux %lux\n",
+                       idx, buf, (ulong)info->base_addr, (ulong)info->limit) < 0)
+                       goto out;
+       }
+
+       err = 0;
+       info->entry_number = idx;
+       if(!descempty(info)){
+               current->tlsmask |= 1<<idx;
+       } else {
+               current->tlsmask &= ~(1<<idx);
+       }
+
+out:
+       if(fd >= 0)
+               close(fd);
+       return err;
+}
+
+int sys_get_thread_area(void *pinfo)
+{
+       struct linux_user_desc *info = pinfo;
+       int err, n, fd, idx;
+       char buf[1024];
+       char *p, *e, *f[Nfields];
+
+       trace("sys_get_thread_area(%p)", pinfo);
+
+       err = -ENOSYS;
+       if((fd = open("/dev/gdt", OREAD)) < 0)
+               goto out;
+
+       err = -EINVAL;
+       if((n = read(fd, buf, sizeof(buf)-1)) <= 0)
+               goto out;
+       buf[n] = 0;
+       p = buf;
+       while(e = strchr(p, '\n')){
+               *e = 0;
+               if(getfields(p, f, nelem(f), 1, " ") != nelem(f))
+                       goto out;
+               idx = strtoul(f[Index], nil, 16);
+               if(idx >= 8*sizeof(current->tlsmask))
+                       break;
+               if(idx == info->entry_number)
+                       goto found;
+               p = e+1;
+       }
+       goto out;
+
+found:
+       info->contents = 0;
+       if(strcmp(f[Type], "code") == 0)
+               info->contents |= 2;
+       info->seg_not_present = 1;
+       info->limit_in_pages = 0;
+       info->seg_32bit = 0;
+       info->read_exec_only = 1;
+       info->useable = 0;
+       for(p = f[Flags]; *p; p++){
+               switch(*p){
+               case 'P':
+                       info->seg_not_present = 0;
+                       break;
+               case 'G':
+                       info->limit_in_pages = 1;
+                       break;
+               case 'B':
+               case 'D':
+                       info->seg_32bit = 1;
+                       break;
+               case 'W':
+               case 'R':
+                       info->read_exec_only = 0;
+                       break;
+               case 'U':
+                       info->useable = 1;
+                       break;
+               case 'E':
+               case 'C':
+                       info->contents |= 1;
+                       break;
+               }
+       }
+
+       info->base_addr = strtoul(f[Base], nil, 16);
+       info->limit = strtoul(f[Limit], nil, 16);
+
+       err = 0;
+
+out:
+       if(fd >= 0)
+               close(fd);
+       return err;
+}
+
+static void
+cleardesc(struct linux_user_desc *info)
+{
+       info->base_addr=0;
+       info->limit=0;
+       info->contents=0;
+       info->read_exec_only=1;
+       info->seg_32bit=0;
+       info->limit_in_pages=0;
+       info->seg_not_present=1;
+       info->useable=0;
+}
+
+void inittls(void)
+{
+       struct linux_user_desc info;
+       int i;
+
+       for(i=0; i<8*sizeof(current->tlsmask); i++){
+               if((current->tlsmask & (1 << i)) == 0)
+                       continue;
+               cleardesc(&info);
+               info.entry_number = i;
+               sys_set_thread_area(&info);
+       }
+       current->tlsmask = 0;
+}
+
+void clonetls(Uproc *new)
+{
+       new->tlsmask = current->tlsmask;
+}
+
+int sys_modify_ldt(int func, void *data, int count)
+{
+       trace("sys_modify_ldt(%d, %p, %x)", func, data, count);
+
+       return -ENOSYS;
+}
diff --git a/linux_emul_base/trace.c b/linux_emul_base/trace.c
new file mode 100644 (file)
index 0000000..2207786
--- /dev/null
@@ -0,0 +1,107 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+
+#undef trace
+
+static char magic[] = "TRACEBUF";
+
+typedef struct Tracebuf Tracebuf;
+struct Tracebuf
+{
+       char            magic[8];
+       int             wp;
+       char            lines[256][80];
+};
+
+static void*
+alloctrace(void)
+{
+       Tracebuf *t;
+
+       t = kmallocz(sizeof(*t), 1);
+       memmove(t->magic, magic, sizeof(t->magic));
+       return t;
+}
+
+static void
+checktrace(Tracebuf *t)
+{
+       if(memcmp(t->magic, magic, sizeof(t->magic)) != 0)
+               panic("tracebuffer corrupted");
+}
+
+static void
+freetrace(Tracebuf *t)
+{
+       if(t == nil)
+               return;
+       checktrace(t);
+       memset(t, 0, sizeof(*t));
+       free(t);
+}
+
+static void
+vputtrace(Tracebuf *t, char *fmt, va_list a)
+{
+       char *s;
+
+       checktrace(t);
+       s = t->lines[t->wp++ %  nelem(t->lines)];
+       vsnprint(s, sizeof(t->lines[0]), fmt, a);
+       if(debug > 1)
+               fprint(2, "%d\t%s\n", (current != nil) ? current->tid : 0, s);
+}
+
+void inittrace(void)
+{
+       if(debug > 0)
+               current->trace = alloctrace();
+}
+
+void exittrace(Uproc *proc)
+{
+       Tracebuf *t;
+
+       if(t = proc->trace){
+               proc->trace = nil;
+               freetrace(t);
+       }
+}
+
+void clonetrace(Uproc *new, int copy)
+{
+       Tracebuf *t;
+
+       if((t = current->trace) == nil){
+               new->trace = nil;
+               return;
+       }
+
+       if(copy){
+               Tracebuf *x;
+
+               x = kmalloc(sizeof(*t));
+               memmove(x, t, sizeof(*t));
+               new->trace = x;
+
+               return;
+       }
+
+       new->trace = alloctrace();
+}
+
+void tprint(char *fmt, ...)
+{
+       va_list a;
+       Uproc *p;
+
+       p = current;
+       if(p && p->trace){
+               va_start(a, fmt);
+               vputtrace((Tracebuf*)p->trace, fmt, a);
+               va_end(a);
+       }
+}
diff --git a/linux_emul_base/trap.c b/linux_emul_base/trap.c
new file mode 100644 (file)
index 0000000..eda75a9
--- /dev/null
@@ -0,0 +1,110 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+#pragma profile off
+
+void
+retuser(void)
+{
+       Uproc *p;
+       Ureg *u;
+
+       p = current;
+       u = p->ureg;
+       p->ureg = nil;
+       if(p->innote == 0)
+               jumpureg(u);
+       p->innote = 0;
+       noted(NCONT);
+}
+
+static void
+handletrap(void *v, char *m)
+{
+       Uproc *p;
+       Usiginfo si;
+
+       p = current;
+       p->innote = 1;
+       p->ureg = v;
+
+       if(strncmp(m, "interrupt", 9) == 0){
+               if(p->notified){
+                       p->notified = 0;
+               } else {
+                       memset(&si, 0, sizeof(si));
+                       si.signo = SIGINT;
+                       sendsignal(p, &si, 0);
+               }
+               goto handled;
+       }
+
+       if(p->traceproc)
+               goto traced;
+
+       if(strncmp(m, "sys: trap: general protection violation", 39) == 0)
+               if(linuxcall() == 0)
+                       goto handled;
+
+       if(strncmp(m, "sys: write on closed pipe", 25) == 0)
+               goto handled;
+
+       if(strncmp(m, "sys: trap: invalid opcode", 25) == 0){
+               memset(&si, 0, sizeof(si));
+               si.signo = SIGILL;
+               si.code = ILL_ILLOPC;
+               si.fault.addr = (void*)p->ureg->pc;
+               sendsignal(p, &si, 0);
+               goto handled;
+       }
+
+       if(strncmp(m, "sys: trap: divide error", 23) == 0){
+               memset(&si, 0, sizeof(si));
+               si.signo = SIGFPE;
+               si.code = FPE_INTDIV;
+               si.fault.addr = (void*)p->ureg->pc;
+               sendsignal(p, &si, 0);
+               goto handled;
+       }
+
+       if(strncmp(m, "sys: trap: overflow", 19) == 0){
+               memset(&si, 0, sizeof(si));
+               si.signo = SIGFPE;
+               si.code = FPE_INTOVF;
+               si.fault.addr = (void*)p->ureg->pc;
+               sendsignal(p, &si, 0);
+               goto handled;
+       }
+
+       trace("handletrap: %s", m);
+       if(debug)
+               noted(NDFLT);
+
+       exitproc(p, SIGKILL, 1);
+
+handled:
+       if(p->traceproc)
+traced:        p->traceproc(p->tracearg);
+
+       handlesignals();
+       retuser();
+}
+
+#pragma profile on
+
+
+void inittrap(void)
+{
+       ulong f;
+
+       /* disable FPU faults */
+       f = getfcr();
+       f &= ~(FPINEX|FPOVFL|FPUNFL|FPZDIV|FPINVAL);
+       setfcr(f);
+
+       notify(handletrap);
+}
similarity index 100%
rename from bsd_man2_all
rename to ref/bsd_man2_all