add linux_emul base, reorganize docs master
authorian <ian@kremlin.cc>
Wed, 18 Feb 2015 21:43:27 +0000 (16:43 -0500)
committerian <ian@kremlin.cc>
Wed, 18 Feb 2015 21:43:27 +0000 (16:43 -0500)
39 files changed:
linux_emul_base/CHANGES [new file with mode: 0644]
linux_emul_base/README [new file with mode: 0644]
linux_emul_base/bits.s [new file with mode: 0644]
linux_emul_base/bootstrap/tar [new file with mode: 0755]
linux_emul_base/bufproc.c [new file with mode: 0644]
linux_emul_base/consdev.c [new file with mode: 0644]
linux_emul_base/dat.h [new file with mode: 0644]
linux_emul_base/doc/ioctl_list.txt [new file with mode: 0644]
linux_emul_base/doc/linuxemu.txt [new file with mode: 0644]
linux_emul_base/doc/todo.txt [new file with mode: 0644]
linux_emul_base/dspdev.c [new file with mode: 0644]
linux_emul_base/error.c [new file with mode: 0644]
linux_emul_base/exec.c [new file with mode: 0644]
linux_emul_base/file.c [new file with mode: 0644]
linux_emul_base/fns.h [new file with mode: 0644]
linux_emul_base/fs.c [new file with mode: 0644]
linux_emul_base/linux [new file with mode: 0755]
linux_emul_base/linux.h [new file with mode: 0644]
linux_emul_base/linuxcall.c [new file with mode: 0644]
linux_emul_base/linuxcalltab [new file with mode: 0644]
linux_emul_base/linuxcalltab.awk [new file with mode: 0755]
linux_emul_base/main.c [new file with mode: 0644]
linux_emul_base/mem.c [new file with mode: 0644]
linux_emul_base/miscdev.c [new file with mode: 0644]
linux_emul_base/mkfile [new file with mode: 0644]
linux_emul_base/pipedev.c [new file with mode: 0644]
linux_emul_base/poll.c [new file with mode: 0644]
linux_emul_base/proc.c [new file with mode: 0644]
linux_emul_base/procdev.c [new file with mode: 0644]
linux_emul_base/ptydev.c [new file with mode: 0644]
linux_emul_base/rootdev.c [new file with mode: 0644]
linux_emul_base/signal.c [new file with mode: 0644]
linux_emul_base/sockdev.c [new file with mode: 0644]
linux_emul_base/stat.c [new file with mode: 0644]
linux_emul_base/time.c [new file with mode: 0644]
linux_emul_base/tls.c [new file with mode: 0644]
linux_emul_base/trace.c [new file with mode: 0644]
linux_emul_base/trap.c [new file with mode: 0644]
ref/bsd_man2_all [moved from bsd_man2_all with 100% similarity]

diff --git a/linux_emul_base/CHANGES b/linux_emul_base/CHANGES
new file mode 100644 (file)
index 0000000..138a094
--- /dev/null
@@ -0,0 +1,623 @@
+2008-08-16
+Creation of a CHANGES file
+
+A entry starts with the date followed by a newline and then
+the content follows. Usualy, the first line after the date
+is some short description and then a loger follows.
+To terminate the entry, intert two newlines at the end.
+
+So this entry serves as an exampe. Hope this is simple enougth :-)
+
+
+2008-08-16
+New debugging implemented
+
+Debug code removed from mem.c and trap.c and rewrote in
+acid. See the DEBUGGING section in the HOWTO file for further
+information.
+
+
+2008-08-17
+Fontconfig crash fixed, Debug code fixes
+
+libfontconfig mapped some config files with len == 0, this
+was not handled correctly so it crashed.
+
+the umem() acid function didnt check for zero segment
+pointers so it showd invalid data for the mostly unused
+SEGSHARED segment.
+
+
+2008-08-18
+Some minjor fixes
+
+more checking in memory manager
+sys_[gs]etpgrp implemented for pid != current->pid
+renamed emu.c to main.c
+set UID/GID/EUID/EGID in AUXVEC on exec()
+
+
+
+2008-08-21
+Making prof(1) work (at least dont let it crash)
+
+mem.c: convertseg():
+
+Peplaced read() calls to pread() to prevent profiling related crash.
+The problem was that convertseg() detaches the DATA segment reattaches
+a new one and used read() to get the contents back.  read() was a
+profiled function and the profiler finds its structures cleared to
+zero and crashes.  We now use pread() that is a unprofiled assembly
+syscall stub.
+
+Still, child processes and kprocs are not currently profiled.
+
+
+2008-08-22
+AF_UNIX client sockets implemented
+
+Its a little bit of a hack. We do the AF_UNIX handling like APE
+does so we can interact with the ported Xservers Xbr and equis.
+
+
+2008-08-23
+Work arround for mozilla GPFAULT bug
+
+If a process is notified that he has pending signals with the
+"sig" message and if the note interrupts the execution of a
+INT 0x80 instruction, a syscall in the handler causes mystic
+crashes i dont understand.
+
+The work arround detects the condition and delays the
+handling of the signals returning back to userspace.
+
+I could reproduce the condition and this hack seems
+to work. I should write some testcase to analyze this
+condition further. Maybe its some kernel bug.
+
+
+2008-08-23
+Minjor file related fixes/cleanups
+
+- Implemented in miscdev.c for writable /dev/zero
+- sys_umask() now returns the previous umask
+- default umask set to 022
+- sys_umask() sys_cwd() moved from proc.c to file.c
+
+
+2008-08-24
+linuxemu.rc script updated
+
+- resolve relative rootpath
+- generate /etc files for hostname and resolv.conf if not readable
+- removed enviroment user->USER home->HOME conversion in main.c
+
+
+2008-09-10
+Better work arround for mozilla GPFAULT bug
+
+The problem arises because of the handling of notes in Plan9 kernel.
+
+It happens if linux code does a syscall while here are usernotes (like
+the "sig" ones) queued in the process note[] array.  Then the trap()
+function in the kernel will enqueue the trap in the queue and the user
+note gets handled first.
+
+Now, after we have done a syscall in the note handler of the user
+signal, notify() gets called in the kernel that detects the next note
+to be a trap and thinks the note handler itself caused it and kills
+the process.
+
+I suggested a patch that makes sure traps get enqued on the head of
+the note[] array so that they get handled before a user note but its
+not decided to be applied yet so here is the work arround.
+
+The work arround avoids posting notes to a process that could possibly
+cause a trap in the future before the usernote gets handled.  (this
+excluded all linux code because it can issure a syscall anytime) The
+only time it is save to post notes is if we are in linuxemu syscall
+handler.
+
+This is mostly the case anyway when notes are used to interrupt
+blocking syscalls (waking a process sleeping in a sigsuspend for
+example)
+
+Linuxcode that would spin in a endless loop will not be interrupted by
+notes/signals anymore.
+
+
+2008-09-20
+Minjor stuff
+
+Masking more fp-errors in trap.c. (To get some dos game running in
+bochs)
+
+prboom and zsnes was bumpy sometimes because our select() didnt set
+the struct timeval *tvp to the time the select call did not slept.
+
+I stat function pointer was removed from the Ufile struct and all the *dev.c
+files. I have no need to return custom stats from the devices right now
+(except for /dev/zero (mode & 0222)) so i move the only case where its
+needed into the hackish dir2statmode function in p9dev.c. Here is also
+an fd field added to the generic Ufile scruct because most of all devices
+use plan9 filedescriptors at some point and we use it in fstat(). Saves some
+casting and fstat can use the fd (when its available) to make a dirstat() and
+convert to linux format.
+
+For the postnote stuff mentiond in the last changes entry, here is a global
+flag in main.c (notehack = 1) to enable/disable the work arround.
+
+
+2008-09-21
+atime/mtime, AT_CLKTCK
+
+new syscalls
+- sys_utime
+- sys_utimes
+- sys_times
+
+the AT_CLKTCK entry in exec is set to _tos->cyclefreq
+now.
+
+
+2008-10-05
+faster death proc detection
+
+Using linuxemu from terminal caused huge network load
+because we did a lookup of /proc/# on every timer round.
+
+now we keep open filedescriptors for /proc/#/args and
+/proc/#/note in the proc structure and only check for
+dead procs every second.
+
+
+2008-10-05
+sound (/dev/dsp)
+
+OSS sound implemented, its only tested with some SDL
+apps (zsnes, prboom).
+
+The output frequency is hardcoded at 44100 Hz and 2
+channels stereo at the moment.
+
+
+2008-10-20
+dsp, stat, mkfile, -d, dns, fs
+
+dspdev detects output frequency now and implement more
+ioctls.
+
+here is an universal Ustat and Udirent structs to separate
+the linux formats from driver code.
+
+big chnage in all devices, added indirection layer fs.c that
+has some kind of mapping from path -> device and forwards
+filesystem calls to device drivers. now drivers can return
+correct stat information.
+
+this is needed for implementing /dev/pts later.
+
+removed the /etc/(hosts resolve.conf) generation code from
+linuxemu.rc because they dont work in some cases and
+cause maintence nightmares.
+
+use the -d switch for turning on trace to stderr. so we dont
+need to always change the if(0) from trace.c and recompile.
+
+force compiler flags in mkfile, the -T from new mkone breaks
+build on current distribution.
+
+
+2008-10-27
+lots of fixes
+
+- off by one error basepath()
+- chmod used access which resolves always to link target
+- runlink
+- the note hack broke sigprocmask, and sometimes failed
+   to interrupt a sleepproc(). this is now handled in
+   interruptproc(). (this makes drawterm work on linuxemu)
+- write() to pty was not synchronized/not blocking that
+   caused some apps to spin in write() or others to drop
+   characters (curses).
+
+
+2008-10-28
+grow filedescriptor table in dup2
+
+- if the new fd supplied is out if range, dont return error but
+   grow the fdtab so it fits in. (this make configure work)
+
+2008-11-15
+lots of changes
+
+- Updated HOWTO
+
+- included <ureg.h> in some modules. this fixed the type
+   signature errors of 8l when compiling with -T flag.
+
+- fixed bug in exec()
+   exec needs to run outside the the note context, because it
+   is deleting memory segments and that can cause the removal
+   of the stack segment as well. we have to set current->syscall
+   to nil to avoid getting a note posted that could interrupt us in
+   the middle of some malloc() or something.
+
+- sys_pipe() FD_CLOEXEC
+   sys_pipe() create the filedescriptors with FD_CLOEXEC bit
+   set as open() does. this is wrong and caused gimp plugins
+   to fail.
+
+- new signal handling code
+   signal.c has changed a bit. now CLONE_THREAD procs
+   share a signal queue and proc() uses wantsignal() to
+   figure out what process to interrupt.
+
+- restartable syscalls
+   re process SA_RESTART flag now and are able to restart syscalls
+   that got interrupted by such signals. sleepproc() returns -ERESTART
+   by default now. sys_poll() and sys_sleep() will return -EINTR in
+   any case.
+
+- fs reorganized, [sg]etxattr added, p9cwd added as an optimization
+   to avoid walks().
+
+- ptydev emits SIGINTR to process group, added tty to Uproc.
+
+- added fddev (/dev/fd)
+
+- exitproc() now emits SIGCHLD.
+
+- mem.c: addrok() now takes a prot flags parameter so we can test
+   for expected memory protection too.
+
+- profme(): profiling support for child processes added
+
+- initialization completly moved in main()
+
+- fchmod, fchown, ftruncate moved to file.c
+
+
+2008-10-19
+just code dressing...
+
+renamed some files, added typedef for Ureg, abstracted syscall specific
+code in linuxcall.c, more tracing...
+
+
+2009-02-06
+Fixed the opera fork() no more threads bug
+
+There was a problem of dns resolver zombie processes created by
+operseemed that used up all the process table due to incomplete
+implementation of clone().  Linux specified a exit signal in the lower
+byte of the clone- flags parameter to tell if the process should hang
+arround as zombie so that the parent can wait for it.  If no signal is
+specified here, then the process should exit without becoming a
+zombie.  Here is also the case that the parent ignores the SIGCHLD
+signal or has SA_NOCHLDWAIT flags on the SIGCHLD set.  In that case
+the child should also purge itself.  I implemented reparenting,
+because i suspected the bug there but this was not the case.
+
+Here is a new uprocs() acid function that dumps the proctab.
+
+
+2009-02-18
+Minjor changes
+
+Added anonymous area merging to reduce the area count and removed
+redundant clearmem calls. Changed Uwaitq lock from QLock to normal
+spinlock. Removed some trace() calls.
+
+The biggest change is that linuxemu.rc now is able to start equis. This
+simplyfies writing wrapper scripts to start a browser or other X11 apps.
+
+
+2009-03-25
+Simplyfied area merging in memory manager
+
+Areas are doubly linked now so its easier to get the previous area for
+mergearea().
+
+
+2009-03-30
+Fixed man-bug. (Restarting syscalls)
+
+Restarting syscalls failed if the singal that was send to the interrupted
+process was blocked. This is fixed now.
+
+
+2009-04-01
+fixed awd-bug (use builtin cd), make errors more verbose in exec.c
+
+bla
+
+2009-05-11
+fix pipeseek, added pread64, pwrite64 syscalls.
+
+forgot to return -ESPIPE on seekpipe.
+implemented pread64/pwrite64 (needed by git).
+
+
+2009-07-25
+random stuff
+
+- incred bufproc read and queuesize to 4K/64K (fixes links2 -g hang)
+- reuse buffers in bufproc
+- cleanup timer stuff, introduce 5ms min sleep time, avoid interrupt note
+- fix sys_select() to always modify tv
+- fix format mismatch in nextsignal
+- dont combine in sys_readv/sys_writev
+- fix open in devdsp.c
+- s/memcpy/memmove/g
+- dont reset segment registers for signal handlers
+- possibly more that i forgot... use history(1)
+
+
+2009-07-27
+fixed audio delay
+
+keeping track of how many samples (time) has been submitted
+to /dev/audio already and wait when the buffer grows over some
+treshold. this removes the audio delay from games :)
+
+
+2009-07-29
+doc
+
+put documentation in doc subdirectory.
+
+
+2009-07-31
+mremap, segbrk shrinking, pagesize, doc
+
+rewrote mremap implementation to correctly clear area for shrinking.
+handles more error cases and checks for overlap. this fixes the gimp
+invalid pointer bug.
+
+removed segment shrinking with segbrk as this feature may be removed
+in newer kernel versions as it introduced a bug where it is possible to
+unmap pages while the kernel touches them and cause a panic.
+
+removed the ROUNDPAGE() macro from dat.h. heres a pagealign()
+function in mem.c now and the global variable pagesize that is initialized
+in main.
+
+some documentation cleanups.
+
+
+2009-08-24
+dspdev improvements.
+
+do some linear interpolation in audio resampling to get better sound quality.
+avoid copy when no resampling is required.  reflect queue full in
+GETOSPACE ioctl. cleanup code to avoid vlong calculations.
+
+
+2009-08-26
+bugs
+
+fixed uninitialized values in stat wich caused -EOVERFLOW on linux
+kernel build.  removed wakeableproc() (changes in signal.c, ptydev.c,
+bufproc.c, poll.c).  fixed sigsuspend race.  simpler waitq code (uses
+lesser memory too).  fixed waitpid race.  added /dev/dsp0 to dspdev
+(makes mikmod work).  fixed rfork/notify crash.
+
+
+2009-08-30
+mplayer, bb, audacity play cursor, bugs
+
+refactored timers, alarm and deadproccheck into one timerproc and
+removed timer.c. every Uproc has a timeout field now that is the
+time in nanoseconds when the timeout expires. on expiration, the
+timerproc sets the value to zero and does a wakeup on the
+timed out process. a process sets/resets its timeout with
+settimeout(delta). the remaining time in ms can be queried with
+timeoutremain().
+
+fixed missing protection flags in setupstack.
+
+more acurate GETOSPACE (mplayer, bb) and new GETOPTR
+ioctl (needed for audacity play cursor) in dspdev.c.
+
+handle kill note as SIGKILL in trap.c.
+
+handle illegal instruction as SIGILL as pass/restore sigcontext
+(needed for mplayer runtime SSE check).
+
+sys_sigreturn now uses current->ureg->sp to find the restore
+information.
+
+preallocate all Uprocs.
+
+
+2009-09-06
+cleanup
+
+removed dev argument from fdgetfile()
+
+return correct -ENOSOCK in socketcall()
+
+fixed pread/pwrite, dev->read/dev-write now take a offset
+argument.
+
+fixed time diff overflow in dspdev
+
+
+2009-09-08
+fuckup, O_TRUNC, restaring syscalls, rc, getdents
+
+fucked up:
+- seek didnt work for whence == 1 as the plan9 seek pointer was
+   never moved in read due to change to pread. this caused cp to
+   corrupt the output file when it skipped null blocks.
+- basepath in fs.c was broken
+- readv/writev didnt increment the file offset
+
+what we have now is that file.c does all the offset tracking, and
+devices provide a size() function that returns the actual file size.
+
+added O_TRUNC for open.
+
+signal restarting sometimes resulted in returning -ERESTART to
+userspace. this could happen when another thread had stolen
+our signal. we now restart the syscall in nextsignal() even if
+here was no signal pending for us.
+
+removed the exitsig function from linuxemu.rc as we can use
+the -terminate option of the xserver to get it shutdown.
+
+read the whole directory, then calculate file offset for directory
+entries. 
+
+
+2009-09-12
+signal handling changes, acid, rc
+
+
+2009-09-20
+sockets, basepath, alarm
+
+fixed error in basepath (*ps vs ps) and implemented nonblocking connect,
+server sockets, socketpair, sys_alarm
+
+
+2009-10-13
+lots of changes
+
+simplified locking by making process wakeup non blocking.
+to not miss wakeups, the to be suspended proc should call wakeme(1)
+before it goes to sleep.
+
+timers for alarm/itimer have been moved to the per "process" signal
+data. current->timeout is still local to the current "thread".
+
+sys_kill() now makes sure we only send one signal per "process".
+
+syscall restarting now can use the Urestart (current->restart)
+structure to remember state.  (implemented for nanosleep, poll and
+select)
+
+changed default to non tracing.
+
+pty now handles winsize changes. fixing current tty changing. (ssh bug,
+rxvt bug)
+
+added /dev/random and /dev/urandom to miscdev.
+
+more ioctls for dspdev.
+
+enforce non reentancy for traps.
+
+
+2009-10-15
+fixed sys_brk()
+
+we now use a separate segment for the BSS and dont intermix mmap and brk.
+thanks jibanes for reporting!
+
+
+2010-02-27
+futex, TLS, mprotect
+
+implemented sys_futex() finally
+
+changed tls to use the new /dev/gdt interface to change its
+process segment descriptors
+
+fixed mprotect
+
+
+20010-04-30
+linuxemu.rc gone, documentation
+
+removed linuxemu.rc and replaced it with linux.
+
+usage: linux [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...]
+
+linuxroot is now an optional parameter (-r). it will default to /sys/lib/linux.
+
+dont hide /lib/tls anymore and bind devarch. if you dont want to
+patch your kernel with the segdescr patch and use mroot[-linuxemu].tbz
+you can rename /lib/tls to /lib/_tls_disabled_.
+
+
+2010-05-02
+exit_group, exec, futex, waitpid, quoted arguments
+
+properly implement exit_group and zap all threads. notify
+all parent threads.
+
+zap threads in exec.
+
+implement FUTEX_REQUEUE and FUTEX_CMP_REQUEUE.
+
+handle WALL, WCLONE and WNOHANG in waitpid.
+
+preserve quoted arguments to linux.
+
+
+2010-05-11
+select/poll and EBADF, execve malloc, set_thread_area, initproc, SIGSTOP/SIGCONT, 
+tty, getsid, getpeeraddr, /proc
+
+select and poll never return -EBADF but ignore the offending
+filedescriptor. this is wrong in the manpage! (this was needed
+to survive the python configure script)
+
+handle malloc errors in execve and dont panic when elf
+loading fails but kill the process.
+
+detect empty descriptors in set_thread_area so descriptors
+can be freed.
+
+move some of the initialization from main to initproc.
+
+SIGSTOP/SIGCONT handling now works for thread groups. for this
+we now have stopproc() and contproc() that are called from the
+signal code when SIGSTOP or SIGCONT signal is received. each Uproc
+now has a traceproc callback that is called when we enter or
+exit the kernel. zapthreads() and stopproc() use this to get all threads
+in the wanted state. for stopped procs, waiting happens in
+the signal code so calling handlesignals() of a stopped proc will
+block until it gets killed or continued.
+
+new fields in Uproc:
+traceproc, tracearg - called when entering or exiting the kernel
+wstate - current wait state of this process. WEXITED, WSTOPPED, WCONTINUED.
+wevent - like wstate, but reset by waitpid
+comm - double null terminated string array. first entry is the full exe name
+followd by the exeve arguments.
+
+heres a new format %S for signal numbers.
+
+the per thread tty is gone. the tty is now in the per process signal queue.
+gettty() and settty() can be used to modify it. ptydev now allows opening
+the slave tty multiple times. (fixes midnight commander error)
+
+implemented sys_getsid().
+
+fix AF_INET padding and byte order for getpeername socketcall.
+
+implemented /proc (procdev). fddev is gone. /dev/tty handled by
+ptydev now. this makes pkill, ps, top and inkscape work!
+
+
+2010-05-28
+fixed pipe filedescriptor leak in AF_UNIX
+
+we leaked the sock->other descriptor when failing to connect
+a AF_UNIX socket. thanks yarikos for reporting!
+
+
+2011-08-05
+rename to existing symlink target bug, profine -> profile
+
+renaming a symlink to a existing symlink would cause the
+file file to be renamed to .udir.L.udir.L....
+
+fix profine/profile typo
+
+2014-11-20
+change uname release to 3.2.1 to make debian 7.0 not complain
+(thanks henesy)
diff --git a/linux_emul_base/README b/linux_emul_base/README
new file mode 100644 (file)
index 0000000..677085d
--- /dev/null
@@ -0,0 +1,138 @@
+INTRO
+
+Linuxemu is a program that can execute Linux/i386 ELF binaries on
+Plan9.  It was started by Russ Cox and development was continued by
+me.  Its opensource, I dont care what you are doing with it, but maybe
+Russ does, i don't know :-)
+
+If you found some bugs or have some other improvements/ideas send a
+email to:
+
+cinap_lenrek AT gmx DOT de
+
+
+SOURCE
+
+linuxemu is available on sources. On Plan9 do:
+
+% 9fs sources
+% cp /n/sources/contrib/cinap_lenrek/linuxemu3.tgz .
+
+Another source is my server on the web:
+
+% hget http://9hal.ath.cx/usr/cinap_lenrek/linuxemu3.tgz >linuxemu3.tgz
+
+
+DOCUMENTATION
+
+documentation is provided in the doc directory:
+
+doc/linuxemu.txt
+doc/todo.txt
+
+
+COMPILE
+
+% tar xzf linuxemu3.tgz
+% cd linuxemu3
+% mk
+
+
+INSTALL
+
+% mk install
+
+
+BOOTSTRAP
+
+You need a linux rootfilesystem packed in a tarball. Go!
+get some linux rootfs:
+
+http://9hal.ath.cx/usr/cinap_lenrek/mroot.tbz
+http://9hal.ath.cx/usr/cinap_lenrek/mroot-linuxemu.tbz
+
+the -linuxemu version contains no symlinks and can be extracted with
+plain plan9 tools bunzip/tar so you can skip the BOOTSTRAP section.
+:-)
+
+You can create your own with debootstrap on debian linux...  or help
+me write a installer that unpacks and installs slackware on plan9...
+In any case, linuxemu is not hardwared to any linux distribution!
+
+Extract your linux rootfilesystem with the static linked gnutar from
+the bootstrap directory.  (This will create all the fake symlinks for
+you)
+
+% 8.out bootstrap/tar xf /tmp/mroot.tar 
+
+
+RUNNING
+
+Then you can use the linux script to "chroot" into your linux
+rootfs. the linux script is neccesary because for linux programs
+to run shared libraries from your linux root have to appear at /lib
+and /usr/lib and configuration files are expected to be in /etc.
+the script will build a private namespace and bind the linuxroot
+over the plan9 root. the original plan9 namespace is mounted to /9.
+
+% linux -r ./mroot /bin/bash -i
+
+if you omit the -r option, the linuxroot defaults to /sys/lib/linux. you
+may put your linux root there or add a bind to your $home/lib/profile.
+
+You should change /etc/resolv.conf to match your network nameserver
+setup.  Also, you may want to edit /etc/apt/sources.list to change the
+debian mirror.
+
+
+DEBUGGING
+
+If linuxemu crashes, use acid to figure out whats going on:
+
+% mk acid
+% acid -l linuxemu.acid <pid>
+
+then you can issue the following commands:
+
+ustk()                         dump a (userspace) stacktrace for the current thread
+umem(Current())                dump the memory mappings
+ufds(Current())                        dump the filedescriptor table
+utrace(Current())              dump the internal tracebuffer (enabled by -d option)
+
+use xasm()/xcasm() for disassembly for linux code.
+
+You can also enable full trace logging:
+
+% linux -r ./mroot -dd /bin/bash -i >[2]/tmp/linuxemu.log
+
+This slows linuxemu down.  In case of race conditions, it often
+happens that the bug disapears when doing full trace logging!
+
+
+NPTL/thread-local storage
+
+If you get one of these errors:
+
+"cannot set up thread-local storage: cannot set up LDT for thread-local storage"
+
+this is glibc/libpthread complaining!  the problem is the following:
+glibc on i386 decided at some point to use the extra segment registers
+GS and FS as an indirection pointer for thread local storage.  the
+operating system kernel therfor must have a mechanism to let userspace
+change descriptor table entries and swap them in/out on context
+switch.
+
+to make it work here are several options:
+
+1) recompile and link the program with a pre NPTL version of glibc.
+
+2) on some distributions, a non-tls version of libc/libpthread is available.
+in my debian mroot, the NPTL version is in /lib/tls, the older version
+is in /lib. by renaming /lib/tls to /lib/_tls_disabled_ the loader will
+use the non-tls version.
+
+3) i made a kernel patch that adds support for per process descriptors to
+plan9:
+/n/sources/contrib/cinap_lenrek/segdescpatch
+http://9hal.ath.cx/usr/cinap_lenrek/segdescpatch.tgz
+it will add the files gdt and ldt to devarch (#P).
diff --git a/linux_emul_base/bits.s b/linux_emul_base/bits.s
new file mode 100644 (file)
index 0000000..5a6c86e
--- /dev/null
@@ -0,0 +1,53 @@
+TEXT   incref(SB),$0
+       MOVL    l+0(FP),AX
+       LOCK
+       INCL    0(AX)
+       RET
+
+TEXT   decref(SB),$0
+       MOVL    l+0(FP),AX
+       LOCK
+       DECL    0(AX)
+       JZ      iszero
+       MOVL    $1, AX
+       RET
+iszero:
+       MOVL    $0, AX
+       RET
+
+TEXT jumpureg(SB), 1, $0
+       MOVL ureg+0(FP), AX     /* ureg in AX */
+       MOVL 68(AX), SP         /* restore SP */
+       SUBL $12, SP
+       MOVL 28(AX), BX         /* put AX on 4(SP) */
+       MOVL BX, 4(SP)
+       MOVL 56(AX), BX         /* put PC on 8(SP) */
+       MOVL BX, 8(SP)
+       MOVL 0(AX), DI          /* restore registers */
+       MOVL 4(AX), SI
+       MOVL 8(AX), BP
+       MOVL 16(AX), BX
+       MOVL 20(AX), DX
+       MOVL 24(AX), CX
+       MOVL 4(SP), AX          /* restore AX */
+       ADDL $8, SP
+       RET
+
+TEXT linux_sigreturn(SB), 1, $0
+       MOVL $119, AX           /* sys_sigreturn */
+       INT $0x80
+       RET
+
+TEXT linux_rtsigreturn(SB), 1, $0
+       MOVL $173, AX           /* sys_rt_sigreturn */
+       INT $0x80
+       RET
+
+TEXT get_ds(SB), 1, $0
+       PUSHL DS
+       POPL AX
+       RET
+TEXT get_cs(SB), 1, $0
+       PUSHL CS
+       POPL AX
+       RET
diff --git a/linux_emul_base/bootstrap/tar b/linux_emul_base/bootstrap/tar
new file mode 100755 (executable)
index 0000000..a403a72
Binary files /dev/null and b/linux_emul_base/bootstrap/tar differ
diff --git a/linux_emul_base/bufproc.c b/linux_emul_base/bufproc.c
new file mode 100644 (file)
index 0000000..ceae26b
--- /dev/null
@@ -0,0 +1,263 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Bufproc Bufproc;
+typedef struct Bufq Bufq;
+
+struct Bufq
+{
+       Bufq            *next;
+
+       uchar   *start;
+       uchar   *end;
+
+       uchar   data[8*1024];
+};
+
+struct Bufproc
+{
+       Ref;
+       QLock;
+
+       int             fd;
+       int             error;
+       int             notefd;
+
+       Bufq            *qf;
+       Bufq            *qh;
+       Bufq            **qt;
+
+       int             wr;
+       Uwaitq  wq;
+};
+
+static int
+queuesize(Bufq *q)
+{
+       int n;
+
+       n = 0;
+       while(q){
+               n += (q->end - q->start);
+               q = q->next;
+       }
+       return n;
+}
+
+void
+freebufproc(void *bp)
+{
+       Bufproc *b = bp;
+       Bufq *q;
+
+       if(b == nil)
+               return;
+       qlock(b);
+       b->fd = -1;
+       if(decref(b)){
+               if(b->wr){
+                       b->wr = 0;
+                       while(rendezvous(&b->wr, 0) == (void*)~0)
+                               ;
+               } else {
+                       write(b->notefd, "interrupt", 9);
+               }
+               qunlock(b);
+               return;
+       }
+       qunlock(b);
+
+       *b->qt = b->qf;
+       while(q = b->qh){
+               b->qh = q->next;
+               free(q);
+       }
+       close(b->notefd);
+       free(b);
+}
+
+static void
+bufproc(void *aux)
+{
+       Bufproc *b = aux;
+       Bufq *q;
+       int ret;
+       int fd;
+
+       setprocname("bufproc()");
+
+       q = nil;
+       qlock(b);
+       for(;;){
+               while((b->fd >= 0) && (queuesize(b->qh) >= 64*1024)){
+                       b->wr = 1;
+                       qunlock(b);
+                       while(rendezvous(&b->wr, 0) == (void*)~0)
+                               ;
+                       qlock(b);
+               }
+               if((fd = b->fd) < 0)
+                       break;
+               if((q == nil) && (q = b->qf))
+                       b->qf = q->next;
+               qunlock(b);
+
+               if(q == nil)
+                       q = kmalloc(sizeof(*q));
+               q->next = nil;
+               q->end = q->start = &q->data[0];
+               ret = read(fd, q->start, sizeof(q->data));
+
+               qlock(b);
+               if(ret < 0){
+                       ret = mkerror();
+                       if(ret == -EINTR || ret == -ERESTART)
+                               continue;
+                       b->error = ret;
+                       b->fd = -1;
+                       break;
+               }
+               q->end = q->start + ret;
+               *b->qt = q;
+               b->qt = &q->next;
+               q = nil;
+               wakeq(&b->wq, MAXPROC);
+       }
+       if(q){
+               q->next = b->qf;
+               b->qf = q;
+       }
+       wakeq(&b->wq, MAXPROC);
+       qunlock(b);
+       freebufproc(b);
+}
+
+void*
+newbufproc(int fd)
+{
+       char buf[80];
+       Bufproc *b;
+       int pid;
+
+       b = kmallocz(sizeof(*b), 1);
+       b->ref = 2;
+       b->fd = fd;
+       b->qt = &b->qh;
+       if((pid = procfork(bufproc, b, 0)) < 0)
+               panic("unable to fork bufproc: %r");
+       snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+       b->notefd = open(buf, OWRITE);
+
+       return b;
+}
+
+int readbufproc(void *bp, void *data, int len, int peek, int noblock)
+{
+       Bufproc *b = bp;
+       uchar *p;
+       Bufq *q;
+       int ret;
+
+       qlock(b);
+       while((q = b->qh) == nil){
+               if(noblock){
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               if(peek){
+                       ret = 0;
+                       goto out;
+               }
+               if(b->fd < 0){
+                       if((ret = b->error) == 0)
+                               ret = -EIO;
+                       goto out;
+               }
+               if((ret = sleepq(&b->wq, b, 1)) < 0){
+                       qunlock(b);
+                       return ret;
+               }
+       }
+
+       p = data;
+       ret = 0;
+       while(q != nil){
+               int n;
+
+               n = q->end - q->start;
+               if(n == 0)
+                       break;
+               if(n > len - ret)
+                       n = len - ret;
+               memmove(p, q->start, n);
+               p += n;
+               ret += n;
+               if(q->start+n >= q->end){
+                       if(!peek){
+                               Bufq *t;
+
+                               t = q->next;
+                               if((b->qh = q->next) == nil)
+                                       b->qt = &b->qh;
+                               q->next = b->qf;
+                               b->qf = q;
+                               q = t;
+                       } else {
+                               q = q->next;
+                       }
+               } else {
+                       if(!peek)
+                               q->start += n;
+                       break;
+               }
+       }
+
+       if(b->wr && !peek){
+               b->wr = 0;
+               while(rendezvous(&b->wr, 0) == (void*)~0)
+                       ;
+               qunlock(b);
+
+               return ret;
+       }
+out:
+       qunlock(b);
+
+       return ret;
+}
+
+int pollbufproc(void *bp, Ufile *file, void *tab)
+{
+       Bufproc *b = bp;
+       int ret;
+
+       ret = 0;
+
+       qlock(b);
+       pollwait(file, &b->wq, tab);
+       if(b->fd >= 0){
+               ret |= POLLOUT;
+       } else if(b->error < 0)
+               ret |= POLLERR;
+       if(b->qh)
+               ret |= POLLIN;
+       qunlock(b);
+
+       return ret;
+}
+
+int nreadablebufproc(void *bp)
+{
+       Bufproc *b = bp;
+       int ret;
+
+       qlock(b);
+       ret = queuesize(b->qh);
+       qunlock(b);
+
+       return ret;
+}
diff --git a/linux_emul_base/consdev.c b/linux_emul_base/consdev.c
new file mode 100644 (file)
index 0000000..ace58d0
--- /dev/null
@@ -0,0 +1,157 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Cons Cons;
+
+struct Cons
+{
+       Ufile;
+       void    *bufproc;
+};
+
+static int
+closecons(Ufile *file)
+{
+       Cons *cons = (Cons*)file;
+
+       freebufproc(cons->bufproc);
+
+       return 0;
+}
+
+static void*
+bufproccons(Cons *cons)
+{
+       if(cons->bufproc == nil)
+               cons->bufproc = newbufproc(0);
+       return cons->bufproc;
+}
+
+static int
+pollcons(Ufile *file, void *tab)
+{
+       Cons *cons = (Cons*)file;
+       return pollbufproc(bufproccons(cons), cons, tab);
+}
+
+static int
+readcons(Ufile *file, void *buf, int len, vlong)
+{
+       Cons *cons = (Cons*)file;
+       int ret;
+
+       if((cons->mode & O_NONBLOCK) || (cons->bufproc != nil)){
+               ret = readbufproc(bufproccons(cons), buf, len, 0, (cons->mode & O_NONBLOCK));
+       } else {
+               if(notifyme(1))
+                       return -ERESTART;
+               ret = read(0, buf, len);
+               notifyme(0);
+               if(ret < 0)
+                       ret = mkerror();
+       }
+       return ret;
+}
+
+static int
+writecons(Ufile *, void *buf, int len, vlong)
+{
+       int ret;
+
+       if(notifyme(1))
+               return -ERESTART;
+       ret = write(1, buf, len);
+       notifyme(0);
+       if(ret < 0)
+               ret = mkerror();
+       return ret;
+}
+
+static int
+ioctlcons(Ufile *file, int cmd, void *arg)
+{
+       Cons *cons = (Cons*)file;
+
+       switch(cmd){
+       default:
+               return -ENOTTY;
+
+       case 0x541B:
+               {
+                       int r;
+
+                       if(arg == nil)
+                               return -EINVAL;
+                       if((r = nreadablebufproc(bufproccons(cons))) < 0){
+                               *((int*)arg) = 0;
+                               return r;
+                       }
+                       *((int*)arg) = r;
+               }
+               return 0;
+       }
+}
+
+static int
+opencons(char *path, int mode, int, Ufile **pf)
+{
+       Cons *file;
+
+       if(strcmp(path, "/dev/cons")!=0)
+               return -ENOENT;
+
+       file = mallocz(sizeof(Cons), 1);
+       file->ref = 1;
+       file->mode = mode;
+       file->dev = CONSDEV;
+       file->fd = 0;
+       file->path = kstrdup(path);
+       *pf = file;
+
+       return 0;
+}
+
+static int
+statcons(char *path, int, Ustat *s)
+{
+       if(strcmp(path, "/dev/cons")!=0)
+               return -ENOENT;
+
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->ino = hashpath(path);
+       s->dev = 0;
+       s->rdev = 0;
+       return 0;
+}
+
+static int
+fstatcons(Ufile *f, Ustat *s)
+{
+       return fsstat(f->path, 0, s);
+};
+
+static Udev consdev = 
+{
+       .open = opencons,
+       .read = readcons,
+       .write = writecons,
+       .poll = pollcons,
+       .close = closecons,
+       .ioctl = ioctlcons,
+       .fstat = fstatcons,
+       .stat = statcons,
+};
+
+void consdevinit(void)
+{
+       devtab[CONSDEV] = &consdev;
+
+       fsmount(&consdev, "/dev/cons");
+}
diff --git a/linux_emul_base/dat.h b/linux_emul_base/dat.h
new file mode 100644 (file)
index 0000000..f3cf4e0
--- /dev/null
@@ -0,0 +1,281 @@
+typedef struct Ref Ref;
+typedef struct Urestart Urestart;
+typedef struct Uproc Uproc;
+typedef struct Uproctab Uproctab;
+typedef struct Uwaitq Uwaitq;
+typedef struct Uwait Uwait;
+
+typedef struct Udev Udev;
+typedef struct Ufile Ufile;
+typedef struct Ustat Ustat;
+typedef struct Udirent Udirent;
+
+typedef struct Ureg Ureg;
+typedef struct Usiginfo Usiginfo;
+
+enum {
+       HZ = 100,
+       PAGESIZE = 0x1000,
+
+       MAXPROC = 128,
+       MAXFD   = 256,
+
+       USTACK  = 8*1024*1024,
+       KSTACK  = 8*1024,
+};
+
+struct Ref
+{
+       long    ref;
+};
+
+struct Urestart
+{
+       Urestart                *link;
+       char                    *syscall;
+
+       union {
+               struct {
+                       vlong   timeout;
+               }                       nanosleep;
+               struct {
+                       vlong   timeout;
+               }                       poll;
+               struct {
+                       vlong   timeout;
+               }                       select;
+               struct {
+                       vlong   timeout;
+               }                       futex;
+       };
+};
+
+struct Uproc
+{
+       QLock;
+
+       int             tid;
+       int             pid;
+       int             ppid;
+       int             pgid;
+       int             psid;
+       int             uid;
+       int             gid;
+       int             umask;
+       int             tlsmask;
+
+       int             kpid;
+       int             notefd;
+       int             argsfd;
+
+       int             wstate;
+       int             wevent;
+       int             exitcode;
+       int             exitsignal;
+
+       int             *cleartidptr;
+
+       vlong   timeout;
+
+       vlong   alarm;
+       Uproc   *alarmq;
+
+       char    *state;
+       char    *xstate;
+       int             innote;
+       int             notified;
+       Ureg            *ureg;
+       char            *syscall;
+       void            (*sysret)(int errno);
+       Urestart        *restart;
+       Urestart        restart0;
+       Uwait   *freewait;
+
+       void            (*traceproc)(void *arg);
+       void            *tracearg;
+
+       int             linkloop;
+       char            *root;
+       char            *cwd;
+       char            *kcwd;
+
+       void            *fdtab;
+       void            *mem;
+       void            *trace;
+       void            *signal;
+
+       char            *comm;
+       int             ncomm;
+       ulong   codestart;
+       ulong   codeend;
+       ulong   stackstart;
+       vlong   starttime;
+};
+
+struct Uproctab
+{
+       QLock;
+       int             nextpid;
+       int             alloc;
+       Uproc   proc[MAXPROC];
+};
+
+struct Uwaitq
+{
+       QLock;
+       Uwait   *w;
+};
+
+struct Uwait
+{
+       Uwait   *next;
+       Uwaitq  *q;
+       Uwait   *nextq;
+       Uproc   *proc;
+       Ufile   *file;
+};
+
+enum {
+       ROOTDEV,
+       SOCKDEV,
+       PIPEDEV,
+       CONSDEV,
+       MISCDEV,
+       DSPDEV,
+       PTYDEV,
+       PROCDEV,
+       MAXDEV,
+};
+
+/* device */
+struct Udev
+{
+       int             (*open)(char *path, int mode, int perm, Ufile **pf);
+       int             (*access)(char *path, int perm);
+       int             (*stat)(char *path, int link, Ustat *ps);
+
+       int             (*link)(char *old, char *new, int sym);
+       int             (*unlink)(char *path, int rmdir);
+       int             (*readlink)(char *path, char *buf, int len);
+       int             (*rename)(char *old, char *new);
+       int             (*mkdir)(char *path, int mode);
+       int             (*utime)(char *path, long atime, long mtime);
+       int             (*chmod)(char *path, int mode);
+       int             (*chown)(char *path, int uid, int gid, int link);
+       int             (*truncate)(char *path, vlong size);
+
+       int             (*read)(Ufile *file, void *buf, int len, vlong off);
+       int             (*write)(Ufile *file, void *buf, int len, vlong off);
+
+       vlong   (*size)(Ufile *file);
+       int             (*poll)(Ufile *file, void *tab);
+       int             (*ioctl)(Ufile *file, int cmd, void *arg);
+       int             (*close)(Ufile *file);
+
+       int             (*fstat)(Ufile *file, Ustat *ps);
+       int             (*readdir)(Ufile *file, Udirent **pd);
+       
+       int             (*fchmod)(Ufile *file, int mode);
+       int             (*fchown)(Ufile *file, int uid, int gid);
+       int             (*ftruncate)(Ufile *file, vlong size);
+};
+
+struct Ufile
+{
+       Ref;
+
+       int             mode;
+       int             dev;
+       char            *path;
+       int             fd;
+       vlong   off;
+
+       Udirent *rdaux; /* aux pointer to hold Udirent* chains */
+};
+
+struct Ustat
+{
+       int             mode;
+       int             uid;
+       int             gid;
+       int             dev;
+       int             rdev;
+       vlong   size;
+       ulong   atime;
+       ulong   mtime;
+       ulong   ctime;
+       uvlong  ino;
+};
+
+struct Udirent
+{
+       Udirent *next;
+
+       uvlong  ino;
+       int             mode;
+       char    name[];
+};
+
+struct Usiginfo
+{
+       int             signo;
+       int             errno;
+       int             code;
+
+       union {
+               /* kill() */
+               struct {
+                       int     pid;            /* sender's pid */
+                       int     uid;            /* sender's uid */
+               } kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       int     tid;                    /* timer id */
+                       int     overrun;                /* overrun count */
+                       int     val;                    /* same as below */
+                       int     sys_private;    /* not to be passed to user */
+               } timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       int     pid;                    /* sender's pid */
+                       int     uid;                    /* sender's uid */
+                       int     val;
+               } rt;
+
+               /* SIGCHLD */
+               struct {
+                       int     pid;                    /* which child */
+                       int     uid;                    /* sender's uid */
+                       int     status;                 /* exit code */
+                       long    utime;
+                       long    stime;
+               } chld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       void    *addr;          /* faulting insn/memory ref. */
+                       int     trapno;         /* TRAP # which caused the signal */
+               } fault;
+
+               /* SIGPOLL */
+               struct {
+                       long    band;           /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int     fd;
+               } poll;
+       };
+
+       int             topid;
+       int             group;
+};
+
+int debug;
+long *kstack;
+long *exitjmp;
+Uproc **pcurrent;
+#define current (*pcurrent)
+vlong boottime;
+
+Udev *devtab[MAXDEV];
+Uproctab proctab;
diff --git a/linux_emul_base/doc/ioctl_list.txt b/linux_emul_base/doc/ioctl_list.txt
new file mode 100644 (file)
index 0000000..5caa948
--- /dev/null
@@ -0,0 +1,612 @@
+Ubuntu Manpage Repository
+
+Provided by: manpages-de-dev_0.5-2ubuntu1_all
+
+BEZEICHNUNG
+
+        ioctl_list - Liste der ioctl-Aufrufe im Linux/i386-Kernel
+
+BESCHREIBUNG
+
+        Dies  ist  die  Ioctl-Liste  1.3.27,  eine  Liste von ioctl-Aufrufen im
+        Linux/i386-Kernel    1.3.27.     Sie    enthält    421    ioctls    aus
+        /usr/include/{asm,linux}/*.h.   Für  jeden  ioctl  wird  der numerische
+        Wert, der Name und der Argumenttyp aufgelistet.
+
+        Ein Argumenttyp const struct foo * bedeutet, dass das Argument  Eingabe
+        für  den  Kernel ist.  struct foo * bedeutet, der Kernel gibt das Argu‐
+        ment aus.  Wenn der Kernel das Argument für Ein- und  Ausgabe  benutzt,
+        wird dies durch // I-O markiert.
+
+        Einige ioctls benötigen mehr Argumente oder geben mehr Werte zurück als
+        eine einzige Struktur.  Diese werden durch // MORE markiert und  weiter
+        dokumentiert in einem separaten Abschnitt.
+
+        Diese Liste ist nicht vollständig.  Sie enthält nicht:
+
+                 Ioctls, die intern im Kernel definiert sind (scsi_ioctl.h).
+
+                 Ioctls,  die  in  Modulen definiert sind, die separat vom Kernel
+               verbreitet werden.
+
+        Und natürlich hat die Liste Fehler und Auslassungen.
+
+        Bitte  wenden  Sie   sich   wegen   Änderungen   und   Kommentaren   an
+        <mec@duracef.shout.net>.   Ich  bin  besonders interessiert an Modulen,
+        die ihre eigenen ioctls definieren.  Wenn Sie solch ein  Modul  kennen,
+        teilen  es  Sie  mir bitte mit, damit ich es mir per ftp besorgen kann,
+        und ich berücksichtige seine ioctls  in  der  nächsten  Ausgabe  dieser
+        Liste.
+
+        Bitte  wenden  Sie  sich  wegen  der Übersetzung in’s Deutsche nicht an
+        <krd@burn.rhein-ruhr.de>. ;-)
+
+Haupttabelle
+
+    // <include/asm-i386/socket.h>
+        0x00008901  FIOSETOWN                   const int *
+        0x00008902  SIOCSPGRP                   const int *
+        0x00008903  FIOGETOWN                   int *
+        0x00008904  SIOCGPGRP                   int *
+        0x00008905  SIOCATMARK                  int *
+        0x00008906  SIOCGSTAMP                  timeval *
+
+    // <include/asm-i386/termios.h>
+        0x00005401  TCGETS                      struct termios *
+        0x00005402  TCSETS                      const struct termios *
+        0x00005403  TCSETSW                     const struct termios *
+        0x00005404  TCSETSF                     const struct termios *
+        0x00005405  TCGETA                      struct termio *
+        0x00005406  TCSETA                      const struct termio *
+        0x00005407  TCSETAW                     const struct termio *
+        0x00005408  TCSETAF                     const struct termio *
+        0x00005409  TCSBRK                      int
+        0x0000540A  TCXONC                      int
+        0x0000540B  TCFLSH                      int
+        0x0000540C  TIOCEXCL                    void
+        0x0000540D  TIOCNXCL                    void
+        0x0000540E  TIOCSCTTY                   int
+        0x0000540F  TIOCGPGRP                   pid_t *
+        0x00005410  TIOCSPGRP                   const pid_t *
+        0x00005411  TIOCOUTQ                    int *
+        0x00005412  TIOCSTI                     const char *
+        0x00005413  TIOCGWINSZ                  const struct winsize *
+        0x00005414  TIOCSWINSZ                  struct winsize *
+        0x00005415  TIOCMGET                    int *
+        0x00005416  TIOCMBIS                    const int *
+        0x00005417  TIOCMBIC                    const int *
+        0x00005418  TIOCMSET                    const int *
+        0x00005419  TIOCGSOFTCAR                int *
+        0x0000541A  TIOCSSOFTCAR                const int *
+        0x0000541B  FIONREAD                    int *
+        0x0000541B  TIOCINQ                     int *
+        0x0000541C  TIOCLINUX                   const char *                    // MORE
+        0x0000541D  TIOCCONS                    void
+        0x0000541E  TIOCGSERIAL                 struct serial_struct *
+        0x0000541F  TIOCSSERIAL                 const struct serial_struct *
+        0x00005420  TIOCPKT                     const int *
+        0x00005421  FIONBIO                     const int *
+        0x00005422  TIOCNOTTY                   void
+        0x00005423  TIOCSETD                    const int *
+        0x00005424  TIOCGETD                    int *
+        0x00005425  TCSBRKP                     int
+        0x00005426  TIOCTTYGSTRUCT              struct tty_struct *
+        0x00005450  FIONCLEX                    void
+        0x00005451  FIOCLEX                     void
+        0x00005452  FIOASYNC                    const int *
+        0x00005453  TIOCSERCONFIG               void
+        0x00005454  TIOCSERGWILD                int *
+        0x00005455  TIOCSERSWILD                const int *
+        0x00005456  TIOCGLCKTRMIOS              struct termios *
+        0x00005457  TIOCSLCKTRMIOS              const struct temios *
+        0x00005458  TIOCSERGSTRUCT              struct async_struct *
+        0x00005459  TIOCSERGETLSR               int *
+        0x0000545A  TIOCSERGETMULTI             struct serial_multiport_struct *
+        0x0000545B  TIOCSERSETMULTI             const struct serial_multiport_struct *
+
+    // <include/linux/ax25.h>
+        0x000089E0  SIOCAX25GETUID              const struct sockaddr_ax25 *
+        0x000089E1  SIOCAX25ADDUID              const struct sockaddr_ax25 *
+        0x000089E2  SIOCAX25DELUID              const struct sockaddr_ax25 *
+        0x000089E3  SIOCAX25NOUID               const int *
+        0x000089E4  SIOCAX25DIGCTL              const int *
+        0x000089E5  SIOCAX25GETPARMS            struct ax25_parms_struct *      // I-O
+        0x000089E6  SIOCAX25SETPARMS            const struct ax25_parms-struct *
+
+    // <include/linux/cdk.h>
+        0x00007314  STL_BINTR                   void
+        0x00007315  STL_BSTART                  void
+        0x00007316  STL_BSTOP                   void
+        0x00007317  STL_BRESET                  void
+
+    // <include/linux/cdrom.h>
+        0x00005301  CDROMPAUSE                  void
+        0x00005302  CDROMRESUME                 void
+        0x00005303  CDROMPLAYMSF                const struct cdrom_msf *
+        0x00005304  CDROMPLAYTRKIND             const struct cdrom_ti *
+        0x00005305  CDROMREADTOCHDR             struct cdrom_tochdr *
+        0x00005306  CDROMREADTOCENTRY           struct cdrom_tocentry *         // I-O
+        0x00005307  CDROMSTOP                   void
+        0x00005308  CDROMSTART                  void
+        0x00005309  CDROMEJECT                  void
+        0x0000530A  CDROMVOLCTRL                const struct cdrom_volctrl *
+        0x0000530B  CDROMSUBCHNL                struct cdrom_subchnl *          // I-O
+        0x0000530C  CDROMREADMODE2              const struct cdrom_msf *        // MORE
+        0x0000530D  CDROMREADMODE1              const struct cdrom_msf *        // MORE
+        0x0000530E  CDROMREADAUDIO              const struct cdrom_read_audio * // MORE
+        0x0000530F  CDROMEJECT_SW               int
+        0x00005310  CDROMMULTISESSION           struct cdrom_multisession *     // I-O
+        0x00005311  CDROM_GET_UPC               struct { char [8]; } *
+        0x00005312  CDROMRESET                  void
+        0x00005313  CDROMVOLREAD                struct cdrom_volctrl *
+        0x00005314  CDROMREADRAW                const struct cdrom_msf *        // MORE
+        0x00005315  CDROMREADCOOKED             const struct cdrom_msf *        // MORE
+        0x00005316  CDROMSEEK                   const struct cdrom_msf *
+
+    // <include/linux/cm206.h>
+        0x00002000  CM206CTL_GET_STAT           int
+        0x00002001  CM206CTL_GET_LAST_STAT      int
+
+    // <include/linux/cyclades.h>
+        0x00435901  CYGETMON                    struct cyclades_monitor *
+        0x00435902  CYGETTHRESH                 int *
+        0x00435903  CYSETTHRESH                 int
+        0x00435904  CYGETDEFTHRESH              int *
+        0x00435905  CYSETDEFTHRESH              int
+        0x00435906  CYGETTIMEOUT                int *
+        0x00435907  CYSETTIMEOUT                int
+        0x00435908  CYGETDEFTIMEOUT             int *
+        0x00435909  CYSETDEFTIMEOUT             int
+
+    // <include/linux/ext2_fs.h>
+        0x80046601  EXT2_IOC_GETFLAGS           int *
+        0x40046602  EXT2_IOC_SETFLAGS           const int *
+        0x80047601  EXT2_IOC_GETVERSION         int *
+        0x40047602  EXT2_IOC_SETVERSION         const int *
+
+    // <include/linux/fd.h>
+        0x00000000  FDCLRPRM                    void
+        0x00000001  FDSETPRM                    const struct floppy_struct *
+        0x00000002  FDDEFPRM                    const struct floppy_struct *
+        0x00000003  FDGETPRM                    struct floppy_struct *
+        0x00000004  FDMSGON                     void
+        0x00000005  FDMSGOFF                    void
+        0x00000006  FDFMTBEG                    void
+        0x00000007  FDFMTTRK                    const struct format_descr *
+        0x00000008  FDFMTEND                    void
+        0x0000000A  FDSETEMSGTRESH              int
+        0x0000000B  FDFLUSH                     void
+        0x0000000C  FDSETMAXERRS                const struct floppy_max_errors *
+        0x0000000E  FDGETMAXERRS                struct floppy_max_errors *
+        0x00000010  FDGETDRVTYP                 struct { char [16]; } *
+        0x00000014  FDSETDRVPRM                 const struct floppy_drive_params *
+        0x00000015  FDGETDRVPRM                 struct floppy_drive_params *
+        0x00000016  FDGETDRVSTAT                struct floppy_drive_struct *
+        0x00000017  FDPOLLDRVSTAT               struct floppy_drive_struct *
+        0x00000018  FDRESET                     int
+        0x00000019  FDGETFDCSTAT                struct floppy_fdc_state *
+        0x0000001B  FDWERRORCLR                 void
+        0x0000001C  FDWERRORGET                 struct floppy_write_errors *
+        0x0000001E  FDRAWCMD                    struct floppy_raw_cmd * // MORE // I-O
+        0x00000028  FDTWADDLE                   void
+
+    // <include/linux/fs.h>
+        0x0000125D  BLKROSET                    const int *
+        0x0000125E  BLKROGET                    int *
+        0x0000125F  BLKRRPART                   void
+        0x00001260  BLKGETSIZE                  int *
+        0x00001261  BLKFLSBUF                   void
+        0x00001262  BLKRASET                    int
+        0x00001263  BLKRAGET                    int *
+        0x00000001  FIBMAP                      int *                           // I-O
+        0x00000002  FIGETBSZ                    int *
+
+    // <include/linux/hdreg.h>
+        0x00000301  HDIO_GETGEO                 struct hd_geometry *
+        0x00000302  HDIO_GET_UNMASKINTR         int *
+        0x00000304  HDIO_GET_MULTCOUNT          int *
+        0x00000307  HDIO_GET_IDENTITY           struct hd_driveid *
+        0x00000308  HDIO_GET_KEEPSETTINGS       int *
+        0x00000309  HDIO_GET_CHIPSET            int *
+        0x0000030A  HDIO_GET_NOWERR             int *
+        0x0000030B  HDIO_GET_DMA                int *
+        0x0000031F  HDIO_DRIVE_CMD              int *                           // I-O
+        0x00000321  HDIO_SET_MULTCOUNT          int
+        0x00000322  HDIO_SET_UNMASKINTR         int
+        0x00000323  HDIO_SET_KEEPSETTINGS       int
+        0x00000324  HDIO_SET_CHIPSET            int
+        0x00000325  HDIO_SET_NOWERR             int
+        0x00000326  HDIO_SET_DMA                int
+
+    // <include/linux/if_eql.h>
+        0x000089F0  EQL_ENSLAVE                 struct ifreq *          // MORE // I-O
+        0x000089F1  EQL_EMANCIPATE              struct ifreq *          // MORE // I-O
+        0x000089F2  EQL_GETSLAVECFG             struct ifreq *          // MORE // I-O
+        0x000089F3  EQL_SETSLAVECFG             struct ifreq *          // MORE // I-O
+        0x000089F4  EQL_GETMASTRCFG             struct ifreq *          // MORE // I-O
+        0x000089F5  EQL_SETMASTRCFG             struct ifreq *          // MORE // I-O
+
+    // <include/linux/if_plip.h>
+        0x000089F0  SIOCDEVPLIP                 struct ifreq *                  // I-O
+
+    // <include/linux/if_ppp.h>
+        0x00005490  PPPIOCGFLAGS                int *
+        0x00005491  PPPIOCSFLAGS                const int *
+        0x00005492  PPPIOCGASYNCMAP             int *
+        0x00005493  PPPIOCSASYNCMAP             const int *
+        0x00005494  PPPIOCGUNIT                 int *
+        0x00005495  PPPIOCSINPSIG               const int *
+        0x00005497  PPPIOCSDEBUG                const int *
+        0x00005498  PPPIOCGDEBUG                int *
+        0x00005499  PPPIOCGSTAT                 struct ppp_stats *
+        0x0000549A  PPPIOCGTIME                 struct ppp_ddinfo *
+        0x0000549B  PPPIOCGXASYNCMAP            struct { int [8]; } *
+        0x0000549C  PPPIOCSXASYNCMAP            const struct { int [8]; } *
+        0x0000549D  PPPIOCSMRU                  const int *
+        0x0000549E  PPPIOCRASYNCMAP             const int *
+        0x0000549F  PPPIOCSMAXCID               const int *
+
+    // <include/linux/ipx.h>
+        0x000089E0  SIOCAIPXITFCRT              const char *
+        0x000089E1  SIOCAIPXPRISLT              const char *
+        0x000089E2  SIOCIPXCFGDATA              struct ipx_config_data *
+
+    // <include/linux/kd.h>
+        0x00004B60  GIO_FONT                    struct { char [8192]; } *
+        0x00004B61  PIO_FONT                    const struct { char [8192]; } *
+        0x00004B6B  GIO_FONTX                   struct console_font_desc *  // MORE I-O
+        0x00004B6C  PIO_FONTX                   const struct console_font_desc * //MORE
+        0x00004B70  GIO_CMAP                    struct { char [48]; } *
+        0x00004B71  PIO_CMAP                    const struct { char [48]; }
+        0x00004B2F  KIOCSOUND                   int
+        0x00004B30  KDMKTONE                    int
+        0x00004B31  KDGETLED                    char *
+        0x00004B32  KDSETLED                    int
+        0x00004B33  KDGKBTYPE                   char *
+        0x00004B34  KDADDIO                     int                             // MORE
+        0x00004B35  KDDELIO                     int                             // MORE
+        0x00004B36  KDENABIO                    void                            // MORE
+        0x00004B37  KDDISABIO                   void                            // MORE
+        0x00004B3A  KDSETMODE                   int
+        0x00004B3B  KDGETMODE                   int *
+        0x00004B3C  KDMAPDISP                   void                            // MORE
+        0x00004B3D  KDUNMAPDISP                 void                            // MORE
+        0x00004B40  GIO_SCRNMAP                 struct { char [E_TABSZ]; } *
+        0x00004B41  PIO_SCRNMAP                 const struct { char [E_TABSZ]; } *
+        0x00004B69  GIO_UNISCRNMAP              struct { short [E_TABSZ]; } *
+        0x00004B6A  PIO_UNISCRNMAP              const struct { short [E_TABSZ]; } *
+        0x00004B66  GIO_UNIMAP                  struct unimapdesc *     // MORE // I-O
+        0x00004B67  PIO_UNIMAP                  const struct unimapdesc *       // MORE
+        0x00004B68  PIO_UNIMAPCLR               const struct unimapinit *
+        0x00004B44  KDGKBMODE                   int *
+        0x00004B45  KDSKBMODE                   int
+        0x00004B62  KDGKBMETA                   int *
+        0x00004B63  KDSKBMETA                   int
+        0x00004B64  KDGKBLED                    int *
+        0x00004B65  KDSKBLED                    int
+        0x00004B46  KDGKBENT                    struct kbentry *                // I-O
+        0x00004B47  KDSKBENT                    const struct kbentry *
+        0x00004B48  KDGKBSENT                   struct kbsentry *               // I-O
+        0x00004B49  KDSKBSENT                   const struct kbsentry *
+        0x00004B4A  KDGKBDIACR                  struct kbdiacrs *
+        0x00004B4B  KDSKBDIACR                  const struct kbdiacrs *
+        0x00004B4C  KDGETKEYCODE                struct kbkeycode *              // I-O
+        0x00004B4D  KDSETKEYCODE                const struct kbkeycode *
+        0x00004B4E  KDSIGACCEPT                 int
+
+    // <include/linux/lp.h>
+        0x00000601  LPCHAR                      int
+        0x00000602  LPTIME                      int
+        0x00000604  LPABORT                     int
+        0x00000605  LPSETIRQ                    int
+        0x00000606  LPGETIRQ                    int *
+        0x00000608  LPWAIT                      int
+        0x00000609  LPCAREFUL                   int
+        0x0000060A  LPABORTOPEN                 int
+        0x0000060B  LPGETSTATUS                 int *
+        0x0000060C  LPRESET                     void
+        0x0000060D  LPGETSTATS                  struct lp_stats *
+
+    // <include/linux/mroute.h>
+        0x000089E0  SIOCGETVIFCNT               struct sioc_vif_req *           // I-O
+        0x000089E1  SIOCGETSGCNT                struct sioc_sg_req *            // I-O
+
+    // <include/linux/mtio.h>
+        0x40086D01  MTIOCTOP                    const struct mtop *
+        0x801C6D02  MTIOCGET                    struct mtget *
+        0x80046D03  MTIOCPOS                    struct mtpos *
+        0x80206D04  MTIOCGETCONFIG              struct mtconfiginfo *
+        0x40206D05  MTIOCSETCONFIG              const struct mtconfiginfo *
+
+    // <include/linux/netrom.h>
+        0x000089E0  SIOCNRGETPARMS              struct nr_parms_struct *        // I-O
+        0x000089E1  SIOCNRSETPARMS              const struct nr_parms_struct *
+        0x000089E2  SIOCNRDECOBS                void
+        0x000089E3  SIOCNRRTCTL                 const int *
+
+    // <include/linux/sbpcd.h>
+        0x00009000  DDIOCSDBG                   const int *
+        0x00005382  CDROMAUDIOBUFSIZ            int
+
+    // <include/linux/scc.h>
+        0x00005470  TIOCSCCINI                  void
+        0x00005471  TIOCCHANINI                 const struct scc_modem *
+        0x00005472  TIOCGKISS                   struct ioctl_command *          // I-O
+        0x00005473  TIOCSKISS                   const struct ioctl_command *
+        0x00005474  TIOCSCCSTAT                 struct scc_stat *
+
+    // <include/linux/scsi.h>
+        0x00005382  SCSI_IOCTL_GET_IDLUN        struct { int [2]; } *
+        0x00005383  SCSI_IOCTL_TAGGED_ENABLE    void
+        0x00005384  SCSI_IOCTL_TAGGED_DISABLE   void
+        0x00005385  SCSI_IOCTL_PROBE_HOST       const int *                     // MORE
+
+    // <include/linux/smb_fs.h>
+        0x80027501  SMB_IOC_GETMOUNTUID         uid_t *
+
+    // <include/linux/sockios.h>
+        0x0000890B  SIOCADDRT                   const struct rtentry *          // MORE
+        0x0000890C  SIOCDELRT                   const struct rtentry *          // MORE
+        0x00008910  SIOCGIFNAME                 char []
+        0x00008911  SIOCSIFLINK                 void
+        0x00008912  SIOCGIFCONF                 struct ifconf *         // MORE // I-O
+        0x00008913  SIOCGIFFLAGS                struct ifreq *                  // I-O
+        0x00008914  SIOCSIFFLAGS                const struct ifreq *
+        0x00008915  SIOCGIFADDR                 struct ifreq *                  // I-O
+        0x00008916  SIOCSIFADDR                 const struct ifreq *
+        0x00008917  SIOCGIFDSTADDR              struct ifreq *                  // I-O
+        0x00008918  SIOCSIFDSTADDR              const struct ifreq *
+        0x00008919  SIOCGIFBRDADDR              struct ifreq *                  // I-O
+        0x0000891A  SIOCSIFBRDADDR              const struct ifreq *
+        0x0000891B  SIOCGIFNETMASK              struct ifreq *                  // I-O
+        0x0000891C  SIOCSIFNETMASK              const struct ifreq *
+        0x0000891D  SIOCGIFMETRIC               struct ifreq *                  // I-O
+        0x0000891E  SIOCSIFMETRIC               const struct ifreq *
+        0x0000891F  SIOCGIFMEM                  struct ifreq *                  // I-O
+        0x00008920  SIOCSIFMEM                  const struct ifreq *
+        0x00008921  SIOCGIFMTU                  struct ifreq *                  // I-O
+        0x00008922  SIOCSIFMTU                  const struct ifreq *
+        0x00008923  OLD_SIOCGIFHWADDR           struct ifreq *                  // I-O
+        0x00008924  SIOCSIFHWADDR               const struct ifreq *            // MORE
+        0x00008925  SIOCGIFENCAP                int *
+        0x00008926  SIOCSIFENCAP                const int *
+        0x00008927  SIOCGIFHWADDR               struct ifreq *                  // I-O
+        0x00008929  SIOCGIFSLAVE                void
+        0x00008930  SIOCSIFSLAVE                void
+        0x00008931  SIOCADDMULTI                const struct ifreq *
+        0x00008932  SIOCDELMULTI                const struct ifreq *
+        0x00008940  SIOCADDRTOLD                void
+        0x00008941  SIOCDELRTOLD                void
+        0x00008950  SIOCDARP                    const struct arpreq *
+        0x00008951  SIOCGARP                    struct arpreq *                 // I-O
+        0x00008952  SIOCSARP                    const struct arpreq *
+        0x00008960  SIOCDRARP                   const struct arpreq *
+        0x00008961  SIOCGRARP                   struct arpreq *                 // I-O
+        0x00008962  SIOCSRARP                   const struct arpreq *
+        0x00008970  SIOCGIFMAP                  struct ifreq *                  // I-O
+        0x00008971  SIOCSIFMAP                  const struct ifreq *
+
+    // <include/linux/soundcard.h>
+        0x00005100  SNDCTL_SEQ_RESET            void
+        0x00005101  SNDCTL_SEQ_SYNC             void
+        0xC08C5102  SNDCTL_SYNTH_INFO           struct synth_info *             // I-O
+        0xC0045103  SNDCTL_SEQ_CTRLRATE         int *                           // I-O
+        0x80045104  SNDCTL_SEQ_GETOUTCOUNT      int *
+        0x80045105  SNDCTL_SEQ_GETINCOUNT       int *
+        0x40045106  SNDCTL_SEQ_PERCMODE         void
+        0x40285107  SNDCTL_FM_LOAD_INSTR        const struct sbi_instrument *
+        0x40045108  SNDCTL_SEQ_TESTMIDI         const int *
+        0x40045109  SNDCTL_SEQ_RESETSAMPLES     const int *
+        0x8004510A  SNDCTL_SEQ_NRSYNTHS         int *
+        0x8004510B  SNDCTL_SEQ_NRMIDIS          int *
+        0xC074510C  SNDCTL_MIDI_INFO            struct midi_info *              // I-O
+        0x4004510D  SNDCTL_SEQ_THRESHOLD        const int *
+        0xC004510E  SNDCTL_SYNTH_MEMAVL         int *                           // I-O
+        0x4004510F  SNDCTL_FM_4OP_ENABLE        const int *
+        0xCFB85110  SNDCTL_PMGR_ACCESS          struct patmgr_info *            // I-O
+        0x00005111  SNDCTL_SEQ_PANIC            void
+        0x40085112  SNDCTL_SEQ_OUTOFBAND        const struct seq_event_rec *
+        0xC0045401  SNDCTL_TMR_TIMEBASE         int *                           // I-O
+        0x00005402  SNDCTL_TMR_START            void
+        0x00005403  SNDCTL_TMR_STOP             void
+        0x00005404  SNDCTL_TMR_CONTINUE         void
+        0xC0045405  SNDCTL_TMR_TEMPO            int *                           // I-O
+        0xC0045406  SNDCTL_TMR_SOURCE           int *                           // I-O
+        0x40045407  SNDCTL_TMR_METRONOME        const int *
+        0x40045408  SNDCTL_TMR_SELECT           int *                           // I-O
+        0xCFB85001  SNDCTL_PMGR_IFACE           struct patmgr_info *            // I-O
+        0xC0046D00  SNDCTL_MIDI_PRETIME         int *                           // I-O
+        0xC0046D01  SNDCTL_MIDI_MPUMODE         const int *
+        0xC0216D02  SNDCTL_MIDI_MPUCMD          struct mpu_command_rec *        // I-O
+        0x00005000  SNDCTL_DSP_RESET            void
+        0x00005001  SNDCTL_DSP_SYNC             void
+        0xC0045002  SNDCTL_DSP_SPEED            int *                           // I-O
+        0xC0045003  SNDCTL_DSP_STEREO           int *                           // I-O
+        0xC0045004  SNDCTL_DSP_GETBLKSIZE       int *                           // I-O
+        0xC0045006  SOUND_PCM_WRITE_CHANNELS    int *                           // I-O
+        0xC0045007  SOUND_PCM_WRITE_FILTER      int *                           // I-O
+        0x00005008  SNDCTL_DSP_POST             void
+        0xC0045009  SNDCTL_DSP_SUBDIVIDE        int *                           // I-O
+        0xC004500A  SNDCTL_DSP_SETFRAGMENT      int *                           // I-O
+        0x8004500B  SNDCTL_DSP_GETFMTS          int *
+        0xC0045005  SNDCTL_DSP_SETFMT           int *                           // I-O
+        0x800C500C  SNDCTL_DSP_GETOSPACE        struct audio_buf_info *
+        0x800C500D  SNDCTL_DSP_GETISPACE        struct audio_buf_info *
+        0x0000500E  SNDCTL_DSP_NONBLOCK         void
+        0x80045002  SOUND_PCM_READ_RATE         int *
+        0x80045006  SOUND_PCM_READ_CHANNELS     int *
+        0x80045005  SOUND_PCM_READ_BITS         int *
+        0x80045007  SOUND_PCM_READ_FILTER       int *
+        0x00004300  SNDCTL_COPR_RESET           void
+        0xCFB04301  SNDCTL_COPR_LOAD            const struct copr_buffer *
+        0xC0144302  SNDCTL_COPR_RDATA           struct copr_debug_buf *         // I-O
+        0xC0144303  SNDCTL_COPR_RCODE           struct copr_debug_buf *         // I-O
+        0x40144304  SNDCTL_COPR_WDATA           const struct copr_debug_buf *
+        0x40144305  SNDCTL_COPR_WCODE           const struct copr_debug_buf *
+        0xC0144306  SNDCTL_COPR_RUN             struct copr_debug_buf *         // I-O
+        0xC0144307  SNDCTL_COPR_HALT            struct copr_debug_buf *         // I-O
+        0x4FA44308  SNDCTL_COPR_SENDMSG         const struct copr_msg *
+        0x8FA44309  SNDCTL_COPR_RCVMSG          struct copr_msg *
+        0x80044D00  SOUND_MIXER_READ_VOLUME     int *
+        0x80044D01  SOUND_MIXER_READ_BASS       int *
+        0x80044D02  SOUND_MIXER_READ_TREBLE     int *
+        0x80044D03  SOUND_MIXER_READ_SYNTH      int *
+        0x80044D04  SOUND_MIXER_READ_PCM        int *
+        0x80044D05  SOUND_MIXER_READ_SPEAKER    int *
+        0x80044D06  SOUND_MIXER_READ_LINE       int *
+        0x80044D07  SOUND_MIXER_READ_MIC        int *
+        0x80044D08  SOUND_MIXER_READ_CD         int *
+        0x80044D09  SOUND_MIXER_READ_IMIX       int *
+        0x80044D0A  SOUND_MIXER_READ_ALTPCM     int *
+        0x80044D0B  SOUND_MIXER_READ_RECLEV     int *
+        0x80044D0C  SOUND_MIXER_READ_IGAIN      int *
+        0x80044D0D  SOUND_MIXER_READ_OGAIN      int *
+        0x80044D0E  SOUND_MIXER_READ_LINE1      int *
+        0x80044D0F  SOUND_MIXER_READ_LINE2      int *
+        0x80044D10  SOUND_MIXER_READ_LINE3      int *
+        0x80044D1C  SOUND_MIXER_READ_MUTE       int *
+        0x80044D1D  SOUND_MIXER_READ_ENHANCE    int *
+        0x80044D1E  SOUND_MIXER_READ_LOUD       int *
+        0x80044DFF  SOUND_MIXER_READ_RECSRC     int *
+        0x80044DFE  SOUND_MIXER_READ_DEVMASK    int *
+        0x80044DFD  SOUND_MIXER_READ_RECMASK    int *
+        0x80044DFB  SOUND_MIXER_READ_STEREODEVS int *
+        0x80044DFC  SOUND_MIXER_READ_CAPS       int *
+        0xC0044D00  SOUND_MIXER_WRITE_VOLUME    int *                           // I-O
+        0xC0044D01  SOUND_MIXER_WRITE_BASS      int *                           // I-O
+        0xC0044D02  SOUND_MIXER_WRITE_TREBLE    int *                           // I-O
+        0xC0044D03  SOUND_MIXER_WRITE_SYNTH     int *                           // I-O
+        0xC0044D04  SOUND_MIXER_WRITE_PCM       int *                           // I-O
+        0xC0044D05  SOUND_MIXER_WRITE_SPEAKER   int *                           // I-O
+        0xC0044D06  SOUND_MIXER_WRITE_LINE      int *                           // I-O
+        0xC0044D07  SOUND_MIXER_WRITE_MIC       int *                           // I-O
+        0xC0044D08  SOUND_MIXER_WRITE_CD        int *                           // I-O
+        0xC0044D09  SOUND_MIXER_WRITE_IMIX      int *                           // I-O
+        0xC0044D0A  SOUND_MIXER_WRITE_ALTPCM    int *                           // I-O
+        0xC0044D0B  SOUND_MIXER_WRITE_RECLEV    int *                           // I-O
+        0xC0044D0C  SOUND_MIXER_WRITE_IGAIN     int *                           // I-O
+        0xC0044D0D  SOUND_MIXER_WRITE_OGAIN     int *                           // I-O
+        0xC0044D0E  SOUND_MIXER_WRITE_LINE1     int *                           // I-O
+        0xC0044D0F  SOUND_MIXER_WRITE_LINE2     int *                           // I-O
+        0xC0044D10  SOUND_MIXER_WRITE_LINE3     int *                           // I-O
+        0xC0044D1C  SOUND_MIXER_WRITE_MUTE      int *                           // I-O
+        0xC0044D1D  SOUND_MIXER_WRITE_ENHANCE   int *                           // I-O
+        0xC0044D1E  SOUND_MIXER_WRITE_LOUD      int *                           // I-O
+        0xC0044DFF  SOUND_MIXER_WRITE_RECSRC    int *                           // I-O
+
+    // <include/linux/umsdos_fs.h>
+        0x000004D2  UMSDOS_READDIR_DOS          struct umsdos_ioctl *           // I-O
+        0x000004D3  UMSDOS_UNLINK_DOS           const struct umsdos_ioctl *
+        0x000004D4  UMSDOS_RMDIR_DOS            const struct umsdos_ioctl *
+        0x000004D5  UMSDOS_STAT_DOS             struct umsdos_ioctl *           // I-O
+        0x000004D6  UMSDOS_CREAT_EMD            const struct umsdos_ioctl *
+        0x000004D7  UMSDOS_UNLINK_EMD           const struct umsdos_ioctl *
+        0x000004D8  UMSDOS_READDIR_EMD          struct umsdos_ioctl *           // I-O
+        0x000004D9  UMSDOS_GETVERSION           struct umsdos_ioctl *
+        0x000004DA  UMSDOS_INIT_EMD             void
+        0x000004DB  UMSDOS_DOS_SETUP            const struct umsdos_ioctl *
+        0x000004DC  UMSDOS_RENAME_DOS           const struct umsdos_ioctl *
+
+    // <include/linux/vt.h>
+        0x00005600  VT_OPENQRY                  int *
+        0x00005601  VT_GETMODE                  struct vt_mode *
+        0x00005602  VT_SETMODE                  const struct vt_mode *
+        0x00005603  VT_GETSTATE                 struct vt_stat *
+        0x00005604  VT_SENDSIG                  void
+        0x00005605  VT_RELDISP                  int
+        0x00005606  VT_ACTIVATE                 int
+        0x00005607  VT_WAITACTIVE               int
+        0x00005608  VT_DISALLOCATE              int
+        0x00005609  VT_RESIZE                   const struct vt_sizes *
+        0x0000560A  VT_RESIZEX                  const struct vt_consize *
+        Einige  ioctls  benötigen  einen  Pointer  auf   eine   Struktur,   die
+        zusätzliche Pointer enthält.  Diese sind hier in alphabetischer Reihen‐
+        folge dokumentiert.
+
+        CDROMREADAUDIO   benötigt    eine    Eingabe-Pointer    const    struct
+        cdrom_read_audio  *.   Das  Feld  buf zeigt auf einen Ausgabepuffer der
+        Länge nframes * CD_FRAMESIZE_RAW.
+
+        CDROMREADCOOKED,  CDROMREADMODE1,   CDROMREADMODE2   und   CDROMREADRAW
+        benötigen einen Eingabe-Pointer const struct cdrom_msf *.  Sie benutzen
+        denselben Pointer als Ausgabe-Pointer auf char [].   Die  Länge  ändert
+        sich  durch  Anforderung.   Bei  CDROMREADMODE1  benutzen  die  meisten
+        Treiber  CD_FRAMESIZE,  jedoch  benutzt  der   Optics   Storage-Treiber
+        stattdessen OPT_BLOCKSIZE (beide haben den numerischen Wert 2048).
+            CDROMREADCOOKED             char [CD_FRAMESIZE]
+            CDROMREADMODE1              char [CD_FRAMESIZE oder OPT_BLOCKSIZE]
+            CDROMREADMODE2              char [CD_FRAMESIZE_RAW0]
+            CDROMREADRAW                char [CD_FRAMESIZE_RAW]
+        EQL_ENSLAVE, EQL_EMANCIPATE, EQL_GETSLAVECFG, EQL_SETSLAVECFG, EQL_GET
+        MASTERCFG und EQL_SETMASTERCFG benötigen eine struct ifreq *.  Das Feld
+        ifr_data ist ein Pointer auf eine weitere Struktur wie folgt:
+            EQL_ENSLAVE                 const struct slaving_request *
+            EQL_EMANCIPATE              const struct slaving_request *
+            EQL_GETSLAVECFG             struct slave_config *           // I-O
+            EQL_SETSLAVECFG             const struct slave_config *
+            EQL_GETMASTERCFG            struct master_config *
+            EQL_SETMASTERCFG            const struct master_config *
+        FDRAWCMD   benötigt  eine  struct  floppy  raw_cmd  *.   Wenn  flags  &
+        FD_RAW_WRITE nicht Null ist, dann zeigt data  auf  einen  Eingabepuffer
+        der  Länge length.  Wenn flags & FD_RAW_READ nicht Null ist, dann zeigt
+        data auf einen Ausgabepuffer der Länge ’length’.
+
+        GIO_FONTX und  PIO_FONTX  benötigen  eine  struct  console_font_desc  *
+        beziehungsweise  eine const struct console_font_desc *.  chardata zeigt
+        auf einen Puffer von char [charcount].  Dies ist ein Ausgabepuffer  für
+        GIO_FONTX und ein Eingabepuffer für PIO_FONTX.
+
+        GIO_UNIMAP   und   PIO_UNIMAP   benötigen   eine  struct  unimapdesc  *
+        beziehungsweise eine const struct  unimapdesc  *.   entries  zeigt  auf
+        einen Puffer von struct unipair [entry_ct].  Dies ist ein Ausgabepuffer
+        für GIO_UNIMAP und ein Eingabepuffer für PIO_UNIMAP.
+
+        KDADDIO, KDDELIO, KDDISABIO und KDENABIO geben Zugriff frei oder  sper‐
+        ren Zugriff auf I/O-Ports.  Sie sind nötige Alternativen zu ioperm.
+
+        KDMAPDISP  und KDUNMAPDISP geben frei oder sperren Memory-Mappings oder
+        Zugriff auf I/O-Ports.  Sie sind nicht im Kernel implementiert.
+
+        SCSI_IOCTL_PROBE_HOST benötigt einen Eingabe-Pointer const int  *,  der
+        eine  Länge ist.  Es benutzt den selben Pointer als Ausgabe-Pointer auf
+        einen Puffer char [] dieser Länge.
+
+        SIOCADDRT und SIOCDELRT benötigen einen Eingabe-Pointer, dessen Typ vom
+        Protokoll abhängt:
+            Die meisten Protokolle      const struct rtentry *
+            AX.25                       const struct ax25_route *
+            NET/ROM                     const struct nr_route_struct *
+        SIOCGIFCONF  benötigt eine struct ifconf *.  Das Feld ifc_buf zeigt auf
+        einen Puffer der Länge ifc_len Byte, wohinein der Kernel eine Liste des
+        Typs struct ifreq [] schreibt.
+
+        SIOCSIFHWADDR  benötigt einen Eingabe-Pointer, dessen Typ vom Protokoll
+        abhängt:
+            Die meisten Protokolle      const struct ifreq *
+            AX.25                       const char [AX25_ADDR_LEN]
+        TIOCLINUX benötigt eine const char *.  Es  benutzt  dies,  um  zwischen
+        diversen  unabhängigen  Fällen  zu unterscheiden.  In der Tabelle unten
+        bedeutet »N + foo« so viel wie »foo« nach einem  N-byte-Block.   struct
+        selection ist definiert in drivers/char/selection.c.
+            TIOCLINUX-2                 1 + const struct selection *
+            TIOCLINUX-3                 void
+            TIOCLINUX-4                 void
+            TIOCLINUX-5                 4 + const struct { long [8]; } *
+            TIOCLINUX-6                 char *
+            TIOCLINUX-7                 char *
+            TIOCLINUX-10                1 + const char *
+
+    Doppelte ioctls
+        Diese  Liste  enthält  keine  ioctls  der  Gruppen  SIOCDEVPRIVATE  und
+        SIOCPROTOPRIVATE.
+        0x00000001  FDSETPRM                    FIBMAP
+        0x00000002  FDDEFPRM                    FIGETBSZ
+        0x00005382  CDROMAUDIOBUFSIZ            SCSI_IOCTL_GET_IDLUN
+        0x00005402  SNDCTL_TMR_START            TCSETS
+        0x00005403  SNDCTL_TMR_STOP             TCSETSW
+        0x00005404  SNDCTL_TMR_CONTINUE         TCSETSF
+
+=======
+
+Powered by the Ubuntu Manpage Repository generator
+Maintained by Dustin Kirkland
diff --git a/linux_emul_base/doc/linuxemu.txt b/linux_emul_base/doc/linuxemu.txt
new file mode 100644 (file)
index 0000000..0c587ee
--- /dev/null
@@ -0,0 +1,117 @@
+SYSCALLS
+
+on linux/i386, the machine code puts the arguments of a syscall in the
+registers AX, BX, CX, DX, DI, SI and makes a soft interrupt 0x80.
+
+as the plan9 kernel doesnt care about the interrupt vector 0x80 it
+sends a note to the process that traped and if not handled kills it.
+in a note handler, it is possible to access the machine state of the
+process when the trap/interrupt happend from the ureg argument.
+
+in linuxemu, we install a note handler that checks if the trap was a
+linux syscall and call our handler function from our systab. 
+
+after our syscall handler returned, we move the program counter
+in the machine state structure after the int 0x80 instruction and
+continue execution by accepting the note as handled with a call to
+noted(NCONT).
+
+todo automatic conversion to a plan9 function call the number of
+arguments and the function name of the handler must be known.  this
+information is provided by the linuxcalltab input file that is feed trough
+linuxcalltab.awk to build neccesary tables.
+
+the linux specific syscall handling and argument conversion done in
+linuxcall.c only.  the idea is to later add support for other syscall
+personalities like bsd without having to change the handler code.
+
+
+MEMORY
+
+unlike shared libraries wich are position independent, binaries have to be
+loaded to a fixed address location. (elf supports position independent
+programs that can be loaded everywhere, but its not used on i386)
+
+the emulator doesnt need to load and relocate shared libraries itself. this is
+done my the runtime linker (/lib/ld-linux.so). it just needs to load
+the binary and the runtime linker to ther prefered location and jump into
+the entry point. then the runtime linker will parse the elf sections of the
+binary and call mmap to load further shared libraries.
+
+the first thing we need is an implementation of mmap that allows us
+to copy files to fixed addresses into memory. to do that on plan9,
+segments are used.
+
+its is not possible to create a segment for every memory mapping
+because plan9 limits the number of segments per process to a small
+number.  instead we create a fixed number of segments and
+expand/shrink them on demand.  the linux stack area is fixed size and
+uses the fact thet plan9 doesnt allocate physical memory until pages
+are touched.
+
+here are 3 segments created for a linux process:
+
+"private" is used for all MAP_PRIVATE mappings and can be shared if
+processes run in same address space. code, data and files is mapped there.
+
+"shared" for shared memory mappings.
+
+"stack" is like "private", but lives just below the plan9 stack segment.
+this is needed because glibc expands the stack down by mmap() pages
+below the current stack area. we cannot use the plan9 stack segment
+because that segment is copied on rfork and is never shared between
+processes.
+
+the data structures of the emulator itself ("kernel memory") need to
+be shared for all processes even if the linux process runs in its own
+private address space, so the plan9 Bss and Data segments are made
+shared on startup by copying the contents of the original segment into a
+temporary file, segdetach() it and segattach() a new shared segments
+on the same place and copy the data back in from the file.
+
+with this memory layout, it is possible for the linux process to damage
+data structures in the emulator. but we seem to be lucky for now :)
+
+
+USER PROCESSES (UPROCS)
+
+linuxemu does not switch ans schedule linux processes itself. every user
+process has its own plan9 process. memory sharing semantics is translated
+to rfork flags on fork/clone.
+
+we have a global process table of Uproc structures to track states and
+resources for all user processes:
+
+fs: filesystem mount table
+fdtab: the filedescriptor table
+mem: memory mappings
+signal: signal handler and queue
+trace: debug trace buffer
+
+resources that can be shared are reference counted and get freed when
+the last process referencing them exits.
+
+
+KERNEL PROCESSES (KPROCS)
+
+if we needs to defer work or do asynchronous i/o it can spawn a
+kernel process with kprocfork. kernel processes dont have a Uproc
+structure associated and have the userspace memory segments detached
+therfor cant access userspace memory.
+
+bufprocs and timers are implemented with kernel processes.
+
+
+DEVICES
+
+ealier versions mapped linux files directly to plan9 files.  this made
+the implementation of ioctls, symlinks, remove on close, and
+select/poll hard and also had problems with implementing fork sharing
+semantics.
+
+current linuxemu does it all by itself.  here is a global device table
+of Udev structures.  devices can implement all i/o related syscalls by
+providing a function pointer in ther Udev.  when a device has to deal
+with asynchronous io on real plan9 files it uses bufprocs.
+
+
diff --git a/linux_emul_base/doc/todo.txt b/linux_emul_base/doc/todo.txt
new file mode 100644 (file)
index 0000000..a24cb3b
--- /dev/null
@@ -0,0 +1,14 @@
+- AF_INET6
+       i dont need that too yet
+
+- VDSO
+       we could gain quite a performance hit when we can avoid
+       the trapping overhead and let linux-code directly jump
+       in linuxemu handler.
+
+- dsp / mixer
+       implement mixer ioctls in devdsp
+       mmap and trigger caps for quake
+
+- ptrace
+       implement ptrace support so we can use native debugger
diff --git a/linux_emul_base/dspdev.c b/linux_emul_base/dspdev.c
new file mode 100644 (file)
index 0000000..822ce51
--- /dev/null
@@ -0,0 +1,377 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+       FREQUENCY = 44100,
+       CHANNELS = 2,
+       DELAY = 100,
+       FRAGSIZE = 4096,
+};
+
+typedef struct Chan Chan;
+typedef struct DSP DSP;
+
+struct Chan
+{
+       ulong   phase;
+       int             last;
+};
+
+struct DSP
+{
+       Ufile;
+
+       int             channels;               /* number of channels (2 for stereo) */
+       int             freq;                   /* frequency of sound stream */
+
+       int             rfreq;          /* frequency of /dev/audio */
+
+       uchar   *buf;                   /* resampling */
+       ulong   nbuf;
+       Chan    chan[CHANNELS]; 
+
+       vlong   time;                   /* time point of the last sample in device buffer */
+
+       ulong   written;                /* number of bytes written to dsp */
+       ulong   written2;               /* same as written, will be reset on every GETOPTR ioctl */
+};
+
+static int
+closedsp(Ufile *file)
+{
+       DSP *dsp = (DSP*)file;
+
+       trace("dsp: closedsp");
+       free(dsp->buf);
+       close(dsp->fd);
+
+       return 0;
+}
+
+static int
+polldsp(Ufile *, void *)
+{
+       return POLLOUT;
+}
+
+static int
+readdsp(Ufile *, void *, int, vlong)
+{
+       return 0;               /* not implemented */
+}
+
+static int
+resample(Chan *c, uchar *src, uchar *dst, int sstep, int dstep, ulong delta, ulong count)
+{
+       int last, val, out;
+       ulong phase, pos;
+       uchar *dp, *sp;
+
+       dp = dst;
+       last = val = c->last;
+       phase = c->phase;
+       pos = phase >> 16;
+       while(pos < count){
+               sp = src + sstep*pos;
+               val = sp[0] | (sp[1] << 8);
+               val = (val & 0x7FFF) - (val & 0x8000);
+               if(pos){
+                       sp -= sstep;
+                       last = sp[0] | (sp[1] << 8);
+                       last = (last & 0x7FFF) - (last & 0x8000);
+               }
+               out = last + (((val - last) * (phase & 0xFFFF)) >> 16);
+               dp[0] = out;
+               dp[1] = out >> 8;
+               dp += dstep;
+               phase += delta;
+               pos = phase >> 16;
+       }
+       c->last = val;
+       if(delta < 0x10000){
+               c->phase = phase & 0xFFFF;
+       } else {
+               c->phase = phase - (count << 16);
+       }
+       return (dp - dst) / dstep;
+}
+
+static int
+convertout(DSP *dsp, uchar *buf, int len, uchar **out)
+{
+       int ret, ch;
+       ulong count, delta;
+
+       /* no conversion required? */
+       if(dsp->freq == dsp->rfreq && dsp->channels == CHANNELS){
+               *out = buf;
+               return len;
+       }
+
+       /*
+        * delta is the number of input samples to 
+        * produce one output sample. scaled by 16 bit to
+        * get fractional part.
+        */
+       delta = ((ulong)dsp->freq << 16) / dsp->rfreq;
+       count = len / (2 * dsp->channels);
+
+       /*
+        * get maximum required size of output bufer. this is not exact!
+        * number of output samples depends on phase!
+        */
+       ret = (((count << 16) + delta-1) / delta) * 2*CHANNELS;
+       if(ret > dsp->nbuf){
+               free(dsp->buf);
+               dsp->buf = kmalloc(ret);
+               dsp->nbuf = ret;
+       }
+       for(ch=0; ch < CHANNELS; ch++)
+               ret = resample(dsp->chan + ch,
+                       buf + 2*(ch % dsp->channels),
+                       dsp->buf + 2*ch,
+                       2*dsp->channels,
+                       2*CHANNELS,
+                       delta,
+                       count);
+
+       *out = dsp->buf;
+       return ret * 2*CHANNELS;
+}
+
+static int
+writedsp(Ufile *file, void *buf, int len, vlong)
+{
+       DSP *dsp = (DSP*)file;
+       vlong now;
+       int ret, diff;
+       uchar *out;
+
+       if((ret = convertout(dsp, buf, len, &out)) <= 0)
+               return ret;
+
+       if((ret = write(dsp->fd, out, ret)) < 0)
+               return mkerror();
+
+       now = nsec();
+       if(dsp->time < now){
+               dsp->time = now;
+               dsp->written = 0;
+               dsp->written2 = 0;
+       } else {
+               diff = (dsp->time - now) / 1000000;
+               if(diff > DELAY)
+                       sleep(diff - DELAY);
+       }
+       dsp->time += ((1000000000LL) * ret / (dsp->rfreq * 2*CHANNELS));
+       dsp->written += len;
+       dsp->written2 += len;
+
+       return len;
+}
+
+enum
+{
+       AFMT_S16_LE = 0x10,
+};
+
+static int
+ioctldsp(Ufile *file, int cmd, void *arg)
+{
+       DSP *dsp = (DSP*)file;
+       int ret, i;
+       vlong now;
+       static int counter;
+
+       ret = 0;
+       switch(cmd){
+       default:
+               trace("dsp: unknown ioctl %lux %p", (ulong)cmd, arg);
+               ret = -ENOTTY;
+               break;
+
+       case 0xC004500A:
+               trace("dsp: SNDCTL_DSP_SETFRAGMENT(%lux)", *(ulong*)arg);
+               break;
+
+       case 0xC0045004:
+               trace("dsp: SNDCTL_DSP_GETBLKSIZE");
+               *((int*)arg) = FRAGSIZE;
+               break;
+
+       case 0x800c5011:
+               trace("dsp: SNDCTL_DSP_GETIPTR");
+               ret = -EPERM;
+               break;
+
+       case 0x800c5012:
+               trace("dsp: SNDCTL_DSP_GETOPTR");
+               ((int*)arg)[0] = dsp->written;                          // Total # of bytes processed
+               ((int*)arg)[1] = dsp->written2 / FRAGSIZE;      // # of fragment transitions since last time
+               dsp->written2 = 0;
+               ((int*)arg)[2] = 0;                                             // Current DMA pointer value
+               break;
+
+       case 0x8010500D:
+               trace("dsp: SNDCTL_DSG_GETISPACE");
+               ret = -EPERM;
+               break;
+       case 0x8010500C:
+               trace("dsp: SNDCTL_DSP_GETOSPACE");
+               i = (2 * dsp->channels) * ((dsp->freq*DELAY)/1000);
+               ((int*)arg)[1] = i / FRAGSIZE;                          // fragstot
+               ((int*)arg)[2] = FRAGSIZE;                                      // fragsize
+               now = nsec();
+               if(now < dsp->time){
+                       i -= ((2 * dsp->channels) * (((dsp->time - now) * (vlong)dsp->freq) / 1000000000));
+                       if(i < 0)
+                               i = 0;
+               }
+               ((int*)arg)[0] = i / FRAGSIZE;                          // available fragment count
+               ((int*)arg)[3] = i;                                             // available space in bytes
+               break;
+
+       case 0x8004500B:
+               trace("dsp: SNDCTL_DSP_GETFMTS(%d)", *(int*)arg);
+               *(int*)arg = AFMT_S16_LE;
+               break;
+
+       case 0x8004500F:
+               trace("dsp: SNDCTL_DSP_GETCAPS");
+               *(int*)arg = 0x400;
+               break;
+
+       case 0xC0045005:
+               trace("dsp: SNDCTL_DSP_SETFMT(%d)", *(int*)arg);
+               *(int*)arg = AFMT_S16_LE;
+               break;
+
+       case 0xC0045006:
+               trace("dsp: SOUND_PCM_WRITE_CHANNELS(%d)", *(int*)arg);
+               dsp->channels = *(int*)arg;
+               break;
+
+       case 0xC0045003:
+               trace("dsp: SNDCTL_DSP_STEREO(%d)", *(int*)arg);
+               dsp->channels = 2;
+               *(int*)arg = 1;
+               break;
+
+       case 0xC0045002:
+               trace("dsp: SNDCTL_DSP_SPEED(%d)", *(int*)arg);
+               dsp->freq = *(int*)arg;
+               for(i=0; i<CHANNELS; i++){
+                       dsp->chan[i].phase = 0;
+                       dsp->chan[i].last = 0;
+               }
+               break;
+
+       case 0x00005000:
+               trace("dsp: SNDCTL_DSP_RESET");
+               break;
+
+       case 0x00005001:
+               trace("dsp: SNDCTL_DSP_SYNC");
+               break;
+       }
+
+       return ret;
+}
+
+static int
+getaudiofreq(void)
+{
+       int ret, n, fd;
+       char buf[1024];
+
+       ret = FREQUENCY;
+       if((fd = open("/dev/volume", OREAD)) < 0)
+               return ret;
+       if((n = read(fd, buf, sizeof(buf)-1)) > 0){
+               char *p;
+
+               buf[n] = 0;
+               if(p = strstr(buf, "speed out "))
+                       ret = atoi(p + 10);
+       }
+       close(fd);
+       return ret;
+}
+
+int opendsp(char *path, int mode, int, Ufile **pf)
+{
+       DSP *dsp;
+       int freq;
+       int fd;
+
+       if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+               if((fd = open("/dev/audio", OWRITE)) < 0)
+                       return mkerror();
+
+               freq = getaudiofreq();
+               dsp = mallocz(sizeof(DSP), 1);
+               dsp->ref = 1;
+               dsp->mode = mode;
+               dsp->dev = DSPDEV;
+               dsp->fd = fd;
+               dsp->path = kstrdup(path);
+               dsp->rfreq = freq;
+               dsp->freq = freq;
+               dsp->channels = CHANNELS;
+
+               *pf = dsp;
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static int
+fstatdsp(Ufile *f, Ustat *s)
+{
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->ino = hashpath(f->path);
+       s->size = 0;
+       return 0;
+};
+
+static int
+statdsp(char *path, int , Ustat *s)
+{
+       if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+               s->mode = 0666 | S_IFCHR;
+               s->uid = current->uid;
+               s->gid = current->gid;
+               s->ino = hashpath(path);
+               s->size = 0;
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static Udev dspdev = 
+{
+       .open = opendsp,
+       .read = readdsp,
+       .write = writedsp,
+       .poll = polldsp,
+       .close = closedsp,
+       .ioctl = ioctldsp,
+       .stat = statdsp,
+       .fstat = fstatdsp,
+};
+
+void dspdevinit(void)
+{
+       devtab[DSPDEV] = &dspdev;
+
+       fsmount(&dspdev, "/dev/dsp");
+       fsmount(&dspdev, "/dev/dsp0");
+}
diff --git a/linux_emul_base/error.c b/linux_emul_base/error.c
new file mode 100644 (file)
index 0000000..fe4cb22
--- /dev/null
@@ -0,0 +1,266 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+int
+Efmt(Fmt *f)
+{
+       static char *t[] = {
+       [EPERM]                         "EPERM",
+       [ENOENT]                        "ENOENT",
+       [ESRCH]                         "ESRCH",
+       [EINTR]                         "EINTR",
+       [EIO]                           "EIO",
+       [ENXIO]                         "ENXIO",
+       [E2BIG]                         "E2BIG",
+       [ENOEXEC]                       "ENOEXEC",
+       [EBADF]                         "EBADF",
+       [ECHILD]                        "ECHILD",
+       [EAGAIN]                        "EAGAIN",
+       [ENOMEM]                        "ENOMEM",
+       [EACCES]                        "EACCES",
+       [EFAULT]                        "EFAULT",
+       [ENOTBLK]                       "ENOTBLK",
+       [EBUSY]                         "EBUSY",
+       [EEXIST]                        "EEXIST",
+       [EXDEV]                         "EXDEV",
+       [ENODEV]                        "ENODEV",
+       [ENOTDIR]                       "ENOTDIR",
+       [EISDIR]                        "EISDIR",
+       [EINVAL]                        "EINVAL",
+       [ENFILE]                        "ENFILE",
+       [EMFILE]                        "EMFILE",
+       [ENOTTY]                        "ENOTTY",
+       [ETXTBSY]                       "ETXTBSY",
+       [EFBIG]                         "EFBIG",
+       [ENOSPC]                        "ENOSPC",
+       [ESPIPE]                        "ESPIPE",
+       [EROFS]                         "EROFS",
+       [EMLINK]                        "EMLINK",
+       [EPIPE]                         "EPIPE",
+       [EDOM]                          "EDOM",
+       [ERANGE]                        "ERANGE",
+       [EDEADLK]                       "EDEADLK",
+       [ENAMETOOLONG]          "ENAMETOOLONG",
+       [ENOLCK]                        "ENOLCK",
+       [ENOSYS]                        "ENOSYS",
+       [ENOTEMPTY]                     "ENOTEMPTY",
+       [ELOOP]                         "ELOOP",
+       [ENOMSG]                        "ENOMSG",
+       [EIDRM]                         "EIDRM",
+       [ECHRNG]                        "ECHRNG",
+       [EL2NSYNC]                      "EL2NSYNC",
+       [EL3HLT]                        "EL3HLT",
+       [EL3RST]                        "EL3RST",
+       [ELNRNG]                        "ELNRNG",
+       [EUNATCH]                       "EUNATCH",
+       [ENOCSI]                        "ENOCSI",
+       [EL2HLT]                        "EL2HLT",
+       [EBADE]                         "EBADE",
+       [EBADR]                         "EBADR",
+       [EXFULL]                        "EXFULL",
+       [ENOANO]                        "ENOANO",
+       [EBADRQC]                       "EBADRQC",
+       [EBADSLT]                       "EBADSLT",
+       [EBFONT]                        "EBFONT",
+       [ENOSTR]                        "ENOSTR",
+       [ENODATA]                       "ENODATA",
+       [ETIME]                         "ETIME",
+       [ENOSR]                         "ENOSR",
+       [ENONET]                        "ENONET",
+       [ENOPKG]                        "ENOPKG",
+       [EREMOTE]                       "EREMOTE",
+       [ENOLINK]                       "ENOLINK",
+       [EADV]                          "EADV",
+       [ESRMNT]                        "ESRMNT",
+       [ECOMM]                         "ECOMM",
+       [EPROTO]                        "EPROTO",
+       [EMULTIHOP]                     "EMULTIHOP",
+       [EDOTDOT]                       "EDOTDOT",
+       [EBADMSG]                       "EBADMSG",
+       [EOVERFLOW]                     "EOVERFLOW",
+       [ENOTUNIQ]                      "ENOTUNIQ",
+       [EBADFD]                        "EBADFD",
+       [EREMCHG]                       "EREMCHG",
+       [ELIBACC]                       "ELIBACC",
+       [ELIBBAD]                       "ELIBBAD",
+       [ELIBSCN]                       "ELIBSCN",
+       [ELIBMAX]                       "ELIBMAX",
+       [ELIBEXEC]                      "ELIBEXEC",
+       [EILSEQ]                        "EILSEQ",
+       [ERESTART]                      "ERESTART",
+       [ESTRPIPE]                      "ESTRPIPE",
+       [EUSERS]                        "EUSERS",
+       [ENOTSOCK]                      "ENOTSOCK",
+       [EDESTADDRREQ]          "EDESTADDRREQ",
+       [EMSGSIZE]                      "EMSGSIZE",
+       [EPROTOTYPE]            "EPROTOTYPE",
+       [ENOPROTOOPT]           "ENOPROTOOPT",
+       [EPROTONOSUPPORT]       "EPROTONOSUPPORT",
+       [ESOCKTNOSUPPORT]       "ESOCKTNOSUPPORT",
+       [EOPNOTSUPP]            "EOPNOTSUPP",
+       [EPFNOSUPPORT]          "EPFNOSUPPORT",
+       [EAFNOSUPPORT]          "EAFNOSUPPORT",
+       [EADDRINUSE]            "EADDRINUSE",
+       [EADDRNOTAVAIL]         "EADDRNOTAVAIL",
+       [ENETDOWN]                      "ENETDOWN",
+       [ENETUNREACH]           "ENETUNREACH",
+       [ENETRESET]                     "ENETRESET",
+       [ECONNABORTED]          "ECONNABORTED",
+       [ECONNRESET]            "ECONNRESET",
+       [ENOBUFS]                       "ENOBUFS",
+       [EISCONN]                       "EISCONN",
+       [ENOTCONN]                      "ENOTCONN",
+       [ESHUTDOWN]                     "ESHUTDOWN",
+       [ETOOMANYREFS]          "ETOOMANYREFS",
+       [ETIMEDOUT]                     "ETIMEDOUT",
+       [ECONNREFUSED]          "ECONNREFUSED",
+       [EHOSTDOWN]                     "EHOSTDOWN",
+       [EHOSTUNREACH]          "EHOSTUNREACH",
+       [EALREADY]                      "EALREADY",
+       [EINPROGRESS]           "EINPROGRESS",
+       [ESTALE]                        "ESTALE",
+       [EUCLEAN]                       "EUCLEAN",
+       [ENOTNAM]                       "ENOTNAM",
+       [ENAVAIL]                       "ENAVAIL",
+       [EISNAM]                        "EISNAM",
+       [EREMOTEIO]                     "EREMOTEIO",
+       [EDQUOT]                        "EDQUOT",
+       [ENOMEDIUM]                     "ENOMEDIUM",
+       [EMEDIUMTYPE]           "EMEDIUMTYPE",
+       };
+
+       int e;
+
+       e = va_arg(f->args, int);
+       if(e >= 0 || -e >= nelem(t))
+               return fmtprint(f, "%d", e);
+       return fmtprint(f, "%d [%s]", e, t[-e]);
+}
+
+int
+mkerror(void)
+{
+       static struct {
+               int             num;
+               char    *msg;
+       } t[] = {
+       /* from /sys/src/9/port/errstr.h */
+       {EINVAL,                        "inconsistent mount"},
+       {EINVAL,                        "not mounted"},
+       {EINVAL,                        "not in union"},
+       {EIO,                           "mount rpc error"},
+       {EIO,                           "mounted device shut down"},
+       {EPERM,                         "mounted directory forbids creation"},
+       {ENOENT,                        "does not exist"},
+       {ENXIO,                         "unknown device in # filename"},
+       {ENOTDIR,                       "not a directory"},
+       {EISDIR,                        "file is a directory"},
+       {EINVAL,                        "bad character in file name"},
+       {EINVAL,                        "file name syntax"},
+       {EPERM,                         "permission denied"},
+       {EPERM,                         "inappropriate use of fd"},
+       {EINVAL,                        "bad arg in system call"},
+       {EBUSY,                         "device or object already in use"},
+       {EIO,                           "i/o error"},
+       {EIO,                           "read or write too large"},
+       {EIO,                           "read or write too small"},
+       {EADDRINUSE,            "network port not available"},
+       {ESHUTDOWN,                     "write to hungup stream"},
+       {ESHUTDOWN,                     "i/o on hungup channel"},
+       {EINVAL,                        "bad process or channel control request"},
+       {EBUSY,                         "no free devices"},
+       {ESRCH,                         "process exited"},
+       {ECHILD,                        "no living children"},
+       {EIO,                           "i/o error in demand load"},
+       {ENOMEM,                        "virtual memory allocation failed"},
+       {EBADF,                         "fd out of range or not open"},
+       {EMFILE,                        "no free file descriptors"},
+       {ESPIPE,                        "seek on a stream"},
+       {ENOEXEC,                       "exec header invalid"},
+       {ETIMEDOUT,                     "connection timed out"},
+       {ECONNREFUSED,          "connection refused"},
+       {ECONNREFUSED,          "connection in use"},
+       {ERESTART,                      "interrupted"},
+       {ENOMEM,                        "kernel allocate failed"},
+       {EINVAL,                        "segments overlap"},
+       {EIO,                           "i/o count too small"},
+       {EINVAL,                        "bad attach specifier"},
+
+       /* from exhausted() calls in kernel */
+       {ENFILE,                        "no free file descriptors"},
+       {EBUSY,                         "no free mount devices"},
+       {EBUSY,                         "no free mount rpc buffer"},
+       {EBUSY,                         "no free segments"},
+       {ENOMEM,                        "no free memory"},
+       {ENOBUFS,                       "no free Blocks"},
+       {EBUSY,                         "no free routes"},
+
+       /* from ken */
+       {EINVAL,                        "attach -- bad specifier"},
+       {EBADF,                         "unknown fid"},
+       {EINVAL,                        "bad character in directory name"},
+       {EBADF,                         "read/write -- on non open fid"},
+       {EIO,                           "read/write -- count too big"},
+       {EIO,                           "phase error -- directory entry not allocated"},
+       {EIO,                           "phase error -- qid does not match"},
+       {EACCES,                        "access permission denied"},
+       {ENOENT,                        "directory entry not found"},
+       {EINVAL,                        "open/create -- unknown mode"},
+       {ENOTDIR,                       "walk -- in a non-directory"},
+       {ENOTDIR,                       "create -- in a non-directory"},
+       {EIO,                           "phase error -- cannot happen"},
+       {EEXIST,                        "create -- file exists"},
+       {EINVAL,                        "create -- . and .. illegal names"},
+       {ENOTEMPTY,                     "remove -- directory not empty"},
+       {EINVAL,                        "attach -- privileged user"},
+       {EPERM,                         "wstat -- not owner"},
+       {EPERM,                         "wstat -- not in group"},
+       {EINVAL,                        "create/wstat -- bad character in file name"},
+       {EBUSY,                         "walk -- too many (system wide)"},
+       {EROFS,                         "file system read only"},
+       {ENOSPC,                        "file system full"},
+       {EINVAL,                        "read/write -- offset negative"},
+       {EBUSY,                         "open/create -- file is locked"},
+       {EBUSY,                         "close/read/write -- lock is broken"},
+
+       /* from sockets */
+       {ENOTSOCK,                      "not a socket"},
+       {EPROTONOSUPPORT,       "protocol not supported"},
+       {ECONNREFUSED,          "connection refused"},
+       {EAFNOSUPPORT,          "address family not supported"},
+       {ENOBUFS,                       "insufficient buffer space"},
+       {EOPNOTSUPP,            "operation not supported"},
+       {EADDRINUSE,            "address in use"},
+
+       /* other */
+       {EEXIST,                        "file already exists"},
+       {EEXIST,                        "is a directory"},
+       {ENOTEMPTY,                     "directory not empty"},
+       };
+
+       int r, i;
+       char msg[ERRMAX];
+
+       rerrstr(msg, sizeof(msg));
+
+       r = -EIO;
+       for(i=0; i<nelem(t); i++){
+               if(strstr(msg, t[i].msg)){
+                       r = -t[i].num;
+                       break;
+               }
+       }
+
+       trace("mkerror(%s): %E", msg, r);
+       return r;
+}
+
+int sys_nosys(void)
+{
+       trace("syscall %s not implemented", current->syscall);
+       return -ENOSYS;
+}
diff --git a/linux_emul_base/exec.c b/linux_emul_base/exec.c
new file mode 100644 (file)
index 0000000..73d4669
--- /dev/null
@@ -0,0 +1,647 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <tos.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Elfhdr          Elfhdr;
+typedef struct Proghdr Proghdr;
+typedef struct ElfEx   ElfEx;
+
+struct Elfhdr {
+       uchar   ident[16];
+       ushort  type;
+       ushort  machine;
+       ulong   version;
+       ulong   entry;
+       ulong   phoff;
+       ulong   shoff;
+       ulong   flags;
+       ushort  ehsize;
+       ushort  phentsize;
+       ushort  phnum;
+       ushort  shentsize;
+       ushort  shnum;
+       ushort  shstrndx;
+};
+
+struct Proghdr {
+       ulong   type;
+       ulong   offset;
+       ulong   vaddr;
+       ulong   paddr;
+       ulong   filesz;
+       ulong   memsz;
+       ulong   flags;
+       ulong   align;  
+};
+
+struct ElfEx
+{
+       ulong   ientry;
+       ulong   ibase;
+
+       ulong   entry;
+       ulong   base;
+
+       ulong   phdr;
+       ulong   phnum;
+       ulong   phent;
+};
+
+static void
+padzero(ulong addr)
+{
+       ulong n;
+
+       if(n = (pagealign(addr) - addr))
+               memset((void*)addr, 0, n);
+}
+
+enum {
+       /* file types */
+       ElfTNone = 0,
+       ElfTReloc = 1,
+       ElfTExec = 2,
+       ElfTShared = 3,
+       ElfTCore = 4,
+       ElfTMax = 5,
+
+       /* machine architectures */
+       ElfMNone = 0,
+       ElfM32 = 1,
+       ElfMSparc = 2,
+       ElfM386 = 3,
+       ElfM68 = 4,
+       ElfM88 = 5,
+       ElfM860 = 7,
+       ElfMMips = 8,
+       ElfMMax = 9,
+
+       /* program segment types */
+       ElfPNull = 0,
+       ElfPLoad = 1,
+       ElfPDynamic = 2,
+       ElfPInterp = 3,
+       ElfPNote = 4,
+       ElfPShlib = 5,
+       ElfPPhdr = 6,
+       ElfPMax = 7,
+
+       /* program segment flags */
+       ElfPFX = 1,
+       ElfPFW = 2,
+       ElfPFR = 4,
+};
+
+static int
+loadelf(char *file, ElfEx *ex, int depth)
+{
+       int fd;
+       int i, l;
+       int mapprot;
+       int mapflags;
+       ulong mapbase;
+       ulong loadaddr;
+       ulong bss;
+
+       Elfhdr hdr;
+       Proghdr *phdr;
+       char *interpreter;
+
+       interpreter = nil;
+       phdr = nil;
+
+       if((fd = sys_open(file, O_RDONLY, 0)) < 0){
+               werrstr("cant open %s", file);
+               goto errout;
+       }
+
+       if(sys_read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)){
+               werrstr("cant read elf header");
+               goto errout;
+       }
+
+       if(memcmp(hdr.ident, "\x7fELF", 4)!=0){
+               werrstr("no elf magic");
+               goto errout;
+       }
+
+       l = hdr.phnum * hdr.phentsize;
+       phdr = kmalloc(l);
+       sys_lseek(fd, hdr.phoff, 0);
+       if(sys_read(fd, phdr, l) != l){
+               werrstr("cant read program headers");
+               goto errout;
+       }
+
+       loadaddr = 0;
+       mapbase = 0;
+       mapflags = MAP_PRIVATE;
+       if(hdr.type != ElfTShared)
+               mapflags |= MAP_FIXED;
+
+       trace("loadelf(): phnum=%d", hdr.phnum);
+
+       bss = 0;
+       for(i=0; i<hdr.phnum; i++){
+               Proghdr *p;
+
+               p = &phdr[i];
+               if(p->type == ElfPInterp){
+                       if(interpreter){
+                               werrstr("multiple interpeter sections");
+                               goto errout;
+                       }
+                       l = p->filesz;
+
+                       interpreter = kmalloc(l+1);
+                       sys_lseek(fd, p->offset, 0);
+                       if(sys_read(fd, interpreter, l)!=l){
+                               werrstr("cant read interpreter section");
+                               goto errout;
+                       }
+                       interpreter[l] = '\0';
+               }
+
+               if(p->type == ElfPLoad){
+                       ulong a;
+                       int diff;
+
+                       trace("loadelf(): phdr %d: vaddr=%lux memsz=%lux filesz=%lux offset=%lux flags=%lux",
+                               i,
+                               p->vaddr,
+                               p->memsz,
+                               p->filesz,
+                               p->offset,
+                               p->flags);
+
+                       mapprot = 0;
+                       if(p->flags & ElfPFR)
+                               mapprot |= PROT_READ;
+                       if(p->flags & ElfPFW)
+                               mapprot |= PROT_WRITE;
+                       if(p->flags & ElfPFX)
+                               mapprot |= PROT_EXEC;
+
+                       if(hdr.entry >= p->vaddr && hdr.entry < p->vaddr + p->memsz)
+                               mapprot |= PROT_EXEC;
+
+                       diff = p->vaddr - (p->vaddr & ~(PAGESIZE-1));
+
+                       /* have to call mapdata() before we do the first mmap */
+                       if(loadaddr == 0 && depth == 0){
+                               if(hdr.type == ElfTShared){
+                                       mapbase = pagealign((ulong)end + 0x4000000);
+                                       mapflags |= MAP_FIXED;
+                               }
+                               mapdata((mapbase + p->vaddr) - diff);
+                       }
+
+                       a = sys_mmap(
+                               (mapbase + p->vaddr) - diff, 
+                               p->filesz + diff,
+                               mapprot,
+                               mapflags,
+                               fd,
+                               (p->offset - diff)/PAGESIZE);
+
+                       if(((int)a < 0) && ((int)a > -EMAX)){
+                               werrstr("mmap failed: %E", (int)a);
+                               goto errout;
+                       }
+                       if(loadaddr == 0)
+                               loadaddr = a;
+                       if(hdr.type == ElfTShared && mapbase == 0){
+                               mapbase = a + diff;
+                               mapflags |= MAP_FIXED;
+                       }
+                       if(mapprot & PROT_WRITE)
+                               padzero(mapbase + p->vaddr + p->filesz);
+                       if(depth == 0)
+                               if(mapbase + p->vaddr + p->memsz > bss)
+                                       bss = mapbase + p->vaddr + p->memsz;
+               } else {
+                       trace("loadelf(): phdr %d: type=%lux", i, p->type);
+               }
+       }
+
+       ex->base = loadaddr;
+       ex->entry = hdr.entry + ((hdr.type == ElfTShared) ? loadaddr : 0);
+
+       ex->phdr = loadaddr + hdr.phoff;
+       ex->phent = hdr.phentsize;
+       ex->phnum = hdr.phnum;
+
+       if(depth == 0){
+               sys_brk(pagealign(bss));
+
+               current->codestart = loadaddr;
+               current->codeend = bss;
+       }
+
+       if(interpreter){
+               ElfEx interpex;
+
+               if(loadelf(interpreter, &interpex, depth+1) < 0){
+                       werrstr("cant load interpreter: %r");
+                       goto errout;
+               }
+               free(interpreter);
+
+               ex->ientry = interpex.entry;
+               ex->ibase = interpex.base;
+       } else {
+               ex->ientry = ex->entry;
+               ex->ibase = 0;  /* no interpreter */
+       }
+
+       sys_close(fd);
+       free(phdr);
+       return 0;
+
+errout:
+       if(fd >= 0)
+               sys_close(fd);
+       free(interpreter);
+       free(phdr);
+       return -1;
+}
+
+
+enum {
+       AT_NULL,
+       AT_IGNORE,
+       AT_EXECFD,
+       AT_PHDR,
+       AT_PHENT,
+       AT_PHNUM,
+       AT_PAGESZ,
+       AT_BASE,
+       AT_FLAGS,
+       AT_ENTRY,
+       AT_NOTELF,
+       AT_UID,
+       AT_EUID,
+       AT_GID,
+       AT_EGID,
+       AT_PLATFORM,
+       AT_HWCAP,
+       AT_CLKTCK,
+       AT_SECURE = 23,
+
+       AT_SYSINFO = 32,
+       AT_SYSINFO_EHDR = 33,
+};
+
+static void*
+setupstack(ElfEx *ex, char *argv[], char *envp[])
+{
+       int envc;
+       int argc;
+
+       char **dargv;
+       char **denv;
+
+       ulong *stack;
+       ulong *p;
+       char *x;
+       int i, n;
+
+       /*
+        * calculate the size we need on stack
+        */
+       argc=0;
+       while(argv && argv[argc]) argc++;
+
+       envc=0;
+       while(envp && envp[envc]) envc++;
+
+       n = 0;
+       n += sizeof(ulong);                     // argc
+       n += (argc+1)*sizeof(char*);    // argv + nil
+       n += (envc+1)*sizeof(char*);    // envp + nil
+       n += 16*(2*sizeof(ulong));      // aux
+
+       for(i=0; i<argc; i++)
+               n += (strlen(argv[i])+1);
+       for(i=0; i<envc; i++)
+               n += (strlen(envp[i])+1);
+
+       if(USTACK - n < PAGESIZE){
+               werrstr("too many arguments passed on stack");
+               return nil;
+       }
+               
+       stack = mapstack(USTACK);
+
+       if(((int)stack < 0) && ((int)stack > -EMAX)){
+               werrstr("mapstack failed: %E", (int)stack);
+               return nil;
+       }
+       stack = (ulong*)(((ulong)stack - n) & ~7);
+
+       current->stackstart = (ulong)stack;
+
+       p = stack;
+
+       *p++ = argc;
+
+       dargv = (char**)p;
+       p += (argc + 1);
+
+       denv = (char**)p;
+       p += (envc + 1);
+
+#define AUXENT(k, v)  {p[0]=k; p[1]=v; p+=2;}
+       AUXENT(AT_PAGESZ, PAGESIZE);
+       AUXENT(AT_CLKTCK, HZ);
+       AUXENT(AT_PHDR, ex->phdr);
+       AUXENT(AT_PHENT, ex->phent);
+       AUXENT(AT_PHNUM, ex->phnum);
+       AUXENT(AT_BASE, ex->ibase);
+       AUXENT(AT_FLAGS, 0);
+       AUXENT(AT_ENTRY, ex->entry);
+       AUXENT(AT_UID, current->uid);
+       AUXENT(AT_EUID, current->uid);
+       AUXENT(AT_GID, current->gid);
+       AUXENT(AT_EGID, current->gid);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+       AUXENT(AT_NULL, 0);
+#undef AUXENT
+
+       x = (char*)p;
+
+       for(i=0; i<argc; i++)
+               x += (strlen(dargv[i] = strcpy(x, argv[i])) + 1);
+       dargv[argc] = 0;
+       for(i=0; i<envc; i++)
+               x += (strlen(denv[i] = strcpy(x, envp[i])) + 1);
+       denv[envc] = 0;
+
+       return stack;
+}
+
+static char**
+copystrings(char *a[])
+{
+       char **r;
+       char *p;
+       int i, n;
+
+       if(a == nil)
+               return nil;
+       i = 0;
+       n = sizeof(a[0]);
+       while(a[i]){
+               n += sizeof(a[0]) + (strlen(a[i]) + 1);
+               i++;
+       }
+       r = kmalloc(n);
+       n = i;
+       p = (char*)&r[n+1];
+       for(i=0; i<n; i++)
+               p += strlen(r[i] = strcpy(p, a[i]))+1;
+       r[n] = 0;
+       return r;
+}
+
+static void
+setcomm(char *exe, char *name, char *argv[])
+{
+       char *buf, *p;
+       int i, n;
+
+       n = strlen(exe) + strlen(name) +2;
+       for(i=0; argv[i]; i++)
+               n += strlen(argv[i])+1;
+
+       buf = kmalloc(n);
+
+       p = buf;
+       p += strlen(strcpy(p, name));
+       for(i=0; argv[i]; i++){
+               p += strlen(strcpy(p, " "));
+               p += strlen(strcpy(p, argv[i]));
+       }
+       setprocname(buf);
+
+       /* comm contains the full exe name + argv */
+       p = buf;
+       p += strlen(strcpy(p, exe));
+       *p++ = 0;
+       for(i=0; argv[i]; i++){
+               p += strlen(strcpy(p, argv[i]));
+               *p++ = 0;
+       }
+       *p++ = 0;
+
+       free(current->comm);
+       current->comm = buf;
+       current->ncomm = p - buf;
+}
+
+static void
+clinote(struct Ureg *ureg)
+{
+       jmp_buf jmp;
+       ulong pc;
+       ulong sp;
+       ulong ax;
+
+       pc = ureg->pc;
+       sp = ureg->sp;
+       ax = ureg->ax;
+
+       if(!setjmp(jmp))
+               notejmp(ureg, jmp, 1);
+
+       ureg->pc = pc;
+       ureg->sp = sp;
+       ureg->ax = ax;
+}
+
+struct kexecveargs
+{
+       char            *name;
+       char            **argv;
+       char            **envp;
+};
+
+#pragma profile off
+
+static int
+kexecve(void *arg)
+{
+       struct kexecveargs *args;
+       Ufile *f;
+       ElfEx ex;
+       Ureg u;
+       int r, n;
+       char *b, *p, *e, *x, **a;
+       void *stack;
+       char *name, *exe;
+       char **argv;
+       char **envp;
+       int phase;
+
+       args =  arg;
+       name = args->name;
+       argv = args->argv;
+       envp = args->envp;
+
+       phase = 0;
+       n = 8192;
+       b = kmalloc(n);
+       p = b;
+       e = b + n;
+again:
+       if(r = sys_access(name, 05)){
+               if(r > 0)
+                       r = -EACCES;
+               goto errout;
+       }
+       if((r = sys_open(name, O_RDONLY, 0)) < 0)
+               goto errout;
+       exe = "/dev/null";
+       if(f = fdgetfile(r)){
+               if(f->path != nil){
+                       strncpy(p, f->path, e-p);
+                       p += strlen(exe = p)+1;
+               }
+               putfile(f);
+       }
+       n = sys_read(r, p, (e-p)-1);
+       sys_close(r);
+
+       r = -ENOEXEC;
+       if(n < 4)
+               goto errout;
+
+       if(memcmp(p, "#!", 2) == 0){
+               p[n] = 0;
+
+               r = -ENAMETOOLONG;
+               if((x = strchr(p, '\n')) == nil)
+                       goto errout;
+               *x = 0;
+
+               a = (char**)&x[1];
+               n = (e - (char*)a) / sizeof(a[0]);
+               if(n < 2)
+                       goto errout;
+               n = getfields(&p[2], a, n, 1, "\t\r\n ");
+               if(n < 1)
+                       goto errout;
+               r = -E2BIG;
+               if(&a[n+1] >= (char**)e)
+                       goto errout;
+               a[n++] = name; 
+               if(argv != nil){
+                       argv++;
+                       while(*argv){
+                               if(&a[n+1] >= (char**)e)
+                                       goto errout;
+                               a[n++] = *argv++;
+                       }
+               }
+               a[n++] = 0;
+               p = (char*)&a[n];
+               if(e - p < 4)
+                       goto errout;
+               argv = a;
+               name = argv[0];
+
+               goto again;
+       }
+
+       if(memcmp(p, "\x7fELF", 4)!=0)
+               goto errout;
+
+       /*
+        * the contents on envp[] or argv[] maybe stored in b[], stack or bss of the calling linux
+        * process that is destroyed on free(b) and exitmem()... so we need to temporary
+        * copy them.
+        */
+       r = -ENOMEM;
+       name = kstrdup(name);
+       phase++;
+       if(argv)
+               argv = copystrings(argv);
+       phase++;
+       if(envp)
+               envp = copystrings(envp);
+       phase++;
+
+       /* get out of the note before we destroy user stack */
+       if(current->innote){
+               clinote(current->ureg);
+               current->innote = 0;
+       }
+
+       /* this is the point of no return! */
+       qlock(&proctab);
+       zapthreads();
+       exitmem();
+       exitsignal();
+
+       initmem();
+       initsignal();
+       inittls();
+       qunlock(&proctab);
+
+       closexfds();
+
+       setcomm(exe, name, argv);
+
+       if(loadelf(name, &ex, 0) < 0){
+               trace("kexecve(): loadelf failed: %r");
+               goto errout;
+       }
+
+       if((stack = setupstack(&ex, argv, envp)) == nil){
+               trace("kexecve(): setupstack failed: %r");
+               goto errout;
+       }
+
+       memset(&u, 0, sizeof(u));
+       u.sp = (ulong)stack;
+       u.pc = (ulong)ex.ientry;
+       current->ureg = &u;
+       current->syscall = nil;
+       phase++;
+
+       trace("kexecve(): startup pc=%lux sp=%lux", current->ureg->pc, current->ureg->sp);
+
+errout:
+       switch(phase){
+       default:        free(envp);
+       case 2: free(argv);
+       case 1: free(name);
+       case 0: free(b);
+       }
+       switch(phase){
+       case 4: retuser();
+       case 3: exitproc(current, SIGKILL, 1);
+       }
+       return r;
+}
+
+int sys_execve(char *name, char *argv[], char *envp[])
+{
+       struct kexecveargs args;
+
+       trace("sys_execve(%s, %p, %p)", name, argv, envp);
+
+       args.name = name;
+       args.argv = argv;
+       args.envp = envp;
+
+       return onstack(kstack, kexecve, &args);
+}
+
+#pragma profile on
diff --git a/linux_emul_base/file.c b/linux_emul_base/file.c
new file mode 100644 (file)
index 0000000..8c048d5
--- /dev/null
@@ -0,0 +1,760 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Fd Fd;
+typedef struct Fdtab Fdtab;
+
+struct Fd
+{
+       int             flags;
+       Ufile           *file;
+};
+
+struct Fdtab
+{
+       Ref;
+       QLock;
+       int             lastfd;
+       int             nfd;
+       Fd              *fd;
+};
+
+Ufile*
+getfile(Ufile *file)
+{
+       if(file)
+               incref(file);
+       return file;
+}
+
+void
+putfile(Ufile *file)
+{
+       Udirent *d;
+
+       if(file == nil)
+               return;
+       if(decref(file))
+               return;
+       trace("putfile(): closing %p %s", file, file->path);
+       if(devtab[file->dev]->close)
+               devtab[file->dev]->close(file);
+       free(file->path);
+       while(d = file->rdaux){
+               file->rdaux = d->next;
+               free(d);
+       }
+       free(file);
+}
+
+static Fdtab*
+newfdtab(void)
+{
+       Fdtab *tab;
+
+       tab = kmallocz(sizeof(*tab), 1);
+       tab->ref = 1;
+       tab->lastfd = -1;
+       tab->nfd = 0;
+       tab->fd = nil;
+
+       return tab;
+}
+
+enum {
+       CHUNK   = 64,
+};
+
+/* assumes tab->lock aquired */
+static int
+grow1(Fdtab *tab)
+{
+       if(tab->nfd >= MAXFD)
+               return -EMFILE;
+       if((tab->nfd % CHUNK) == 0)
+               tab->fd = krealloc(tab->fd, sizeof(tab->fd[0]) * (tab->nfd + CHUNK));
+       memset(&tab->fd[tab->nfd], 0, sizeof(tab->fd[0]));
+       return tab->nfd++;
+}
+
+Ufile *procfdgetfile(Uproc *proc, int fd)
+{
+       Fdtab *tab;
+       Ufile *file;
+
+       file = nil;
+       if(tab = proc->fdtab){
+               qlock(tab);
+               if(fd >= 0 && fd < tab->nfd)
+                       file = getfile(tab->fd[fd].file);
+               qunlock(tab);
+       }
+       return file;
+}
+
+Ufile*
+fdgetfile(int fd)
+{
+       return procfdgetfile(current, fd);
+}
+
+int
+newfd(Ufile *file, int flags)
+{
+       int fd;
+       Fdtab *tab;
+
+       tab = current->fdtab;
+       qlock(tab);
+       fd = tab->lastfd;
+       if((fd >= 0) && (fd < tab->nfd) && (tab->fd[fd].file == nil))
+               goto found;
+       for(fd=0; fd<tab->nfd; fd++)
+               if(tab->fd[fd].file == nil)
+                       goto found;
+       fd = grow1(tab);
+found:
+       if(fd >= 0){
+               tab->fd[fd].file = file;
+               tab->fd[fd].flags = flags;
+               file = nil;
+       }
+       qunlock(tab);
+       putfile(file);
+
+       return fd;
+}
+
+static Fdtab*
+getfdtab(Fdtab *tab, int copy)
+{
+       Fdtab *new;
+       int i;
+
+       if(!copy){
+               incref(tab);
+               return tab;
+       }
+       qlock(tab);
+       new = newfdtab();
+       new->lastfd = tab->lastfd;
+       new->nfd = tab->nfd;
+       new->fd = kmallocz(sizeof(new->fd[0]) * (((tab->nfd+CHUNK-1)/CHUNK)*CHUNK), 1);
+       for(i=0; i<new->nfd; i++){
+               Ufile *file;
+
+               if((file = tab->fd[i].file) == nil)
+                       continue;
+               incref(file);
+               new->fd[i].file = file;
+               new->fd[i].flags = tab->fd[i].flags;
+       }
+       qunlock(tab);
+       return new;
+}
+
+static void
+putfdtab(Fdtab *tab)
+{
+       int i;
+
+       if(decref(tab))
+               return;
+       for(i=0; i<tab->nfd; i++){
+               Ufile *file;
+               if((file = tab->fd[i].file) == nil)
+                       continue;
+               tab->fd[i].file = nil;
+               putfile(file);
+       }
+       free(tab->fd);
+       free(tab);
+}
+
+int sys_dup2(int old, int new)
+{
+       Ufile *file;
+       Fdtab *tab;
+       int err;
+
+       trace("sys_dup2(%d, %d)", old, new);
+
+       tab = current->fdtab;
+
+       if((file = fdgetfile(old)) == nil)
+               return -EBADF;
+       if(new < 0)
+               return newfd(file, 0);
+       if(new >= MAXFD)
+               return -EBADF;
+       qlock(tab);
+       while(new >= tab->nfd){
+               err = grow1(tab);
+               if(err < 0){
+                       qunlock(tab);
+                       putfile(file);
+                       return err;
+               }
+       }
+       if(tab->fd[new].file != nil)
+               putfile(tab->fd[new].file);
+       tab->fd[new].file = file;
+       tab->fd[new].flags &= ~FD_CLOEXEC;
+       qunlock(tab);
+
+       return new;
+}
+
+int sys_dup(int fd)
+{
+       return sys_dup2(fd, -1);
+}
+
+struct linux_flock
+{
+       short   l_type;
+       short   l_whence;
+       ulong   l_start;
+       ulong   l_len;
+       int             l_pid;
+}; 
+
+struct linux_flock64
+{
+       short   l_type;
+       short   l_whence;
+       uvlong  l_start;
+       uvlong  l_len;
+       int             l_pid;
+};
+
+enum {
+       F_RDLCK,
+       F_WRLCK,
+       F_UNLCK,
+};
+
+int sys_fcntl(int fd, int cmd, int arg)
+{
+       int ret;
+       Ufile *file;
+       Fdtab *tab;
+
+       trace("sys_fcntl(%d, %lux, %lux)", fd, (ulong)cmd, (ulong)arg);
+
+       tab = current->fdtab;
+
+       ret = -EBADF;
+       if((file = fdgetfile(fd)) == nil)
+               goto out;
+       ret = -EINVAL;
+       switch(cmd){
+       default:
+               trace("sys_fcntl() cmd %lux not implemented", (ulong)cmd);
+               break;
+
+       case F_DUPFD:
+               if(arg < 0 || arg >= MAXFD)
+                       break;
+               qlock(tab);
+               for(ret=arg; ret<tab->nfd; ret++)
+                       if(tab->fd[ret].file == nil)
+                               goto found;
+               do {
+                       if((ret = grow1(tab)) < 0)
+                               break;
+               } while(ret < arg);
+found:
+               if(ret >= 0){
+                       tab->fd[ret].file = file;
+                       tab->fd[ret].flags = tab->fd[fd].flags & ~FD_CLOEXEC;
+                       file = nil;
+               }
+               qunlock(tab);
+               break;
+
+       case F_GETFD:
+       case F_SETFD:
+               qlock(tab);
+               if(cmd == F_GETFD){
+                       ret = tab->fd[fd].flags & FD_CLOEXEC;
+               } else {
+                       tab->fd[fd].flags = (arg & FD_CLOEXEC);
+                       ret = 0;
+               }
+               qunlock(tab);
+               break;
+
+       case F_GETFL:
+               ret = file->mode;
+               break;
+       case F_SETFL:
+               trace("sys_fcntl() changing mode from %o to %o", file->mode, arg);
+               file->mode = arg;
+               ret = 0;
+               break;
+
+       case F_GETLK:
+               ((struct linux_flock*)arg)->l_type = F_UNLCK;
+       case F_SETLK:
+       case F_SETLKW:
+               ret = 0;
+               break;
+
+       case F_GETLK64:
+               ((struct linux_flock64*)arg)->l_type = F_UNLCK;
+       case F_SETLK64:
+               ret = 0;
+               break;
+       }
+out:
+       putfile(file);
+       return ret;
+}
+
+int sys_close(int fd)
+{
+       Fdtab *tab;
+       Ufile *file;
+
+       trace("sys_close(%d)", fd);
+
+       tab = current->fdtab;
+       qlock(tab);
+       if(fd >= 0 && fd < tab->nfd){
+               if(file = tab->fd[fd].file){
+                       tab->fd[fd].file = nil;
+                       tab->lastfd = fd;
+                       qunlock(tab);
+
+                       putfile(file);
+                       return 0;
+               }
+       }
+       qunlock(tab);
+       return -EBADF;
+}
+
+int sys_ioctl(int fd, int cmd, void *arg)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_ioctl(%d, %lux, %p)", fd, (ulong)cmd, arg);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = -ENOTTY;
+       if(devtab[file->dev]->ioctl)
+               ret = devtab[file->dev]->ioctl(file, cmd, arg);
+       putfile(file);
+       return ret;
+}
+
+int preadfile(Ufile *file, void *buf, int len, vlong off)
+{
+       if(file->mode & O_NONBLOCK){
+               if(devtab[file->dev]->poll != nil){
+                       if((devtab[file->dev]->poll(file, nil) & POLLIN) == 0){
+                               trace("readfile(): nonblocking read blocked");
+
+                               return -EAGAIN;
+                       }
+               }
+       }
+       if(devtab[file->dev]->read == nil)
+               return 0;
+       return devtab[file->dev]->read(file, buf, len, off);
+}
+
+int readfile(Ufile *file, void *buf, int len)
+{
+       int err;
+
+       if((err = preadfile(file, buf, len, file->off)) > 0)
+               file->off += err;
+       return err;
+}
+
+int pwritefile(Ufile *file, void *buf, int len, vlong off)
+{
+       if(devtab[file->dev]->write == nil)
+               return 0;
+       if(file->mode & O_APPEND){
+               if(devtab[file->dev]->size){
+                       off = devtab[file->dev]->size(file);
+                       if(off < 0)
+                               return (int)off;
+               }
+       }
+       return devtab[file->dev]->write(file, buf, len, off);
+}
+
+int writefile(Ufile *file, void *buf, int len)
+{
+       int err;
+       vlong end;
+
+       if(devtab[file->dev]->write == nil)
+               return 0;
+       if(file->mode & O_APPEND){
+               if(devtab[file->dev]->size){
+                       end = devtab[file->dev]->size(file);
+                       if(end < 0)
+                               return (int)end;
+                       file->off = end;
+               }
+       }
+       if(len == 0)
+               return 0;
+       if((err = devtab[file->dev]->write(file, buf, len, file->off)) > 0)
+               file->off += err;
+       return err;
+}
+
+int sys_read(int fd, void *buf, int len)
+{
+       int ret;
+       Ufile *file;
+
+       trace("sys_read(%d, %p, %x)", fd, buf, len);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = readfile(file, buf, len);
+       putfile(file);
+       return ret;
+}
+
+int sys_write(int fd, void *buf, int len)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_write(%d, %p, %x)", fd, buf, len);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = writefile(file, buf, len);
+       putfile(file);
+
+       return ret;
+}
+
+int sys_pread64(int fd, void *buf, int len, ulong off)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_pread(%d, %p, %x, %lux)", fd, buf, len, off);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = preadfile(file, buf, len, off);
+       putfile(file);
+       return ret;
+}
+
+int sys_pwrite64(int fd, void *buf, int len, ulong off)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_pwrite(%d, %p, %x, %lux)", fd, buf, len, off);
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = pwritefile(file, buf, len, off);
+       putfile(file);
+       return ret;
+}
+
+struct linux_iovec
+{
+       void            *base;
+       ulong   len;
+};
+
+int sys_writev(int fd, void *vec, int n)
+{
+       struct linux_iovec *v = vec;
+       int ret, i, w;
+       Ufile *file;
+
+       trace("sys_writev(%d, %p, %d)", fd, vec, n);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = 0;
+       for(i=0; i<n; i++){
+               w = writefile(file, v[i].base, v[i].len);
+               if(w < 0){
+                       if(ret == 0)
+                               ret = w;
+                       break;
+               }
+               ret += w;
+               if(w < v[i].len)
+                       break;
+       }
+       putfile(file);
+
+       return ret;
+}
+
+int sys_readv(int fd, void *vec, int n)
+{
+       struct linux_iovec *v = vec;
+       int ret, i, r;
+       Ufile *file;
+
+       trace("sys_readv(%d, %p, %d)", fd, vec, n);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = 0;
+       for(i=0; i<n; i++){
+               r = readfile(file, v[i].base, v[i].len);
+               if(r < 0){
+                       if(ret == 0)
+                               ret = r;
+                       break;
+               }
+               ret += r;
+               if(r < v[i].len)
+                       break;
+       }
+       putfile(file);
+
+       return ret;
+}
+
+int seekfile(Ufile *file, vlong off, int whence)
+{
+       vlong end;
+
+       if(devtab[file->dev]->size == nil)
+               return -ESPIPE;
+
+       switch(whence){
+       case 0:
+               file->off = off;
+               return 0;
+       case 1:
+               file->off += off;
+               return 0;
+       case 2:
+               end = devtab[file->dev]->size(file);
+               if(end < 0)
+                       return end;
+               file->off = end + off;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+ulong sys_lseek(int fd, ulong off, int whence)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_lseek(%d, %lux, %d)", fd, off, whence);
+
+       if((file = fdgetfile(fd)) == nil)
+               return (ulong)-EBADF;
+       ret = seekfile(file, off, whence);
+       if(ret == 0)
+               ret = file->off;
+       putfile(file);
+
+       return ret;
+}
+
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence)
+{
+       Ufile *file;
+       int ret;
+
+       trace("sys_llseek(%d, %lux, %lux, %p, %d)", fd, hioff, looff, res, whence);
+
+       if((file = fdgetfile(fd)) == nil)
+               return -EBADF;
+       ret = seekfile(file, ((vlong)hioff<<32) | ((vlong)looff), whence);
+       if((ret == 0) && res)
+               *res = file->off;
+       putfile(file);
+
+       return ret;
+}
+
+int sys_umask(int umask)
+{
+       int old;
+
+       trace("sys_umask(%#o)", umask);
+
+       old = current->umask;
+       current->umask = (umask & 0777);
+       return old;
+}
+
+int
+chdirfile(Ufile *f)
+{
+       Ustat s;
+       int err;
+
+       trace("chdirfile(%s)", f->path);
+
+       err = -ENOTDIR;
+       if(f->path == nil)
+               return err;
+       if(devtab[f->dev]->fstat == nil)
+               return err;
+       if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+               return err;
+       err = -ENOTDIR;
+       if((s.mode & ~0777) != S_IFDIR)
+               return err;
+       free(current->cwd);
+       current->cwd = kstrdup(fsrootpath(f->path));
+       if(f->dev == ROOTDEV && chdir(f->path) == 0){
+               free(current->kcwd);
+               current->kcwd = kstrdup(f->path);
+       }
+       return 0;
+}
+
+int
+sys_fchdir(int fd)
+{
+       Ufile *f;
+       int err;
+
+       trace("sys_fchdir(%d)", fd);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = chdirfile(f);
+       putfile(f);
+       return err;
+}
+
+int
+sys_fchown(int fd, int uid, int gid)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_fchown(%d, %d, %d)", fd, uid, gid);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->fchown)
+               err = devtab[f->dev]->fchown(f, uid, gid);
+       putfile(f);
+
+       return err;
+}
+
+int
+sys_fchmod(int fd, int mode)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_fchmod(%d, %#o)", fd, mode);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->fchmod)
+               err = devtab[f->dev]->fchmod(f, mode);
+       putfile(f);
+
+       return err;
+}
+
+int
+sys_ftruncate(int fd, ulong size)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_ftruncate(%d, %lux)", fd, size);
+
+       if((f = fdgetfile(fd)) == nil)
+               return -EBADF;
+       err = -EPERM;
+       if(devtab[f->dev]->ftruncate)
+               err = devtab[f->dev]->ftruncate(f, (uvlong)size);
+       putfile(f);
+
+       return err;
+}
+
+void initfile(void)
+{
+       current->fdtab = newfdtab();
+       current->umask = 022;
+}
+
+void exitfile(Uproc *proc)
+{
+       Fdtab *tab;
+
+       if(tab = proc->fdtab){
+               proc->fdtab = nil;
+               putfdtab(tab);
+       }
+}
+
+void clonefile(Uproc *new, int copy)
+{
+       Fdtab *tab;
+
+       if((tab = current->fdtab) == nil){
+               new->fdtab = nil;
+               return;
+       }
+       new->fdtab = getfdtab(tab, copy);
+}
+
+void closexfds(void)
+{
+       Fdtab *tab;
+       int i;
+
+       if((tab = current->fdtab) == nil)
+               return;
+       qlock(tab);
+       for(i=0; i<tab->nfd; i++){
+               Ufile *f;
+
+               if((f = tab->fd[i].file) == nil)
+                       continue;
+               if((tab->fd[i].flags & FD_CLOEXEC) == 0)
+                       continue;
+
+               tab->fd[i].file = nil;
+               tab->fd[i].flags = 0;
+
+               putfile(f);
+       }
+       qunlock(tab);
+}
+
+int sys_flock(int fd, int cmd)
+{
+       trace("sys_flock(%d, %d)", fd, cmd);
+       return 0;
+}
+
+int sys_fsync(int fd)
+{
+       trace("sys_fsync(%d)", fd);
+       return 0;
+}
+
diff --git a/linux_emul_base/fns.h b/linux_emul_base/fns.h
new file mode 100644 (file)
index 0000000..d6005d5
--- /dev/null
@@ -0,0 +1,311 @@
+/* error */
+int mkerror(void);
+#pragma varargck type "E" int
+int Efmt(Fmt *e);
+int sys_nosys(void);
+
+/* linuxcall */
+int linuxcall(void);
+
+/* trap */
+void inittrap(void);
+void retuser(void);
+
+/* bits */
+void incref(Ref *);
+int decref(Ref *);
+void jumpstart(ulong addr, ulong *stack);
+void jumpureg(void *ureg);
+void linux_sigreturn(void);
+void linux_rtsigreturn(void);
+
+/* trace */
+void inittrace(void);
+void exittrace(Uproc *proc);
+void clonetrace(Uproc *new, int copy);
+void tprint(char *fmt, ...);
+#pragma varargck argpos tprint 1
+#define trace if(debug)tprint
+
+/* proc */
+void initproc(void);
+void exitproc(Uproc *proc, int code, int group);
+void stopproc(Uproc *proc, int code, int group);
+void contproc(Uproc *proc, int code, int group);
+int procfork(void (*fproc)(void *aux), void *aux, int flags);
+Uproc* getproc(int tid);
+Uproc* getprocn(int n);
+int threadcount(int pid);
+void zapthreads(void);
+void setprocname(char *s);
+int notifyme(int on);
+void wakeme(int on);
+int sleepproc(QLock *l, int flags);
+Uwait* addwaitq(Uwaitq *q);
+void delwaitq(Uwait *w);
+int sleepq(Uwaitq *q, QLock *l, int flags);
+int wakeq(Uwaitq *q, int nwake);
+int requeue(Uwaitq *q1, Uwaitq *q2, int nrequeue);
+int killproc(Uproc *p, Usiginfo *info, int group);
+void setalarm(vlong t);
+
+int sys_waitpid(int pid, int *pexit, int opt);
+int sys_wait4(int pid, int *pexit, int opt, void *prusage);
+int sys_exit(int code);
+int sys_exit_group(int code);
+int sys_linux_clone(int flags, void *newstack, int *parenttidptr, int *tlsdescr, void *childtidptr);
+int sys_fork(void);
+int sys_vfork(void);
+int sys_getpid(void);
+int sys_getppid(void);
+int sys_gettid(void);
+int sys_setpgid(int pid, int pgid);
+int sys_getpgid(int pid);
+int sys_setpgrp(int pid);
+int sys_getpgrp(void);
+int sys_getuid(void);
+int sys_getgid(void);
+int sys_setgid(int gid);
+int sys_setuid(int uid);
+int sys_setresuid(int ruid, int euid, int suid);
+int sys_getresuid(int *ruid, int *euid, int *suid);
+int sys_setresgid(int rgid, int egid, int sgid);
+int sys_getresgid(int *rgid, int *egid, int *sgid);
+int sys_setreuid(int ruid, int euid);
+int sys_setregid(int rgid, int egid);
+int sys_uname(void *);
+int sys_personality(ulong p);
+int sys_setsid(void);
+int sys_getsid(int pid);
+int sys_getgroups(int size, int *groups);
+int sys_setgroups(int size, int *groups);
+
+int sys_kill(int pid, int sig);
+int sys_tkill(int tid, int sig);
+int sys_tgkill(int pid, int tid, int sig);
+int sys_rt_sigqueueinfo(int pid, int sig, void *info);
+
+int sys_set_tid_address(int *tidptr);
+
+int sys_sched_setscheduler(int pid, int policy, void *param);
+int sys_sched_getscheduler(int pid);
+int sys_sched_setparam(int pid, void *param);
+int sys_sched_getparam(int pid, void *param);
+int sys_sched_yield(void);
+
+int sys_getrlimit(long resource, void *rlim);
+int sys_setrlimit(long resource, void *rlim);
+
+/* signal */
+void initsignal(void);
+void exitsignal(void);
+void clonesignal(Uproc *new, int copyhand, int newproc);
+void settty(Ufile *tty);
+Ufile* gettty(void);
+#pragma varargck type "S" int
+int Sfmt(Fmt *f);
+
+int wantssignal(Uproc *proc, int sig);
+int ignoressignal(Uproc *proc, int sig);
+int signalspending(Uproc *proc);
+
+void handlesignals(void);
+int sendsignal(Uproc *proc, Usiginfo *info, int group);
+
+void siginfo2linux(Usiginfo *, void *);
+void linux2siginfo(void *, Usiginfo *);
+
+int sys_sigaltstack(void *stk, void *ostk);
+int sys_rt_sigaction(int sig, void *pact, void *poact, int setsize);
+int sys_rt_sigpending(uchar *set, int setsize);
+int sys_rt_sigprocmask(int how, uchar *act, uchar *oact, int setsize);
+int sys_rt_sigsuspend(uchar *set, int setsize);
+int sys_sigreturn(void);
+int sys_rt_sigreturn(void);
+
+int sys_setitimer(int which, void *value, void *ovalue);
+int sys_getitimer(int which, void *value);
+int sys_alarm(long seconds);
+
+/* file */
+void initfile(void);
+void exitfile(Uproc *proc);
+void clonefile(Uproc *new, int copy);
+void closexfds(void);
+Ufile *procfdgetfile(Uproc *proc, int fd);
+Ufile* fdgetfile(int fd);
+Ufile* getfile(Ufile *file);
+void putfile(Ufile *file);
+int newfd(Ufile *file, int flags);
+int chdirfile(Ufile *file);
+int readfile(Ufile *file, void *buf, int len);
+int writefile(Ufile *file, void *buf, int len);
+int preadfile(Ufile *file, void *buf, int len, vlong off);
+int pwritefile(Ufile *file, void *buf, int len, vlong off);
+int sys_dup(int fd);
+int sys_dup2(int old, int new);
+int sys_fcntl(int fd, int cmd, int arg);
+int sys_close(int fd);
+int sys_ioctl(int fd, int cmd, void *arg);
+int sys_read(int fd, void *buf, int len);
+int sys_readv(int fd, void *vec, int n);
+int sys_pread64(int fd, void *buf, int len, ulong off);
+int sys_write(int fd, void *buf, int len);
+int sys_pwrite64(int fd, void *buf, int len, ulong off);
+int sys_writev(int fd, void *vec, int n);
+ulong sys_lseek(int fd, ulong off, int whence);
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence);
+int sys_umask(int umask);
+int sys_flock(int fd, int cmd);
+int sys_fsync(int fd);
+int sys_fchdir(int fd);
+int sys_getcwd(char *buf, int len);
+int sys_fchmod(int fd, int mode);
+int sys_fchown(int fd, int uid, int gid);
+int sys_ftruncate(int fd, ulong size);
+
+/* poll */
+void pollwait(Ufile *f, Uwaitq *q, void *t);
+int sys_poll(void *p, int nfd, long timeout);
+int sys_select(int nfd, ulong *rfd, ulong *wfd, ulong *efd, void *ptv);
+
+/* mem */
+void* kmalloc(int size);
+void* kmallocz(int size, int zero);
+void* krealloc(void *ptr, int size);
+char* kstrdup(char *s);
+char* ksmprint(char *fmt, ...);
+#pragma varargck argpos ksmprint 1
+
+ulong pagealign(ulong addr);
+
+void initmem(void);
+void exitmem(void);
+void clonemem(Uproc *new, int copy);
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe);
+void* mapstack(int size);
+void mapdata(ulong base);
+void unmapuserspace(void);
+int okaddr(void *ptr, int len, int write);
+
+ulong sys_linux_mmap(void *a);
+ulong sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff);
+int sys_munmap(ulong addr, ulong len);
+ulong sys_brk(ulong bk);
+int sys_mprotect(ulong addr, ulong len, int prot);
+int sys_msync(ulong addr, ulong len, int flags);
+ulong sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr);
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3);
+
+/* exec */
+int sys_execve(char *name, char *argv[], char *envp[]);
+
+/* time */
+void inittime(void);
+int sys_time(long *p);
+int sys_gettimeofday(void *tvp, void *tzp);
+int sys_clock_gettime(int clock, void *t);
+int sys_nanosleep(void *rqp, void *rmp);
+int proctimes(Uproc *p, ulong *t);
+int sys_times(void *times);
+
+/* tls */
+void inittls(void);
+void clonetls(Uproc *new);
+
+int sys_set_thread_area(void *pinfo);
+int sys_get_thread_area(void *pinfo);
+int sys_modify_ldt(int func, void *data, int count);
+
+/* bufproc */
+void *newbufproc(int fd);
+void freebufproc(void *bp);
+int readbufproc(void *bp, void *data, int len, int peek, int noblock);
+int pollbufproc(void *bp, Ufile *file, void *tab);
+int nreadablebufproc(void *bp);
+
+/* main */
+void panic(char *msg, ...);
+int onstack(long *stk, int (*func)(void *arg), void *arg);
+void profme(void);
+
+/* stat */
+int ufstat(int fd, Ustat *ps);
+Udirent *newdirent(char *path, char *name, int mode);
+
+int sys_getxattr(char *path, char *name, void *value, int size);
+int sys_lgetxattr(char *path, char *name, void *value, int size);
+int sys_fgetxattr(int fd, char *name, void *value, int size);
+int sys_setxattr(char *path, char *name, void *value, int flags, int size);
+int sys_lsetxattr(char *path, char *name, void *value, int flags, int size);
+int sys_fsetxattr(int fd, char *name, void *value, int size, int flags);
+
+int sys_linux_fstat(int fd, void *st);
+int sys_linux_fstat64(int fd, void *st);
+int sys_linux_getdents(int fd, void *buf, int nbuf);
+int sys_linux_getdents64(int fd, void *buf, int nbuf);
+int sys_linux_lstat(char *path, void *st);
+int sys_linux_lstat64(char *path, void *st);
+int sys_linux_stat(char *path, void *st);
+int sys_linux_stat64(char *path, void *st);
+
+int sys_statfs(char *name, void *pstatfs);
+
+/* fs */
+void fsmount(Udev *dev, char *path);
+
+char* allocpath(char *base, char *prefix, char *name);
+char* fullpath(char *base, char *name);
+char* shortpath(char *base, char *path);
+char* fsfullpath(char *path);
+char* fsrootpath(char *path);
+char* basepath(char *p, char **ps);
+ulong hashpath(char *s);
+
+int fsaccess(char *path, int mode);
+int fschmod(char *path, int mode);
+int fschown(char *path, int uid, int gid, int link);
+int fslink(char *old, char *new, int sym);
+int fsmkdir(char *path, int mode);
+int fsopen(char *path, int mode, int perm, Ufile **pf);
+int fsreadlink(char *path, char *buf, int len);
+int fsrename(char *old, char *new);
+int fsstat(char *path, int link, Ustat *ps);
+int fstruncate(char *path, vlong size);
+int fsunlink(char *path, int rmdir);
+int fsutime(char *path, int atime, int mtime);
+
+int sys_access(char *name, int mode);
+int sys_chdir(char *name);
+int sys_chroot(char *name);
+int sys_chmod(char *name, int mode);
+int sys_chown(char *name, int uid, int gid);
+int sys_creat(char *name, int perm);
+int sys_lchown(char *name, int uid, int gid);
+int sys_link(char *old, char *new);
+int sys_open(char *name, int mode, int perm);
+int sys_readlink(char *name, char *buf, int len);
+int sys_rename(char *from, char *to);
+int sys_rmdir(char *name);
+int sys_symlink(char *old, char *new);
+int sys_truncate(char *name, ulong size);
+int sys_unlink(char *name);
+int sys_utime(char *name, void *times);
+int sys_utimes(char *name, void *tvp);
+int sys_mkdir(char *name, int mode);
+
+/* drivers */
+void rootdevinit(void);
+void sockdevinit(void);
+int sys_linux_socketcall(int call, int *arg);
+void pipedevinit(void);
+int sys_pipe(int *fds);
+void fddevinit(void);
+void ptsdevinit(void);
+void dspdevinit(void);
+void miscdevinit(void);
+void ptydevinit(void);
+void consdevinit(void);
+void procdevinit(void);
+
diff --git a/linux_emul_base/fs.c b/linux_emul_base/fs.c
new file mode 100644 (file)
index 0000000..4283991
--- /dev/null
@@ -0,0 +1,758 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Mount Mount;
+
+struct Mount
+{
+       Mount   *next;
+       Udev            *dev;
+       int             npath;
+       char            path[];
+};
+
+static Mount *mtab;
+
+void
+fsmount(Udev *dev, char *path)
+{
+       Mount *m, **p;
+       int n;
+
+       if(dev == nil)
+               return;
+
+       n = strlen(path);
+       m = kmalloc(sizeof(*m) + n + 1);
+       m->dev = dev;
+       m->next = nil;
+       m->npath = n;
+       strcpy(m->path, path);
+
+       for(p=&mtab;;p=&((*p)->next)){
+               Mount *x;
+
+               if(x = *p){
+                       if(m->npath < x->npath)
+                               continue;
+                       if(m->npath == x->npath){
+                               if(strcmp(m->path, x->path) < 0)
+                                       continue;
+                       }
+               }
+               m->next = *p;
+               *p = m;
+               break;
+       }
+}
+
+ulong
+hashpath(char *s)
+{
+       ulong h;
+       for(h=0; *s; s++)
+               h = (h * 13) + (*s - 'a');
+       return h;
+}
+
+char*
+basepath(char *p, char **ps)
+{
+       char *x, *s;
+       int n;
+
+       if(s = strrchr(p, '/')){
+               if(s[1] != 0){
+                       if(ps)
+                               *ps = kstrdup(s+1);
+                       if((n = s - p) == 0)
+                               n = 1;
+                       x = kmalloc(n+1);
+                       memmove(x, p, n);
+                       x[n] = 0;
+                       return x;
+               }
+       }
+       if(ps)
+               *ps = nil;
+       return nil;
+}
+
+char*
+allocpath(char *base, char *prefix, char *name)
+{
+       char *p, *s;
+       int n, m, k;
+
+       n = strlen(base);
+       m = strlen(name);
+       k = prefix ? strlen(prefix) : 0;
+       p = s = kmalloc(n+m+k+2);
+       memmove(p, base, n);
+       p += n;
+       if(m || k)
+               *p++ = '/';
+       if(k){
+               memmove(p, prefix, k);
+               p += k;
+       }
+       memmove(p, name, m+1);
+       return s;
+}
+
+char*
+fullpath(char *base, char *name)
+{
+       char *s;
+
+       if(*name == '/' || *name == '#'){
+               s = kstrdup(name);
+       } else if(base) {
+               s = allocpath(base, nil, name);
+       } else {
+               s = nil;
+       }
+       if(s != nil)
+               cleanname(s);
+       return s;
+}
+
+char*
+shortpath(char *base, char *path)
+{
+       int n;
+
+       n = strlen(base);
+       if((n <= strlen(path)) && (strncmp(path, base, n)==0)){
+               path += n;
+               if(*path == '/')
+                       path++;
+               if(*path == 0)
+                       path = ".";
+       }
+       return path;
+}
+
+char*
+fsfullpath(char *path)
+{
+       char *root;
+
+       path = fullpath(current->cwd, path);
+       if(path && (root = current->root)){
+               root = allocpath(root, nil, path+1);
+               free(path);
+               path = root;
+       }
+       return path;
+}
+
+char*
+fsrootpath(char *path)
+{
+       char *root;
+
+       if(root = current->root){
+               root = shortpath(root, path);
+               if(*root == '.'){
+                       path = "/";
+               } else if(root > path){
+                       path = root-1;
+               }
+       }
+       return path;
+}
+
+static Mount*
+path2mount(char *path)
+{
+       Mount *m;
+
+       for(m=mtab; m; m=m->next){
+               if(strncmp(path, m->path, m->npath) == 0){
+                       switch(path[m->npath]){
+                       case '\0':
+                       case '/':
+                               return m;
+                       }
+               }
+       }
+       return nil;
+}
+
+static Udev*
+path2dev(char *path)
+{
+       Mount *m;
+
+       if(m = path2mount(path))
+               return m->dev;
+       return nil;
+}
+
+static int
+fsenter(int *perr)
+{
+       int err;
+
+       if(perr == nil)
+               perr = &err;
+       if(current->linkloop > 8)
+               return *perr = -ELOOP;
+       current->linkloop++;
+       return 0;
+}
+
+static void
+fsleave(void)
+{
+       current->linkloop--;
+}
+
+int sys_getcwd(char *buf, int len)
+{
+       int n;
+       char *cwd;
+
+       trace("sys_getcwd(%p, %x)", buf, len);
+
+       cwd = current->cwd;
+       n = strlen(cwd)+1;
+       if(n > len)
+               return -ERANGE;
+       memmove(buf, cwd, n);
+       return n;
+}
+
+int
+fsopen(char *path, int mode, int perm, Ufile **pf)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsopen(%s, %#o, %#o)", path, mode, perm);
+
+       *pf = nil;
+       if(fsenter(&err) < 0)
+               return err;
+       err = -ENOENT;
+       if((dev = path2dev(path)) && dev->open)
+               err = dev->open(path, mode, perm, pf);
+       fsleave();
+       return err;
+}
+
+int
+fsaccess(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsaccess(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -ENOENT;
+       if(dev = path2dev(path)){
+               err = 0;
+               if(dev->access)
+                       err = dev->access(path, mode);
+       }
+       fsleave();
+
+       return err;
+}
+
+int sys_access(char *name, int mode)
+{
+       int err;
+
+       trace("sys_access(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsaccess(name, mode);
+       free(name);
+
+       return err;
+}
+
+int sys_open(char *name, int mode, int perm)
+{
+       int err;
+       Ufile *file;
+
+       trace("sys_open(%s, %#o, %#o)", name, mode, perm);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsopen(name, mode, perm, &file);
+       free(name);
+
+       if(err == 0)
+               err = newfd(file, FD_CLOEXEC);
+
+       return err;
+}
+
+int sys_creat(char *name, int perm)
+{
+       trace("sys_create(%s, %#o)", name, perm);
+
+       return sys_open(name, O_CREAT|O_TRUNC, perm);
+}
+
+int
+fsstat(char *path, int link, Ustat *ps)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsstat(%s, %d)", path, link);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->stat){
+               memset(ps, 0, sizeof(Ustat));
+               err = dev->stat(path, link, ps);
+       }
+       fsleave();
+       return err;
+}
+
+int
+sys_chdir(char *name)
+{
+       int err;
+       Ufile *f;
+
+       trace("sys_chdir(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsopen(name, O_RDONLY, 0, &f);
+       free(name);
+       if(err == 0){
+               err = chdirfile(f);
+               putfile(f);
+       }
+       return err;
+}
+
+int sys_chroot(char *name)
+{
+       Ufile *f;
+       Ustat s;
+       int err;
+
+       trace("sys_chroot(%s)", name);
+
+       f = nil;
+       if((err = fsopen(name, O_RDONLY, 0, &f)) < 0)
+               goto out;
+       err = -ENOTDIR;
+       if(f->path == nil)
+               goto out;
+       if(devtab[f->dev]->fstat == nil)
+               goto out;
+       if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+               goto out;
+       err = -ENOTDIR;
+       if((s.mode & ~0777) != S_IFDIR)
+               goto out;
+       err = 0;
+       free(current->root);
+       if(strcmp(f->path, "/") == 0){
+               current->root = nil;
+       } else {
+               current->root = kstrdup(f->path);
+       }
+out:
+       putfile(f);
+       return err;
+}
+
+int
+fschown(char *path, int uid, int gid, int link)
+{
+       int err;
+       Udev *dev;
+
+       trace("fschown(%s, %d, %d, %d)", path, uid, gid, link);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->chown)
+               err = dev->chown(path, uid, gid, link);
+       fsleave();
+       return err;
+}
+
+int sys_chown(char *name, int uid, int gid)
+{
+       int err;
+
+       trace("sys_chown(%s, %d, %d)", name, uid, gid);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err =  fschown(name, uid, gid, 0);
+       free(name);
+
+       return err;
+}
+
+int sys_lchown(char *name, int uid, int gid)
+{
+       int err;
+
+       trace("sys_lchown(%s, %d, %d)", name, uid, gid);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fschown(name, uid, gid, 1);
+       free(name);
+
+       return err;
+}
+
+int
+fsreadlink(char *path, char *buf, int len)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsreadlink(%s)", path);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->readlink)
+               err = dev->readlink(path, buf, len);
+       fsleave();
+
+       return err;
+}
+
+int sys_readlink(char *name, char *buf, int len)
+{
+       int err;
+
+       trace("sys_readlink(%s, %p, %x)", name, buf, len);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsreadlink(name, buf, len);
+       free(name);
+
+       return err;
+}
+
+int
+fsrename(char *old, char *new)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsrename(%s, %s)", old, new);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(old)) && dev->rename){
+               err = -EXDEV;
+               if(dev == path2dev(new))
+                       err = dev->rename(old, new);
+       }
+       fsleave();
+
+       return err;
+}
+
+
+int sys_rename(char *from, char *to)
+{
+       int err;
+
+       trace("sys_rename(%s, %s)", from, to);
+
+       if((from = fsfullpath(from)) == nil)
+               return -EFAULT;
+       if((to = fsfullpath(to)) == nil){
+               free(from);
+               return -EFAULT;
+       }
+       err = fsrename(from, to);
+       free(from);
+       free(to);
+
+       return err;
+}
+
+int
+fsmkdir(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsmkdir(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->mkdir)
+               err = dev->mkdir(path, mode);
+       fsleave();
+
+       return err;
+}
+
+int sys_mkdir(char *name, int mode)
+{
+       int err;
+
+       trace("sys_mkdir(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsmkdir(name, mode);
+       free(name);
+
+       return err;
+}
+
+int
+fsutime(char *path, int atime, int mtime)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsutime(%s, %d, %d)", path, atime, mtime);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->utime)
+               err = dev->utime(path, atime, mtime);
+       fsleave();
+
+       return err;
+}
+
+struct linux_utimbuf
+{
+       long    atime;
+       long    mtime;
+};
+
+int sys_utime(char *name, void *times)
+{
+       int err;
+       struct linux_utimbuf *t = times;
+
+       trace("sys_utime(%s, %p)", name, times);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       if(t != nil){
+               err = fsutime(name, t->atime, t->mtime);
+       }else{
+               long x = time(0);
+               err = fsutime(name, x, x);
+       }
+       free(name);
+
+       return err;
+}
+
+int sys_utimes(char *name, void *tvp)
+{
+       int err;
+       struct linux_timeval *t = tvp;
+
+       trace("sys_utimes(%s, %p)", name, tvp);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       if(t != nil){
+               err = fsutime(name, t[0].tv_sec, t[1].tv_sec);
+       }else{
+               long x = time(0);
+               err = fsutime(name, x, x);
+       }
+       free(name);
+
+       return err;
+}
+
+int
+fschmod(char *path, int mode)
+{
+       int err;
+       Udev *dev;
+
+       trace("fschmod(%s, %#o)", path, mode);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->chmod)
+               err = dev->chmod(path, mode);
+       fsleave();
+
+       return err;
+}
+
+int sys_chmod(char *name, int mode)
+{
+       int err;
+
+       trace("sys_chmod(%s, %#o)", name, mode);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fschmod(name, mode);
+       free(name);
+
+       return err;
+}
+
+int
+fstruncate(char *path, vlong size)
+{
+       int err;
+       Udev *dev;
+
+       trace("fstruncate(%s, %llx)", path, size);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->truncate)
+               err = dev->truncate(path, size);
+       fsleave();
+
+       return err;
+}
+
+int sys_truncate(char *name, ulong size)
+{
+       int err;
+
+       trace("sys_truncate(%s, %lux)", name, size);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fstruncate(name, size);
+       free(name);
+
+       return err;
+}
+
+int
+fsunlink(char *path, int rmdir)
+{
+       int err;
+       Udev *dev;
+
+       trace("fsunlink(%s, %d)", path, rmdir);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(path)) && dev->unlink)
+               err = dev->unlink(path, rmdir);
+       fsleave();
+
+       return err;
+}
+
+int sys_unlink(char *name)
+{
+       int err;
+
+       trace("sys_unlink(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsunlink(name, 0);
+       free(name);
+
+       return err;
+}
+
+int sys_rmdir(char *name)
+{
+       int err;
+
+       trace("sys_rmdir(%s)", name);
+
+       if((name = fsfullpath(name)) == nil)
+               return -EFAULT;
+       err = fsunlink(name, 1);
+       free(name);
+
+       return err;
+}
+
+int
+fslink(char *old, char *new, int sym)
+{
+       int err;
+       Udev *dev;
+
+       trace("fslink(%s, %s, %d)", old, new, sym);
+
+       if(fsenter(&err) < 0)
+               return err;
+       err = -EPERM;
+       if((dev = path2dev(new)) && dev->link){
+               err = -EXDEV;
+               if(sym || dev == path2dev(old))
+                       err = dev->link(old, new, sym);
+       }
+       fsleave();
+
+       return err;
+}
+
+int sys_link(char *old, char *new)
+{
+       int err;
+
+       trace("sys_link(%s, %s)", old, new);
+
+       if((old = fsfullpath(old)) == nil)
+               return -EFAULT;
+       if((new = fsfullpath(new)) == nil){
+               free(old);
+               return -EFAULT;
+       }
+       err = fslink(old, new, 0);
+       free(old);
+       free(new);
+
+       return err;
+}
+
+int sys_symlink(char *old, char *new)
+{
+       int err;
+
+       trace("sys_symlink(%s, %s)", old, new);
+
+       if((new = fsfullpath(new)) == nil)
+               return -EFAULT;
+       err = fslink(old, new, 1);
+       free(new);
+
+       return err;
+}
+
diff --git a/linux_emul_base/linux b/linux_emul_base/linux
new file mode 100755 (executable)
index 0000000..e262acf
--- /dev/null
@@ -0,0 +1,114 @@
+#!/bin/rc
+
+arg0=$0
+DISPLAY=:0
+HOME=/tmp
+PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/X11R6/bin:/usr/games
+
+x=''
+e=/bin/linuxemu
+r=/sys/lib/linux
+
+fn eprint {
+       echo $arg0: $* >[1=2]
+}
+
+fn usage {
+       echo usage: $arg0 [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...] >[1=2]
+       exit usage
+}
+
+# extract options
+o=()
+while(~ $1 -*){
+       switch($1){
+       case -h
+               usage
+       case -r
+               shift
+               r=$1
+       case -e
+               shift
+               e=$1
+       case -startx
+               x=1
+       case -display
+               shift
+               DISPLAY=$1
+       case -[ug]
+               o=($o $1 $2)
+               shift
+       case -*
+               o=($o $1)
+       }
+       shift
+}
+
+switch($#*){
+case 0
+       usage
+}
+
+if(! ~ $x ''){
+       # find free local display
+       d=(`{{seq 0 32; {echo /srv/UD.X* | sed 's!/srv/UD\.X!!g; s!\ !\
+       !g; s!\*!!g;'}} | sort | uniq -c | awk '/^\ *1\ /{print $2}'})
+       d=$d(1)
+       X11/equis -ac :$d &
+       k=/proc/$apid/notepg
+       $arg0 -e $e -r $r -display :$d $o $*
+       {echo kill >$k} >/dev/null >[2=1]
+       exit
+}
+
+# rewrite the path so it would accessible after binding $r to /
+fn ninepath {
+       if(~ $1 /* && test -e $1 && ! test -e $r/$1){
+               echo /9$1
+       }
+       if not {
+               echo $1
+       }
+}
+
+w=`{pwd}
+r=`{cleanname -d $w $r}
+if(! test -d $r){
+       eprint bad rootpath: $r
+       exit rootpath
+}
+e=`{cleanname -d $w $e}
+if(! test -x $e){
+       eprint bad emubin: $e
+       exit emubin
+}
+e=`{ninepath $e}
+p=`{ninepath $w}
+a=($e $o)
+while(! ~ $#* 0){
+       x=`{ninepath $1}
+       a=($a $"x)
+       shift
+}
+
+# bind the required plan9 stuff
+rfork n
+mntgen $r
+bind -a '#P' /dev
+for(d in /9 /dev /proc /net /env /srv /n /mnt /tmp){
+       t=$r^$d
+       switch($d){
+       case /tmp /env /srv
+               bind -c $d $t
+       case /9
+               bind / $t
+       case *
+               bind $d $t
+       }
+}
+
+# change root and run the emulator
+builtin cd /
+bind $r /
+builtin cd $p
+exec $a
diff --git a/linux_emul_base/linux.h b/linux_emul_base/linux.h
new file mode 100644 (file)
index 0000000..99bf1de
--- /dev/null
@@ -0,0 +1,352 @@
+enum {
+       O_ACCMODE       = 0003,
+       O_RDONLY        = 00,
+       O_WRONLY        = 01,
+       O_RDWR          = 02,
+       O_CREAT         = 0100,
+       O_EXCL          = 0200,
+       O_NOCTTY        = 0400,
+       O_TRUNC         = 01000,
+       O_APPEND        = 02000,
+       O_NONBLOCK      = 04000,
+       O_NDELAY        = 04000,
+       O_SYNC          = 010000,
+       FASYNC          = 020000,
+};
+
+enum {
+       FD_CLOEXEC = 1,
+};
+
+enum {
+       F_DUPFD         = 0,
+       F_GETFD,
+       F_SETFD,
+       F_GETFL,
+       F_SETFL,
+       F_GETLK,
+       F_SETLK,
+       F_SETLKW,
+       F_SETOWN,
+       F_GETOWN,
+       F_GETSIG,
+       F_GETLK64       = 12,
+       F_SETLK64       = 13,
+};
+
+enum {
+       S_IFMT                  = 0170000,
+       S_IFSOCK                = 0140000,
+       S_IFLNK                 = 0120000,
+       S_IFREG                 = 0100000,
+       S_IFBLK                 = 0060000,
+       S_IFDIR                 = 0040000,
+       S_IFCHR                 = 0020000,
+       S_IFIFO                 = 0010000,
+       S_ISUID                 = 0004000,
+       S_ISGID                 = 0002000,
+       S_ISVTX                 = 0001000,
+};
+
+enum {
+       PROT_READ               = 0x01,
+       PROT_WRITE              = 0x02,
+       PROT_EXEC               = 0x04,
+       PROT_SEM                = 0x08,
+       PROT_NONE               = 0x00,
+       PROT_GROWSDOWN  = 0x01000000,
+       PROT_GROWSUP    = 0x02000000,
+       MAP_SHARED              = 0x01,
+       MAP_PRIVATE             = 0x02,
+       MAP_TYPE                = 0x0f,
+       MAP_FIXED               = 0x10,
+       MAP_ANONYMOUS   = 0x20,
+
+       MREMAP_MAYMOVE  = 1,
+       MREMAP_FIXED    = 2,
+};
+
+enum {
+       CLONE_VM                                = 0x00000100,
+       CLONE_FS                                = 0x00000200,
+       CLONE_FILES                             = 0x00000400,
+       CLONE_SIGHAND                   = 0x00000800,
+       CLONE_PTRACE                    = 0x00002000,
+       CLONE_VFORK                             = 0x00004000,
+       CLONE_PARENT                    = 0x00008000,
+       CLONE_THREAD                    = 0x00010000,
+       CLONE_NEWNS                             = 0x00020000,
+       CLONE_SYSVSEM                   = 0x00040000,
+       CLONE_SETTLS                    = 0x00080000,
+       CLONE_PARENT_SETTID             = 0x00100000,
+       CLONE_CHILD_CLEARTID    = 0x00200000,
+       CLONE_DETACHED                  = 0x00400000,
+       CLONE_UNTRACED                  = 0x00800000,
+       CLONE_CHILD_SETTID              = 0x01000000,
+       CLONE_STOPPED                   = 0x02000000,
+};
+
+enum {
+       EPERM                   = 1,
+       ENOENT                  = 2,
+       ESRCH                   = 3,
+       EINTR                   = 4,
+       EIO                             = 5,
+       ENXIO                   = 6,
+       E2BIG                   = 7,
+       ENOEXEC                 = 8,
+       EBADF                   = 9,
+       ECHILD                  = 10,
+       EAGAIN                  = 11,
+       ENOMEM                  = 12,
+       EACCES                  = 13,
+       EFAULT                  = 14,
+       ENOTBLK                 = 15,
+       EBUSY                   = 16,
+       EEXIST                  = 17,
+       EXDEV                   = 18,
+       ENODEV                  = 19,
+       ENOTDIR                 = 20,
+       EISDIR                  = 21,
+       EINVAL                  = 22,
+       ENFILE                  = 23,
+       EMFILE                  = 24,
+       ENOTTY                  = 25,
+       ETXTBSY                 = 26,
+       EFBIG                   = 27,
+       ENOSPC                  = 28,
+       ESPIPE                  = 29,
+       EROFS                   = 30,
+       EMLINK                  = 31,
+       EPIPE                   = 32,
+       EDOM                    = 33,
+       ERANGE                  = 34,
+       EDEADLK                 = 35,
+       ENAMETOOLONG    = 36,
+       ENOLCK                  = 37,
+       ENOSYS                  = 38,
+       ENOTEMPTY               = 39,
+       ELOOP                   = 40,
+       ENOMSG                  = 42,
+       EIDRM                   = 43,
+       ECHRNG                  = 44,
+       EL2NSYNC                = 45,
+       EL3HLT                  = 46,
+       EL3RST                  = 47,
+       ELNRNG                  = 48,
+       EUNATCH                 = 49,
+       ENOCSI                  = 50,
+       EL2HLT                  = 51,
+       EBADE                   = 52,
+       EBADR                   = 53,
+       EXFULL                  = 54,
+       ENOANO                  = 55,
+       EBADRQC                 = 56,
+       EBADSLT                 = 57,
+       EBFONT                  = 59,
+       ENOSTR                  = 60,
+       ENODATA                 = 61,
+       ETIME                   = 62,
+       ENOSR                   = 63,
+       ENONET                  = 64,
+       ENOPKG                  = 65,
+       EREMOTE                 = 66,
+       ENOLINK                 = 67,
+       EADV                    = 68,
+       ESRMNT                  = 69,
+       ECOMM                   = 70,
+       EPROTO                  = 71,
+       EMULTIHOP               = 72,
+       EDOTDOT                 = 73,
+       EBADMSG                 = 74,
+       EOVERFLOW               = 75,
+       ENOTUNIQ                = 76,
+       EBADFD                  = 77,
+       EREMCHG                 = 78,
+       ELIBACC                 = 79,
+       ELIBBAD                 = 80,
+       ELIBSCN                 = 81,
+       ELIBMAX                 = 82,
+       ELIBEXEC                = 83,
+       EILSEQ                  = 84,
+       ERESTART                = 85,
+       ESTRPIPE                = 86,
+       EUSERS                  = 87,
+       ENOTSOCK                = 88,
+       EDESTADDRREQ    = 89,
+       EMSGSIZE                = 90,
+       EPROTOTYPE              = 91,
+       ENOPROTOOPT             = 92,
+       EPROTONOSUPPORT = 93,
+       ESOCKTNOSUPPORT = 94,
+       EOPNOTSUPP              = 95,
+       EPFNOSUPPORT    = 96,
+       EAFNOSUPPORT    = 97,
+       EADDRINUSE              = 98,
+       EADDRNOTAVAIL   = 99,
+       ENETDOWN                = 100,
+       ENETUNREACH             = 101,
+       ENETRESET               = 102,
+       ECONNABORTED    = 103,
+       ECONNRESET              = 104,
+       ENOBUFS                 = 105,
+       EISCONN                 = 106,
+       ENOTCONN                = 107,
+       ESHUTDOWN               = 108,
+       ETOOMANYREFS    = 109,
+       ETIMEDOUT               = 110,
+       ECONNREFUSED    = 111,
+       EHOSTDOWN               = 112,
+       EHOSTUNREACH    = 113,
+       EALREADY                = 114,
+       EINPROGRESS             = 115,
+       ESTALE                  = 116,
+       EUCLEAN                 = 117,
+       ENOTNAM                 = 118,
+       ENAVAIL                 = 119,
+       EISNAM                  = 120,
+       EREMOTEIO               = 121,
+       EDQUOT                  = 122,
+       ENOMEDIUM               = 123,
+       EMEDIUMTYPE             = 124,
+       EMAX                    = 125,
+};
+
+#define EWOULDBLOCK    EAGAIN
+#define EDEADLOCK      EDEADLK
+#define ENOATTR                ENODATA
+
+enum {
+       POLLIN                  = (1<<0),
+       POLLPRI                 = (1<<1),
+       POLLOUT                 = (1<<2),
+       POLLERR                 = (1<<3),
+       POLLHUP                 = (1<<4),
+       POLLNVAL                = (1<<5),
+       POLLRDNORM              = (1<<6),
+       POLLRDBAND              = (1<<7),
+       POLLWRNORM              = (1<<8),
+       POLLWRBAND              = (1<<9),
+       POLLMSG                 = (1<<10),
+       POLLREMOVE              = (1<<11),
+       POLLRDHUP               = 0x2000,
+       EPOLLONESHOT    = (1<<30),
+       EPOLLET                 = (1<<31),
+};
+
+enum {
+       SIGHUP          = 1,
+       SIGINT          = 2,
+       SIGQUIT         = 3,
+       SIGILL          = 4,
+       SIGTRAP         = 5,
+       SIGABRT         = 6,
+       SIGIOT          = 6,
+       SIGBUS          = 7,
+       SIGFPE          = 8,
+       SIGKILL         = 9,
+       SIGUSR1         = 10,
+       SIGSEGV         = 11,
+       SIGUSR2         = 12,
+       SIGPIPE         = 13,
+       SIGALRM         = 14,
+       SIGTERM         = 15,
+       SIGSTKFLT       = 16,
+       SIGCHLD         = 17,
+       SIGCONT         = 18,
+       SIGSTOP         = 19,
+       SIGTSTP         = 20,
+       SIGTTIN         = 21,
+       SIGTTOU         = 22,
+       SIGURG          = 23,
+       SIGXCPU         = 24,
+       SIGXFSZ         = 25,
+       SIGVTALRM       = 26,
+       SIGPROF         = 27,
+       SIGWINCH        = 28,
+       SIGIO           = 29,
+       SIGPOLL         = 29,
+       SIGLOST         = 29,
+       SIGPWR          = 30,
+       SIGSYS          = 31,
+
+       SIGRT1          = 32,
+       SIGRT2          = 33,
+       SIGRT3          = 34,
+       SIGRT4          = 35,
+       SIGRT5          = 36,
+       SIGRT6          = 37,
+       SIGRT7          = 38,
+       SIGRT8          = 39,
+
+       SIGMAX          = 40,
+};
+
+enum {
+       SI_USER                 = 0,
+       SI_QUEUE                = -1,
+       SI_TIMER                = -2,
+       SI_MESGQ                = -3,
+       SI_ASYNCIO              = -4,
+       SI_SIGIO                = -5,
+       SI_TKILL                = -6,
+       SI_DETHREAD             = -7,
+};
+
+enum {
+       ILL_ILLOPC              = 1,
+       ILL_ILLOPN,
+       ILL_ILLADR,
+       ILL_ILLTRP,
+       ILL_PROVOPC,
+       ILL_PRVREG,
+       ILL_COPROC,
+       ILL_BADSTK,
+};
+
+enum {
+       FPE_INTDIV              = 1,
+       FPE_INTOVF,
+       FPE_FLTDIV,
+       FPE_FLTOVF,
+       FPE_FLTUND,
+       FPE_FLTRES,
+       FPE_FLTINV,
+       FPE_FLTSUB, 
+};
+
+enum {
+       WNOHANG =0x00000001,
+       WUNTRACED       =0x00000002,
+       WSTOPPED        =0x00000002,
+       WEXITED         =0x00000004,
+       WCONTINUED      =0x00000008,
+       WNOWAIT =0x01000000,
+       WNOTHREAD       =0x20000000,
+       WALL            =0x40000000,
+       WCLONE          =0x80000000,
+};
+
+struct linux_timeval
+{
+       long    tv_sec;
+       long    tv_usec;
+};
+
+struct linux_timespec
+{
+       long    tv_sec;
+       long    tv_nsec;
+};
+
+struct linux_user_desc {
+       uint  entry_number;
+       ulong base_addr;
+       uint  limit;
+       uint  seg_32bit:1;
+       int  contents:2;
+       uint  read_exec_only:1;
+       uint  limit_in_pages:1;
+       uint  seg_not_present:1;
+       uint  useable:1;
+};
diff --git a/linux_emul_base/linuxcall.c b/linux_emul_base/linuxcall.c
new file mode 100644 (file)
index 0000000..26e4bbe
--- /dev/null
@@ -0,0 +1,79 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Linuxcall Linuxcall;
+
+struct Linuxcall
+{
+       char    *name;
+       void    *func;
+       int     (*stub)(Ureg *, void *);
+};
+
+static int fcall0(Ureg *, void *func){return ((int (*)(void))func)();}
+static int fcall1(Ureg *u, void *func){return ((int (*)(int))func)(u->bx);}
+static int fcall2(Ureg *u, void *func){return ((int (*)(int, int))func)(u->bx, u->cx);}
+static int fcall3(Ureg *u, void *func){return ((int (*)(int, int, int))func)(u->bx, u->cx, u->dx);}
+static int fcall4(Ureg *u, void *func){return ((int (*)(int, int, int, int))func)(u->bx, u->cx, u->dx, u->si);}
+static int fcall5(Ureg *u, void *func){return ((int (*)(int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di);}
+static int fcall6(Ureg *u, void *func){return ((int (*)(int, int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di, u->bp);}
+
+#include "linuxcalltab.out"
+
+static Linuxcall nocall = {
+       .name = "nosys",
+       .func = sys_nosys,
+       .stub = fcall0,
+};
+
+static void
+linuxret(int errno)
+{
+       Uproc *p;
+       Ureg *u;
+
+       p = current;
+       u = p->ureg;
+       trace("linuxret(%lux: %s, %lux: %E)", u->pc, p->syscall, (ulong)errno, errno);
+       if(errno == -ERESTART){
+               p->restart->syscall = p->syscall;
+               return;
+       }
+       u->ax = (ulong)errno;
+       u->pc += 2;
+       p->restart->syscall = nil;
+       p->syscall = nil;
+}
+
+
+int
+linuxcall(void)
+{
+       Uproc *p;
+       Ureg *u;
+       Linuxcall *c;
+       uchar *pc;
+
+       p = current;
+       u = p->ureg;
+
+       /* CD 80 = INT 0x80 */
+       pc = (uchar*)u->pc;
+       if(pc[0] != 0xcd || pc[1] != 0x80){
+               trace("linuxcall(): not a syscall pc=%lux sp=%lux", u->pc, u->sp);
+               return -1;
+       }
+       c = &linuxcalltab[u->ax];
+       if(c > &linuxcalltab[nelem(linuxcalltab)-1])
+               c = &nocall;
+       p->syscall = c->name;
+       p->sysret = linuxret;
+       if(p->restart->syscall)
+               trace("linuxcall(): restarting %s", p->syscall);
+       linuxret(c->stub(u, c->func));
+       return 0;
+}
diff --git a/linux_emul_base/linuxcalltab b/linux_emul_base/linuxcalltab
new file mode 100644 (file)
index 0000000..031dbf0
--- /dev/null
@@ -0,0 +1,286 @@
+0      0    restart_syscall                sys_nosys
+1      1    exit                           sys_exit
+2      0    fork                           sys_fork
+3      3    read                           sys_read
+4      3    write                          sys_write
+5      3    open                           sys_open
+6      1    close                          sys_close
+7      3    waitpid                        sys_waitpid
+8      2    creat                          sys_creat
+9      2    link                           sys_link
+10     1    unlink                         sys_unlink
+11     3    execve                         sys_execve
+12     1    chdir                          sys_chdir
+13     1    time                           sys_time
+14     0    mknod                          sys_nosys
+15     2    chmod                          sys_chmod
+16     0    lchown                         sys_lchown
+17     0    break                          sys_nosys
+18     0    oldstat                        sys_nosys
+19     3    lseek                          sys_lseek
+20     0    getpid                         sys_getpid
+21     0    mount                          sys_nosys
+22     0    umount                         sys_nosys
+23     1    setuid                         sys_setuid
+24     0    getuid                         sys_getuid
+25     0    stime                          sys_nosys
+26     0    ptrace                         sys_nosys
+27     1    alarm                          sys_alarm
+28     0    oldfstat                       sys_nosys
+29     0    pause                          sys_nosys
+30     2    utime                          sys_utime
+31     0    stty                           sys_nosys
+32     0    gtty                           sys_nosys
+33     2    access                         sys_access
+34     0    nice                           sys_nosys
+35     0    ftime                          sys_nosys
+36     0    sync                           sys_nosys
+37     2    kill                           sys_kill
+38     2    rename                         sys_rename
+39     2    mkdir                          sys_mkdir
+40     1    rmdir                          sys_rmdir
+41     1    dup                            sys_dup
+42     1    pipe                           sys_pipe
+43     1    times                          sys_times
+44     0    prof                           sys_nosys
+45     1    brk                            sys_brk
+46     1    setgid                         sys_setgid
+47     0    getgid                         sys_getgid
+48     0    signal                         sys_nosys
+49     0    geteuid                        sys_nosys
+50     0    getegid                        sys_nosys
+51     0    acct                           sys_nosys
+52     0    umount2                        sys_nosys
+53     0    lock                           sys_nosys
+54     3    ioctl                          sys_ioctl
+55     3    fcntl                          sys_fcntl
+56     0    mpx                            sys_nosys
+57     2    setpgid                        sys_setpgid
+58     0    ulimit                         sys_nosys
+59     0    oldolduname                    sys_nosys
+60     1    umask                          sys_umask
+61     1    chroot                         sys_chroot
+62     0    ustat                          sys_nosys
+63     2    dup2                           sys_dup2
+64     0    getppid                        sys_getppid
+65     0    getpgrp                        sys_getpgrp
+66     0    setsid                         sys_setsid
+67     0    sigaction                      sys_nosys
+68     0    sgetmask                       sys_nosys
+69     0    ssetmask                       sys_nosys
+70     0    setreuid                       sys_nosys
+71     0    setregid                       sys_nosys
+72     0    sigsuspend                     sys_nosys
+73     0    sigpending                     sys_nosys
+74     0    sethostname                    sys_nosys
+75     2    setrlimit                      sys_setrlimit
+76     2    getrlimit                      sys_getrlimit
+77     0    getrusage                      sys_nosys
+78     2    gettimeofday                   sys_gettimeofday
+79     0    settimeofday                   sys_nosys
+80     0    getgroups                      sys_nosys
+81     0    setgroups                      sys_nosys
+82     0    select                         sys_nosys
+83     2    symlink                        sys_symlink
+84     0    oldlstat                       sys_nosys
+85     3    readlink                       sys_readlink
+86     0    uselib                         sys_nosys
+87     0    swapon                         sys_nosys
+88     0    reboot                         sys_nosys
+89     0    readdir                        sys_nosys
+90     1    mmap                           sys_linux_mmap
+91     2    munmap                         sys_munmap
+92     2    truncate                       sys_truncate
+93     2    ftruncate                      sys_ftruncate
+94     2    fchmod                         sys_fchmod
+95     0    fchown                         sys_fchown
+96     0    getpriority                    sys_nosys
+97     0    setpriority                    sys_nosys
+98     0    profil                         sys_nosys
+99     2    statfs                         sys_statfs
+100    0    fstatfs                        sys_nosys
+101    0    ioperm                         sys_nosys
+102    2    socketcall                     sys_linux_socketcall
+103    0    syslog                         sys_nosys
+104    3    setitimer                      sys_setitimer
+105    2    getitimer                      sys_getitimer
+106    2    stat                           sys_linux_stat
+107    2    lstat                          sys_linux_lstat
+108    2    fstat                          sys_linux_fstat
+109    0    olduname                       sys_nosys
+110    0    iopl                           sys_nosys
+111    0    vhangup                        sys_nosys
+112    0    idle                           sys_nosys
+113    0    vm86old                        sys_nosys
+114    4    wait4                          sys_wait4
+115    0    swapoff                        sys_nosys
+116    0    sysinfo                        sys_nosys
+117    0    ipc                            sys_nosys
+118    1    fsync                          sys_fsync
+119    0    sigreturn                      sys_sigreturn
+120    5    clone                          sys_linux_clone
+121    0    setdomainname                  sys_nosys
+122    1    uname                          sys_uname
+123    3    modify_ldt                     sys_modify_ldt
+124    0    adjtimex                       sys_nosys
+125    3    mprotect                       sys_mprotect
+126    0    sigprocmask                    sys_nosys
+127    0    create_module                  sys_nosys
+128    0    init_module                    sys_nosys
+129    0    delete_module                  sys_nosys
+130    0    get_kernel_syms                sys_nosys
+131    0    quotactl                       sys_nosys
+132    1    getpgid                        sys_getpgid
+133    1    fchdir                         sys_fchdir
+134    0    bdflush                        sys_nosys
+135    0    sysfs                          sys_nosys
+136    1    personality                    sys_personality
+137    0    afs_syscall                    sys_nosys
+138    0    setfsuid                       sys_nosys
+139    0    setfsgid                       sys_nosys
+140    5    _llseek                        sys_llseek
+141    3    getdents                       sys_linux_getdents
+142    5    _newselect                     sys_select
+143    0    flock                          sys_flock
+144    3    msync                          sys_msync
+145    3    readv                          sys_readv
+146    3    writev                         sys_writev
+147    1    getsid                         sys_getsid
+148    0    fdatasync                      sys_nosys
+149    0    _sysctl                        sys_nosys
+150    0    mlock                          sys_nosys
+151    0    munlock                        sys_nosys
+152    0    mlockall                       sys_nosys
+153    0    munlockall                     sys_nosys
+154    2    sched_setparam                 sys_sched_setparam
+155    2    sched_getparam                 sys_sched_getparam
+156    3    sched_setscheduler             sys_sched_setscheduler
+157    1    sched_getscheduler             sys_sched_getscheduler
+158    0    sched_yield                    sys_sched_yield
+159    0    sched_get_priority_max         sys_nosys
+160    0    sched_get_priority_min         sys_nosys
+161    0    sched_rr_get_interval          sys_nosys
+162    2    nanosleep                      sys_nanosleep
+163    5    mremap                         sys_mremap
+164    3    setresuid                      sys_setresuid
+165    3    getresuid                      sys_getresuid
+166    0    vm86                           sys_nosys
+167    0    query_module                   sys_nosys
+168    3    poll                           sys_poll
+169    0    nfsservctl                     sys_nosys
+170    3    setresgid                      sys_setresgid
+171    3    getresgid                      sys_getresgid
+172    0    prctl                          sys_nosys
+173    0    rt_sigreturn                   sys_rt_sigreturn
+174    4    rt_sigaction                   sys_rt_sigaction
+175    4    rt_sigprocmask                 sys_rt_sigprocmask
+176    2    rt_sigpending                  sys_rt_sigpending
+177    0    rt_sigtimedwait                sys_nosys
+178    3    rt_sigqueueinfo                sys_rt_sigqueueinfo
+179    2    rt_sigsuspend                  sys_rt_sigsuspend
+180    4    pread64                        sys_pread64
+181    4    pwrite64                       sys_pwrite64
+182    0    chown                          sys_chown
+183    2    getcwd                         sys_getcwd
+184    0    capget                         sys_nosys
+185    0    capset                         sys_nosys
+186    2    sigaltstack                    sys_sigaltstack
+187    0    sendfile                       sys_nosys
+188    0    getpmsg                        sys_nosys
+189    0    putpmsg                        sys_nosys
+190    0    vfork                          sys_vfork
+191    0    ugetrlimit                     sys_nosys
+192    6    mmap2                          sys_mmap
+193    2    truncate64                     sys_truncate
+194    2    ftruncate64                    sys_ftruncate
+195    2    stat64                         sys_linux_stat64
+196    2    lstat64                        sys_linux_lstat64
+197    2    fstat64                        sys_linux_fstat64
+198    3    lchown32                       sys_lchown
+199    0    getuid32                       sys_getuid
+200    0    getgid32                       sys_getgid
+201    0    geteuid32                      sys_getuid
+202    0    getegid32                      sys_getgid
+203    2    setreuid32                     sys_setreuid
+204    2    setregid32                     sys_setregid
+205    2    getgroups32                    sys_getgroups
+206    2    setgroups32                    sys_setgroups
+207    3    fchown32                       sys_fchown
+208    3    setresuid32                    sys_setresuid
+209    3    getresuid32                    sys_getresuid
+210    3    setresgid32                    sys_setresgid
+211    3    getresgid32                    sys_getresgid
+212    3    chown32                        sys_chown
+213    1    setuid32                       sys_setuid
+214    1    setgid32                       sys_setgid
+215    0    setfsuid32                     sys_nosys
+216    0    setfsgid32                     sys_nosys
+217    0    pivot_root                     sys_nosys
+218    0    mincore                        sys_nosys
+219    0    madvise                        sys_nosys
+220    3    getdents64                     sys_linux_getdents64
+221    3    fcntl64                        sys_fcntl
+224    0    gettid                         sys_gettid
+225    0    readahead                      sys_nosys
+226    5    setxattr                       sys_setxattr
+227    5    lsetxattr                      sys_lsetxattr
+228    5    fsetxattr                      sys_fsetxattr
+229    4    getxattr                       sys_getxattr
+230    4    lgetxattr                      sys_lgetxattr
+231    4    fgetxattr                      sys_fgetxattr
+232    0    listxattr                      sys_nosys
+233    0    llistxattr                     sys_nosys
+234    0    flistxattr                     sys_nosys
+235    0    removexattr                    sys_nosys
+236    0    lremovexattr                   sys_nosys
+237    0    fremovexattr                   sys_nosys
+238    2    tkill                          sys_tkill
+239    0    sendfile64                     sys_nosys
+240    6    futex                          sys_futex
+241    0    sched_setaffinity              sys_nosys
+242    0    sched_getaffinity              sys_nosys
+243    1    set_thread_area                sys_set_thread_area
+244    1    get_thread_area                sys_get_thread_area
+245    0    io_setup                       sys_nosys
+246    0    io_destroy                     sys_nosys
+247    0    io_getevents                   sys_nosys
+248    0    io_submit                      sys_nosys
+249    0    io_cancel                      sys_nosys
+250    0    fadvise64                      sys_nosys
+252    1    exit_group                     sys_exit_group
+253    0    lookup_dcookie                 sys_nosys
+254    0    epoll_create                   sys_nosys
+255    0    epoll_ctl                      sys_nosys
+256    0    epoll_wait                     sys_nosys
+257    0    remap_file_pages               sys_nosys
+258    1    set_tid_address                sys_set_tid_address
+259    0    timer_create                   sys_nosys
+260    0    timer_settime                  sys_nosys
+261    0    timer_gettime                  sys_nosys
+262    0    timer_getoverrun               sys_nosys
+263    0    timer_delete                   sys_nosys
+264    0    clock_settime                  sys_nosys
+265    2    clock_gettime                  sys_clock_gettime
+266    0    clock_getres                   sys_nosys
+267    0    clock_nanosleep                sys_nosys
+268    0    statfs64                       sys_nosys
+269    0    fstatfs64                      sys_nosys
+270    0    tgkill                         sys_tgkill
+271    2    utimes                         sys_utimes
+272    0    fadvise64_64                   sys_nosys
+273    0    vserver                        sys_nosys
+274    0    mbind                          sys_nosys
+275    0    get_mempolicy                  sys_nosys
+276    0    set_mempolicy                  sys_nosys
+277    0    mq_open                        sys_nosys
+278    0    mq_unlink                      sys_nosys
+279    0    mq_timedsend                   sys_nosys
+280    0    mq_timedreceive                sys_nosys
+281    0    mq_notify                      sys_nosys
+282    0    mq_getsetattr                  sys_nosys
+283    0    sys_kexec_load                 sys_nosys
+284    0    waitid                         sys_nosys
+285    0    setaltroot                     sys_nosys
+286    0    add_key                        sys_nosys
+287    0    request_key                    sys_nosys
+288    0    keyctl                         sys_nosys
diff --git a/linux_emul_base/linuxcalltab.awk b/linux_emul_base/linuxcalltab.awk
new file mode 100755 (executable)
index 0000000..d750cdb
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/awk -f
+BEGIN {
+       nsys = 0
+}
+
+/^#/ {
+       next
+}
+
+{
+       i=$1
+       if(nsys > i){
+               print "BROKEN TABLE: "nsys" > "i
+               exit
+       }
+       while(nsys < i){
+               sysarg[nsys] = 0
+               sysnam[nsys] = "nosys"nsys
+               sysfun[nsys] = "sys_nosys"
+               nsys++;
+       }
+       sysarg[nsys] = $2
+       sysnam[nsys] = $3
+       sysfun[nsys] = $4
+       nsys++
+}
+
+END {
+       print "static Linuxcall linuxcalltab[] = {"
+       for(i=0; i<nsys; i++){
+               print " {       /* "i" */"
+               print "         .name = \""sysnam[i]"\","
+               print "         .func = "sysfun[i]","
+               print "         .stub = fcall"sysarg[i]","
+               print " },"
+       }
+       print "};"
+       print ""
+}
diff --git a/linux_emul_base/main.c b/linux_emul_base/main.c
new file mode 100644 (file)
index 0000000..2472e6b
--- /dev/null
@@ -0,0 +1,259 @@
+#include <u.h>
+#include <libc.h>
+#include <tos.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+static void
+die(void)
+{
+       exits(nil);
+}
+
+static char**
+readenv(void)
+{
+       char **env;
+       int fd, n, i, c;
+       Dir *d;
+
+       if((fd = open("/env", OREAD)) < 0)
+               return nil;
+       n = dirreadall(fd, &d);
+       close(fd);
+       env = kmalloc(sizeof(env[0]) * (n + 1));
+       c = 0;
+       for(i=0; i<n; i++){
+               char *v;
+               char *k;
+
+               k = d[i].name;
+
+               // filter out some stuff...
+               if(strncmp(k, "fn#", 3) == 0)
+                       continue;
+               if(strcmp(k, "timezone") == 0)
+                       continue;
+               if(strcmp(k, "0")==0)
+                       continue;
+
+               if((v = getenv(d[i].name)) == nil)
+                       continue;
+               if((env[c] = ksmprint("%s=%s", k, v)) == nil)
+                       continue;
+               free(v);
+
+               c++;
+       }
+       env[c] = 0;
+
+       free(d);
+
+       return env;
+}
+
+struct onstackargs
+{
+       long            *stk;
+       void            *arg;
+       int             (*func)(void *);
+       int             ret;
+       jmp_buf jmp;
+};
+
+int
+onstack(long *stk, int (*func)(void *), void *arg)
+{
+       struct onstackargs a, *args;
+       jmp_buf jmp;
+       long *sp;
+
+       sp = (long*)&a;
+       if((long*)sp >= stk && (long*)sp < stk+(KSTACK / sizeof(long)))
+               return func(arg);
+
+       if(args = (struct onstackargs*)setjmp(jmp)){
+               args->ret = onstack(args->stk, args->func, args->arg);
+               longjmp(args->jmp, 1);
+       }
+
+       sp = &stk[(KSTACK / sizeof(long))-16];
+       jmp[JMPBUFSP] = (long)sp;
+
+       memset(stk, 0, KSTACK);
+
+       args = &a;
+       args->stk = stk;
+       args->func = func;
+       args->arg = arg;
+
+       if(!setjmp(args->jmp))
+               longjmp(jmp, (int)args);
+
+       return args->ret;
+}
+
+#pragma profile off
+
+static void
+proff(void (*fn)(void*), void *arg)
+{
+       if(_tos->prof.what == 0){
+               fn(arg);
+       }else{
+               prof(fn, arg, 2000, _tos->prof.what);
+       }
+}
+
+static void
+profexitjmpfn(void *arg)
+{
+       /*
+        * we are now called by the profiling function on the profstack.
+        * save the current continuation so we can return here on exit.
+        */
+       if(!setjmp(exitjmp))
+               longjmp((long*)arg, 1); /* return from profme() */
+}
+
+static int
+profmeprofstack(void *arg)
+{
+       proff(profexitjmpfn, arg);
+       for(;;) die();
+}
+
+#pragma profile on
+
+static long *profstack;
+
+void
+profme(void)
+{
+       jmp_buf j;
+
+       if(!setjmp(j))
+               onstack(profstack, profmeprofstack, j);
+}
+
+
+static void
+vpanic(char *msg, va_list arg)
+{
+       char buf[32];
+       int fd;
+
+       fprint(2, "PANIC: ");
+       vfprint(2, msg, arg);
+       fprint(2, "\n");
+
+       if(debug)
+               abort();
+
+       snprint(buf, sizeof(buf), "/proc/%d/notepg", getpid());
+       if((fd = open(buf, OWRITE)) >= 0){
+               write(fd, "kill", 4);
+               close(fd);
+       }
+       exits("panic");
+}
+
+void
+panic(char *msg, ...)
+{
+       va_list arg;
+
+       va_start(arg, msg);
+       vpanic(msg, arg);
+       va_end(arg);
+}
+
+void usage(void)
+{
+       fprint(2, "usage: linuxemu [-d] [-u uid] [-g gid] cmd [args]\n");
+       exits("usage");
+}
+
+struct mainstack
+{
+       long            profstack[KSTACK / sizeof(long)];
+       long            kstack[KSTACK / sizeof(long)];
+       Uproc   *proc;
+       jmp_buf exitjmp;
+};
+
+void main(int argc, char *argv[])
+{
+       struct mainstack ms;
+       int err;
+       int uid, gid;
+       int fd;
+
+       fmtinstall('E', Efmt);
+       fmtinstall('S', Sfmt);
+
+       uid = 0;
+       gid = 0;
+       debug = 0;
+
+       ARGBEGIN {
+       case 'd':
+               debug++;
+               break;
+       case 'u':
+               uid = atoi(EARGF(usage()));
+               break;
+       case 'g':
+               gid = atoi(EARGF(usage()));
+               break;
+       default:
+               usage();
+       } ARGEND
+
+       if(argc < 1)
+               usage();
+
+       rootdevinit();
+       procdevinit();
+       ptydevinit();
+       consdevinit();
+       dspdevinit();
+       miscdevinit();
+       sockdevinit();
+       pipedevinit();
+
+       kstack = ms.kstack;
+       profstack = ms.profstack;
+       exitjmp = ms.exitjmp;
+       pcurrent = &ms.proc;
+       current = nil;
+
+       if(setjmp(exitjmp))
+               die();
+
+       initproc();
+       current->uid = uid;
+       current->gid = gid;
+
+       /* emulated console */
+       sys_close(0);
+       if((fd = sys_open("/dev/cons", O_RDWR, 0)) != 0)
+               fprint(2, "cant open console for stdin\n");
+       sys_close(1);
+       if(sys_dup(fd) != 1)
+               fprint(2, "cant dup stdout\n");
+       sys_close(2);
+       if(sys_dup(fd) != 2)
+               fprint(2, "cant dup stderr\n");
+
+       sys_fcntl(0, F_SETFD, 0);
+       sys_fcntl(1, F_SETFD, 0);
+       sys_fcntl(2, F_SETFD, 0);
+
+       err = sys_execve(*argv, argv, readenv());
+
+       fprint(2, "%s: %E\n", *argv, err);
+       longjmp(exitjmp, 1);
+}
diff --git a/linux_emul_base/mem.c b/linux_emul_base/mem.c
new file mode 100644 (file)
index 0000000..996cafb
--- /dev/null
@@ -0,0 +1,1538 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Range Range;
+typedef struct Area Area;
+typedef struct Filemap Filemap;
+typedef struct Futex Futex;
+typedef struct Seg Seg;
+typedef struct Space Space;
+
+/* keep in order, lowest base address first */
+enum {
+       SEGDATA,
+       SEGPRIVATE,
+       SEGSHARED,
+       SEGSTACK,
+       SEGMAX,
+};
+
+static char *segname[SEGMAX] = { "data", "private", "shared", "stack" };
+
+struct Range
+{
+       ulong   base;
+       ulong   top;
+};
+
+struct Filemap
+{
+       Range   addr;
+
+       Filemap *next;
+
+       char            *path;
+       ulong   offset;
+       int             mode;
+       Ufile           *file;
+
+       Ref;
+};
+
+struct Futex
+{
+       ulong   *addr;
+
+       Futex   *next;
+       Futex   **link;
+
+       Ref;
+       Uwaitq;
+};
+
+struct Area
+{
+       Range   addr;
+
+       Area    *next;          /* next higher area */
+       Area    *prev;          /* previous lower area */
+       Seg             *seg;                   /* segment we belong to */
+
+       int             prot;
+
+       Filemap         *filemap;
+       Futex   *futex;
+};
+
+struct Seg
+{
+       Ref;
+       QLock;
+
+       Range   addr;
+       ulong   limit;                  /* maximum address this segment can grow */
+
+       Area    *areas;         /* orderd by address */
+
+       int             type;                   /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */
+
+       Area            *freearea;
+       Filemap *freefilemap;
+       Futex   *freefutex;
+};
+
+struct Space
+{
+       Ref;
+       QLock;
+
+       ulong   brk;
+       Seg             *seg[SEGMAX];
+};
+
+
+void*
+kmalloc(int size)
+{
+       void *p;
+
+       p = malloc(size);
+       if(p == nil)
+               panic("kmalloc: out of memory");
+       setmalloctag(p, getcallerpc(&size));
+       return p;
+}
+void*
+krealloc(void *ptr, int size)
+{
+       void *p;
+
+       p = realloc(ptr, size);
+       if(size > 0){
+               if(p == nil)
+                       panic("krealloc: out of memory");
+               setmalloctag(p, getcallerpc(&ptr));
+       }
+       return p;
+}
+
+void*
+kmallocz(int size, int zero)
+{
+       void *p;
+
+       p = mallocz(size, zero);
+       if(p == nil)
+               panic("kmallocz: out of memory");
+       setmalloctag(p, getcallerpc(&size));
+       return p;
+}
+
+char*
+kstrdup(char *s)
+{
+       char *p;
+       int n;
+
+       n = strlen(s);
+       p = kmalloc(n+1);
+       memmove(p, s, n);
+       p[n] = 0;
+       setmalloctag(p, getcallerpc(&s));
+       return p;
+}
+
+char*
+ksmprint(char *fmt, ...)
+{
+       va_list args;
+       char *p;
+       int n;
+
+       n = 4096;
+       p = kmalloc(n);
+       va_start(args, fmt);
+       n = vsnprint(p, n, fmt, args);
+       va_end(args);
+       if((p = realloc(p, n+1)) == nil)
+               panic("ksmprint: out of memory");
+       setmalloctag(p, getcallerpc(&fmt));
+       return p;
+}
+
+ulong
+pagealign(ulong addr)
+{
+       ulong m;
+
+       m = PAGESIZE-1;
+       return (addr + m) & ~m;
+}
+
+static void
+syncarea(Area *a, Range r)
+{
+       if(a->filemap == nil)
+               return;
+       if(a->filemap->file == nil)
+               return;
+       if((a->prot & PROT_WRITE) == 0)
+               return;
+
+       if(r.base < a->addr.base)
+               r.base = a->addr.base;
+       if(r.top > a->addr.top)
+               r.top = a->addr.top;
+       if(r.base < a->filemap->addr.base)
+               r.base = a->filemap->addr.base;
+       if(r.top > a->filemap->addr.top)
+               r.top = a->filemap->addr.top;
+       pwritefile(a->filemap->file, (void*)r.base, r.top - r.base,
+               (r.base - a->filemap->addr.base) + a->filemap->offset);
+}
+
+static void
+linkarea(Seg *seg, Area *a)
+{
+       Area *p;
+
+       a->next = nil;
+       a->prev = nil;
+       a->seg = seg;
+
+       for(p = seg->areas; p && p->next; p=p->next)
+               if(p->addr.base > a->addr.base)
+                       break;
+       if(p != nil){
+               if(p->addr.base > a->addr.base){
+                       a->next = p;
+                       if(a->prev = p->prev)
+                               a->prev->next = a;
+                       p->prev = a;
+               } else {
+                       a->prev = p;
+                       p->next = a;
+               }
+       }
+       if(a->prev == nil)
+               seg->areas = a;
+}
+
+static Area *
+duparea(Area *a)
+{
+       Area *r;
+
+       if(r = a->seg->freearea){
+               a->seg->freearea = r->next;
+       } else {
+               r = kmalloc(sizeof(Area));
+       }
+       r->addr = a->addr;
+       r->next = nil;
+       r->prev = nil;
+       r->seg = nil;
+       r->prot = a->prot;
+       if(r->filemap = a->filemap)
+               incref(r->filemap);
+       r->futex = nil;
+       return r;
+}
+
+static void
+freearea(Area *a)
+{
+       Filemap *f;
+       Futex *x;
+       Seg *seg;
+
+       seg = a->seg;
+       if(f = a->filemap){
+               syncarea(a, a->addr);
+               a->filemap = nil;
+               if(!decref(f)){
+                       free(f->path);
+                       putfile(f->file);
+                       f->next = seg->freefilemap;
+                       seg->freefilemap = f;
+               }
+       }
+       while(x = a->futex){
+               if(a->futex = x->next)
+                       x->next->link = &a->futex;
+               x->link = nil;
+               x->next = nil;
+               wakeq(x, MAXPROC);
+       }
+       if(a->prev == nil){
+               if(seg->areas = a->next)
+                       a->next->prev = nil;
+       } else {
+               if(a->prev->next = a->next)
+                       a->next->prev = a->prev;
+       }
+
+       a->next = seg->freearea;
+       seg->freearea = a;
+}
+
+static Seg *
+allocseg(int type, Range addr, ulong limit, int attr, char *class)
+{
+       Seg *seg;
+
+       if(class){
+               trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top);
+               if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base)
+                       panic("allocseg: segattach %s segment: %r", segname[type]);
+       }
+
+       seg = kmallocz(sizeof(Seg), 1);
+       seg->addr = addr;
+       seg->limit = limit;
+       seg->type = type;
+       seg->ref = 1;
+
+       return seg;
+}
+
+static Seg *
+dupseg(Seg *old, int copy)
+{
+       Seg *new;
+       Area *a, *p, *x;
+
+       if(old == nil)
+               return nil;
+       if(!copy){
+               incref(old);
+               return old;
+       }
+       new = allocseg(old->type, old->addr, old->limit, 0, nil);
+       p = nil;
+       for(a=old->areas; a; a=a->next){
+               x = duparea(a);
+               x->seg = new;
+               if(x->prev = p){
+                       p->next = x;
+               } else {
+                       new->areas = x;
+               }
+               p = x;
+       }
+
+       return new;
+}
+
+static Space *
+getspace(Space *old, int copy)
+{      
+       Space *new;
+       Seg *seg;
+       int t;
+
+       if(!copy){
+               incref(old);
+               return old;
+       }
+
+       new = kmallocz(sizeof(Space), 1);
+       new->ref = 1;
+
+       qlock(old);
+       for(t=0; t<SEGMAX; t++){
+               if(seg = old->seg[t]){
+                       qlock(seg);
+                       new->seg[t] = dupseg(seg, t != SEGSHARED);
+                       qunlock(seg);
+               }
+       }
+       new->brk = old->brk;
+       qunlock(old);
+
+       return new;
+}
+
+static void
+putspace(Space *space)
+{
+       Seg *seg;
+       int t;
+       Area *a;
+       Filemap *f;
+       Futex *x;
+       void *addr;
+
+       if(decref(space))
+               return;
+       for(t=0; t<SEGMAX; t++){
+               if(seg = space->seg[t]){
+                       addr = (void*)seg->addr.base;
+                       if(!decref(seg)){
+                               qlock(seg);
+                               /* mark all areas as free */
+                               while(a = seg->areas)
+                                       freearea(a);
+
+                               /* clear the free lists */
+                               while(a = seg->freearea){
+                                       seg->freearea = a->next;
+                                       free(a);
+                               }
+                               while(f = seg->freefilemap){
+                                       seg->freefilemap = f->next;
+                                       free(f);
+                               }
+                               while(x = seg->freefutex){
+                                       seg->freefutex = x->next;
+                                       free(x);
+                               }
+                               free(seg);
+                       }
+                       if(segdetach(addr) < 0)
+                               panic("putspace: segdetach %s segment: %r", segname[t]);
+               }
+       }
+       free(space);
+}
+
+static int
+canmerge(Area *a, Area *b)
+{
+       return a->filemap==nil && 
+               a->futex==nil &&
+               b->filemap==nil &&
+               b->futex==nil &&
+               a->prot == b->prot;
+}
+
+static void
+mergearea(Area *a)
+{
+       if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){
+               a->addr.base = a->prev->addr.base;
+               freearea(a->prev);
+       }
+       if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){
+               a->addr.top = a->next->addr.top;
+               freearea(a->next);
+       }
+}
+
+static int
+findhole(Seg *seg, Range *r, int fixed)
+{
+       Range h;
+       Area *a;
+       ulong m;
+       ulong z;
+       ulong hz;
+
+       z = r->top - r->base;
+       m = ~0;
+       h.base = seg->addr.base;
+       a = seg->areas;
+       for(;;) {
+               if((h.top = a ? a->addr.base : seg->addr.top) > h.base) {
+                       if(fixed){
+                               if(h.base > r->base)
+                                       break;
+                               if((r->base >= h.base) && (r->top <= h.top))
+                                       goto found;
+                       } else {
+                               hz = h.top - h.base;
+                               if((hz >= z) && (hz < m)) {
+                                       r->base = h.top - z;
+                                       r->top = h.top;
+                                       if((m = hz) == z)
+                                               goto found;
+                               }
+                       }
+               }
+               if(a == nil)
+                       break;
+               h.base = a->addr.top;
+               a = a->next;
+       }
+       if(!fixed && (m != ~0))
+               goto found;
+       return 0;
+
+found:
+       return 1;
+}
+
+/* wake up all futexes in range and unlink from area */
+static void
+wakefutexarea(Area *a, Range addr)
+{
+       Futex *fu, *x;
+
+       for(fu = a->futex; fu; fu = x){
+               x = fu->next;
+               if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){
+                       if(*fu->link = x)
+                               x->link = fu->link;
+                       fu->link = nil;
+                       fu->next = nil;
+
+                       trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr);
+                       wakeq(fu, MAXPROC);
+               }
+       }
+}
+
+static void
+makehole(Seg *seg, Range r)
+{
+       Area *a, *b, *x;
+       Range f;
+
+       for(a = seg->areas; a; a = x){
+               x = a->next;
+
+               if(a->addr.top <= r.base)
+                       continue;
+               if(a->addr.base >= r.top)
+                       break;
+
+               f = r;
+               if(f.base < a->addr.base)
+                       f.base = a->addr.base;
+               if(f.top > a->addr.top)
+                       f.top = a->addr.top;
+
+               wakefutexarea(a, f);
+               if(f.base == a->addr.base){
+                       if(f.top == a->addr.top){
+                               freearea(a);
+                       } else {
+                               a->addr.base = f.top;
+                       }
+               } else if(f.top == a->addr.top){
+                       a->addr.top = f.base;
+               } else {
+                       b = duparea(a);
+                       b->addr.base = f.top;
+
+                       a->addr.top = f.base;
+                       linkarea(seg, b);
+               }
+
+               if(segfree((void*)f.base, f.top - f.base) < 0)
+                       panic("makehole: segfree %s segment: %r", segname[seg->type]);
+       }
+}
+
+static Seg*
+addr2seg(Space *space, ulong addr)
+{
+       Seg *seg;
+       int t;
+
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               qlock(seg);
+               if((addr >= seg->addr.base) && (addr < seg->addr.top))
+                       return seg;
+               qunlock(seg);
+       }
+
+       return nil;
+}
+
+static Area*
+addr2area(Seg *seg, ulong addr)
+{
+       Area *a;
+
+       for(a=seg->areas; a; a=a->next)
+               if((addr >= a->addr.base) && (addr < a->addr.top))
+                       return a;
+       return nil;
+}
+
+int
+okaddr(void *ptr, int len, int write)
+{
+       ulong addr;
+       Space *space;
+       Seg *seg;
+       Area *a;
+       int ok;
+
+       ok = 0;
+       addr = (ulong)ptr;
+       if(addr < PAGESIZE)
+               goto out;
+       if(space = current->mem){
+               qlock(space);
+               if(seg = addr2seg(space, addr)){
+                       while(a = addr2area(seg, addr)){
+                               if(write){
+                                       if((a->prot & PROT_WRITE) == 0)
+                                               break;
+                               } else {
+                                       if((a->prot & PROT_READ) == 0)
+                                               break;
+                               }
+                               if((ulong)ptr + len <= a->addr.top){
+                                       ok = 1;
+                                       break;
+                               }
+                               addr = a->addr.top;
+                       }
+                       qunlock(seg);
+               }
+               qunlock(space);
+       }
+out:
+       trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok);
+       return ok;
+}
+
+static void
+unmapspace(Space *space, Range r)
+{
+       Seg *seg;
+       int t;
+
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               qlock(seg);
+               if(seg->addr.base >= r.top){
+                       qunlock(seg);
+                       break;
+               }
+               if(seg->addr.top > r.base)
+                       makehole(seg, r);
+               qunlock(seg);
+       }
+}
+
+static Area*
+mapspace(Space *space, Range r, int flags, int prot, int *perr)
+{
+       Seg *seg;
+       Area *a;
+       Range f;
+       int t;
+
+       if(flags & MAP_PRIVATE){
+               if(r.base >= space->seg[SEGSTACK]->addr.base){
+                       t = SEGSTACK;
+               } else if(r.base >= space->seg[SEGDATA]->addr.base && 
+                       r.base < space->seg[SEGDATA]->limit){
+                       t = SEGDATA;
+               } else {
+                       t = SEGPRIVATE;
+               }
+       } else {
+               t = SEGSHARED;
+       }
+
+       if((seg = space->seg[t]) == nil)
+               goto nomem;
+
+       qlock(seg);
+       if((r.base >= seg->addr.base) && (r.top <= seg->limit)){
+               if(r.base >= seg->addr.top)
+                       goto addrok;
+
+               f = r;
+               if(f.top > seg->addr.top)
+                       f.top = seg->addr.top;
+               if(findhole(seg, &f, 1))
+                       goto addrok;
+               if(flags & MAP_FIXED){
+                       if(seg->type == SEGSHARED){
+                               trace("mapspace(): cant make hole %lux-%lux in shared segment",
+                                       f.base, f.top);
+                               goto nomem;
+                       }
+                       makehole(seg, f);
+                       goto addrok;
+               }               
+       }
+
+       if(flags & MAP_FIXED){
+               trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment", 
+                       r.base, r.top, segname[seg->type]);
+               goto nomem;
+       }
+
+       if(findhole(seg, &r, 0))
+               goto addrok;
+
+       r.top -= r.base;
+       r.base = seg->addr.top;
+       r.top += r.base;
+
+addrok:
+       trace("mapspace(): addr %lux-%lux", r.base, r.top);
+
+       if(r.top > seg->addr.top){
+               if(r.top > seg->limit){
+                       trace("mapspace(): area top %lux over %s segment limit %lux",
+                               r.top, segname[seg->type], seg->limit);
+                       goto nomem;
+               }
+               trace("mapspace(): segbrk %s segment %lux-%lux -> %lux",
+                       segname[seg->type], seg->addr.base, seg->addr.top, r.top);
+               if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){
+                       trace("mapspace(): segbrk failed: %r");
+                       goto nomem;
+               }
+               seg->addr.top = r.top;
+       }
+
+       if(a = seg->freearea){
+               seg->freearea = a->next;
+       } else {
+               a = kmalloc(sizeof(Area));
+       }
+       a->addr = r;
+       a->prot = prot;
+       a->filemap = nil;
+       a->futex = nil;
+
+       linkarea(seg, a);
+
+       /* keep seg locked */
+       return a;
+
+nomem:
+       if(seg != nil)
+               qunlock(seg);
+       if(perr) *perr = -ENOMEM;
+       return nil;
+}
+
+static ulong
+brkspace(Space *space, ulong bk)
+{
+       Seg *seg;
+       Area *a;
+       ulong old, new;
+       Range r;
+
+       if((seg = space->seg[SEGDATA]) == nil)
+               goto out;
+
+       qlock(seg);
+       if(space->brk < seg->addr.base)
+               space->brk = seg->addr.top;
+
+       if(bk < seg->addr.base)
+               goto out;
+
+       old = pagealign(space->brk);
+       new = pagealign(bk);
+
+       if(old != new){
+               if(bk < space->brk){
+                       r.base = new;
+                       r.top = old;
+                       qunlock(seg);
+                       seg = nil;
+
+                       unmapspace(space, r);
+               } else {
+                       r.base = old;
+                       r.top = new;
+
+                       trace("brkspace(): new mapping %lux-%lux", r.base, r.top);
+                       for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){
+                               if(a->addr.top <= r.base)
+                                       continue;
+                               if(a->addr.base > r.top + PAGESIZE)
+                                       break;
+
+                               trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top);
+                               goto out;
+                       }
+                       qunlock(seg);
+                       seg = nil;
+
+                       a = mapspace(space, r,
+                               MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED,
+                               PROT_READ|PROT_WRITE|PROT_EXEC, nil);
+
+                       if(a == nil)
+                               goto out;
+
+                       seg = a->seg;
+                       mergearea(a);
+               }
+       }
+
+       if(space->brk != bk){
+               trace("brkspace: set new brk %lux", bk);
+               space->brk = bk;
+       }
+
+out:
+       if(seg != nil)
+               qunlock(seg);
+
+       return space->brk;
+}
+
+static ulong
+remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags)
+{
+       Area *a;
+       Seg *seg;
+       int move;
+       Range r;
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+
+       oldlen = pagealign(oldlen);
+       newlen = pagealign(newlen);
+
+       if((addr + oldlen) < addr)
+               return -EINVAL;
+       if((addr + newlen) <= addr)
+               return -EINVAL;
+
+       move = 0;
+       if(flags & MREMAP_FIXED){
+               if(pagealign(newaddr) != newaddr)
+                       return -EINVAL;
+               if((flags & MREMAP_MAYMOVE) == 0)
+                       return -EINVAL;
+               if((newaddr <= addr) && ((newaddr+newlen)  > addr))
+                       return -EINVAL;
+               if((addr <= newaddr) && ((addr+oldlen) > newaddr))
+                       return -EINVAL;
+               move = (newaddr != addr);
+       }
+
+       if(newlen < oldlen){
+               r.base = addr + newlen;
+               r.top = addr + oldlen;
+
+               unmapspace(space, r);
+
+               oldlen = newlen;
+       }
+
+       if((newlen == oldlen) && !move)
+               return addr;
+
+       if((seg = addr2seg(space, addr)) == nil)
+               return -EFAULT;
+
+       if((a = addr2area(seg, addr)) == nil)
+               goto fault;
+       if(a->addr.top < (addr + oldlen))
+               goto fault;
+
+       if(move)
+               goto domove;
+       if((addr + oldlen) != a->addr.top)
+               goto domove;
+       if((addr + newlen) > seg->limit)
+               goto domove;
+       if(a->next != nil)
+               if((addr + newlen) > a->next->addr.base)
+                       goto domove;
+
+       if((addr + newlen) > seg->addr.top){
+               trace("remapspace(): segbrk %s segment %lux-%lux -> %lux", 
+                       segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen));
+               if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){
+                       trace("remapspace(): segbrk: %r");
+                       goto domove;
+               }
+
+               seg->addr.top = (addr + newlen);
+       }
+       a->addr.top = (addr + newlen);
+       mergearea(a);
+       qunlock(seg);
+
+       return addr;
+
+domove:
+       trace("remapspace(): domove not implemented");
+       if(seg != nil)
+               qunlock(seg);
+       return -ENOMEM;
+
+fault:
+       if(seg != nil)
+               qunlock(seg);
+       return -EFAULT;
+}
+
+static void
+syncspace(Space *space, Range r)
+{
+       Seg *seg;
+       Area *a;
+
+       if(seg = addr2seg(space, r.base)){
+               for(a = addr2area(seg, r.base); a; a=a->next){
+                       if(r.base >= a->addr.top)
+                               break;
+                       syncarea(a, r);
+               }
+               qunlock(seg);
+       }
+}
+
+void*
+mapstack(int size)
+{
+       Space *space;
+       ulong a;
+
+       space = current->mem;
+       a = space->seg[SEGSTACK]->addr.top;
+       size = pagealign(size);
+       a = sys_mmap(a - size, size, 
+               PROT_READ|PROT_WRITE, 
+               MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
+       if(a == 0)
+               return nil;
+
+       return (void*)(a + size);
+}
+
+void
+mapdata(ulong base)
+{
+       Space *space;
+       Range r;
+       ulong top;
+       int t;
+
+       space = current->mem;
+       base = pagealign(base);
+       top = space->seg[SEGSTACK]->addr.base - PAGESIZE;
+
+       for(t=0; t<SEGMAX; t++){
+               if(space->seg[t] == nil){
+                       switch(t){
+                       case SEGDATA:
+                               r.base = base;
+                               break;
+                       case SEGPRIVATE:
+                               r.base = base + 0x10000000;
+                               break;
+                       case SEGSHARED:
+                               r.base = top - 0x10000000;
+                               break;
+                       }
+                       r.top = r.base + PAGESIZE;
+                       space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory");
+               }
+               if(t > 0 && space->seg[t-1])
+                       space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE;
+       }
+}
+
+/*
+ * unmapuserspace is called from kprocfork to get rid of
+ * the linux memory segments used by the calling process
+ * before current is set to zero. we just segdetach() all that
+ * segments but keep the data structures valid for the calling
+ * (linux) process.
+ */
+void
+unmapuserspace(void)
+{
+       Space *space;
+       Seg *seg;
+       int t;
+
+       space = current->mem;
+       qlock(space);
+       for(t=0; t<SEGMAX; t++){
+               if((seg = space->seg[t]) == nil)
+                       continue;
+               if(segdetach((void*)seg->addr.base) < 0)
+                       panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]);
+       }
+       qunlock(space);
+}
+
+/* hack: 
+ * we write segment out into a file, detach it and reattach
+ * a new one and reading contents back. i'm surprised that
+ * this even works seamless with the Plan9 Bss! :-)
+ */
+static void
+convertseg(Range r, ulong attr, char *class)
+{
+       char name[64];
+       ulong p;
+       int n;
+       int fd;
+       ulong len;
+
+       snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid());
+       fd = create(name, ORDWR|ORCLOSE, 0600);
+       if(fd < 0)
+               panic("convertseg: cant create %s: %r", name);
+
+       len = r.top - r.base;
+
+       if(len > 0){
+               n = write(fd, (void*)r.base, len);
+               if(n != len)
+                       panic("convertseg: write: %r");
+       }
+
+       /* copy string to stack because its memory gets detached :-) */
+       strncpy(name, class, sizeof(name));
+
+       trace("detaching %lux-%lux", r.base, r.top);
+
+       /* point of no return */
+       if(segdetach((void*)r.base) < 0)
+               panic("convertseg: segdetach: %r");
+       if(segattach(attr, name, (void*)r.base, len) != (void*)r.base)
+               *((int*)0) = 0;
+
+       p = 0;
+       while(p < len) {
+               /*
+                * we use pread directly to avoid hitting profiling code until
+                * data segment is read back again. pread is unprofiled syscall
+                * stub.
+                */
+               n = pread(fd, (void*)(r.base + p), len - p, (vlong)p);
+               if(n <= 0)
+                       *((int*)0) = 0;
+               p += n;
+       }
+
+       /* anything normal again */
+       trace("segment %lux-%lux reattached as %s", r.base, r.top, class);
+
+       close(fd);
+}
+
+void initmem(void)
+{
+       Space *space;
+       Range r, x;
+       char buf[80];
+       int fd;
+       int n;
+
+       static int firsttime = 1;
+
+       space = kmallocz(sizeof(Space), 1);
+       space->ref = 1;
+
+       snprint(buf, sizeof(buf), "/proc/%d/segment", getpid());
+       if((fd = open(buf, OREAD)) < 0)
+               panic("initspace: cant open %s: %r", buf);
+
+       n = 10 + 9 + 9 + 4 + 1;
+       x.base = x.top = 0;
+       while(readn(fd, buf, n)==n){
+               char *name;
+
+               buf[8] = 0;
+               buf[18] = 0;
+               buf[28] = 0;
+               buf[33] = 0;
+       
+               name = &buf[0];
+               r.base = strtoul(&buf[9], nil, 16);
+               r.top = strtoul(&buf[19], nil, 16);
+
+               trace("initspace(): %s %lux-%lux", name, r.base, r.top);
+
+               if(firsttime){
+                       /*
+                        * convert Plan9 data+bss segments into shared segments so
+                        * that the memory of emulator data structures gets shared across 
+                        * all processes. This only happens if initspace() is called the first time.
+                        */
+                       if(strstr(name, "Data")==name)
+                               convertseg(r, 0, "shared");
+                       if(strstr(name, "Bss")==name)
+                               convertseg(r, 0, "shared");
+               }
+
+               if(strstr(name, "Stack")==name){
+                       x.top = r.base - PAGESIZE;
+                       x.base = x.top - pagealign((MAXPROC / 4) * USTACK);
+
+                       if(!firsttime)
+                               break;
+               }
+       }
+       close(fd);
+       firsttime = 0;
+
+       /* allocate the linux stack */
+       space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory");
+
+       current->mem = space;
+}
+
+void exitmem(void)
+{
+       Space *space;
+
+       if(space = current->mem){
+               current->mem = nil;
+               putspace(space);
+       }
+}
+
+void clonemem(Uproc *new, int copy)
+{
+       Space *space;
+
+       if((space = current->mem) == nil){
+               new->mem = nil;
+               return;
+       }
+       new->mem = getspace(space, copy);
+}
+
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe)
+{
+       Space *space;
+       ulong size, z;
+       int i;
+
+       if(pdat) *pdat = 0;
+       if(plib) *plib = 0;
+       if(pshr) *pshr = 0;
+       if(pstk) *pstk = 0;
+       if(pexe) *pexe = 0;
+
+       if((space = proc->mem) == nil)
+               return 0;
+
+       size = 0;
+       qlock(space);
+       for(i=0; i<SEGMAX; i++){
+               Area *a;
+               Seg *seg;
+               if((seg = space->seg[i]) == nil)
+                       continue;
+               qlock(seg);
+               for(a = seg->areas; a; a = a->next){
+                       z = a->addr.top - a->addr.base;
+                       switch(i){
+                       case SEGDATA:
+                               if(pdat)
+                                       *pdat += z;
+                       case SEGPRIVATE:
+                               if(plib)
+                                       *plib += z;
+                               break;
+                       case SEGSHARED:
+                               if(pshr)
+                                       *pshr += z;
+                               break;
+                       case SEGSTACK:
+                               if(pstk)
+                                       *pstk += z;
+                               break;
+                       }
+                       if(pexe && (a->prot & PROT_EXEC))
+                               *pexe += z;
+                       size += z;
+               }
+               qunlock(seg);
+       }
+       qunlock(space);
+
+       return size;
+}
+
+struct linux_mmap_args {
+       ulong addr;
+       int len;
+       int prot;
+       int flags;
+       int fd;
+       ulong offset;
+};
+
+ulong
+sys_linux_mmap(void *a)
+{
+       struct linux_mmap_args *p = a;
+
+       if(pagealign(p->offset) != p->offset)
+               return -EINVAL;
+
+       return sys_mmap(
+               p->addr, 
+               p->len,
+               p->prot,
+               p->flags,
+               p->fd,
+               p->offset / PAGESIZE);
+}
+
+ulong
+sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff)
+{
+       Space *space;
+       Seg *seg;
+       Range r;
+       ulong o;
+       int e, n;
+       Area *a;
+       Filemap *f;
+       Ufile *file;
+
+       trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff);
+
+       if(pagealign(addr) != addr)
+               return (ulong)-EINVAL;
+
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return (ulong)-EINVAL;
+
+       file = nil;
+       if((flags & MAP_ANONYMOUS)==0)
+               if((file = fdgetfile(fd))==nil)
+                       return (ulong)-EBADF;
+
+       space = current->mem;
+       qlock(space);
+       if((a = mapspace(space, r, flags, prot, &e)) == nil){
+               qunlock(space);
+               putfile(file);
+               return (ulong)e;
+       }
+
+       seg = a->seg;
+       r = a->addr;
+
+       if(flags & MAP_ANONYMOUS){
+               mergearea(a);
+               qunlock(seg);
+               qunlock(space);
+
+               return r.base;
+       }
+
+       o = pgoff * PAGESIZE;
+
+       if(f = seg->freefilemap)
+               seg->freefilemap = f->next;
+       if(f == nil)
+               f = kmalloc(sizeof(Filemap));
+       f->ref = 1;
+       f->addr = r;
+       f->next = nil;
+       f->path = kstrdup(file->path);
+       f->offset = o;
+       if((f->mode = file->mode) != O_RDONLY){
+               f->file = getfile(file);
+       } else {
+               f->file = nil;
+       }
+       a->filemap = f;
+       qunlock(seg);
+       qunlock(space);
+
+       trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top);
+
+       addr = r.base;
+       while(addr < r.top){
+               n = preadfile(file, (void*)addr, r.top - addr, o);
+               if(n == 0)
+                       break;
+               if(n < 0){
+                       trace("read failed at offset %lux for address %lux failed: %r", o, addr);
+                       break;
+               }
+               addr += n;
+               o += n;
+       }
+
+       putfile(file);
+
+       return r.base;
+}
+
+int sys_munmap(ulong addr, ulong len)
+{
+       Space *space;
+       Range r;
+
+       trace("sys_munmap(%lux, %lux)", addr, len);
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return -EINVAL;
+
+       space = current->mem;
+       qlock(space);
+       unmapspace(current->mem, r);
+       qunlock(space);
+
+       return 0;
+}
+
+ulong
+sys_brk(ulong bk)
+{
+       Space *space;
+       ulong a;
+
+       trace("sys_brk(%lux)", bk);
+
+       space = current->mem;
+       qlock(space);
+       a = brkspace(space, bk);
+       qunlock(space);
+
+       return a;
+}
+
+int sys_mprotect(ulong addr, ulong len, int prot)
+{
+       Space *space;
+       Seg *seg;
+       Area *a, *b;
+       int err;
+
+       trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot);
+
+       len = pagealign(len);
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       if(len == 0)
+               return -EINVAL;
+
+       err = -ENOMEM;
+       space = current->mem;
+       qlock(space);
+       if(seg = addr2seg(space, addr)){
+               for(a = addr2area(seg, addr); a!=nil; a=a->next){
+                       if(addr + len <= a->addr.base)
+                               break;
+                       err = 0;
+                       if(a->prot == prot)
+                               continue;
+                       wakefutexarea(a, a->addr);
+                       if(a->addr.base < addr){
+                               b = duparea(a);
+                               a->addr.base = addr;
+                               b->addr.top = addr;
+                               linkarea(seg, b);
+                       }
+                       if(a->addr.top > addr + len){
+                               b = duparea(a);
+                               a->addr.top = addr + len;
+                               b->addr.base = addr + len;
+                               linkarea(seg, b);
+                       }
+                       trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot);
+                       a->prot = prot;
+               }
+               qunlock(seg);
+       }
+       qunlock(space);
+
+       return err;
+}
+
+int sys_msync(ulong addr, ulong len, int flags)
+{
+       Space *space;
+       Range r;
+
+       trace("sys_msync(%lux, %lux, %x)", addr, len, flags);
+
+       if(pagealign(addr) != addr)
+               return -EINVAL;
+       r.base = addr;
+       r.top = addr + pagealign(len);
+       if(r.top <= r.base)
+               return -EINVAL;
+
+       space = current->mem;
+       qlock(space);
+       syncspace(space, r);
+       qunlock(space);
+
+       return 0;
+}
+
+ulong
+sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr)
+{
+       Space *space;
+       int r;
+
+       trace("sys_mremap(%lux, %lux, %lux, %x, %lux)",
+               addr, oldlen, newlen, flags, newaddr);
+
+       space = current->mem;
+       qlock(space);
+       r = remapspace(space, addr, oldlen, newlen, newaddr, flags);
+       qunlock(space);
+
+       return r;
+}
+
+enum {
+       FUTEX_WAIT,
+       FUTEX_WAKE,
+       FUTEX_FD,
+       FUTEX_REQUEUE,
+       FUTEX_CMP_REQUEUE,
+};
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3)
+{
+       Space *space;
+       Seg *seg;
+       Area *a;
+       Futex *fu, *fu2;
+       int err, val2;
+       vlong timeout;
+
+       trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3);
+
+       seg = nil;
+       err = -EFAULT;
+       if((space = current->mem) == 0)
+               goto out;
+
+       qlock(space);
+       if((seg = addr2seg(space, (ulong)addr)) == nil){
+               qunlock(space);
+               goto out;
+       }
+       qunlock(space);
+       if((a = addr2area(seg, (ulong)addr)) == nil)
+               goto out;
+       for(fu = a->futex; fu; fu = fu->next)
+               if(fu->addr == addr)
+                       break;
+
+       switch(op){
+       case FUTEX_WAIT:
+               trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr);
+
+               if(fu == nil){
+                       if(fu = seg->freefutex){
+                               seg->freefutex = fu->next;
+                       } else {
+                               fu = kmallocz(sizeof(Futex), 1);
+                       }
+                       fu->ref = 1;
+                       fu->addr = addr;
+                       if(fu->next = a->futex)
+                               fu->next->link = &fu->next;
+                       fu->link = &a->futex;
+                       a->futex = fu;
+               } else {
+                       incref(fu);
+               }
+
+               err = 0;
+               timeout = 0;
+               if(ptime != nil){
+                       struct linux_timespec *ts = ptime;
+                       vlong now;
+
+                       wakeme(1);
+                       now = nsec();
+                       if(current->restart->syscall){
+                               timeout = current->restart->futex.timeout;
+                       } else {
+                               timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec;
+                       }
+                       if(now < timeout){
+                               current->timeout = timeout;
+                               setalarm(timeout);
+                       } else {
+                               err = -ETIMEDOUT;
+                       }
+               }
+               if(err == 0){
+                       if(*addr != val){
+                               err = -EWOULDBLOCK;
+                       } else {
+                               err = sleepq(fu, seg, 1);
+                       }
+               }
+               if(ptime != nil){
+                       current->timeout = 0;
+                       wakeme(0);
+               }
+               if(err == -ERESTART)
+                       current->restart->futex.timeout = timeout;
+
+               if(!decref(fu)){
+                       if(fu->link){
+                               if(*fu->link = fu->next)
+                                       fu->next->link = fu->link;
+                               fu->link = nil;
+                               fu->next = nil;
+                       }
+                       fu->next = seg->freefutex;
+                       seg->freefutex = fu;
+               }
+               break;
+
+       case FUTEX_WAKE:
+               trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr);
+               err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+               break;
+
+       case FUTEX_CMP_REQUEUE:
+               trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr);
+               if(*addr != val3){
+                       err = -EAGAIN;
+                       break;
+       case FUTEX_REQUEUE:
+                       trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr);
+               }
+               err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+               if(err > 0){
+                       val2 = (int)ptime;
+
+                       /* BUG: fu2 has to be in the same segment as fu */
+                       if(a = addr2area(seg, (ulong)addr2)){
+                               for(fu2 = a->futex; fu2; fu2 = fu2->next){
+                                       if(fu2->addr == addr2){
+                                               err += requeue(fu, fu2, val2);
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               break;
+
+       default:
+               err = -ENOSYS;
+       }
+
+out:
+       if(seg)
+               qunlock(seg);
+       return err;
+}
diff --git a/linux_emul_base/miscdev.c b/linux_emul_base/miscdev.c
new file mode 100644 (file)
index 0000000..21ee4aa
--- /dev/null
@@ -0,0 +1,156 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <mp.h>
+#include <libsec.h>
+
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum
+{
+       Mnull,
+       Mzero,
+       Mfull,
+       Mrandom,
+       Murandom,
+       Mmax,
+};
+
+typedef struct Miscfile Miscfile;
+struct Miscfile
+{
+       Ufile;
+       int     m;
+};
+
+static int
+path2m(char *path)
+{
+       int m;
+
+       m = -1;
+       if(strcmp(path, "/dev/null")==0){
+               m = Mnull;
+       } else if(strcmp(path, "/dev/zero")==0){
+               m = Mzero;
+       } else if(strcmp(path, "/dev/full")==0){
+               m = Mfull;
+       } else if(strcmp(path, "/dev/random")==0){
+               m = Mrandom;
+       } else if(strcmp(path, "/dev/urandom")==0){
+               m = Murandom;
+       }
+
+       return m;
+}
+
+static int
+openmisc(char *path, int mode, int, Ufile **pf)
+{
+       Miscfile *f;
+       int m;
+
+       if((m = path2m(path)) < 0)
+               return -ENOENT;
+       f = kmallocz(sizeof(*f), 1);
+       f->ref = 1;
+       f->mode = mode;
+       f->path = kstrdup(path);
+       f->fd = -1;
+       f->dev = MISCDEV;
+       f->m = m;
+       *pf = f;
+       return 0;
+}
+
+static int
+closemisc(Ufile *)
+{
+       return 0;
+}
+
+static int
+readmisc(Ufile *f, void *buf, int len, vlong)
+{
+       switch(((Miscfile*)f)->m){
+       case Mnull:
+               return 0;
+       case Mzero:
+               memset(buf, 0, len);
+               return len;
+       case Mfull:
+               return -EIO;
+       case Mrandom:
+               genrandom(buf, len);
+               return len;
+       case Murandom:
+               prng(buf, len);
+               return len;
+       default:
+               return -EIO;
+       }
+}
+
+static int
+writemisc(Ufile *f, void *, int len, vlong)
+{
+       switch(((Miscfile*)f)->m){
+       case Mnull:
+       case Mzero:
+       case Mrandom:
+       case Murandom:
+               return len;
+       case Mfull:
+               return -ENOSPC;
+       default:
+               return -EIO;
+       }
+}
+
+static int
+statmisc(char *path, int, Ustat *s)
+{
+       if(path2m(path) < 0)
+               return -ENOENT;
+
+       s->mode = 0666 | S_IFCHR;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->size = 0;
+       s->ino = hashpath(path);
+       s->dev = 0;
+       s->rdev = 0;
+       s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+       return 0;
+}
+
+static int
+fstatmisc(Ufile *f, Ustat *s)
+{
+       return fsstat(f->path, 0, s);
+};
+
+static Udev miscdev =
+{
+       .open = openmisc,
+       .read = readmisc,
+       .write = writemisc,
+       .close = closemisc,
+       .stat = statmisc,
+       .fstat = fstatmisc,
+};
+
+void miscdevinit(void)
+{
+       devtab[MISCDEV] = &miscdev;
+
+       fsmount(&miscdev, "/dev/null");
+       fsmount(&miscdev, "/dev/zero");
+       fsmount(&miscdev, "/dev/full");
+       fsmount(&miscdev, "/dev/random");
+       fsmount(&miscdev, "/dev/urandom");
+
+       srand(truerand());
+}
diff --git a/linux_emul_base/mkfile b/linux_emul_base/mkfile
new file mode 100644 (file)
index 0000000..422112c
--- /dev/null
@@ -0,0 +1,67 @@
+</$objtype/mkfile
+
+TARG=linuxemu
+BIN=$home/bin/$objtype
+RCBIN=$home/bin/rc
+CFLAGS=-FTVw
+
+OFILES=\
+       bits.$O \
+       bufproc.$O \
+       error.$O \
+       exec.$O \
+       file.$O \
+       fs.$O \
+       main.$O \
+       mem.$O \
+       poll.$O \
+       proc.$O \
+       signal.$O \
+       stat.$O \
+       time.$O \
+       tls.$O \
+       trace.$O \
+       trap.$O \
+       linuxcall.$O \
+       consdev.$O \
+       dspdev.$O \
+       miscdev.$O \
+       pipedev.$O \
+       ptydev.$O \
+       rootdev.$O \
+       sockdev.$O \
+       procdev.$O \
+
+
+HFILES=fns.h dat.h linux.h
+
+CLEANFILES=linuxcalltab.out linuxdat.acid
+
+</sys/src/cmd/mkone
+
+linuxcalltab.out:      linuxcalltab linuxcalltab.awk
+       ./linuxcalltab.awk <linuxcalltab >$target
+
+linuxcall.$O:  linuxcalltab.out
+
+linuxdat.acid: $HFILES main.c trace.c signal.c mem.c file.c
+       rm -f $target
+       for(i in main.c){
+               $CC -a $i >>$target
+       }
+       for(i in bufproc.c error.c exec.c file.c fs.c mem.c poll.c \
+               proc.c signal.c stat.c time.c tls.c trace.c trap.c \
+               consdev.c dspdev.c miscdev.c pipedev.c \
+               ptydev.c rootdev.c sockdev.c procdev.c){
+               $CC -aa $i >>$target
+       }
+
+$RCBIN/linux:  linux
+       cp linux $RCBIN/linux
+       
+acid:V:        linuxdat.acid
+
+install:V:     $RCBIN/linux
+       
+
+
diff --git a/linux_emul_base/pipedev.c b/linux_emul_base/pipedev.c
new file mode 100644 (file)
index 0000000..dd4f5f6
--- /dev/null
@@ -0,0 +1,202 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Pipe Pipe;
+
+struct Pipe
+{
+       Ufile;
+       void    *bufproc;
+       ulong atime;
+       ulong mtime;
+       int ino;
+};
+
+enum{
+       Maxatomic = 64*1024,
+};
+
+int
+pipewrite(int fd, void *buf, int len)
+{
+       uchar *p, *e;
+       int err, n;
+
+       p = buf;
+       e = p + len;
+       while(p < e){
+               n = e - p;
+               if(n > Maxatomic)
+                       n = Maxatomic;
+               if(notifyme(1))
+                       err = -ERESTART;
+               else {
+                       err = write(fd, p, n);
+                       notifyme(0);
+                       if(err < 0)
+                               err = mkerror();
+               }
+               if(err < 0){
+                       if(p == (uchar*)buf)
+                               return err;
+                       break;
+               }
+               p += err;
+       }
+       return p - (uchar*)buf;
+}
+
+static int
+closepipe(Ufile *file)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       close(pipe->fd);
+       freebufproc(pipe->bufproc);
+
+       return 0;
+}
+
+static void*
+bufprocpipe(Pipe *pipe)
+{
+       if(pipe->bufproc == nil)
+               pipe->bufproc = newbufproc(pipe->fd);
+       return pipe->bufproc;
+}
+
+static int
+pollpipe(Ufile *file, void *tab)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       return pollbufproc(bufprocpipe(pipe), pipe, tab);
+}
+
+static int
+readpipe(Ufile *file, void *buf, int len, vlong)
+{
+       Pipe *pipe = (Pipe*)file;
+       int ret;
+
+       if((pipe->mode & O_NONBLOCK) || (pipe->bufproc != nil)){
+               ret = readbufproc(bufprocpipe(pipe), buf, len, 0, (pipe->mode & O_NONBLOCK));
+       } else {
+               if(notifyme(1))
+                       return -ERESTART;
+               ret = read(pipe->fd, buf, len);
+               notifyme(0);
+               if(ret < 0)
+                       ret = mkerror();
+       }
+       if(ret > 0)
+               pipe->atime = time(nil);
+       return ret;
+}
+
+static int
+writepipe(Ufile *file, void *buf, int len, vlong)
+{
+       Pipe *pipe = (Pipe*)file;
+       int ret;
+
+       ret = pipewrite(pipe->fd, buf, len);
+       if(ret > 0)
+               pipe->mtime = time(nil);
+       return ret;
+}
+
+static int
+ioctlpipe(Ufile *file, int cmd, void *arg)
+{
+       Pipe *pipe = (Pipe*)file;
+
+       switch(cmd){
+       default:
+               return -ENOTTY;
+       case 0x541B:
+               {
+                       int r;
+
+                       if(arg == nil)
+                               return -EINVAL;
+                       if((r = nreadablebufproc(bufprocpipe(pipe))) < 0){
+                               *((int*)arg) = 0;
+                               return r;
+                       }
+                       *((int*)arg) = r;
+               }
+               return 0;
+       }
+}
+
+int sys_pipe(int *fds)
+{
+       Pipe *file;
+       int p[2];
+       int i, fd;
+       static int ino = 0x1234;
+
+       trace("sys_pipe(%p)", fds);
+
+       if(pipe(p) < 0)
+               return mkerror();
+
+       for(i=0; i<2; i++){
+               file = kmallocz(sizeof(Pipe), 1);
+               file->ref = 1;
+               file->mode = O_RDWR;
+               file->dev = PIPEDEV;
+               file->fd =  p[i];
+               file->ino = ino++;
+               file->atime = file->mtime = time(nil);
+               if((fd = newfd(file, 0)) < 0){
+                       if(i > 0)
+                               sys_close(fds[0]);
+                       close(p[0]);
+                       close(p[1]);
+                       return fd;
+               }
+               fds[i] = fd;
+       }
+       return 0;
+}
+
+static void
+fillstat(Pipe *pipe, Ustat *s)
+{
+       s->ino = pipe->ino;
+       s->mode = 0666 | S_IFIFO;
+       s->uid = current->uid;
+       s->gid = current->gid;
+       s->atime = pipe->atime;
+       s->mtime = pipe->mtime;
+       s->size = 0;
+}
+
+static int
+fstatpipe(Ufile *file, Ustat *s)
+{
+       Pipe *pipe = (Pipe*)file;
+       fillstat(pipe, s);
+       return 0;
+};
+
+static Udev pipedev = 
+{
+       .read = readpipe,
+       .write = writepipe,
+       .poll = pollpipe,
+       .close = closepipe,
+       .ioctl = ioctlpipe,
+       .fstat = fstatpipe,
+};
+
+void pipedevinit(void)
+{
+       devtab[PIPEDEV] = &pipedev;
+}
diff --git a/linux_emul_base/poll.c b/linux_emul_base/poll.c
new file mode 100644 (file)
index 0000000..79fb8f5
--- /dev/null
@@ -0,0 +1,250 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+void pollwait(Ufile *f, Uwaitq *q, void *t)
+{
+       Uwait *w, **p;
+
+       if(f == nil || t == nil || q == nil)
+               return;
+
+       p = t;
+       w = addwaitq(q);
+       w->file = getfile(f);
+       w->next = *p;
+       *p = w;
+}
+
+static void
+clearpoll(Uwait **p)
+{
+       Uwait *w;
+
+       while(w = *p){
+               *p = w->next;
+               delwaitq(w);
+       }
+}
+
+struct linux_pollfd
+{
+       int                     fd;
+       short           events;
+       short           revents;
+};
+
+int sys_poll(void *p, int nfd, long timeout)
+{
+       int i, e, err;
+       Uwait *tab;
+       Ufile *file;
+       vlong now, t;
+       struct linux_pollfd *fds = p;
+
+       trace("sys_poll(%p, %d, %ld)", p, nfd, timeout);
+
+       if(nfd < 0)
+               return -EINVAL;
+
+       t = 0;
+       wakeme(1);
+       if(timeout > 0){
+               now = nsec();
+               if(current->restart->syscall){
+                       t = current->restart->poll.timeout;
+               } else {
+                       t = now + timeout*1000000LL;
+               }
+               if(now < t){
+                       current->timeout = t;
+                       setalarm(t);
+               }
+       }
+
+       tab = nil;
+       for(;;){
+               clearpoll(&tab);
+
+               err = 0;
+               for(i=0; i<nfd; i++){
+            &nb