--- /dev/null
+2008-08-16
+Creation of a CHANGES file
+
+A entry starts with the date followed by a newline and then
+the content follows. Usualy, the first line after the date
+is some short description and then a loger follows.
+To terminate the entry, intert two newlines at the end.
+
+So this entry serves as an exampe. Hope this is simple enougth :-)
+
+
+2008-08-16
+New debugging implemented
+
+Debug code removed from mem.c and trap.c and rewrote in
+acid. See the DEBUGGING section in the HOWTO file for further
+information.
+
+
+2008-08-17
+Fontconfig crash fixed, Debug code fixes
+
+libfontconfig mapped some config files with len == 0, this
+was not handled correctly so it crashed.
+
+the umem() acid function didnt check for zero segment
+pointers so it showd invalid data for the mostly unused
+SEGSHARED segment.
+
+
+2008-08-18
+Some minjor fixes
+
+more checking in memory manager
+sys_[gs]etpgrp implemented for pid != current->pid
+renamed emu.c to main.c
+set UID/GID/EUID/EGID in AUXVEC on exec()
+
+
+
+2008-08-21
+Making prof(1) work (at least dont let it crash)
+
+mem.c: convertseg():
+
+Peplaced read() calls to pread() to prevent profiling related crash.
+The problem was that convertseg() detaches the DATA segment reattaches
+a new one and used read() to get the contents back. read() was a
+profiled function and the profiler finds its structures cleared to
+zero and crashes. We now use pread() that is a unprofiled assembly
+syscall stub.
+
+Still, child processes and kprocs are not currently profiled.
+
+
+2008-08-22
+AF_UNIX client sockets implemented
+
+Its a little bit of a hack. We do the AF_UNIX handling like APE
+does so we can interact with the ported Xservers Xbr and equis.
+
+
+2008-08-23
+Work arround for mozilla GPFAULT bug
+
+If a process is notified that he has pending signals with the
+"sig" message and if the note interrupts the execution of a
+INT 0x80 instruction, a syscall in the handler causes mystic
+crashes i dont understand.
+
+The work arround detects the condition and delays the
+handling of the signals returning back to userspace.
+
+I could reproduce the condition and this hack seems
+to work. I should write some testcase to analyze this
+condition further. Maybe its some kernel bug.
+
+
+2008-08-23
+Minjor file related fixes/cleanups
+
+- Implemented in miscdev.c for writable /dev/zero
+- sys_umask() now returns the previous umask
+- default umask set to 022
+- sys_umask() sys_cwd() moved from proc.c to file.c
+
+
+2008-08-24
+linuxemu.rc script updated
+
+- resolve relative rootpath
+- generate /etc files for hostname and resolv.conf if not readable
+- removed enviroment user->USER home->HOME conversion in main.c
+
+
+2008-09-10
+Better work arround for mozilla GPFAULT bug
+
+The problem arises because of the handling of notes in Plan9 kernel.
+
+It happens if linux code does a syscall while here are usernotes (like
+the "sig" ones) queued in the process note[] array. Then the trap()
+function in the kernel will enqueue the trap in the queue and the user
+note gets handled first.
+
+Now, after we have done a syscall in the note handler of the user
+signal, notify() gets called in the kernel that detects the next note
+to be a trap and thinks the note handler itself caused it and kills
+the process.
+
+I suggested a patch that makes sure traps get enqued on the head of
+the note[] array so that they get handled before a user note but its
+not decided to be applied yet so here is the work arround.
+
+The work arround avoids posting notes to a process that could possibly
+cause a trap in the future before the usernote gets handled. (this
+excluded all linux code because it can issure a syscall anytime) The
+only time it is save to post notes is if we are in linuxemu syscall
+handler.
+
+This is mostly the case anyway when notes are used to interrupt
+blocking syscalls (waking a process sleeping in a sigsuspend for
+example)
+
+Linuxcode that would spin in a endless loop will not be interrupted by
+notes/signals anymore.
+
+
+2008-09-20
+Minjor stuff
+
+Masking more fp-errors in trap.c. (To get some dos game running in
+bochs)
+
+prboom and zsnes was bumpy sometimes because our select() didnt set
+the struct timeval *tvp to the time the select call did not slept.
+
+I stat function pointer was removed from the Ufile struct and all the *dev.c
+files. I have no need to return custom stats from the devices right now
+(except for /dev/zero (mode & 0222)) so i move the only case where its
+needed into the hackish dir2statmode function in p9dev.c. Here is also
+an fd field added to the generic Ufile scruct because most of all devices
+use plan9 filedescriptors at some point and we use it in fstat(). Saves some
+casting and fstat can use the fd (when its available) to make a dirstat() and
+convert to linux format.
+
+For the postnote stuff mentiond in the last changes entry, here is a global
+flag in main.c (notehack = 1) to enable/disable the work arround.
+
+
+2008-09-21
+atime/mtime, AT_CLKTCK
+
+new syscalls
+- sys_utime
+- sys_utimes
+- sys_times
+
+the AT_CLKTCK entry in exec is set to _tos->cyclefreq
+now.
+
+
+2008-10-05
+faster death proc detection
+
+Using linuxemu from terminal caused huge network load
+because we did a lookup of /proc/# on every timer round.
+
+now we keep open filedescriptors for /proc/#/args and
+/proc/#/note in the proc structure and only check for
+dead procs every second.
+
+
+2008-10-05
+sound (/dev/dsp)
+
+OSS sound implemented, its only tested with some SDL
+apps (zsnes, prboom).
+
+The output frequency is hardcoded at 44100 Hz and 2
+channels stereo at the moment.
+
+
+2008-10-20
+dsp, stat, mkfile, -d, dns, fs
+
+dspdev detects output frequency now and implement more
+ioctls.
+
+here is an universal Ustat and Udirent structs to separate
+the linux formats from driver code.
+
+big chnage in all devices, added indirection layer fs.c that
+has some kind of mapping from path -> device and forwards
+filesystem calls to device drivers. now drivers can return
+correct stat information.
+
+this is needed for implementing /dev/pts later.
+
+removed the /etc/(hosts resolve.conf) generation code from
+linuxemu.rc because they dont work in some cases and
+cause maintence nightmares.
+
+use the -d switch for turning on trace to stderr. so we dont
+need to always change the if(0) from trace.c and recompile.
+
+force compiler flags in mkfile, the -T from new mkone breaks
+build on current distribution.
+
+
+2008-10-27
+lots of fixes
+
+- off by one error basepath()
+- chmod used access which resolves always to link target
+- runlink
+- the note hack broke sigprocmask, and sometimes failed
+ to interrupt a sleepproc(). this is now handled in
+ interruptproc(). (this makes drawterm work on linuxemu)
+- write() to pty was not synchronized/not blocking that
+ caused some apps to spin in write() or others to drop
+ characters (curses).
+
+
+2008-10-28
+grow filedescriptor table in dup2
+
+- if the new fd supplied is out if range, dont return error but
+ grow the fdtab so it fits in. (this make configure work)
+
+2008-11-15
+lots of changes
+
+- Updated HOWTO
+
+- included <ureg.h> in some modules. this fixed the type
+ signature errors of 8l when compiling with -T flag.
+
+- fixed bug in exec()
+ exec needs to run outside the the note context, because it
+ is deleting memory segments and that can cause the removal
+ of the stack segment as well. we have to set current->syscall
+ to nil to avoid getting a note posted that could interrupt us in
+ the middle of some malloc() or something.
+
+- sys_pipe() FD_CLOEXEC
+ sys_pipe() create the filedescriptors with FD_CLOEXEC bit
+ set as open() does. this is wrong and caused gimp plugins
+ to fail.
+
+- new signal handling code
+ signal.c has changed a bit. now CLONE_THREAD procs
+ share a signal queue and proc() uses wantsignal() to
+ figure out what process to interrupt.
+
+- restartable syscalls
+ re process SA_RESTART flag now and are able to restart syscalls
+ that got interrupted by such signals. sleepproc() returns -ERESTART
+ by default now. sys_poll() and sys_sleep() will return -EINTR in
+ any case.
+
+- fs reorganized, [sg]etxattr added, p9cwd added as an optimization
+ to avoid walks().
+
+- ptydev emits SIGINTR to process group, added tty to Uproc.
+
+- added fddev (/dev/fd)
+
+- exitproc() now emits SIGCHLD.
+
+- mem.c: addrok() now takes a prot flags parameter so we can test
+ for expected memory protection too.
+
+- profme(): profiling support for child processes added
+
+- initialization completly moved in main()
+
+- fchmod, fchown, ftruncate moved to file.c
+
+
+2008-10-19
+just code dressing...
+
+renamed some files, added typedef for Ureg, abstracted syscall specific
+code in linuxcall.c, more tracing...
+
+
+2009-02-06
+Fixed the opera fork() no more threads bug
+
+There was a problem of dns resolver zombie processes created by
+operseemed that used up all the process table due to incomplete
+implementation of clone(). Linux specified a exit signal in the lower
+byte of the clone- flags parameter to tell if the process should hang
+arround as zombie so that the parent can wait for it. If no signal is
+specified here, then the process should exit without becoming a
+zombie. Here is also the case that the parent ignores the SIGCHLD
+signal or has SA_NOCHLDWAIT flags on the SIGCHLD set. In that case
+the child should also purge itself. I implemented reparenting,
+because i suspected the bug there but this was not the case.
+
+Here is a new uprocs() acid function that dumps the proctab.
+
+
+2009-02-18
+Minjor changes
+
+Added anonymous area merging to reduce the area count and removed
+redundant clearmem calls. Changed Uwaitq lock from QLock to normal
+spinlock. Removed some trace() calls.
+
+The biggest change is that linuxemu.rc now is able to start equis. This
+simplyfies writing wrapper scripts to start a browser or other X11 apps.
+
+
+2009-03-25
+Simplyfied area merging in memory manager
+
+Areas are doubly linked now so its easier to get the previous area for
+mergearea().
+
+
+2009-03-30
+Fixed man-bug. (Restarting syscalls)
+
+Restarting syscalls failed if the singal that was send to the interrupted
+process was blocked. This is fixed now.
+
+
+2009-04-01
+fixed awd-bug (use builtin cd), make errors more verbose in exec.c
+
+bla
+
+2009-05-11
+fix pipeseek, added pread64, pwrite64 syscalls.
+
+forgot to return -ESPIPE on seekpipe.
+implemented pread64/pwrite64 (needed by git).
+
+
+2009-07-25
+random stuff
+
+- incred bufproc read and queuesize to 4K/64K (fixes links2 -g hang)
+- reuse buffers in bufproc
+- cleanup timer stuff, introduce 5ms min sleep time, avoid interrupt note
+- fix sys_select() to always modify tv
+- fix format mismatch in nextsignal
+- dont combine in sys_readv/sys_writev
+- fix open in devdsp.c
+- s/memcpy/memmove/g
+- dont reset segment registers for signal handlers
+- possibly more that i forgot... use history(1)
+
+
+2009-07-27
+fixed audio delay
+
+keeping track of how many samples (time) has been submitted
+to /dev/audio already and wait when the buffer grows over some
+treshold. this removes the audio delay from games :)
+
+
+2009-07-29
+doc
+
+put documentation in doc subdirectory.
+
+
+2009-07-31
+mremap, segbrk shrinking, pagesize, doc
+
+rewrote mremap implementation to correctly clear area for shrinking.
+handles more error cases and checks for overlap. this fixes the gimp
+invalid pointer bug.
+
+removed segment shrinking with segbrk as this feature may be removed
+in newer kernel versions as it introduced a bug where it is possible to
+unmap pages while the kernel touches them and cause a panic.
+
+removed the ROUNDPAGE() macro from dat.h. heres a pagealign()
+function in mem.c now and the global variable pagesize that is initialized
+in main.
+
+some documentation cleanups.
+
+
+2009-08-24
+dspdev improvements.
+
+do some linear interpolation in audio resampling to get better sound quality.
+avoid copy when no resampling is required. reflect queue full in
+GETOSPACE ioctl. cleanup code to avoid vlong calculations.
+
+
+2009-08-26
+bugs
+
+fixed uninitialized values in stat wich caused -EOVERFLOW on linux
+kernel build. removed wakeableproc() (changes in signal.c, ptydev.c,
+bufproc.c, poll.c). fixed sigsuspend race. simpler waitq code (uses
+lesser memory too). fixed waitpid race. added /dev/dsp0 to dspdev
+(makes mikmod work). fixed rfork/notify crash.
+
+
+2009-08-30
+mplayer, bb, audacity play cursor, bugs
+
+refactored timers, alarm and deadproccheck into one timerproc and
+removed timer.c. every Uproc has a timeout field now that is the
+time in nanoseconds when the timeout expires. on expiration, the
+timerproc sets the value to zero and does a wakeup on the
+timed out process. a process sets/resets its timeout with
+settimeout(delta). the remaining time in ms can be queried with
+timeoutremain().
+
+fixed missing protection flags in setupstack.
+
+more acurate GETOSPACE (mplayer, bb) and new GETOPTR
+ioctl (needed for audacity play cursor) in dspdev.c.
+
+handle kill note as SIGKILL in trap.c.
+
+handle illegal instruction as SIGILL as pass/restore sigcontext
+(needed for mplayer runtime SSE check).
+
+sys_sigreturn now uses current->ureg->sp to find the restore
+information.
+
+preallocate all Uprocs.
+
+
+2009-09-06
+cleanup
+
+removed dev argument from fdgetfile()
+
+return correct -ENOSOCK in socketcall()
+
+fixed pread/pwrite, dev->read/dev-write now take a offset
+argument.
+
+fixed time diff overflow in dspdev
+
+
+2009-09-08
+fuckup, O_TRUNC, restaring syscalls, rc, getdents
+
+fucked up:
+- seek didnt work for whence == 1 as the plan9 seek pointer was
+ never moved in read due to change to pread. this caused cp to
+ corrupt the output file when it skipped null blocks.
+- basepath in fs.c was broken
+- readv/writev didnt increment the file offset
+
+what we have now is that file.c does all the offset tracking, and
+devices provide a size() function that returns the actual file size.
+
+added O_TRUNC for open.
+
+signal restarting sometimes resulted in returning -ERESTART to
+userspace. this could happen when another thread had stolen
+our signal. we now restart the syscall in nextsignal() even if
+here was no signal pending for us.
+
+removed the exitsig function from linuxemu.rc as we can use
+the -terminate option of the xserver to get it shutdown.
+
+read the whole directory, then calculate file offset for directory
+entries.
+
+
+2009-09-12
+signal handling changes, acid, rc
+
+
+2009-09-20
+sockets, basepath, alarm
+
+fixed error in basepath (*ps vs ps) and implemented nonblocking connect,
+server sockets, socketpair, sys_alarm
+
+
+2009-10-13
+lots of changes
+
+simplified locking by making process wakeup non blocking.
+to not miss wakeups, the to be suspended proc should call wakeme(1)
+before it goes to sleep.
+
+timers for alarm/itimer have been moved to the per "process" signal
+data. current->timeout is still local to the current "thread".
+
+sys_kill() now makes sure we only send one signal per "process".
+
+syscall restarting now can use the Urestart (current->restart)
+structure to remember state. (implemented for nanosleep, poll and
+select)
+
+changed default to non tracing.
+
+pty now handles winsize changes. fixing current tty changing. (ssh bug,
+rxvt bug)
+
+added /dev/random and /dev/urandom to miscdev.
+
+more ioctls for dspdev.
+
+enforce non reentancy for traps.
+
+
+2009-10-15
+fixed sys_brk()
+
+we now use a separate segment for the BSS and dont intermix mmap and brk.
+thanks jibanes for reporting!
+
+
+2010-02-27
+futex, TLS, mprotect
+
+implemented sys_futex() finally
+
+changed tls to use the new /dev/gdt interface to change its
+process segment descriptors
+
+fixed mprotect
+
+
+20010-04-30
+linuxemu.rc gone, documentation
+
+removed linuxemu.rc and replaced it with linux.
+
+usage: linux [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...]
+
+linuxroot is now an optional parameter (-r). it will default to /sys/lib/linux.
+
+dont hide /lib/tls anymore and bind devarch. if you dont want to
+patch your kernel with the segdescr patch and use mroot[-linuxemu].tbz
+you can rename /lib/tls to /lib/_tls_disabled_.
+
+
+2010-05-02
+exit_group, exec, futex, waitpid, quoted arguments
+
+properly implement exit_group and zap all threads. notify
+all parent threads.
+
+zap threads in exec.
+
+implement FUTEX_REQUEUE and FUTEX_CMP_REQUEUE.
+
+handle WALL, WCLONE and WNOHANG in waitpid.
+
+preserve quoted arguments to linux.
+
+
+2010-05-11
+select/poll and EBADF, execve malloc, set_thread_area, initproc, SIGSTOP/SIGCONT,
+tty, getsid, getpeeraddr, /proc
+
+select and poll never return -EBADF but ignore the offending
+filedescriptor. this is wrong in the manpage! (this was needed
+to survive the python configure script)
+
+handle malloc errors in execve and dont panic when elf
+loading fails but kill the process.
+
+detect empty descriptors in set_thread_area so descriptors
+can be freed.
+
+move some of the initialization from main to initproc.
+
+SIGSTOP/SIGCONT handling now works for thread groups. for this
+we now have stopproc() and contproc() that are called from the
+signal code when SIGSTOP or SIGCONT signal is received. each Uproc
+now has a traceproc callback that is called when we enter or
+exit the kernel. zapthreads() and stopproc() use this to get all threads
+in the wanted state. for stopped procs, waiting happens in
+the signal code so calling handlesignals() of a stopped proc will
+block until it gets killed or continued.
+
+new fields in Uproc:
+traceproc, tracearg - called when entering or exiting the kernel
+wstate - current wait state of this process. WEXITED, WSTOPPED, WCONTINUED.
+wevent - like wstate, but reset by waitpid
+comm - double null terminated string array. first entry is the full exe name
+followd by the exeve arguments.
+
+heres a new format %S for signal numbers.
+
+the per thread tty is gone. the tty is now in the per process signal queue.
+gettty() and settty() can be used to modify it. ptydev now allows opening
+the slave tty multiple times. (fixes midnight commander error)
+
+implemented sys_getsid().
+
+fix AF_INET padding and byte order for getpeername socketcall.
+
+implemented /proc (procdev). fddev is gone. /dev/tty handled by
+ptydev now. this makes pkill, ps, top and inkscape work!
+
+
+2010-05-28
+fixed pipe filedescriptor leak in AF_UNIX
+
+we leaked the sock->other descriptor when failing to connect
+a AF_UNIX socket. thanks yarikos for reporting!
+
+
+2011-08-05
+rename to existing symlink target bug, profine -> profile
+
+renaming a symlink to a existing symlink would cause the
+file file to be renamed to .udir.L.udir.L....
+
+fix profine/profile typo
+
+2014-11-20
+change uname release to 3.2.1 to make debian 7.0 not complain
+(thanks henesy)
--- /dev/null
+INTRO
+
+Linuxemu is a program that can execute Linux/i386 ELF binaries on
+Plan9. It was started by Russ Cox and development was continued by
+me. Its opensource, I dont care what you are doing with it, but maybe
+Russ does, i don't know :-)
+
+If you found some bugs or have some other improvements/ideas send a
+email to:
+
+cinap_lenrek AT gmx DOT de
+
+
+SOURCE
+
+linuxemu is available on sources. On Plan9 do:
+
+% 9fs sources
+% cp /n/sources/contrib/cinap_lenrek/linuxemu3.tgz .
+
+Another source is my server on the web:
+
+% hget http://9hal.ath.cx/usr/cinap_lenrek/linuxemu3.tgz >linuxemu3.tgz
+
+
+DOCUMENTATION
+
+documentation is provided in the doc directory:
+
+doc/linuxemu.txt
+doc/todo.txt
+
+
+COMPILE
+
+% tar xzf linuxemu3.tgz
+% cd linuxemu3
+% mk
+
+
+INSTALL
+
+% mk install
+
+
+BOOTSTRAP
+
+You need a linux rootfilesystem packed in a tarball. Go!
+get some linux rootfs:
+
+http://9hal.ath.cx/usr/cinap_lenrek/mroot.tbz
+http://9hal.ath.cx/usr/cinap_lenrek/mroot-linuxemu.tbz
+
+the -linuxemu version contains no symlinks and can be extracted with
+plain plan9 tools bunzip/tar so you can skip the BOOTSTRAP section.
+:-)
+
+You can create your own with debootstrap on debian linux... or help
+me write a installer that unpacks and installs slackware on plan9...
+In any case, linuxemu is not hardwared to any linux distribution!
+
+Extract your linux rootfilesystem with the static linked gnutar from
+the bootstrap directory. (This will create all the fake symlinks for
+you)
+
+% 8.out bootstrap/tar xf /tmp/mroot.tar
+
+
+RUNNING
+
+Then you can use the linux script to "chroot" into your linux
+rootfs. the linux script is neccesary because for linux programs
+to run shared libraries from your linux root have to appear at /lib
+and /usr/lib and configuration files are expected to be in /etc.
+the script will build a private namespace and bind the linuxroot
+over the plan9 root. the original plan9 namespace is mounted to /9.
+
+% linux -r ./mroot /bin/bash -i
+
+if you omit the -r option, the linuxroot defaults to /sys/lib/linux. you
+may put your linux root there or add a bind to your $home/lib/profile.
+
+You should change /etc/resolv.conf to match your network nameserver
+setup. Also, you may want to edit /etc/apt/sources.list to change the
+debian mirror.
+
+
+DEBUGGING
+
+If linuxemu crashes, use acid to figure out whats going on:
+
+% mk acid
+% acid -l linuxemu.acid <pid>
+
+then you can issue the following commands:
+
+ustk() dump a (userspace) stacktrace for the current thread
+umem(Current()) dump the memory mappings
+ufds(Current()) dump the filedescriptor table
+utrace(Current()) dump the internal tracebuffer (enabled by -d option)
+
+use xasm()/xcasm() for disassembly for linux code.
+
+You can also enable full trace logging:
+
+% linux -r ./mroot -dd /bin/bash -i >[2]/tmp/linuxemu.log
+
+This slows linuxemu down. In case of race conditions, it often
+happens that the bug disapears when doing full trace logging!
+
+
+NPTL/thread-local storage
+
+If you get one of these errors:
+
+"cannot set up thread-local storage: cannot set up LDT for thread-local storage"
+
+this is glibc/libpthread complaining! the problem is the following:
+glibc on i386 decided at some point to use the extra segment registers
+GS and FS as an indirection pointer for thread local storage. the
+operating system kernel therfor must have a mechanism to let userspace
+change descriptor table entries and swap them in/out on context
+switch.
+
+to make it work here are several options:
+
+1) recompile and link the program with a pre NPTL version of glibc.
+
+2) on some distributions, a non-tls version of libc/libpthread is available.
+in my debian mroot, the NPTL version is in /lib/tls, the older version
+is in /lib. by renaming /lib/tls to /lib/_tls_disabled_ the loader will
+use the non-tls version.
+
+3) i made a kernel patch that adds support for per process descriptors to
+plan9:
+/n/sources/contrib/cinap_lenrek/segdescpatch
+http://9hal.ath.cx/usr/cinap_lenrek/segdescpatch.tgz
+it will add the files gdt and ldt to devarch (#P).
--- /dev/null
+TEXT incref(SB),$0
+ MOVL l+0(FP),AX
+ LOCK
+ INCL 0(AX)
+ RET
+
+TEXT decref(SB),$0
+ MOVL l+0(FP),AX
+ LOCK
+ DECL 0(AX)
+ JZ iszero
+ MOVL $1, AX
+ RET
+iszero:
+ MOVL $0, AX
+ RET
+
+TEXT jumpureg(SB), 1, $0
+ MOVL ureg+0(FP), AX /* ureg in AX */
+ MOVL 68(AX), SP /* restore SP */
+ SUBL $12, SP
+ MOVL 28(AX), BX /* put AX on 4(SP) */
+ MOVL BX, 4(SP)
+ MOVL 56(AX), BX /* put PC on 8(SP) */
+ MOVL BX, 8(SP)
+ MOVL 0(AX), DI /* restore registers */
+ MOVL 4(AX), SI
+ MOVL 8(AX), BP
+ MOVL 16(AX), BX
+ MOVL 20(AX), DX
+ MOVL 24(AX), CX
+ MOVL 4(SP), AX /* restore AX */
+ ADDL $8, SP
+ RET
+
+TEXT linux_sigreturn(SB), 1, $0
+ MOVL $119, AX /* sys_sigreturn */
+ INT $0x80
+ RET
+
+TEXT linux_rtsigreturn(SB), 1, $0
+ MOVL $173, AX /* sys_rt_sigreturn */
+ INT $0x80
+ RET
+
+TEXT get_ds(SB), 1, $0
+ PUSHL DS
+ POPL AX
+ RET
+TEXT get_cs(SB), 1, $0
+ PUSHL CS
+ POPL AX
+ RET
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Bufproc Bufproc;
+typedef struct Bufq Bufq;
+
+struct Bufq
+{
+ Bufq *next;
+
+ uchar *start;
+ uchar *end;
+
+ uchar data[8*1024];
+};
+
+struct Bufproc
+{
+ Ref;
+ QLock;
+
+ int fd;
+ int error;
+ int notefd;
+
+ Bufq *qf;
+ Bufq *qh;
+ Bufq **qt;
+
+ int wr;
+ Uwaitq wq;
+};
+
+static int
+queuesize(Bufq *q)
+{
+ int n;
+
+ n = 0;
+ while(q){
+ n += (q->end - q->start);
+ q = q->next;
+ }
+ return n;
+}
+
+void
+freebufproc(void *bp)
+{
+ Bufproc *b = bp;
+ Bufq *q;
+
+ if(b == nil)
+ return;
+ qlock(b);
+ b->fd = -1;
+ if(decref(b)){
+ if(b->wr){
+ b->wr = 0;
+ while(rendezvous(&b->wr, 0) == (void*)~0)
+ ;
+ } else {
+ write(b->notefd, "interrupt", 9);
+ }
+ qunlock(b);
+ return;
+ }
+ qunlock(b);
+
+ *b->qt = b->qf;
+ while(q = b->qh){
+ b->qh = q->next;
+ free(q);
+ }
+ close(b->notefd);
+ free(b);
+}
+
+static void
+bufproc(void *aux)
+{
+ Bufproc *b = aux;
+ Bufq *q;
+ int ret;
+ int fd;
+
+ setprocname("bufproc()");
+
+ q = nil;
+ qlock(b);
+ for(;;){
+ while((b->fd >= 0) && (queuesize(b->qh) >= 64*1024)){
+ b->wr = 1;
+ qunlock(b);
+ while(rendezvous(&b->wr, 0) == (void*)~0)
+ ;
+ qlock(b);
+ }
+ if((fd = b->fd) < 0)
+ break;
+ if((q == nil) && (q = b->qf))
+ b->qf = q->next;
+ qunlock(b);
+
+ if(q == nil)
+ q = kmalloc(sizeof(*q));
+ q->next = nil;
+ q->end = q->start = &q->data[0];
+ ret = read(fd, q->start, sizeof(q->data));
+
+ qlock(b);
+ if(ret < 0){
+ ret = mkerror();
+ if(ret == -EINTR || ret == -ERESTART)
+ continue;
+ b->error = ret;
+ b->fd = -1;
+ break;
+ }
+ q->end = q->start + ret;
+ *b->qt = q;
+ b->qt = &q->next;
+ q = nil;
+ wakeq(&b->wq, MAXPROC);
+ }
+ if(q){
+ q->next = b->qf;
+ b->qf = q;
+ }
+ wakeq(&b->wq, MAXPROC);
+ qunlock(b);
+ freebufproc(b);
+}
+
+void*
+newbufproc(int fd)
+{
+ char buf[80];
+ Bufproc *b;
+ int pid;
+
+ b = kmallocz(sizeof(*b), 1);
+ b->ref = 2;
+ b->fd = fd;
+ b->qt = &b->qh;
+ if((pid = procfork(bufproc, b, 0)) < 0)
+ panic("unable to fork bufproc: %r");
+ snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+ b->notefd = open(buf, OWRITE);
+
+ return b;
+}
+
+int readbufproc(void *bp, void *data, int len, int peek, int noblock)
+{
+ Bufproc *b = bp;
+ uchar *p;
+ Bufq *q;
+ int ret;
+
+ qlock(b);
+ while((q = b->qh) == nil){
+ if(noblock){
+ ret = -EAGAIN;
+ goto out;
+ }
+ if(peek){
+ ret = 0;
+ goto out;
+ }
+ if(b->fd < 0){
+ if((ret = b->error) == 0)
+ ret = -EIO;
+ goto out;
+ }
+ if((ret = sleepq(&b->wq, b, 1)) < 0){
+ qunlock(b);
+ return ret;
+ }
+ }
+
+ p = data;
+ ret = 0;
+ while(q != nil){
+ int n;
+
+ n = q->end - q->start;
+ if(n == 0)
+ break;
+ if(n > len - ret)
+ n = len - ret;
+ memmove(p, q->start, n);
+ p += n;
+ ret += n;
+ if(q->start+n >= q->end){
+ if(!peek){
+ Bufq *t;
+
+ t = q->next;
+ if((b->qh = q->next) == nil)
+ b->qt = &b->qh;
+ q->next = b->qf;
+ b->qf = q;
+ q = t;
+ } else {
+ q = q->next;
+ }
+ } else {
+ if(!peek)
+ q->start += n;
+ break;
+ }
+ }
+
+ if(b->wr && !peek){
+ b->wr = 0;
+ while(rendezvous(&b->wr, 0) == (void*)~0)
+ ;
+ qunlock(b);
+
+ return ret;
+ }
+out:
+ qunlock(b);
+
+ return ret;
+}
+
+int pollbufproc(void *bp, Ufile *file, void *tab)
+{
+ Bufproc *b = bp;
+ int ret;
+
+ ret = 0;
+
+ qlock(b);
+ pollwait(file, &b->wq, tab);
+ if(b->fd >= 0){
+ ret |= POLLOUT;
+ } else if(b->error < 0)
+ ret |= POLLERR;
+ if(b->qh)
+ ret |= POLLIN;
+ qunlock(b);
+
+ return ret;
+}
+
+int nreadablebufproc(void *bp)
+{
+ Bufproc *b = bp;
+ int ret;
+
+ qlock(b);
+ ret = queuesize(b->qh);
+ qunlock(b);
+
+ return ret;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Cons Cons;
+
+struct Cons
+{
+ Ufile;
+ void *bufproc;
+};
+
+static int
+closecons(Ufile *file)
+{
+ Cons *cons = (Cons*)file;
+
+ freebufproc(cons->bufproc);
+
+ return 0;
+}
+
+static void*
+bufproccons(Cons *cons)
+{
+ if(cons->bufproc == nil)
+ cons->bufproc = newbufproc(0);
+ return cons->bufproc;
+}
+
+static int
+pollcons(Ufile *file, void *tab)
+{
+ Cons *cons = (Cons*)file;
+ return pollbufproc(bufproccons(cons), cons, tab);
+}
+
+static int
+readcons(Ufile *file, void *buf, int len, vlong)
+{
+ Cons *cons = (Cons*)file;
+ int ret;
+
+ if((cons->mode & O_NONBLOCK) || (cons->bufproc != nil)){
+ ret = readbufproc(bufproccons(cons), buf, len, 0, (cons->mode & O_NONBLOCK));
+ } else {
+ if(notifyme(1))
+ return -ERESTART;
+ ret = read(0, buf, len);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ }
+ return ret;
+}
+
+static int
+writecons(Ufile *, void *buf, int len, vlong)
+{
+ int ret;
+
+ if(notifyme(1))
+ return -ERESTART;
+ ret = write(1, buf, len);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ return ret;
+}
+
+static int
+ioctlcons(Ufile *file, int cmd, void *arg)
+{
+ Cons *cons = (Cons*)file;
+
+ switch(cmd){
+ default:
+ return -ENOTTY;
+
+ case 0x541B:
+ {
+ int r;
+
+ if(arg == nil)
+ return -EINVAL;
+ if((r = nreadablebufproc(bufproccons(cons))) < 0){
+ *((int*)arg) = 0;
+ return r;
+ }
+ *((int*)arg) = r;
+ }
+ return 0;
+ }
+}
+
+static int
+opencons(char *path, int mode, int, Ufile **pf)
+{
+ Cons *file;
+
+ if(strcmp(path, "/dev/cons")!=0)
+ return -ENOENT;
+
+ file = mallocz(sizeof(Cons), 1);
+ file->ref = 1;
+ file->mode = mode;
+ file->dev = CONSDEV;
+ file->fd = 0;
+ file->path = kstrdup(path);
+ *pf = file;
+
+ return 0;
+}
+
+static int
+statcons(char *path, int, Ustat *s)
+{
+ if(strcmp(path, "/dev/cons")!=0)
+ return -ENOENT;
+
+ s->mode = 0666 | S_IFCHR;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = 0;
+ s->ino = hashpath(path);
+ s->dev = 0;
+ s->rdev = 0;
+ return 0;
+}
+
+static int
+fstatcons(Ufile *f, Ustat *s)
+{
+ return fsstat(f->path, 0, s);
+};
+
+static Udev consdev =
+{
+ .open = opencons,
+ .read = readcons,
+ .write = writecons,
+ .poll = pollcons,
+ .close = closecons,
+ .ioctl = ioctlcons,
+ .fstat = fstatcons,
+ .stat = statcons,
+};
+
+void consdevinit(void)
+{
+ devtab[CONSDEV] = &consdev;
+
+ fsmount(&consdev, "/dev/cons");
+}
--- /dev/null
+typedef struct Ref Ref;
+typedef struct Urestart Urestart;
+typedef struct Uproc Uproc;
+typedef struct Uproctab Uproctab;
+typedef struct Uwaitq Uwaitq;
+typedef struct Uwait Uwait;
+
+typedef struct Udev Udev;
+typedef struct Ufile Ufile;
+typedef struct Ustat Ustat;
+typedef struct Udirent Udirent;
+
+typedef struct Ureg Ureg;
+typedef struct Usiginfo Usiginfo;
+
+enum {
+ HZ = 100,
+ PAGESIZE = 0x1000,
+
+ MAXPROC = 128,
+ MAXFD = 256,
+
+ USTACK = 8*1024*1024,
+ KSTACK = 8*1024,
+};
+
+struct Ref
+{
+ long ref;
+};
+
+struct Urestart
+{
+ Urestart *link;
+ char *syscall;
+
+ union {
+ struct {
+ vlong timeout;
+ } nanosleep;
+ struct {
+ vlong timeout;
+ } poll;
+ struct {
+ vlong timeout;
+ } select;
+ struct {
+ vlong timeout;
+ } futex;
+ };
+};
+
+struct Uproc
+{
+ QLock;
+
+ int tid;
+ int pid;
+ int ppid;
+ int pgid;
+ int psid;
+ int uid;
+ int gid;
+ int umask;
+ int tlsmask;
+
+ int kpid;
+ int notefd;
+ int argsfd;
+
+ int wstate;
+ int wevent;
+ int exitcode;
+ int exitsignal;
+
+ int *cleartidptr;
+
+ vlong timeout;
+
+ vlong alarm;
+ Uproc *alarmq;
+
+ char *state;
+ char *xstate;
+ int innote;
+ int notified;
+ Ureg *ureg;
+ char *syscall;
+ void (*sysret)(int errno);
+ Urestart *restart;
+ Urestart restart0;
+ Uwait *freewait;
+
+ void (*traceproc)(void *arg);
+ void *tracearg;
+
+ int linkloop;
+ char *root;
+ char *cwd;
+ char *kcwd;
+
+ void *fdtab;
+ void *mem;
+ void *trace;
+ void *signal;
+
+ char *comm;
+ int ncomm;
+ ulong codestart;
+ ulong codeend;
+ ulong stackstart;
+ vlong starttime;
+};
+
+struct Uproctab
+{
+ QLock;
+ int nextpid;
+ int alloc;
+ Uproc proc[MAXPROC];
+};
+
+struct Uwaitq
+{
+ QLock;
+ Uwait *w;
+};
+
+struct Uwait
+{
+ Uwait *next;
+ Uwaitq *q;
+ Uwait *nextq;
+ Uproc *proc;
+ Ufile *file;
+};
+
+enum {
+ ROOTDEV,
+ SOCKDEV,
+ PIPEDEV,
+ CONSDEV,
+ MISCDEV,
+ DSPDEV,
+ PTYDEV,
+ PROCDEV,
+ MAXDEV,
+};
+
+/* device */
+struct Udev
+{
+ int (*open)(char *path, int mode, int perm, Ufile **pf);
+ int (*access)(char *path, int perm);
+ int (*stat)(char *path, int link, Ustat *ps);
+
+ int (*link)(char *old, char *new, int sym);
+ int (*unlink)(char *path, int rmdir);
+ int (*readlink)(char *path, char *buf, int len);
+ int (*rename)(char *old, char *new);
+ int (*mkdir)(char *path, int mode);
+ int (*utime)(char *path, long atime, long mtime);
+ int (*chmod)(char *path, int mode);
+ int (*chown)(char *path, int uid, int gid, int link);
+ int (*truncate)(char *path, vlong size);
+
+ int (*read)(Ufile *file, void *buf, int len, vlong off);
+ int (*write)(Ufile *file, void *buf, int len, vlong off);
+
+ vlong (*size)(Ufile *file);
+ int (*poll)(Ufile *file, void *tab);
+ int (*ioctl)(Ufile *file, int cmd, void *arg);
+ int (*close)(Ufile *file);
+
+ int (*fstat)(Ufile *file, Ustat *ps);
+ int (*readdir)(Ufile *file, Udirent **pd);
+
+ int (*fchmod)(Ufile *file, int mode);
+ int (*fchown)(Ufile *file, int uid, int gid);
+ int (*ftruncate)(Ufile *file, vlong size);
+};
+
+struct Ufile
+{
+ Ref;
+
+ int mode;
+ int dev;
+ char *path;
+ int fd;
+ vlong off;
+
+ Udirent *rdaux; /* aux pointer to hold Udirent* chains */
+};
+
+struct Ustat
+{
+ int mode;
+ int uid;
+ int gid;
+ int dev;
+ int rdev;
+ vlong size;
+ ulong atime;
+ ulong mtime;
+ ulong ctime;
+ uvlong ino;
+};
+
+struct Udirent
+{
+ Udirent *next;
+
+ uvlong ino;
+ int mode;
+ char name[];
+};
+
+struct Usiginfo
+{
+ int signo;
+ int errno;
+ int code;
+
+ union {
+ /* kill() */
+ struct {
+ int pid; /* sender's pid */
+ int uid; /* sender's uid */
+ } kill;
+
+ /* POSIX.1b timers */
+ struct {
+ int tid; /* timer id */
+ int overrun; /* overrun count */
+ int val; /* same as below */
+ int sys_private; /* not to be passed to user */
+ } timer;
+
+ /* POSIX.1b signals */
+ struct {
+ int pid; /* sender's pid */
+ int uid; /* sender's uid */
+ int val;
+ } rt;
+
+ /* SIGCHLD */
+ struct {
+ int pid; /* which child */
+ int uid; /* sender's uid */
+ int status; /* exit code */
+ long utime;
+ long stime;
+ } chld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ void *addr; /* faulting insn/memory ref. */
+ int trapno; /* TRAP # which caused the signal */
+ } fault;
+
+ /* SIGPOLL */
+ struct {
+ long band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int fd;
+ } poll;
+ };
+
+ int topid;
+ int group;
+};
+
+int debug;
+long *kstack;
+long *exitjmp;
+Uproc **pcurrent;
+#define current (*pcurrent)
+vlong boottime;
+
+Udev *devtab[MAXDEV];
+Uproctab proctab;
--- /dev/null
+Ubuntu Manpage Repository
+
+Provided by: manpages-de-dev_0.5-2ubuntu1_all
+
+BEZEICHNUNG
+
+ ioctl_list - Liste der ioctl-Aufrufe im Linux/i386-Kernel
+
+BESCHREIBUNG
+
+ Dies ist die Ioctl-Liste 1.3.27, eine Liste von ioctl-Aufrufen im
+ Linux/i386-Kernel 1.3.27. Sie enthält 421 ioctls aus
+ /usr/include/{asm,linux}/*.h. Für jeden ioctl wird der numerische
+ Wert, der Name und der Argumenttyp aufgelistet.
+
+ Ein Argumenttyp const struct foo * bedeutet, dass das Argument Eingabe
+ für den Kernel ist. struct foo * bedeutet, der Kernel gibt das Argu‐
+ ment aus. Wenn der Kernel das Argument für Ein- und Ausgabe benutzt,
+ wird dies durch // I-O markiert.
+
+ Einige ioctls benötigen mehr Argumente oder geben mehr Werte zurück als
+ eine einzige Struktur. Diese werden durch // MORE markiert und weiter
+ dokumentiert in einem separaten Abschnitt.
+
+ Diese Liste ist nicht vollständig. Sie enthält nicht:
+
+ Ioctls, die intern im Kernel definiert sind (scsi_ioctl.h).
+
+ Ioctls, die in Modulen definiert sind, die separat vom Kernel
+ verbreitet werden.
+
+ Und natürlich hat die Liste Fehler und Auslassungen.
+
+ Bitte wenden Sie sich wegen Änderungen und Kommentaren an
+ <mec@duracef.shout.net>. Ich bin besonders interessiert an Modulen,
+ die ihre eigenen ioctls definieren. Wenn Sie solch ein Modul kennen,
+ teilen es Sie mir bitte mit, damit ich es mir per ftp besorgen kann,
+ und ich berücksichtige seine ioctls in der nächsten Ausgabe dieser
+ Liste.
+
+ Bitte wenden Sie sich wegen der Übersetzung in’s Deutsche nicht an
+ <krd@burn.rhein-ruhr.de>. ;-)
+
+Haupttabelle
+
+ // <include/asm-i386/socket.h>
+ 0x00008901 FIOSETOWN const int *
+ 0x00008902 SIOCSPGRP const int *
+ 0x00008903 FIOGETOWN int *
+ 0x00008904 SIOCGPGRP int *
+ 0x00008905 SIOCATMARK int *
+ 0x00008906 SIOCGSTAMP timeval *
+
+ // <include/asm-i386/termios.h>
+ 0x00005401 TCGETS struct termios *
+ 0x00005402 TCSETS const struct termios *
+ 0x00005403 TCSETSW const struct termios *
+ 0x00005404 TCSETSF const struct termios *
+ 0x00005405 TCGETA struct termio *
+ 0x00005406 TCSETA const struct termio *
+ 0x00005407 TCSETAW const struct termio *
+ 0x00005408 TCSETAF const struct termio *
+ 0x00005409 TCSBRK int
+ 0x0000540A TCXONC int
+ 0x0000540B TCFLSH int
+ 0x0000540C TIOCEXCL void
+ 0x0000540D TIOCNXCL void
+ 0x0000540E TIOCSCTTY int
+ 0x0000540F TIOCGPGRP pid_t *
+ 0x00005410 TIOCSPGRP const pid_t *
+ 0x00005411 TIOCOUTQ int *
+ 0x00005412 TIOCSTI const char *
+ 0x00005413 TIOCGWINSZ const struct winsize *
+ 0x00005414 TIOCSWINSZ struct winsize *
+ 0x00005415 TIOCMGET int *
+ 0x00005416 TIOCMBIS const int *
+ 0x00005417 TIOCMBIC const int *
+ 0x00005418 TIOCMSET const int *
+ 0x00005419 TIOCGSOFTCAR int *
+ 0x0000541A TIOCSSOFTCAR const int *
+ 0x0000541B FIONREAD int *
+ 0x0000541B TIOCINQ int *
+ 0x0000541C TIOCLINUX const char * // MORE
+ 0x0000541D TIOCCONS void
+ 0x0000541E TIOCGSERIAL struct serial_struct *
+ 0x0000541F TIOCSSERIAL const struct serial_struct *
+ 0x00005420 TIOCPKT const int *
+ 0x00005421 FIONBIO const int *
+ 0x00005422 TIOCNOTTY void
+ 0x00005423 TIOCSETD const int *
+ 0x00005424 TIOCGETD int *
+ 0x00005425 TCSBRKP int
+ 0x00005426 TIOCTTYGSTRUCT struct tty_struct *
+ 0x00005450 FIONCLEX void
+ 0x00005451 FIOCLEX void
+ 0x00005452 FIOASYNC const int *
+ 0x00005453 TIOCSERCONFIG void
+ 0x00005454 TIOCSERGWILD int *
+ 0x00005455 TIOCSERSWILD const int *
+ 0x00005456 TIOCGLCKTRMIOS struct termios *
+ 0x00005457 TIOCSLCKTRMIOS const struct temios *
+ 0x00005458 TIOCSERGSTRUCT struct async_struct *
+ 0x00005459 TIOCSERGETLSR int *
+ 0x0000545A TIOCSERGETMULTI struct serial_multiport_struct *
+ 0x0000545B TIOCSERSETMULTI const struct serial_multiport_struct *
+
+ // <include/linux/ax25.h>
+ 0x000089E0 SIOCAX25GETUID const struct sockaddr_ax25 *
+ 0x000089E1 SIOCAX25ADDUID const struct sockaddr_ax25 *
+ 0x000089E2 SIOCAX25DELUID const struct sockaddr_ax25 *
+ 0x000089E3 SIOCAX25NOUID const int *
+ 0x000089E4 SIOCAX25DIGCTL const int *
+ 0x000089E5 SIOCAX25GETPARMS struct ax25_parms_struct * // I-O
+ 0x000089E6 SIOCAX25SETPARMS const struct ax25_parms-struct *
+
+ // <include/linux/cdk.h>
+ 0x00007314 STL_BINTR void
+ 0x00007315 STL_BSTART void
+ 0x00007316 STL_BSTOP void
+ 0x00007317 STL_BRESET void
+
+ // <include/linux/cdrom.h>
+ 0x00005301 CDROMPAUSE void
+ 0x00005302 CDROMRESUME void
+ 0x00005303 CDROMPLAYMSF const struct cdrom_msf *
+ 0x00005304 CDROMPLAYTRKIND const struct cdrom_ti *
+ 0x00005305 CDROMREADTOCHDR struct cdrom_tochdr *
+ 0x00005306 CDROMREADTOCENTRY struct cdrom_tocentry * // I-O
+ 0x00005307 CDROMSTOP void
+ 0x00005308 CDROMSTART void
+ 0x00005309 CDROMEJECT void
+ 0x0000530A CDROMVOLCTRL const struct cdrom_volctrl *
+ 0x0000530B CDROMSUBCHNL struct cdrom_subchnl * // I-O
+ 0x0000530C CDROMREADMODE2 const struct cdrom_msf * // MORE
+ 0x0000530D CDROMREADMODE1 const struct cdrom_msf * // MORE
+ 0x0000530E CDROMREADAUDIO const struct cdrom_read_audio * // MORE
+ 0x0000530F CDROMEJECT_SW int
+ 0x00005310 CDROMMULTISESSION struct cdrom_multisession * // I-O
+ 0x00005311 CDROM_GET_UPC struct { char [8]; } *
+ 0x00005312 CDROMRESET void
+ 0x00005313 CDROMVOLREAD struct cdrom_volctrl *
+ 0x00005314 CDROMREADRAW const struct cdrom_msf * // MORE
+ 0x00005315 CDROMREADCOOKED const struct cdrom_msf * // MORE
+ 0x00005316 CDROMSEEK const struct cdrom_msf *
+
+ // <include/linux/cm206.h>
+ 0x00002000 CM206CTL_GET_STAT int
+ 0x00002001 CM206CTL_GET_LAST_STAT int
+
+ // <include/linux/cyclades.h>
+ 0x00435901 CYGETMON struct cyclades_monitor *
+ 0x00435902 CYGETTHRESH int *
+ 0x00435903 CYSETTHRESH int
+ 0x00435904 CYGETDEFTHRESH int *
+ 0x00435905 CYSETDEFTHRESH int
+ 0x00435906 CYGETTIMEOUT int *
+ 0x00435907 CYSETTIMEOUT int
+ 0x00435908 CYGETDEFTIMEOUT int *
+ 0x00435909 CYSETDEFTIMEOUT int
+
+ // <include/linux/ext2_fs.h>
+ 0x80046601 EXT2_IOC_GETFLAGS int *
+ 0x40046602 EXT2_IOC_SETFLAGS const int *
+ 0x80047601 EXT2_IOC_GETVERSION int *
+ 0x40047602 EXT2_IOC_SETVERSION const int *
+
+ // <include/linux/fd.h>
+ 0x00000000 FDCLRPRM void
+ 0x00000001 FDSETPRM const struct floppy_struct *
+ 0x00000002 FDDEFPRM const struct floppy_struct *
+ 0x00000003 FDGETPRM struct floppy_struct *
+ 0x00000004 FDMSGON void
+ 0x00000005 FDMSGOFF void
+ 0x00000006 FDFMTBEG void
+ 0x00000007 FDFMTTRK const struct format_descr *
+ 0x00000008 FDFMTEND void
+ 0x0000000A FDSETEMSGTRESH int
+ 0x0000000B FDFLUSH void
+ 0x0000000C FDSETMAXERRS const struct floppy_max_errors *
+ 0x0000000E FDGETMAXERRS struct floppy_max_errors *
+ 0x00000010 FDGETDRVTYP struct { char [16]; } *
+ 0x00000014 FDSETDRVPRM const struct floppy_drive_params *
+ 0x00000015 FDGETDRVPRM struct floppy_drive_params *
+ 0x00000016 FDGETDRVSTAT struct floppy_drive_struct *
+ 0x00000017 FDPOLLDRVSTAT struct floppy_drive_struct *
+ 0x00000018 FDRESET int
+ 0x00000019 FDGETFDCSTAT struct floppy_fdc_state *
+ 0x0000001B FDWERRORCLR void
+ 0x0000001C FDWERRORGET struct floppy_write_errors *
+ 0x0000001E FDRAWCMD struct floppy_raw_cmd * // MORE // I-O
+ 0x00000028 FDTWADDLE void
+
+ // <include/linux/fs.h>
+ 0x0000125D BLKROSET const int *
+ 0x0000125E BLKROGET int *
+ 0x0000125F BLKRRPART void
+ 0x00001260 BLKGETSIZE int *
+ 0x00001261 BLKFLSBUF void
+ 0x00001262 BLKRASET int
+ 0x00001263 BLKRAGET int *
+ 0x00000001 FIBMAP int * // I-O
+ 0x00000002 FIGETBSZ int *
+
+ // <include/linux/hdreg.h>
+ 0x00000301 HDIO_GETGEO struct hd_geometry *
+ 0x00000302 HDIO_GET_UNMASKINTR int *
+ 0x00000304 HDIO_GET_MULTCOUNT int *
+ 0x00000307 HDIO_GET_IDENTITY struct hd_driveid *
+ 0x00000308 HDIO_GET_KEEPSETTINGS int *
+ 0x00000309 HDIO_GET_CHIPSET int *
+ 0x0000030A HDIO_GET_NOWERR int *
+ 0x0000030B HDIO_GET_DMA int *
+ 0x0000031F HDIO_DRIVE_CMD int * // I-O
+ 0x00000321 HDIO_SET_MULTCOUNT int
+ 0x00000322 HDIO_SET_UNMASKINTR int
+ 0x00000323 HDIO_SET_KEEPSETTINGS int
+ 0x00000324 HDIO_SET_CHIPSET int
+ 0x00000325 HDIO_SET_NOWERR int
+ 0x00000326 HDIO_SET_DMA int
+
+ // <include/linux/if_eql.h>
+ 0x000089F0 EQL_ENSLAVE struct ifreq * // MORE // I-O
+ 0x000089F1 EQL_EMANCIPATE struct ifreq * // MORE // I-O
+ 0x000089F2 EQL_GETSLAVECFG struct ifreq * // MORE // I-O
+ 0x000089F3 EQL_SETSLAVECFG struct ifreq * // MORE // I-O
+ 0x000089F4 EQL_GETMASTRCFG struct ifreq * // MORE // I-O
+ 0x000089F5 EQL_SETMASTRCFG struct ifreq * // MORE // I-O
+
+ // <include/linux/if_plip.h>
+ 0x000089F0 SIOCDEVPLIP struct ifreq * // I-O
+
+ // <include/linux/if_ppp.h>
+ 0x00005490 PPPIOCGFLAGS int *
+ 0x00005491 PPPIOCSFLAGS const int *
+ 0x00005492 PPPIOCGASYNCMAP int *
+ 0x00005493 PPPIOCSASYNCMAP const int *
+ 0x00005494 PPPIOCGUNIT int *
+ 0x00005495 PPPIOCSINPSIG const int *
+ 0x00005497 PPPIOCSDEBUG const int *
+ 0x00005498 PPPIOCGDEBUG int *
+ 0x00005499 PPPIOCGSTAT struct ppp_stats *
+ 0x0000549A PPPIOCGTIME struct ppp_ddinfo *
+ 0x0000549B PPPIOCGXASYNCMAP struct { int [8]; } *
+ 0x0000549C PPPIOCSXASYNCMAP const struct { int [8]; } *
+ 0x0000549D PPPIOCSMRU const int *
+ 0x0000549E PPPIOCRASYNCMAP const int *
+ 0x0000549F PPPIOCSMAXCID const int *
+
+ // <include/linux/ipx.h>
+ 0x000089E0 SIOCAIPXITFCRT const char *
+ 0x000089E1 SIOCAIPXPRISLT const char *
+ 0x000089E2 SIOCIPXCFGDATA struct ipx_config_data *
+
+ // <include/linux/kd.h>
+ 0x00004B60 GIO_FONT struct { char [8192]; } *
+ 0x00004B61 PIO_FONT const struct { char [8192]; } *
+ 0x00004B6B GIO_FONTX struct console_font_desc * // MORE I-O
+ 0x00004B6C PIO_FONTX const struct console_font_desc * //MORE
+ 0x00004B70 GIO_CMAP struct { char [48]; } *
+ 0x00004B71 PIO_CMAP const struct { char [48]; }
+ 0x00004B2F KIOCSOUND int
+ 0x00004B30 KDMKTONE int
+ 0x00004B31 KDGETLED char *
+ 0x00004B32 KDSETLED int
+ 0x00004B33 KDGKBTYPE char *
+ 0x00004B34 KDADDIO int // MORE
+ 0x00004B35 KDDELIO int // MORE
+ 0x00004B36 KDENABIO void // MORE
+ 0x00004B37 KDDISABIO void // MORE
+ 0x00004B3A KDSETMODE int
+ 0x00004B3B KDGETMODE int *
+ 0x00004B3C KDMAPDISP void // MORE
+ 0x00004B3D KDUNMAPDISP void // MORE
+ 0x00004B40 GIO_SCRNMAP struct { char [E_TABSZ]; } *
+ 0x00004B41 PIO_SCRNMAP const struct { char [E_TABSZ]; } *
+ 0x00004B69 GIO_UNISCRNMAP struct { short [E_TABSZ]; } *
+ 0x00004B6A PIO_UNISCRNMAP const struct { short [E_TABSZ]; } *
+ 0x00004B66 GIO_UNIMAP struct unimapdesc * // MORE // I-O
+ 0x00004B67 PIO_UNIMAP const struct unimapdesc * // MORE
+ 0x00004B68 PIO_UNIMAPCLR const struct unimapinit *
+ 0x00004B44 KDGKBMODE int *
+ 0x00004B45 KDSKBMODE int
+ 0x00004B62 KDGKBMETA int *
+ 0x00004B63 KDSKBMETA int
+ 0x00004B64 KDGKBLED int *
+ 0x00004B65 KDSKBLED int
+ 0x00004B46 KDGKBENT struct kbentry * // I-O
+ 0x00004B47 KDSKBENT const struct kbentry *
+ 0x00004B48 KDGKBSENT struct kbsentry * // I-O
+ 0x00004B49 KDSKBSENT const struct kbsentry *
+ 0x00004B4A KDGKBDIACR struct kbdiacrs *
+ 0x00004B4B KDSKBDIACR const struct kbdiacrs *
+ 0x00004B4C KDGETKEYCODE struct kbkeycode * // I-O
+ 0x00004B4D KDSETKEYCODE const struct kbkeycode *
+ 0x00004B4E KDSIGACCEPT int
+
+ // <include/linux/lp.h>
+ 0x00000601 LPCHAR int
+ 0x00000602 LPTIME int
+ 0x00000604 LPABORT int
+ 0x00000605 LPSETIRQ int
+ 0x00000606 LPGETIRQ int *
+ 0x00000608 LPWAIT int
+ 0x00000609 LPCAREFUL int
+ 0x0000060A LPABORTOPEN int
+ 0x0000060B LPGETSTATUS int *
+ 0x0000060C LPRESET void
+ 0x0000060D LPGETSTATS struct lp_stats *
+
+ // <include/linux/mroute.h>
+ 0x000089E0 SIOCGETVIFCNT struct sioc_vif_req * // I-O
+ 0x000089E1 SIOCGETSGCNT struct sioc_sg_req * // I-O
+
+ // <include/linux/mtio.h>
+ 0x40086D01 MTIOCTOP const struct mtop *
+ 0x801C6D02 MTIOCGET struct mtget *
+ 0x80046D03 MTIOCPOS struct mtpos *
+ 0x80206D04 MTIOCGETCONFIG struct mtconfiginfo *
+ 0x40206D05 MTIOCSETCONFIG const struct mtconfiginfo *
+
+ // <include/linux/netrom.h>
+ 0x000089E0 SIOCNRGETPARMS struct nr_parms_struct * // I-O
+ 0x000089E1 SIOCNRSETPARMS const struct nr_parms_struct *
+ 0x000089E2 SIOCNRDECOBS void
+ 0x000089E3 SIOCNRRTCTL const int *
+
+ // <include/linux/sbpcd.h>
+ 0x00009000 DDIOCSDBG const int *
+ 0x00005382 CDROMAUDIOBUFSIZ int
+
+ // <include/linux/scc.h>
+ 0x00005470 TIOCSCCINI void
+ 0x00005471 TIOCCHANINI const struct scc_modem *
+ 0x00005472 TIOCGKISS struct ioctl_command * // I-O
+ 0x00005473 TIOCSKISS const struct ioctl_command *
+ 0x00005474 TIOCSCCSTAT struct scc_stat *
+
+ // <include/linux/scsi.h>
+ 0x00005382 SCSI_IOCTL_GET_IDLUN struct { int [2]; } *
+ 0x00005383 SCSI_IOCTL_TAGGED_ENABLE void
+ 0x00005384 SCSI_IOCTL_TAGGED_DISABLE void
+ 0x00005385 SCSI_IOCTL_PROBE_HOST const int * // MORE
+
+ // <include/linux/smb_fs.h>
+ 0x80027501 SMB_IOC_GETMOUNTUID uid_t *
+
+ // <include/linux/sockios.h>
+ 0x0000890B SIOCADDRT const struct rtentry * // MORE
+ 0x0000890C SIOCDELRT const struct rtentry * // MORE
+ 0x00008910 SIOCGIFNAME char []
+ 0x00008911 SIOCSIFLINK void
+ 0x00008912 SIOCGIFCONF struct ifconf * // MORE // I-O
+ 0x00008913 SIOCGIFFLAGS struct ifreq * // I-O
+ 0x00008914 SIOCSIFFLAGS const struct ifreq *
+ 0x00008915 SIOCGIFADDR struct ifreq * // I-O
+ 0x00008916 SIOCSIFADDR const struct ifreq *
+ 0x00008917 SIOCGIFDSTADDR struct ifreq * // I-O
+ 0x00008918 SIOCSIFDSTADDR const struct ifreq *
+ 0x00008919 SIOCGIFBRDADDR struct ifreq * // I-O
+ 0x0000891A SIOCSIFBRDADDR const struct ifreq *
+ 0x0000891B SIOCGIFNETMASK struct ifreq * // I-O
+ 0x0000891C SIOCSIFNETMASK const struct ifreq *
+ 0x0000891D SIOCGIFMETRIC struct ifreq * // I-O
+ 0x0000891E SIOCSIFMETRIC const struct ifreq *
+ 0x0000891F SIOCGIFMEM struct ifreq * // I-O
+ 0x00008920 SIOCSIFMEM const struct ifreq *
+ 0x00008921 SIOCGIFMTU struct ifreq * // I-O
+ 0x00008922 SIOCSIFMTU const struct ifreq *
+ 0x00008923 OLD_SIOCGIFHWADDR struct ifreq * // I-O
+ 0x00008924 SIOCSIFHWADDR const struct ifreq * // MORE
+ 0x00008925 SIOCGIFENCAP int *
+ 0x00008926 SIOCSIFENCAP const int *
+ 0x00008927 SIOCGIFHWADDR struct ifreq * // I-O
+ 0x00008929 SIOCGIFSLAVE void
+ 0x00008930 SIOCSIFSLAVE void
+ 0x00008931 SIOCADDMULTI const struct ifreq *
+ 0x00008932 SIOCDELMULTI const struct ifreq *
+ 0x00008940 SIOCADDRTOLD void
+ 0x00008941 SIOCDELRTOLD void
+ 0x00008950 SIOCDARP const struct arpreq *
+ 0x00008951 SIOCGARP struct arpreq * // I-O
+ 0x00008952 SIOCSARP const struct arpreq *
+ 0x00008960 SIOCDRARP const struct arpreq *
+ 0x00008961 SIOCGRARP struct arpreq * // I-O
+ 0x00008962 SIOCSRARP const struct arpreq *
+ 0x00008970 SIOCGIFMAP struct ifreq * // I-O
+ 0x00008971 SIOCSIFMAP const struct ifreq *
+
+ // <include/linux/soundcard.h>
+ 0x00005100 SNDCTL_SEQ_RESET void
+ 0x00005101 SNDCTL_SEQ_SYNC void
+ 0xC08C5102 SNDCTL_SYNTH_INFO struct synth_info * // I-O
+ 0xC0045103 SNDCTL_SEQ_CTRLRATE int * // I-O
+ 0x80045104 SNDCTL_SEQ_GETOUTCOUNT int *
+ 0x80045105 SNDCTL_SEQ_GETINCOUNT int *
+ 0x40045106 SNDCTL_SEQ_PERCMODE void
+ 0x40285107 SNDCTL_FM_LOAD_INSTR const struct sbi_instrument *
+ 0x40045108 SNDCTL_SEQ_TESTMIDI const int *
+ 0x40045109 SNDCTL_SEQ_RESETSAMPLES const int *
+ 0x8004510A SNDCTL_SEQ_NRSYNTHS int *
+ 0x8004510B SNDCTL_SEQ_NRMIDIS int *
+ 0xC074510C SNDCTL_MIDI_INFO struct midi_info * // I-O
+ 0x4004510D SNDCTL_SEQ_THRESHOLD const int *
+ 0xC004510E SNDCTL_SYNTH_MEMAVL int * // I-O
+ 0x4004510F SNDCTL_FM_4OP_ENABLE const int *
+ 0xCFB85110 SNDCTL_PMGR_ACCESS struct patmgr_info * // I-O
+ 0x00005111 SNDCTL_SEQ_PANIC void
+ 0x40085112 SNDCTL_SEQ_OUTOFBAND const struct seq_event_rec *
+ 0xC0045401 SNDCTL_TMR_TIMEBASE int * // I-O
+ 0x00005402 SNDCTL_TMR_START void
+ 0x00005403 SNDCTL_TMR_STOP void
+ 0x00005404 SNDCTL_TMR_CONTINUE void
+ 0xC0045405 SNDCTL_TMR_TEMPO int * // I-O
+ 0xC0045406 SNDCTL_TMR_SOURCE int * // I-O
+ 0x40045407 SNDCTL_TMR_METRONOME const int *
+ 0x40045408 SNDCTL_TMR_SELECT int * // I-O
+ 0xCFB85001 SNDCTL_PMGR_IFACE struct patmgr_info * // I-O
+ 0xC0046D00 SNDCTL_MIDI_PRETIME int * // I-O
+ 0xC0046D01 SNDCTL_MIDI_MPUMODE const int *
+ 0xC0216D02 SNDCTL_MIDI_MPUCMD struct mpu_command_rec * // I-O
+ 0x00005000 SNDCTL_DSP_RESET void
+ 0x00005001 SNDCTL_DSP_SYNC void
+ 0xC0045002 SNDCTL_DSP_SPEED int * // I-O
+ 0xC0045003 SNDCTL_DSP_STEREO int * // I-O
+ 0xC0045004 SNDCTL_DSP_GETBLKSIZE int * // I-O
+ 0xC0045006 SOUND_PCM_WRITE_CHANNELS int * // I-O
+ 0xC0045007 SOUND_PCM_WRITE_FILTER int * // I-O
+ 0x00005008 SNDCTL_DSP_POST void
+ 0xC0045009 SNDCTL_DSP_SUBDIVIDE int * // I-O
+ 0xC004500A SNDCTL_DSP_SETFRAGMENT int * // I-O
+ 0x8004500B SNDCTL_DSP_GETFMTS int *
+ 0xC0045005 SNDCTL_DSP_SETFMT int * // I-O
+ 0x800C500C SNDCTL_DSP_GETOSPACE struct audio_buf_info *
+ 0x800C500D SNDCTL_DSP_GETISPACE struct audio_buf_info *
+ 0x0000500E SNDCTL_DSP_NONBLOCK void
+ 0x80045002 SOUND_PCM_READ_RATE int *
+ 0x80045006 SOUND_PCM_READ_CHANNELS int *
+ 0x80045005 SOUND_PCM_READ_BITS int *
+ 0x80045007 SOUND_PCM_READ_FILTER int *
+ 0x00004300 SNDCTL_COPR_RESET void
+ 0xCFB04301 SNDCTL_COPR_LOAD const struct copr_buffer *
+ 0xC0144302 SNDCTL_COPR_RDATA struct copr_debug_buf * // I-O
+ 0xC0144303 SNDCTL_COPR_RCODE struct copr_debug_buf * // I-O
+ 0x40144304 SNDCTL_COPR_WDATA const struct copr_debug_buf *
+ 0x40144305 SNDCTL_COPR_WCODE const struct copr_debug_buf *
+ 0xC0144306 SNDCTL_COPR_RUN struct copr_debug_buf * // I-O
+ 0xC0144307 SNDCTL_COPR_HALT struct copr_debug_buf * // I-O
+ 0x4FA44308 SNDCTL_COPR_SENDMSG const struct copr_msg *
+ 0x8FA44309 SNDCTL_COPR_RCVMSG struct copr_msg *
+ 0x80044D00 SOUND_MIXER_READ_VOLUME int *
+ 0x80044D01 SOUND_MIXER_READ_BASS int *
+ 0x80044D02 SOUND_MIXER_READ_TREBLE int *
+ 0x80044D03 SOUND_MIXER_READ_SYNTH int *
+ 0x80044D04 SOUND_MIXER_READ_PCM int *
+ 0x80044D05 SOUND_MIXER_READ_SPEAKER int *
+ 0x80044D06 SOUND_MIXER_READ_LINE int *
+ 0x80044D07 SOUND_MIXER_READ_MIC int *
+ 0x80044D08 SOUND_MIXER_READ_CD int *
+ 0x80044D09 SOUND_MIXER_READ_IMIX int *
+ 0x80044D0A SOUND_MIXER_READ_ALTPCM int *
+ 0x80044D0B SOUND_MIXER_READ_RECLEV int *
+ 0x80044D0C SOUND_MIXER_READ_IGAIN int *
+ 0x80044D0D SOUND_MIXER_READ_OGAIN int *
+ 0x80044D0E SOUND_MIXER_READ_LINE1 int *
+ 0x80044D0F SOUND_MIXER_READ_LINE2 int *
+ 0x80044D10 SOUND_MIXER_READ_LINE3 int *
+ 0x80044D1C SOUND_MIXER_READ_MUTE int *
+ 0x80044D1D SOUND_MIXER_READ_ENHANCE int *
+ 0x80044D1E SOUND_MIXER_READ_LOUD int *
+ 0x80044DFF SOUND_MIXER_READ_RECSRC int *
+ 0x80044DFE SOUND_MIXER_READ_DEVMASK int *
+ 0x80044DFD SOUND_MIXER_READ_RECMASK int *
+ 0x80044DFB SOUND_MIXER_READ_STEREODEVS int *
+ 0x80044DFC SOUND_MIXER_READ_CAPS int *
+ 0xC0044D00 SOUND_MIXER_WRITE_VOLUME int * // I-O
+ 0xC0044D01 SOUND_MIXER_WRITE_BASS int * // I-O
+ 0xC0044D02 SOUND_MIXER_WRITE_TREBLE int * // I-O
+ 0xC0044D03 SOUND_MIXER_WRITE_SYNTH int * // I-O
+ 0xC0044D04 SOUND_MIXER_WRITE_PCM int * // I-O
+ 0xC0044D05 SOUND_MIXER_WRITE_SPEAKER int * // I-O
+ 0xC0044D06 SOUND_MIXER_WRITE_LINE int * // I-O
+ 0xC0044D07 SOUND_MIXER_WRITE_MIC int * // I-O
+ 0xC0044D08 SOUND_MIXER_WRITE_CD int * // I-O
+ 0xC0044D09 SOUND_MIXER_WRITE_IMIX int * // I-O
+ 0xC0044D0A SOUND_MIXER_WRITE_ALTPCM int * // I-O
+ 0xC0044D0B SOUND_MIXER_WRITE_RECLEV int * // I-O
+ 0xC0044D0C SOUND_MIXER_WRITE_IGAIN int * // I-O
+ 0xC0044D0D SOUND_MIXER_WRITE_OGAIN int * // I-O
+ 0xC0044D0E SOUND_MIXER_WRITE_LINE1 int * // I-O
+ 0xC0044D0F SOUND_MIXER_WRITE_LINE2 int * // I-O
+ 0xC0044D10 SOUND_MIXER_WRITE_LINE3 int * // I-O
+ 0xC0044D1C SOUND_MIXER_WRITE_MUTE int * // I-O
+ 0xC0044D1D SOUND_MIXER_WRITE_ENHANCE int * // I-O
+ 0xC0044D1E SOUND_MIXER_WRITE_LOUD int * // I-O
+ 0xC0044DFF SOUND_MIXER_WRITE_RECSRC int * // I-O
+
+ // <include/linux/umsdos_fs.h>
+ 0x000004D2 UMSDOS_READDIR_DOS struct umsdos_ioctl * // I-O
+ 0x000004D3 UMSDOS_UNLINK_DOS const struct umsdos_ioctl *
+ 0x000004D4 UMSDOS_RMDIR_DOS const struct umsdos_ioctl *
+ 0x000004D5 UMSDOS_STAT_DOS struct umsdos_ioctl * // I-O
+ 0x000004D6 UMSDOS_CREAT_EMD const struct umsdos_ioctl *
+ 0x000004D7 UMSDOS_UNLINK_EMD const struct umsdos_ioctl *
+ 0x000004D8 UMSDOS_READDIR_EMD struct umsdos_ioctl * // I-O
+ 0x000004D9 UMSDOS_GETVERSION struct umsdos_ioctl *
+ 0x000004DA UMSDOS_INIT_EMD void
+ 0x000004DB UMSDOS_DOS_SETUP const struct umsdos_ioctl *
+ 0x000004DC UMSDOS_RENAME_DOS const struct umsdos_ioctl *
+
+ // <include/linux/vt.h>
+ 0x00005600 VT_OPENQRY int *
+ 0x00005601 VT_GETMODE struct vt_mode *
+ 0x00005602 VT_SETMODE const struct vt_mode *
+ 0x00005603 VT_GETSTATE struct vt_stat *
+ 0x00005604 VT_SENDSIG void
+ 0x00005605 VT_RELDISP int
+ 0x00005606 VT_ACTIVATE int
+ 0x00005607 VT_WAITACTIVE int
+ 0x00005608 VT_DISALLOCATE int
+ 0x00005609 VT_RESIZE const struct vt_sizes *
+ 0x0000560A VT_RESIZEX const struct vt_consize *
+ Einige ioctls benötigen einen Pointer auf eine Struktur, die
+ zusätzliche Pointer enthält. Diese sind hier in alphabetischer Reihen‐
+ folge dokumentiert.
+
+ CDROMREADAUDIO benötigt eine Eingabe-Pointer const struct
+ cdrom_read_audio *. Das Feld buf zeigt auf einen Ausgabepuffer der
+ Länge nframes * CD_FRAMESIZE_RAW.
+
+ CDROMREADCOOKED, CDROMREADMODE1, CDROMREADMODE2 und CDROMREADRAW
+ benötigen einen Eingabe-Pointer const struct cdrom_msf *. Sie benutzen
+ denselben Pointer als Ausgabe-Pointer auf char []. Die Länge ändert
+ sich durch Anforderung. Bei CDROMREADMODE1 benutzen die meisten
+ Treiber CD_FRAMESIZE, jedoch benutzt der Optics Storage-Treiber
+ stattdessen OPT_BLOCKSIZE (beide haben den numerischen Wert 2048).
+ CDROMREADCOOKED char [CD_FRAMESIZE]
+ CDROMREADMODE1 char [CD_FRAMESIZE oder OPT_BLOCKSIZE]
+ CDROMREADMODE2 char [CD_FRAMESIZE_RAW0]
+ CDROMREADRAW char [CD_FRAMESIZE_RAW]
+ EQL_ENSLAVE, EQL_EMANCIPATE, EQL_GETSLAVECFG, EQL_SETSLAVECFG, EQL_GET
+ MASTERCFG und EQL_SETMASTERCFG benötigen eine struct ifreq *. Das Feld
+ ifr_data ist ein Pointer auf eine weitere Struktur wie folgt:
+ EQL_ENSLAVE const struct slaving_request *
+ EQL_EMANCIPATE const struct slaving_request *
+ EQL_GETSLAVECFG struct slave_config * // I-O
+ EQL_SETSLAVECFG const struct slave_config *
+ EQL_GETMASTERCFG struct master_config *
+ EQL_SETMASTERCFG const struct master_config *
+ FDRAWCMD benötigt eine struct floppy raw_cmd *. Wenn flags &
+ FD_RAW_WRITE nicht Null ist, dann zeigt data auf einen Eingabepuffer
+ der Länge length. Wenn flags & FD_RAW_READ nicht Null ist, dann zeigt
+ data auf einen Ausgabepuffer der Länge ’length’.
+
+ GIO_FONTX und PIO_FONTX benötigen eine struct console_font_desc *
+ beziehungsweise eine const struct console_font_desc *. chardata zeigt
+ auf einen Puffer von char [charcount]. Dies ist ein Ausgabepuffer für
+ GIO_FONTX und ein Eingabepuffer für PIO_FONTX.
+
+ GIO_UNIMAP und PIO_UNIMAP benötigen eine struct unimapdesc *
+ beziehungsweise eine const struct unimapdesc *. entries zeigt auf
+ einen Puffer von struct unipair [entry_ct]. Dies ist ein Ausgabepuffer
+ für GIO_UNIMAP und ein Eingabepuffer für PIO_UNIMAP.
+
+ KDADDIO, KDDELIO, KDDISABIO und KDENABIO geben Zugriff frei oder sper‐
+ ren Zugriff auf I/O-Ports. Sie sind nötige Alternativen zu ioperm.
+
+ KDMAPDISP und KDUNMAPDISP geben frei oder sperren Memory-Mappings oder
+ Zugriff auf I/O-Ports. Sie sind nicht im Kernel implementiert.
+
+ SCSI_IOCTL_PROBE_HOST benötigt einen Eingabe-Pointer const int *, der
+ eine Länge ist. Es benutzt den selben Pointer als Ausgabe-Pointer auf
+ einen Puffer char [] dieser Länge.
+
+ SIOCADDRT und SIOCDELRT benötigen einen Eingabe-Pointer, dessen Typ vom
+ Protokoll abhängt:
+ Die meisten Protokolle const struct rtentry *
+ AX.25 const struct ax25_route *
+ NET/ROM const struct nr_route_struct *
+ SIOCGIFCONF benötigt eine struct ifconf *. Das Feld ifc_buf zeigt auf
+ einen Puffer der Länge ifc_len Byte, wohinein der Kernel eine Liste des
+ Typs struct ifreq [] schreibt.
+
+ SIOCSIFHWADDR benötigt einen Eingabe-Pointer, dessen Typ vom Protokoll
+ abhängt:
+ Die meisten Protokolle const struct ifreq *
+ AX.25 const char [AX25_ADDR_LEN]
+ TIOCLINUX benötigt eine const char *. Es benutzt dies, um zwischen
+ diversen unabhängigen Fällen zu unterscheiden. In der Tabelle unten
+ bedeutet »N + foo« so viel wie »foo« nach einem N-byte-Block. struct
+ selection ist definiert in drivers/char/selection.c.
+ TIOCLINUX-2 1 + const struct selection *
+ TIOCLINUX-3 void
+ TIOCLINUX-4 void
+ TIOCLINUX-5 4 + const struct { long [8]; } *
+ TIOCLINUX-6 char *
+ TIOCLINUX-7 char *
+ TIOCLINUX-10 1 + const char *
+
+ Doppelte ioctls
+ Diese Liste enthält keine ioctls der Gruppen SIOCDEVPRIVATE und
+ SIOCPROTOPRIVATE.
+ 0x00000001 FDSETPRM FIBMAP
+ 0x00000002 FDDEFPRM FIGETBSZ
+ 0x00005382 CDROMAUDIOBUFSIZ SCSI_IOCTL_GET_IDLUN
+ 0x00005402 SNDCTL_TMR_START TCSETS
+ 0x00005403 SNDCTL_TMR_STOP TCSETSW
+ 0x00005404 SNDCTL_TMR_CONTINUE TCSETSF
+
+=======
+
+Powered by the Ubuntu Manpage Repository generator
+Maintained by Dustin Kirkland
--- /dev/null
+SYSCALLS
+
+on linux/i386, the machine code puts the arguments of a syscall in the
+registers AX, BX, CX, DX, DI, SI and makes a soft interrupt 0x80.
+
+as the plan9 kernel doesnt care about the interrupt vector 0x80 it
+sends a note to the process that traped and if not handled kills it.
+in a note handler, it is possible to access the machine state of the
+process when the trap/interrupt happend from the ureg argument.
+
+in linuxemu, we install a note handler that checks if the trap was a
+linux syscall and call our handler function from our systab.
+
+after our syscall handler returned, we move the program counter
+in the machine state structure after the int 0x80 instruction and
+continue execution by accepting the note as handled with a call to
+noted(NCONT).
+
+todo automatic conversion to a plan9 function call the number of
+arguments and the function name of the handler must be known. this
+information is provided by the linuxcalltab input file that is feed trough
+linuxcalltab.awk to build neccesary tables.
+
+the linux specific syscall handling and argument conversion done in
+linuxcall.c only. the idea is to later add support for other syscall
+personalities like bsd without having to change the handler code.
+
+
+MEMORY
+
+unlike shared libraries wich are position independent, binaries have to be
+loaded to a fixed address location. (elf supports position independent
+programs that can be loaded everywhere, but its not used on i386)
+
+the emulator doesnt need to load and relocate shared libraries itself. this is
+done my the runtime linker (/lib/ld-linux.so). it just needs to load
+the binary and the runtime linker to ther prefered location and jump into
+the entry point. then the runtime linker will parse the elf sections of the
+binary and call mmap to load further shared libraries.
+
+the first thing we need is an implementation of mmap that allows us
+to copy files to fixed addresses into memory. to do that on plan9,
+segments are used.
+
+its is not possible to create a segment for every memory mapping
+because plan9 limits the number of segments per process to a small
+number. instead we create a fixed number of segments and
+expand/shrink them on demand. the linux stack area is fixed size and
+uses the fact thet plan9 doesnt allocate physical memory until pages
+are touched.
+
+here are 3 segments created for a linux process:
+
+"private" is used for all MAP_PRIVATE mappings and can be shared if
+processes run in same address space. code, data and files is mapped there.
+
+"shared" for shared memory mappings.
+
+"stack" is like "private", but lives just below the plan9 stack segment.
+this is needed because glibc expands the stack down by mmap() pages
+below the current stack area. we cannot use the plan9 stack segment
+because that segment is copied on rfork and is never shared between
+processes.
+
+the data structures of the emulator itself ("kernel memory") need to
+be shared for all processes even if the linux process runs in its own
+private address space, so the plan9 Bss and Data segments are made
+shared on startup by copying the contents of the original segment into a
+temporary file, segdetach() it and segattach() a new shared segments
+on the same place and copy the data back in from the file.
+
+with this memory layout, it is possible for the linux process to damage
+data structures in the emulator. but we seem to be lucky for now :)
+
+
+USER PROCESSES (UPROCS)
+
+linuxemu does not switch ans schedule linux processes itself. every user
+process has its own plan9 process. memory sharing semantics is translated
+to rfork flags on fork/clone.
+
+we have a global process table of Uproc structures to track states and
+resources for all user processes:
+
+fs: filesystem mount table
+fdtab: the filedescriptor table
+mem: memory mappings
+signal: signal handler and queue
+trace: debug trace buffer
+
+resources that can be shared are reference counted and get freed when
+the last process referencing them exits.
+
+
+KERNEL PROCESSES (KPROCS)
+
+if we needs to defer work or do asynchronous i/o it can spawn a
+kernel process with kprocfork. kernel processes dont have a Uproc
+structure associated and have the userspace memory segments detached
+therfor cant access userspace memory.
+
+bufprocs and timers are implemented with kernel processes.
+
+
+DEVICES
+
+ealier versions mapped linux files directly to plan9 files. this made
+the implementation of ioctls, symlinks, remove on close, and
+select/poll hard and also had problems with implementing fork sharing
+semantics.
+
+current linuxemu does it all by itself. here is a global device table
+of Udev structures. devices can implement all i/o related syscalls by
+providing a function pointer in ther Udev. when a device has to deal
+with asynchronous io on real plan9 files it uses bufprocs.
+
+
--- /dev/null
+- AF_INET6
+ i dont need that too yet
+
+- VDSO
+ we could gain quite a performance hit when we can avoid
+ the trapping overhead and let linux-code directly jump
+ in linuxemu handler.
+
+- dsp / mixer
+ implement mixer ioctls in devdsp
+ mmap and trigger caps for quake
+
+- ptrace
+ implement ptrace support so we can use native debugger
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+ FREQUENCY = 44100,
+ CHANNELS = 2,
+ DELAY = 100,
+ FRAGSIZE = 4096,
+};
+
+typedef struct Chan Chan;
+typedef struct DSP DSP;
+
+struct Chan
+{
+ ulong phase;
+ int last;
+};
+
+struct DSP
+{
+ Ufile;
+
+ int channels; /* number of channels (2 for stereo) */
+ int freq; /* frequency of sound stream */
+
+ int rfreq; /* frequency of /dev/audio */
+
+ uchar *buf; /* resampling */
+ ulong nbuf;
+ Chan chan[CHANNELS];
+
+ vlong time; /* time point of the last sample in device buffer */
+
+ ulong written; /* number of bytes written to dsp */
+ ulong written2; /* same as written, will be reset on every GETOPTR ioctl */
+};
+
+static int
+closedsp(Ufile *file)
+{
+ DSP *dsp = (DSP*)file;
+
+ trace("dsp: closedsp");
+ free(dsp->buf);
+ close(dsp->fd);
+
+ return 0;
+}
+
+static int
+polldsp(Ufile *, void *)
+{
+ return POLLOUT;
+}
+
+static int
+readdsp(Ufile *, void *, int, vlong)
+{
+ return 0; /* not implemented */
+}
+
+static int
+resample(Chan *c, uchar *src, uchar *dst, int sstep, int dstep, ulong delta, ulong count)
+{
+ int last, val, out;
+ ulong phase, pos;
+ uchar *dp, *sp;
+
+ dp = dst;
+ last = val = c->last;
+ phase = c->phase;
+ pos = phase >> 16;
+ while(pos < count){
+ sp = src + sstep*pos;
+ val = sp[0] | (sp[1] << 8);
+ val = (val & 0x7FFF) - (val & 0x8000);
+ if(pos){
+ sp -= sstep;
+ last = sp[0] | (sp[1] << 8);
+ last = (last & 0x7FFF) - (last & 0x8000);
+ }
+ out = last + (((val - last) * (phase & 0xFFFF)) >> 16);
+ dp[0] = out;
+ dp[1] = out >> 8;
+ dp += dstep;
+ phase += delta;
+ pos = phase >> 16;
+ }
+ c->last = val;
+ if(delta < 0x10000){
+ c->phase = phase & 0xFFFF;
+ } else {
+ c->phase = phase - (count << 16);
+ }
+ return (dp - dst) / dstep;
+}
+
+static int
+convertout(DSP *dsp, uchar *buf, int len, uchar **out)
+{
+ int ret, ch;
+ ulong count, delta;
+
+ /* no conversion required? */
+ if(dsp->freq == dsp->rfreq && dsp->channels == CHANNELS){
+ *out = buf;
+ return len;
+ }
+
+ /*
+ * delta is the number of input samples to
+ * produce one output sample. scaled by 16 bit to
+ * get fractional part.
+ */
+ delta = ((ulong)dsp->freq << 16) / dsp->rfreq;
+ count = len / (2 * dsp->channels);
+
+ /*
+ * get maximum required size of output bufer. this is not exact!
+ * number of output samples depends on phase!
+ */
+ ret = (((count << 16) + delta-1) / delta) * 2*CHANNELS;
+ if(ret > dsp->nbuf){
+ free(dsp->buf);
+ dsp->buf = kmalloc(ret);
+ dsp->nbuf = ret;
+ }
+ for(ch=0; ch < CHANNELS; ch++)
+ ret = resample(dsp->chan + ch,
+ buf + 2*(ch % dsp->channels),
+ dsp->buf + 2*ch,
+ 2*dsp->channels,
+ 2*CHANNELS,
+ delta,
+ count);
+
+ *out = dsp->buf;
+ return ret * 2*CHANNELS;
+}
+
+static int
+writedsp(Ufile *file, void *buf, int len, vlong)
+{
+ DSP *dsp = (DSP*)file;
+ vlong now;
+ int ret, diff;
+ uchar *out;
+
+ if((ret = convertout(dsp, buf, len, &out)) <= 0)
+ return ret;
+
+ if((ret = write(dsp->fd, out, ret)) < 0)
+ return mkerror();
+
+ now = nsec();
+ if(dsp->time < now){
+ dsp->time = now;
+ dsp->written = 0;
+ dsp->written2 = 0;
+ } else {
+ diff = (dsp->time - now) / 1000000;
+ if(diff > DELAY)
+ sleep(diff - DELAY);
+ }
+ dsp->time += ((1000000000LL) * ret / (dsp->rfreq * 2*CHANNELS));
+ dsp->written += len;
+ dsp->written2 += len;
+
+ return len;
+}
+
+enum
+{
+ AFMT_S16_LE = 0x10,
+};
+
+static int
+ioctldsp(Ufile *file, int cmd, void *arg)
+{
+ DSP *dsp = (DSP*)file;
+ int ret, i;
+ vlong now;
+ static int counter;
+
+ ret = 0;
+ switch(cmd){
+ default:
+ trace("dsp: unknown ioctl %lux %p", (ulong)cmd, arg);
+ ret = -ENOTTY;
+ break;
+
+ case 0xC004500A:
+ trace("dsp: SNDCTL_DSP_SETFRAGMENT(%lux)", *(ulong*)arg);
+ break;
+
+ case 0xC0045004:
+ trace("dsp: SNDCTL_DSP_GETBLKSIZE");
+ *((int*)arg) = FRAGSIZE;
+ break;
+
+ case 0x800c5011:
+ trace("dsp: SNDCTL_DSP_GETIPTR");
+ ret = -EPERM;
+ break;
+
+ case 0x800c5012:
+ trace("dsp: SNDCTL_DSP_GETOPTR");
+ ((int*)arg)[0] = dsp->written; // Total # of bytes processed
+ ((int*)arg)[1] = dsp->written2 / FRAGSIZE; // # of fragment transitions since last time
+ dsp->written2 = 0;
+ ((int*)arg)[2] = 0; // Current DMA pointer value
+ break;
+
+ case 0x8010500D:
+ trace("dsp: SNDCTL_DSG_GETISPACE");
+ ret = -EPERM;
+ break;
+ case 0x8010500C:
+ trace("dsp: SNDCTL_DSP_GETOSPACE");
+ i = (2 * dsp->channels) * ((dsp->freq*DELAY)/1000);
+ ((int*)arg)[1] = i / FRAGSIZE; // fragstot
+ ((int*)arg)[2] = FRAGSIZE; // fragsize
+ now = nsec();
+ if(now < dsp->time){
+ i -= ((2 * dsp->channels) * (((dsp->time - now) * (vlong)dsp->freq) / 1000000000));
+ if(i < 0)
+ i = 0;
+ }
+ ((int*)arg)[0] = i / FRAGSIZE; // available fragment count
+ ((int*)arg)[3] = i; // available space in bytes
+ break;
+
+ case 0x8004500B:
+ trace("dsp: SNDCTL_DSP_GETFMTS(%d)", *(int*)arg);
+ *(int*)arg = AFMT_S16_LE;
+ break;
+
+ case 0x8004500F:
+ trace("dsp: SNDCTL_DSP_GETCAPS");
+ *(int*)arg = 0x400;
+ break;
+
+ case 0xC0045005:
+ trace("dsp: SNDCTL_DSP_SETFMT(%d)", *(int*)arg);
+ *(int*)arg = AFMT_S16_LE;
+ break;
+
+ case 0xC0045006:
+ trace("dsp: SOUND_PCM_WRITE_CHANNELS(%d)", *(int*)arg);
+ dsp->channels = *(int*)arg;
+ break;
+
+ case 0xC0045003:
+ trace("dsp: SNDCTL_DSP_STEREO(%d)", *(int*)arg);
+ dsp->channels = 2;
+ *(int*)arg = 1;
+ break;
+
+ case 0xC0045002:
+ trace("dsp: SNDCTL_DSP_SPEED(%d)", *(int*)arg);
+ dsp->freq = *(int*)arg;
+ for(i=0; i<CHANNELS; i++){
+ dsp->chan[i].phase = 0;
+ dsp->chan[i].last = 0;
+ }
+ break;
+
+ case 0x00005000:
+ trace("dsp: SNDCTL_DSP_RESET");
+ break;
+
+ case 0x00005001:
+ trace("dsp: SNDCTL_DSP_SYNC");
+ break;
+ }
+
+ return ret;
+}
+
+static int
+getaudiofreq(void)
+{
+ int ret, n, fd;
+ char buf[1024];
+
+ ret = FREQUENCY;
+ if((fd = open("/dev/volume", OREAD)) < 0)
+ return ret;
+ if((n = read(fd, buf, sizeof(buf)-1)) > 0){
+ char *p;
+
+ buf[n] = 0;
+ if(p = strstr(buf, "speed out "))
+ ret = atoi(p + 10);
+ }
+ close(fd);
+ return ret;
+}
+
+int opendsp(char *path, int mode, int, Ufile **pf)
+{
+ DSP *dsp;
+ int freq;
+ int fd;
+
+ if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+ if((fd = open("/dev/audio", OWRITE)) < 0)
+ return mkerror();
+
+ freq = getaudiofreq();
+ dsp = mallocz(sizeof(DSP), 1);
+ dsp->ref = 1;
+ dsp->mode = mode;
+ dsp->dev = DSPDEV;
+ dsp->fd = fd;
+ dsp->path = kstrdup(path);
+ dsp->rfreq = freq;
+ dsp->freq = freq;
+ dsp->channels = CHANNELS;
+
+ *pf = dsp;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int
+fstatdsp(Ufile *f, Ustat *s)
+{
+ s->mode = 0666 | S_IFCHR;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->ino = hashpath(f->path);
+ s->size = 0;
+ return 0;
+};
+
+static int
+statdsp(char *path, int , Ustat *s)
+{
+ if(strcmp(path, "/dev/dsp")==0 || strcmp(path, "/dev/dsp0")==0){
+ s->mode = 0666 | S_IFCHR;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->ino = hashpath(path);
+ s->size = 0;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static Udev dspdev =
+{
+ .open = opendsp,
+ .read = readdsp,
+ .write = writedsp,
+ .poll = polldsp,
+ .close = closedsp,
+ .ioctl = ioctldsp,
+ .stat = statdsp,
+ .fstat = fstatdsp,
+};
+
+void dspdevinit(void)
+{
+ devtab[DSPDEV] = &dspdev;
+
+ fsmount(&dspdev, "/dev/dsp");
+ fsmount(&dspdev, "/dev/dsp0");
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+int
+Efmt(Fmt *f)
+{
+ static char *t[] = {
+ [EPERM] "EPERM",
+ [ENOENT] "ENOENT",
+ [ESRCH] "ESRCH",
+ [EINTR] "EINTR",
+ [EIO] "EIO",
+ [ENXIO] "ENXIO",
+ [E2BIG] "E2BIG",
+ [ENOEXEC] "ENOEXEC",
+ [EBADF] "EBADF",
+ [ECHILD] "ECHILD",
+ [EAGAIN] "EAGAIN",
+ [ENOMEM] "ENOMEM",
+ [EACCES] "EACCES",
+ [EFAULT] "EFAULT",
+ [ENOTBLK] "ENOTBLK",
+ [EBUSY] "EBUSY",
+ [EEXIST] "EEXIST",
+ [EXDEV] "EXDEV",
+ [ENODEV] "ENODEV",
+ [ENOTDIR] "ENOTDIR",
+ [EISDIR] "EISDIR",
+ [EINVAL] "EINVAL",
+ [ENFILE] "ENFILE",
+ [EMFILE] "EMFILE",
+ [ENOTTY] "ENOTTY",
+ [ETXTBSY] "ETXTBSY",
+ [EFBIG] "EFBIG",
+ [ENOSPC] "ENOSPC",
+ [ESPIPE] "ESPIPE",
+ [EROFS] "EROFS",
+ [EMLINK] "EMLINK",
+ [EPIPE] "EPIPE",
+ [EDOM] "EDOM",
+ [ERANGE] "ERANGE",
+ [EDEADLK] "EDEADLK",
+ [ENAMETOOLONG] "ENAMETOOLONG",
+ [ENOLCK] "ENOLCK",
+ [ENOSYS] "ENOSYS",
+ [ENOTEMPTY] "ENOTEMPTY",
+ [ELOOP] "ELOOP",
+ [ENOMSG] "ENOMSG",
+ [EIDRM] "EIDRM",
+ [ECHRNG] "ECHRNG",
+ [EL2NSYNC] "EL2NSYNC",
+ [EL3HLT] "EL3HLT",
+ [EL3RST] "EL3RST",
+ [ELNRNG] "ELNRNG",
+ [EUNATCH] "EUNATCH",
+ [ENOCSI] "ENOCSI",
+ [EL2HLT] "EL2HLT",
+ [EBADE] "EBADE",
+ [EBADR] "EBADR",
+ [EXFULL] "EXFULL",
+ [ENOANO] "ENOANO",
+ [EBADRQC] "EBADRQC",
+ [EBADSLT] "EBADSLT",
+ [EBFONT] "EBFONT",
+ [ENOSTR] "ENOSTR",
+ [ENODATA] "ENODATA",
+ [ETIME] "ETIME",
+ [ENOSR] "ENOSR",
+ [ENONET] "ENONET",
+ [ENOPKG] "ENOPKG",
+ [EREMOTE] "EREMOTE",
+ [ENOLINK] "ENOLINK",
+ [EADV] "EADV",
+ [ESRMNT] "ESRMNT",
+ [ECOMM] "ECOMM",
+ [EPROTO] "EPROTO",
+ [EMULTIHOP] "EMULTIHOP",
+ [EDOTDOT] "EDOTDOT",
+ [EBADMSG] "EBADMSG",
+ [EOVERFLOW] "EOVERFLOW",
+ [ENOTUNIQ] "ENOTUNIQ",
+ [EBADFD] "EBADFD",
+ [EREMCHG] "EREMCHG",
+ [ELIBACC] "ELIBACC",
+ [ELIBBAD] "ELIBBAD",
+ [ELIBSCN] "ELIBSCN",
+ [ELIBMAX] "ELIBMAX",
+ [ELIBEXEC] "ELIBEXEC",
+ [EILSEQ] "EILSEQ",
+ [ERESTART] "ERESTART",
+ [ESTRPIPE] "ESTRPIPE",
+ [EUSERS] "EUSERS",
+ [ENOTSOCK] "ENOTSOCK",
+ [EDESTADDRREQ] "EDESTADDRREQ",
+ [EMSGSIZE] "EMSGSIZE",
+ [EPROTOTYPE] "EPROTOTYPE",
+ [ENOPROTOOPT] "ENOPROTOOPT",
+ [EPROTONOSUPPORT] "EPROTONOSUPPORT",
+ [ESOCKTNOSUPPORT] "ESOCKTNOSUPPORT",
+ [EOPNOTSUPP] "EOPNOTSUPP",
+ [EPFNOSUPPORT] "EPFNOSUPPORT",
+ [EAFNOSUPPORT] "EAFNOSUPPORT",
+ [EADDRINUSE] "EADDRINUSE",
+ [EADDRNOTAVAIL] "EADDRNOTAVAIL",
+ [ENETDOWN] "ENETDOWN",
+ [ENETUNREACH] "ENETUNREACH",
+ [ENETRESET] "ENETRESET",
+ [ECONNABORTED] "ECONNABORTED",
+ [ECONNRESET] "ECONNRESET",
+ [ENOBUFS] "ENOBUFS",
+ [EISCONN] "EISCONN",
+ [ENOTCONN] "ENOTCONN",
+ [ESHUTDOWN] "ESHUTDOWN",
+ [ETOOMANYREFS] "ETOOMANYREFS",
+ [ETIMEDOUT] "ETIMEDOUT",
+ [ECONNREFUSED] "ECONNREFUSED",
+ [EHOSTDOWN] "EHOSTDOWN",
+ [EHOSTUNREACH] "EHOSTUNREACH",
+ [EALREADY] "EALREADY",
+ [EINPROGRESS] "EINPROGRESS",
+ [ESTALE] "ESTALE",
+ [EUCLEAN] "EUCLEAN",
+ [ENOTNAM] "ENOTNAM",
+ [ENAVAIL] "ENAVAIL",
+ [EISNAM] "EISNAM",
+ [EREMOTEIO] "EREMOTEIO",
+ [EDQUOT] "EDQUOT",
+ [ENOMEDIUM] "ENOMEDIUM",
+ [EMEDIUMTYPE] "EMEDIUMTYPE",
+ };
+
+ int e;
+
+ e = va_arg(f->args, int);
+ if(e >= 0 || -e >= nelem(t))
+ return fmtprint(f, "%d", e);
+ return fmtprint(f, "%d [%s]", e, t[-e]);
+}
+
+int
+mkerror(void)
+{
+ static struct {
+ int num;
+ char *msg;
+ } t[] = {
+ /* from /sys/src/9/port/errstr.h */
+ {EINVAL, "inconsistent mount"},
+ {EINVAL, "not mounted"},
+ {EINVAL, "not in union"},
+ {EIO, "mount rpc error"},
+ {EIO, "mounted device shut down"},
+ {EPERM, "mounted directory forbids creation"},
+ {ENOENT, "does not exist"},
+ {ENXIO, "unknown device in # filename"},
+ {ENOTDIR, "not a directory"},
+ {EISDIR, "file is a directory"},
+ {EINVAL, "bad character in file name"},
+ {EINVAL, "file name syntax"},
+ {EPERM, "permission denied"},
+ {EPERM, "inappropriate use of fd"},
+ {EINVAL, "bad arg in system call"},
+ {EBUSY, "device or object already in use"},
+ {EIO, "i/o error"},
+ {EIO, "read or write too large"},
+ {EIO, "read or write too small"},
+ {EADDRINUSE, "network port not available"},
+ {ESHUTDOWN, "write to hungup stream"},
+ {ESHUTDOWN, "i/o on hungup channel"},
+ {EINVAL, "bad process or channel control request"},
+ {EBUSY, "no free devices"},
+ {ESRCH, "process exited"},
+ {ECHILD, "no living children"},
+ {EIO, "i/o error in demand load"},
+ {ENOMEM, "virtual memory allocation failed"},
+ {EBADF, "fd out of range or not open"},
+ {EMFILE, "no free file descriptors"},
+ {ESPIPE, "seek on a stream"},
+ {ENOEXEC, "exec header invalid"},
+ {ETIMEDOUT, "connection timed out"},
+ {ECONNREFUSED, "connection refused"},
+ {ECONNREFUSED, "connection in use"},
+ {ERESTART, "interrupted"},
+ {ENOMEM, "kernel allocate failed"},
+ {EINVAL, "segments overlap"},
+ {EIO, "i/o count too small"},
+ {EINVAL, "bad attach specifier"},
+
+ /* from exhausted() calls in kernel */
+ {ENFILE, "no free file descriptors"},
+ {EBUSY, "no free mount devices"},
+ {EBUSY, "no free mount rpc buffer"},
+ {EBUSY, "no free segments"},
+ {ENOMEM, "no free memory"},
+ {ENOBUFS, "no free Blocks"},
+ {EBUSY, "no free routes"},
+
+ /* from ken */
+ {EINVAL, "attach -- bad specifier"},
+ {EBADF, "unknown fid"},
+ {EINVAL, "bad character in directory name"},
+ {EBADF, "read/write -- on non open fid"},
+ {EIO, "read/write -- count too big"},
+ {EIO, "phase error -- directory entry not allocated"},
+ {EIO, "phase error -- qid does not match"},
+ {EACCES, "access permission denied"},
+ {ENOENT, "directory entry not found"},
+ {EINVAL, "open/create -- unknown mode"},
+ {ENOTDIR, "walk -- in a non-directory"},
+ {ENOTDIR, "create -- in a non-directory"},
+ {EIO, "phase error -- cannot happen"},
+ {EEXIST, "create -- file exists"},
+ {EINVAL, "create -- . and .. illegal names"},
+ {ENOTEMPTY, "remove -- directory not empty"},
+ {EINVAL, "attach -- privileged user"},
+ {EPERM, "wstat -- not owner"},
+ {EPERM, "wstat -- not in group"},
+ {EINVAL, "create/wstat -- bad character in file name"},
+ {EBUSY, "walk -- too many (system wide)"},
+ {EROFS, "file system read only"},
+ {ENOSPC, "file system full"},
+ {EINVAL, "read/write -- offset negative"},
+ {EBUSY, "open/create -- file is locked"},
+ {EBUSY, "close/read/write -- lock is broken"},
+
+ /* from sockets */
+ {ENOTSOCK, "not a socket"},
+ {EPROTONOSUPPORT, "protocol not supported"},
+ {ECONNREFUSED, "connection refused"},
+ {EAFNOSUPPORT, "address family not supported"},
+ {ENOBUFS, "insufficient buffer space"},
+ {EOPNOTSUPP, "operation not supported"},
+ {EADDRINUSE, "address in use"},
+
+ /* other */
+ {EEXIST, "file already exists"},
+ {EEXIST, "is a directory"},
+ {ENOTEMPTY, "directory not empty"},
+ };
+
+ int r, i;
+ char msg[ERRMAX];
+
+ rerrstr(msg, sizeof(msg));
+
+ r = -EIO;
+ for(i=0; i<nelem(t); i++){
+ if(strstr(msg, t[i].msg)){
+ r = -t[i].num;
+ break;
+ }
+ }
+
+ trace("mkerror(%s): %E", msg, r);
+ return r;
+}
+
+int sys_nosys(void)
+{
+ trace("syscall %s not implemented", current->syscall);
+ return -ENOSYS;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <tos.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Elfhdr Elfhdr;
+typedef struct Proghdr Proghdr;
+typedef struct ElfEx ElfEx;
+
+struct Elfhdr {
+ uchar ident[16];
+ ushort type;
+ ushort machine;
+ ulong version;
+ ulong entry;
+ ulong phoff;
+ ulong shoff;
+ ulong flags;
+ ushort ehsize;
+ ushort phentsize;
+ ushort phnum;
+ ushort shentsize;
+ ushort shnum;
+ ushort shstrndx;
+};
+
+struct Proghdr {
+ ulong type;
+ ulong offset;
+ ulong vaddr;
+ ulong paddr;
+ ulong filesz;
+ ulong memsz;
+ ulong flags;
+ ulong align;
+};
+
+struct ElfEx
+{
+ ulong ientry;
+ ulong ibase;
+
+ ulong entry;
+ ulong base;
+
+ ulong phdr;
+ ulong phnum;
+ ulong phent;
+};
+
+static void
+padzero(ulong addr)
+{
+ ulong n;
+
+ if(n = (pagealign(addr) - addr))
+ memset((void*)addr, 0, n);
+}
+
+enum {
+ /* file types */
+ ElfTNone = 0,
+ ElfTReloc = 1,
+ ElfTExec = 2,
+ ElfTShared = 3,
+ ElfTCore = 4,
+ ElfTMax = 5,
+
+ /* machine architectures */
+ ElfMNone = 0,
+ ElfM32 = 1,
+ ElfMSparc = 2,
+ ElfM386 = 3,
+ ElfM68 = 4,
+ ElfM88 = 5,
+ ElfM860 = 7,
+ ElfMMips = 8,
+ ElfMMax = 9,
+
+ /* program segment types */
+ ElfPNull = 0,
+ ElfPLoad = 1,
+ ElfPDynamic = 2,
+ ElfPInterp = 3,
+ ElfPNote = 4,
+ ElfPShlib = 5,
+ ElfPPhdr = 6,
+ ElfPMax = 7,
+
+ /* program segment flags */
+ ElfPFX = 1,
+ ElfPFW = 2,
+ ElfPFR = 4,
+};
+
+static int
+loadelf(char *file, ElfEx *ex, int depth)
+{
+ int fd;
+ int i, l;
+ int mapprot;
+ int mapflags;
+ ulong mapbase;
+ ulong loadaddr;
+ ulong bss;
+
+ Elfhdr hdr;
+ Proghdr *phdr;
+ char *interpreter;
+
+ interpreter = nil;
+ phdr = nil;
+
+ if((fd = sys_open(file, O_RDONLY, 0)) < 0){
+ werrstr("cant open %s", file);
+ goto errout;
+ }
+
+ if(sys_read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)){
+ werrstr("cant read elf header");
+ goto errout;
+ }
+
+ if(memcmp(hdr.ident, "\x7fELF", 4)!=0){
+ werrstr("no elf magic");
+ goto errout;
+ }
+
+ l = hdr.phnum * hdr.phentsize;
+ phdr = kmalloc(l);
+ sys_lseek(fd, hdr.phoff, 0);
+ if(sys_read(fd, phdr, l) != l){
+ werrstr("cant read program headers");
+ goto errout;
+ }
+
+ loadaddr = 0;
+ mapbase = 0;
+ mapflags = MAP_PRIVATE;
+ if(hdr.type != ElfTShared)
+ mapflags |= MAP_FIXED;
+
+ trace("loadelf(): phnum=%d", hdr.phnum);
+
+ bss = 0;
+ for(i=0; i<hdr.phnum; i++){
+ Proghdr *p;
+
+ p = &phdr[i];
+ if(p->type == ElfPInterp){
+ if(interpreter){
+ werrstr("multiple interpeter sections");
+ goto errout;
+ }
+ l = p->filesz;
+
+ interpreter = kmalloc(l+1);
+ sys_lseek(fd, p->offset, 0);
+ if(sys_read(fd, interpreter, l)!=l){
+ werrstr("cant read interpreter section");
+ goto errout;
+ }
+ interpreter[l] = '\0';
+ }
+
+ if(p->type == ElfPLoad){
+ ulong a;
+ int diff;
+
+ trace("loadelf(): phdr %d: vaddr=%lux memsz=%lux filesz=%lux offset=%lux flags=%lux",
+ i,
+ p->vaddr,
+ p->memsz,
+ p->filesz,
+ p->offset,
+ p->flags);
+
+ mapprot = 0;
+ if(p->flags & ElfPFR)
+ mapprot |= PROT_READ;
+ if(p->flags & ElfPFW)
+ mapprot |= PROT_WRITE;
+ if(p->flags & ElfPFX)
+ mapprot |= PROT_EXEC;
+
+ if(hdr.entry >= p->vaddr && hdr.entry < p->vaddr + p->memsz)
+ mapprot |= PROT_EXEC;
+
+ diff = p->vaddr - (p->vaddr & ~(PAGESIZE-1));
+
+ /* have to call mapdata() before we do the first mmap */
+ if(loadaddr == 0 && depth == 0){
+ if(hdr.type == ElfTShared){
+ mapbase = pagealign((ulong)end + 0x4000000);
+ mapflags |= MAP_FIXED;
+ }
+ mapdata((mapbase + p->vaddr) - diff);
+ }
+
+ a = sys_mmap(
+ (mapbase + p->vaddr) - diff,
+ p->filesz + diff,
+ mapprot,
+ mapflags,
+ fd,
+ (p->offset - diff)/PAGESIZE);
+
+ if(((int)a < 0) && ((int)a > -EMAX)){
+ werrstr("mmap failed: %E", (int)a);
+ goto errout;
+ }
+ if(loadaddr == 0)
+ loadaddr = a;
+ if(hdr.type == ElfTShared && mapbase == 0){
+ mapbase = a + diff;
+ mapflags |= MAP_FIXED;
+ }
+ if(mapprot & PROT_WRITE)
+ padzero(mapbase + p->vaddr + p->filesz);
+ if(depth == 0)
+ if(mapbase + p->vaddr + p->memsz > bss)
+ bss = mapbase + p->vaddr + p->memsz;
+ } else {
+ trace("loadelf(): phdr %d: type=%lux", i, p->type);
+ }
+ }
+
+ ex->base = loadaddr;
+ ex->entry = hdr.entry + ((hdr.type == ElfTShared) ? loadaddr : 0);
+
+ ex->phdr = loadaddr + hdr.phoff;
+ ex->phent = hdr.phentsize;
+ ex->phnum = hdr.phnum;
+
+ if(depth == 0){
+ sys_brk(pagealign(bss));
+
+ current->codestart = loadaddr;
+ current->codeend = bss;
+ }
+
+ if(interpreter){
+ ElfEx interpex;
+
+ if(loadelf(interpreter, &interpex, depth+1) < 0){
+ werrstr("cant load interpreter: %r");
+ goto errout;
+ }
+ free(interpreter);
+
+ ex->ientry = interpex.entry;
+ ex->ibase = interpex.base;
+ } else {
+ ex->ientry = ex->entry;
+ ex->ibase = 0; /* no interpreter */
+ }
+
+ sys_close(fd);
+ free(phdr);
+ return 0;
+
+errout:
+ if(fd >= 0)
+ sys_close(fd);
+ free(interpreter);
+ free(phdr);
+ return -1;
+}
+
+
+enum {
+ AT_NULL,
+ AT_IGNORE,
+ AT_EXECFD,
+ AT_PHDR,
+ AT_PHENT,
+ AT_PHNUM,
+ AT_PAGESZ,
+ AT_BASE,
+ AT_FLAGS,
+ AT_ENTRY,
+ AT_NOTELF,
+ AT_UID,
+ AT_EUID,
+ AT_GID,
+ AT_EGID,
+ AT_PLATFORM,
+ AT_HWCAP,
+ AT_CLKTCK,
+ AT_SECURE = 23,
+
+ AT_SYSINFO = 32,
+ AT_SYSINFO_EHDR = 33,
+};
+
+static void*
+setupstack(ElfEx *ex, char *argv[], char *envp[])
+{
+ int envc;
+ int argc;
+
+ char **dargv;
+ char **denv;
+
+ ulong *stack;
+ ulong *p;
+ char *x;
+ int i, n;
+
+ /*
+ * calculate the size we need on stack
+ */
+ argc=0;
+ while(argv && argv[argc]) argc++;
+
+ envc=0;
+ while(envp && envp[envc]) envc++;
+
+ n = 0;
+ n += sizeof(ulong); // argc
+ n += (argc+1)*sizeof(char*); // argv + nil
+ n += (envc+1)*sizeof(char*); // envp + nil
+ n += 16*(2*sizeof(ulong)); // aux
+
+ for(i=0; i<argc; i++)
+ n += (strlen(argv[i])+1);
+ for(i=0; i<envc; i++)
+ n += (strlen(envp[i])+1);
+
+ if(USTACK - n < PAGESIZE){
+ werrstr("too many arguments passed on stack");
+ return nil;
+ }
+
+ stack = mapstack(USTACK);
+
+ if(((int)stack < 0) && ((int)stack > -EMAX)){
+ werrstr("mapstack failed: %E", (int)stack);
+ return nil;
+ }
+ stack = (ulong*)(((ulong)stack - n) & ~7);
+
+ current->stackstart = (ulong)stack;
+
+ p = stack;
+
+ *p++ = argc;
+
+ dargv = (char**)p;
+ p += (argc + 1);
+
+ denv = (char**)p;
+ p += (envc + 1);
+
+#define AUXENT(k, v) {p[0]=k; p[1]=v; p+=2;}
+ AUXENT(AT_PAGESZ, PAGESIZE);
+ AUXENT(AT_CLKTCK, HZ);
+ AUXENT(AT_PHDR, ex->phdr);
+ AUXENT(AT_PHENT, ex->phent);
+ AUXENT(AT_PHNUM, ex->phnum);
+ AUXENT(AT_BASE, ex->ibase);
+ AUXENT(AT_FLAGS, 0);
+ AUXENT(AT_ENTRY, ex->entry);
+ AUXENT(AT_UID, current->uid);
+ AUXENT(AT_EUID, current->uid);
+ AUXENT(AT_GID, current->gid);
+ AUXENT(AT_EGID, current->gid);
+ AUXENT(AT_NULL, 0);
+ AUXENT(AT_NULL, 0);
+ AUXENT(AT_NULL, 0);
+ AUXENT(AT_NULL, 0);
+#undef AUXENT
+
+ x = (char*)p;
+
+ for(i=0; i<argc; i++)
+ x += (strlen(dargv[i] = strcpy(x, argv[i])) + 1);
+ dargv[argc] = 0;
+ for(i=0; i<envc; i++)
+ x += (strlen(denv[i] = strcpy(x, envp[i])) + 1);
+ denv[envc] = 0;
+
+ return stack;
+}
+
+static char**
+copystrings(char *a[])
+{
+ char **r;
+ char *p;
+ int i, n;
+
+ if(a == nil)
+ return nil;
+ i = 0;
+ n = sizeof(a[0]);
+ while(a[i]){
+ n += sizeof(a[0]) + (strlen(a[i]) + 1);
+ i++;
+ }
+ r = kmalloc(n);
+ n = i;
+ p = (char*)&r[n+1];
+ for(i=0; i<n; i++)
+ p += strlen(r[i] = strcpy(p, a[i]))+1;
+ r[n] = 0;
+ return r;
+}
+
+static void
+setcomm(char *exe, char *name, char *argv[])
+{
+ char *buf, *p;
+ int i, n;
+
+ n = strlen(exe) + strlen(name) +2;
+ for(i=0; argv[i]; i++)
+ n += strlen(argv[i])+1;
+
+ buf = kmalloc(n);
+
+ p = buf;
+ p += strlen(strcpy(p, name));
+ for(i=0; argv[i]; i++){
+ p += strlen(strcpy(p, " "));
+ p += strlen(strcpy(p, argv[i]));
+ }
+ setprocname(buf);
+
+ /* comm contains the full exe name + argv */
+ p = buf;
+ p += strlen(strcpy(p, exe));
+ *p++ = 0;
+ for(i=0; argv[i]; i++){
+ p += strlen(strcpy(p, argv[i]));
+ *p++ = 0;
+ }
+ *p++ = 0;
+
+ free(current->comm);
+ current->comm = buf;
+ current->ncomm = p - buf;
+}
+
+static void
+clinote(struct Ureg *ureg)
+{
+ jmp_buf jmp;
+ ulong pc;
+ ulong sp;
+ ulong ax;
+
+ pc = ureg->pc;
+ sp = ureg->sp;
+ ax = ureg->ax;
+
+ if(!setjmp(jmp))
+ notejmp(ureg, jmp, 1);
+
+ ureg->pc = pc;
+ ureg->sp = sp;
+ ureg->ax = ax;
+}
+
+struct kexecveargs
+{
+ char *name;
+ char **argv;
+ char **envp;
+};
+
+#pragma profile off
+
+static int
+kexecve(void *arg)
+{
+ struct kexecveargs *args;
+ Ufile *f;
+ ElfEx ex;
+ Ureg u;
+ int r, n;
+ char *b, *p, *e, *x, **a;
+ void *stack;
+ char *name, *exe;
+ char **argv;
+ char **envp;
+ int phase;
+
+ args = arg;
+ name = args->name;
+ argv = args->argv;
+ envp = args->envp;
+
+ phase = 0;
+ n = 8192;
+ b = kmalloc(n);
+ p = b;
+ e = b + n;
+again:
+ if(r = sys_access(name, 05)){
+ if(r > 0)
+ r = -EACCES;
+ goto errout;
+ }
+ if((r = sys_open(name, O_RDONLY, 0)) < 0)
+ goto errout;
+ exe = "/dev/null";
+ if(f = fdgetfile(r)){
+ if(f->path != nil){
+ strncpy(p, f->path, e-p);
+ p += strlen(exe = p)+1;
+ }
+ putfile(f);
+ }
+ n = sys_read(r, p, (e-p)-1);
+ sys_close(r);
+
+ r = -ENOEXEC;
+ if(n < 4)
+ goto errout;
+
+ if(memcmp(p, "#!", 2) == 0){
+ p[n] = 0;
+
+ r = -ENAMETOOLONG;
+ if((x = strchr(p, '\n')) == nil)
+ goto errout;
+ *x = 0;
+
+ a = (char**)&x[1];
+ n = (e - (char*)a) / sizeof(a[0]);
+ if(n < 2)
+ goto errout;
+ n = getfields(&p[2], a, n, 1, "\t\r\n ");
+ if(n < 1)
+ goto errout;
+ r = -E2BIG;
+ if(&a[n+1] >= (char**)e)
+ goto errout;
+ a[n++] = name;
+ if(argv != nil){
+ argv++;
+ while(*argv){
+ if(&a[n+1] >= (char**)e)
+ goto errout;
+ a[n++] = *argv++;
+ }
+ }
+ a[n++] = 0;
+ p = (char*)&a[n];
+ if(e - p < 4)
+ goto errout;
+ argv = a;
+ name = argv[0];
+
+ goto again;
+ }
+
+ if(memcmp(p, "\x7fELF", 4)!=0)
+ goto errout;
+
+ /*
+ * the contents on envp[] or argv[] maybe stored in b[], stack or bss of the calling linux
+ * process that is destroyed on free(b) and exitmem()... so we need to temporary
+ * copy them.
+ */
+ r = -ENOMEM;
+ name = kstrdup(name);
+ phase++;
+ if(argv)
+ argv = copystrings(argv);
+ phase++;
+ if(envp)
+ envp = copystrings(envp);
+ phase++;
+
+ /* get out of the note before we destroy user stack */
+ if(current->innote){
+ clinote(current->ureg);
+ current->innote = 0;
+ }
+
+ /* this is the point of no return! */
+ qlock(&proctab);
+ zapthreads();
+ exitmem();
+ exitsignal();
+
+ initmem();
+ initsignal();
+ inittls();
+ qunlock(&proctab);
+
+ closexfds();
+
+ setcomm(exe, name, argv);
+
+ if(loadelf(name, &ex, 0) < 0){
+ trace("kexecve(): loadelf failed: %r");
+ goto errout;
+ }
+
+ if((stack = setupstack(&ex, argv, envp)) == nil){
+ trace("kexecve(): setupstack failed: %r");
+ goto errout;
+ }
+
+ memset(&u, 0, sizeof(u));
+ u.sp = (ulong)stack;
+ u.pc = (ulong)ex.ientry;
+ current->ureg = &u;
+ current->syscall = nil;
+ phase++;
+
+ trace("kexecve(): startup pc=%lux sp=%lux", current->ureg->pc, current->ureg->sp);
+
+errout:
+ switch(phase){
+ default: free(envp);
+ case 2: free(argv);
+ case 1: free(name);
+ case 0: free(b);
+ }
+ switch(phase){
+ case 4: retuser();
+ case 3: exitproc(current, SIGKILL, 1);
+ }
+ return r;
+}
+
+int sys_execve(char *name, char *argv[], char *envp[])
+{
+ struct kexecveargs args;
+
+ trace("sys_execve(%s, %p, %p)", name, argv, envp);
+
+ args.name = name;
+ args.argv = argv;
+ args.envp = envp;
+
+ return onstack(kstack, kexecve, &args);
+}
+
+#pragma profile on
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Fd Fd;
+typedef struct Fdtab Fdtab;
+
+struct Fd
+{
+ int flags;
+ Ufile *file;
+};
+
+struct Fdtab
+{
+ Ref;
+ QLock;
+ int lastfd;
+ int nfd;
+ Fd *fd;
+};
+
+Ufile*
+getfile(Ufile *file)
+{
+ if(file)
+ incref(file);
+ return file;
+}
+
+void
+putfile(Ufile *file)
+{
+ Udirent *d;
+
+ if(file == nil)
+ return;
+ if(decref(file))
+ return;
+ trace("putfile(): closing %p %s", file, file->path);
+ if(devtab[file->dev]->close)
+ devtab[file->dev]->close(file);
+ free(file->path);
+ while(d = file->rdaux){
+ file->rdaux = d->next;
+ free(d);
+ }
+ free(file);
+}
+
+static Fdtab*
+newfdtab(void)
+{
+ Fdtab *tab;
+
+ tab = kmallocz(sizeof(*tab), 1);
+ tab->ref = 1;
+ tab->lastfd = -1;
+ tab->nfd = 0;
+ tab->fd = nil;
+
+ return tab;
+}
+
+enum {
+ CHUNK = 64,
+};
+
+/* assumes tab->lock aquired */
+static int
+grow1(Fdtab *tab)
+{
+ if(tab->nfd >= MAXFD)
+ return -EMFILE;
+ if((tab->nfd % CHUNK) == 0)
+ tab->fd = krealloc(tab->fd, sizeof(tab->fd[0]) * (tab->nfd + CHUNK));
+ memset(&tab->fd[tab->nfd], 0, sizeof(tab->fd[0]));
+ return tab->nfd++;
+}
+
+Ufile *procfdgetfile(Uproc *proc, int fd)
+{
+ Fdtab *tab;
+ Ufile *file;
+
+ file = nil;
+ if(tab = proc->fdtab){
+ qlock(tab);
+ if(fd >= 0 && fd < tab->nfd)
+ file = getfile(tab->fd[fd].file);
+ qunlock(tab);
+ }
+ return file;
+}
+
+Ufile*
+fdgetfile(int fd)
+{
+ return procfdgetfile(current, fd);
+}
+
+int
+newfd(Ufile *file, int flags)
+{
+ int fd;
+ Fdtab *tab;
+
+ tab = current->fdtab;
+ qlock(tab);
+ fd = tab->lastfd;
+ if((fd >= 0) && (fd < tab->nfd) && (tab->fd[fd].file == nil))
+ goto found;
+ for(fd=0; fd<tab->nfd; fd++)
+ if(tab->fd[fd].file == nil)
+ goto found;
+ fd = grow1(tab);
+found:
+ if(fd >= 0){
+ tab->fd[fd].file = file;
+ tab->fd[fd].flags = flags;
+ file = nil;
+ }
+ qunlock(tab);
+ putfile(file);
+
+ return fd;
+}
+
+static Fdtab*
+getfdtab(Fdtab *tab, int copy)
+{
+ Fdtab *new;
+ int i;
+
+ if(!copy){
+ incref(tab);
+ return tab;
+ }
+ qlock(tab);
+ new = newfdtab();
+ new->lastfd = tab->lastfd;
+ new->nfd = tab->nfd;
+ new->fd = kmallocz(sizeof(new->fd[0]) * (((tab->nfd+CHUNK-1)/CHUNK)*CHUNK), 1);
+ for(i=0; i<new->nfd; i++){
+ Ufile *file;
+
+ if((file = tab->fd[i].file) == nil)
+ continue;
+ incref(file);
+ new->fd[i].file = file;
+ new->fd[i].flags = tab->fd[i].flags;
+ }
+ qunlock(tab);
+ return new;
+}
+
+static void
+putfdtab(Fdtab *tab)
+{
+ int i;
+
+ if(decref(tab))
+ return;
+ for(i=0; i<tab->nfd; i++){
+ Ufile *file;
+ if((file = tab->fd[i].file) == nil)
+ continue;
+ tab->fd[i].file = nil;
+ putfile(file);
+ }
+ free(tab->fd);
+ free(tab);
+}
+
+int sys_dup2(int old, int new)
+{
+ Ufile *file;
+ Fdtab *tab;
+ int err;
+
+ trace("sys_dup2(%d, %d)", old, new);
+
+ tab = current->fdtab;
+
+ if((file = fdgetfile(old)) == nil)
+ return -EBADF;
+ if(new < 0)
+ return newfd(file, 0);
+ if(new >= MAXFD)
+ return -EBADF;
+ qlock(tab);
+ while(new >= tab->nfd){
+ err = grow1(tab);
+ if(err < 0){
+ qunlock(tab);
+ putfile(file);
+ return err;
+ }
+ }
+ if(tab->fd[new].file != nil)
+ putfile(tab->fd[new].file);
+ tab->fd[new].file = file;
+ tab->fd[new].flags &= ~FD_CLOEXEC;
+ qunlock(tab);
+
+ return new;
+}
+
+int sys_dup(int fd)
+{
+ return sys_dup2(fd, -1);
+}
+
+struct linux_flock
+{
+ short l_type;
+ short l_whence;
+ ulong l_start;
+ ulong l_len;
+ int l_pid;
+};
+
+struct linux_flock64
+{
+ short l_type;
+ short l_whence;
+ uvlong l_start;
+ uvlong l_len;
+ int l_pid;
+};
+
+enum {
+ F_RDLCK,
+ F_WRLCK,
+ F_UNLCK,
+};
+
+int sys_fcntl(int fd, int cmd, int arg)
+{
+ int ret;
+ Ufile *file;
+ Fdtab *tab;
+
+ trace("sys_fcntl(%d, %lux, %lux)", fd, (ulong)cmd, (ulong)arg);
+
+ tab = current->fdtab;
+
+ ret = -EBADF;
+ if((file = fdgetfile(fd)) == nil)
+ goto out;
+ ret = -EINVAL;
+ switch(cmd){
+ default:
+ trace("sys_fcntl() cmd %lux not implemented", (ulong)cmd);
+ break;
+
+ case F_DUPFD:
+ if(arg < 0 || arg >= MAXFD)
+ break;
+ qlock(tab);
+ for(ret=arg; ret<tab->nfd; ret++)
+ if(tab->fd[ret].file == nil)
+ goto found;
+ do {
+ if((ret = grow1(tab)) < 0)
+ break;
+ } while(ret < arg);
+found:
+ if(ret >= 0){
+ tab->fd[ret].file = file;
+ tab->fd[ret].flags = tab->fd[fd].flags & ~FD_CLOEXEC;
+ file = nil;
+ }
+ qunlock(tab);
+ break;
+
+ case F_GETFD:
+ case F_SETFD:
+ qlock(tab);
+ if(cmd == F_GETFD){
+ ret = tab->fd[fd].flags & FD_CLOEXEC;
+ } else {
+ tab->fd[fd].flags = (arg & FD_CLOEXEC);
+ ret = 0;
+ }
+ qunlock(tab);
+ break;
+
+ case F_GETFL:
+ ret = file->mode;
+ break;
+ case F_SETFL:
+ trace("sys_fcntl() changing mode from %o to %o", file->mode, arg);
+ file->mode = arg;
+ ret = 0;
+ break;
+
+ case F_GETLK:
+ ((struct linux_flock*)arg)->l_type = F_UNLCK;
+ case F_SETLK:
+ case F_SETLKW:
+ ret = 0;
+ break;
+
+ case F_GETLK64:
+ ((struct linux_flock64*)arg)->l_type = F_UNLCK;
+ case F_SETLK64:
+ ret = 0;
+ break;
+ }
+out:
+ putfile(file);
+ return ret;
+}
+
+int sys_close(int fd)
+{
+ Fdtab *tab;
+ Ufile *file;
+
+ trace("sys_close(%d)", fd);
+
+ tab = current->fdtab;
+ qlock(tab);
+ if(fd >= 0 && fd < tab->nfd){
+ if(file = tab->fd[fd].file){
+ tab->fd[fd].file = nil;
+ tab->lastfd = fd;
+ qunlock(tab);
+
+ putfile(file);
+ return 0;
+ }
+ }
+ qunlock(tab);
+ return -EBADF;
+}
+
+int sys_ioctl(int fd, int cmd, void *arg)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_ioctl(%d, %lux, %p)", fd, (ulong)cmd, arg);
+
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = -ENOTTY;
+ if(devtab[file->dev]->ioctl)
+ ret = devtab[file->dev]->ioctl(file, cmd, arg);
+ putfile(file);
+ return ret;
+}
+
+int preadfile(Ufile *file, void *buf, int len, vlong off)
+{
+ if(file->mode & O_NONBLOCK){
+ if(devtab[file->dev]->poll != nil){
+ if((devtab[file->dev]->poll(file, nil) & POLLIN) == 0){
+ trace("readfile(): nonblocking read blocked");
+
+ return -EAGAIN;
+ }
+ }
+ }
+ if(devtab[file->dev]->read == nil)
+ return 0;
+ return devtab[file->dev]->read(file, buf, len, off);
+}
+
+int readfile(Ufile *file, void *buf, int len)
+{
+ int err;
+
+ if((err = preadfile(file, buf, len, file->off)) > 0)
+ file->off += err;
+ return err;
+}
+
+int pwritefile(Ufile *file, void *buf, int len, vlong off)
+{
+ if(devtab[file->dev]->write == nil)
+ return 0;
+ if(file->mode & O_APPEND){
+ if(devtab[file->dev]->size){
+ off = devtab[file->dev]->size(file);
+ if(off < 0)
+ return (int)off;
+ }
+ }
+ return devtab[file->dev]->write(file, buf, len, off);
+}
+
+int writefile(Ufile *file, void *buf, int len)
+{
+ int err;
+ vlong end;
+
+ if(devtab[file->dev]->write == nil)
+ return 0;
+ if(file->mode & O_APPEND){
+ if(devtab[file->dev]->size){
+ end = devtab[file->dev]->size(file);
+ if(end < 0)
+ return (int)end;
+ file->off = end;
+ }
+ }
+ if(len == 0)
+ return 0;
+ if((err = devtab[file->dev]->write(file, buf, len, file->off)) > 0)
+ file->off += err;
+ return err;
+}
+
+int sys_read(int fd, void *buf, int len)
+{
+ int ret;
+ Ufile *file;
+
+ trace("sys_read(%d, %p, %x)", fd, buf, len);
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = readfile(file, buf, len);
+ putfile(file);
+ return ret;
+}
+
+int sys_write(int fd, void *buf, int len)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_write(%d, %p, %x)", fd, buf, len);
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = writefile(file, buf, len);
+ putfile(file);
+
+ return ret;
+}
+
+int sys_pread64(int fd, void *buf, int len, ulong off)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_pread(%d, %p, %x, %lux)", fd, buf, len, off);
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = preadfile(file, buf, len, off);
+ putfile(file);
+ return ret;
+}
+
+int sys_pwrite64(int fd, void *buf, int len, ulong off)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_pwrite(%d, %p, %x, %lux)", fd, buf, len, off);
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = pwritefile(file, buf, len, off);
+ putfile(file);
+ return ret;
+}
+
+struct linux_iovec
+{
+ void *base;
+ ulong len;
+};
+
+int sys_writev(int fd, void *vec, int n)
+{
+ struct linux_iovec *v = vec;
+ int ret, i, w;
+ Ufile *file;
+
+ trace("sys_writev(%d, %p, %d)", fd, vec, n);
+
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = 0;
+ for(i=0; i<n; i++){
+ w = writefile(file, v[i].base, v[i].len);
+ if(w < 0){
+ if(ret == 0)
+ ret = w;
+ break;
+ }
+ ret += w;
+ if(w < v[i].len)
+ break;
+ }
+ putfile(file);
+
+ return ret;
+}
+
+int sys_readv(int fd, void *vec, int n)
+{
+ struct linux_iovec *v = vec;
+ int ret, i, r;
+ Ufile *file;
+
+ trace("sys_readv(%d, %p, %d)", fd, vec, n);
+
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = 0;
+ for(i=0; i<n; i++){
+ r = readfile(file, v[i].base, v[i].len);
+ if(r < 0){
+ if(ret == 0)
+ ret = r;
+ break;
+ }
+ ret += r;
+ if(r < v[i].len)
+ break;
+ }
+ putfile(file);
+
+ return ret;
+}
+
+int seekfile(Ufile *file, vlong off, int whence)
+{
+ vlong end;
+
+ if(devtab[file->dev]->size == nil)
+ return -ESPIPE;
+
+ switch(whence){
+ case 0:
+ file->off = off;
+ return 0;
+ case 1:
+ file->off += off;
+ return 0;
+ case 2:
+ end = devtab[file->dev]->size(file);
+ if(end < 0)
+ return end;
+ file->off = end + off;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+ulong sys_lseek(int fd, ulong off, int whence)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_lseek(%d, %lux, %d)", fd, off, whence);
+
+ if((file = fdgetfile(fd)) == nil)
+ return (ulong)-EBADF;
+ ret = seekfile(file, off, whence);
+ if(ret == 0)
+ ret = file->off;
+ putfile(file);
+
+ return ret;
+}
+
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence)
+{
+ Ufile *file;
+ int ret;
+
+ trace("sys_llseek(%d, %lux, %lux, %p, %d)", fd, hioff, looff, res, whence);
+
+ if((file = fdgetfile(fd)) == nil)
+ return -EBADF;
+ ret = seekfile(file, ((vlong)hioff<<32) | ((vlong)looff), whence);
+ if((ret == 0) && res)
+ *res = file->off;
+ putfile(file);
+
+ return ret;
+}
+
+int sys_umask(int umask)
+{
+ int old;
+
+ trace("sys_umask(%#o)", umask);
+
+ old = current->umask;
+ current->umask = (umask & 0777);
+ return old;
+}
+
+int
+chdirfile(Ufile *f)
+{
+ Ustat s;
+ int err;
+
+ trace("chdirfile(%s)", f->path);
+
+ err = -ENOTDIR;
+ if(f->path == nil)
+ return err;
+ if(devtab[f->dev]->fstat == nil)
+ return err;
+ if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+ return err;
+ err = -ENOTDIR;
+ if((s.mode & ~0777) != S_IFDIR)
+ return err;
+ free(current->cwd);
+ current->cwd = kstrdup(fsrootpath(f->path));
+ if(f->dev == ROOTDEV && chdir(f->path) == 0){
+ free(current->kcwd);
+ current->kcwd = kstrdup(f->path);
+ }
+ return 0;
+}
+
+int
+sys_fchdir(int fd)
+{
+ Ufile *f;
+ int err;
+
+ trace("sys_fchdir(%d)", fd);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = chdirfile(f);
+ putfile(f);
+ return err;
+}
+
+int
+sys_fchown(int fd, int uid, int gid)
+{
+ int err;
+ Ufile *f;
+
+ trace("sys_fchown(%d, %d, %d)", fd, uid, gid);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = -EPERM;
+ if(devtab[f->dev]->fchown)
+ err = devtab[f->dev]->fchown(f, uid, gid);
+ putfile(f);
+
+ return err;
+}
+
+int
+sys_fchmod(int fd, int mode)
+{
+ int err;
+ Ufile *f;
+
+ trace("sys_fchmod(%d, %#o)", fd, mode);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = -EPERM;
+ if(devtab[f->dev]->fchmod)
+ err = devtab[f->dev]->fchmod(f, mode);
+ putfile(f);
+
+ return err;
+}
+
+int
+sys_ftruncate(int fd, ulong size)
+{
+ int err;
+ Ufile *f;
+
+ trace("sys_ftruncate(%d, %lux)", fd, size);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = -EPERM;
+ if(devtab[f->dev]->ftruncate)
+ err = devtab[f->dev]->ftruncate(f, (uvlong)size);
+ putfile(f);
+
+ return err;
+}
+
+void initfile(void)
+{
+ current->fdtab = newfdtab();
+ current->umask = 022;
+}
+
+void exitfile(Uproc *proc)
+{
+ Fdtab *tab;
+
+ if(tab = proc->fdtab){
+ proc->fdtab = nil;
+ putfdtab(tab);
+ }
+}
+
+void clonefile(Uproc *new, int copy)
+{
+ Fdtab *tab;
+
+ if((tab = current->fdtab) == nil){
+ new->fdtab = nil;
+ return;
+ }
+ new->fdtab = getfdtab(tab, copy);
+}
+
+void closexfds(void)
+{
+ Fdtab *tab;
+ int i;
+
+ if((tab = current->fdtab) == nil)
+ return;
+ qlock(tab);
+ for(i=0; i<tab->nfd; i++){
+ Ufile *f;
+
+ if((f = tab->fd[i].file) == nil)
+ continue;
+ if((tab->fd[i].flags & FD_CLOEXEC) == 0)
+ continue;
+
+ tab->fd[i].file = nil;
+ tab->fd[i].flags = 0;
+
+ putfile(f);
+ }
+ qunlock(tab);
+}
+
+int sys_flock(int fd, int cmd)
+{
+ trace("sys_flock(%d, %d)", fd, cmd);
+ return 0;
+}
+
+int sys_fsync(int fd)
+{
+ trace("sys_fsync(%d)", fd);
+ return 0;
+}
+
--- /dev/null
+/* error */
+int mkerror(void);
+#pragma varargck type "E" int
+int Efmt(Fmt *e);
+int sys_nosys(void);
+
+/* linuxcall */
+int linuxcall(void);
+
+/* trap */
+void inittrap(void);
+void retuser(void);
+
+/* bits */
+void incref(Ref *);
+int decref(Ref *);
+void jumpstart(ulong addr, ulong *stack);
+void jumpureg(void *ureg);
+void linux_sigreturn(void);
+void linux_rtsigreturn(void);
+
+/* trace */
+void inittrace(void);
+void exittrace(Uproc *proc);
+void clonetrace(Uproc *new, int copy);
+void tprint(char *fmt, ...);
+#pragma varargck argpos tprint 1
+#define trace if(debug)tprint
+
+/* proc */
+void initproc(void);
+void exitproc(Uproc *proc, int code, int group);
+void stopproc(Uproc *proc, int code, int group);
+void contproc(Uproc *proc, int code, int group);
+int procfork(void (*fproc)(void *aux), void *aux, int flags);
+Uproc* getproc(int tid);
+Uproc* getprocn(int n);
+int threadcount(int pid);
+void zapthreads(void);
+void setprocname(char *s);
+int notifyme(int on);
+void wakeme(int on);
+int sleepproc(QLock *l, int flags);
+Uwait* addwaitq(Uwaitq *q);
+void delwaitq(Uwait *w);
+int sleepq(Uwaitq *q, QLock *l, int flags);
+int wakeq(Uwaitq *q, int nwake);
+int requeue(Uwaitq *q1, Uwaitq *q2, int nrequeue);
+int killproc(Uproc *p, Usiginfo *info, int group);
+void setalarm(vlong t);
+
+int sys_waitpid(int pid, int *pexit, int opt);
+int sys_wait4(int pid, int *pexit, int opt, void *prusage);
+int sys_exit(int code);
+int sys_exit_group(int code);
+int sys_linux_clone(int flags, void *newstack, int *parenttidptr, int *tlsdescr, void *childtidptr);
+int sys_fork(void);
+int sys_vfork(void);
+int sys_getpid(void);
+int sys_getppid(void);
+int sys_gettid(void);
+int sys_setpgid(int pid, int pgid);
+int sys_getpgid(int pid);
+int sys_setpgrp(int pid);
+int sys_getpgrp(void);
+int sys_getuid(void);
+int sys_getgid(void);
+int sys_setgid(int gid);
+int sys_setuid(int uid);
+int sys_setresuid(int ruid, int euid, int suid);
+int sys_getresuid(int *ruid, int *euid, int *suid);
+int sys_setresgid(int rgid, int egid, int sgid);
+int sys_getresgid(int *rgid, int *egid, int *sgid);
+int sys_setreuid(int ruid, int euid);
+int sys_setregid(int rgid, int egid);
+int sys_uname(void *);
+int sys_personality(ulong p);
+int sys_setsid(void);
+int sys_getsid(int pid);
+int sys_getgroups(int size, int *groups);
+int sys_setgroups(int size, int *groups);
+
+int sys_kill(int pid, int sig);
+int sys_tkill(int tid, int sig);
+int sys_tgkill(int pid, int tid, int sig);
+int sys_rt_sigqueueinfo(int pid, int sig, void *info);
+
+int sys_set_tid_address(int *tidptr);
+
+int sys_sched_setscheduler(int pid, int policy, void *param);
+int sys_sched_getscheduler(int pid);
+int sys_sched_setparam(int pid, void *param);
+int sys_sched_getparam(int pid, void *param);
+int sys_sched_yield(void);
+
+int sys_getrlimit(long resource, void *rlim);
+int sys_setrlimit(long resource, void *rlim);
+
+/* signal */
+void initsignal(void);
+void exitsignal(void);
+void clonesignal(Uproc *new, int copyhand, int newproc);
+void settty(Ufile *tty);
+Ufile* gettty(void);
+#pragma varargck type "S" int
+int Sfmt(Fmt *f);
+
+int wantssignal(Uproc *proc, int sig);
+int ignoressignal(Uproc *proc, int sig);
+int signalspending(Uproc *proc);
+
+void handlesignals(void);
+int sendsignal(Uproc *proc, Usiginfo *info, int group);
+
+void siginfo2linux(Usiginfo *, void *);
+void linux2siginfo(void *, Usiginfo *);
+
+int sys_sigaltstack(void *stk, void *ostk);
+int sys_rt_sigaction(int sig, void *pact, void *poact, int setsize);
+int sys_rt_sigpending(uchar *set, int setsize);
+int sys_rt_sigprocmask(int how, uchar *act, uchar *oact, int setsize);
+int sys_rt_sigsuspend(uchar *set, int setsize);
+int sys_sigreturn(void);
+int sys_rt_sigreturn(void);
+
+int sys_setitimer(int which, void *value, void *ovalue);
+int sys_getitimer(int which, void *value);
+int sys_alarm(long seconds);
+
+/* file */
+void initfile(void);
+void exitfile(Uproc *proc);
+void clonefile(Uproc *new, int copy);
+void closexfds(void);
+Ufile *procfdgetfile(Uproc *proc, int fd);
+Ufile* fdgetfile(int fd);
+Ufile* getfile(Ufile *file);
+void putfile(Ufile *file);
+int newfd(Ufile *file, int flags);
+int chdirfile(Ufile *file);
+int readfile(Ufile *file, void *buf, int len);
+int writefile(Ufile *file, void *buf, int len);
+int preadfile(Ufile *file, void *buf, int len, vlong off);
+int pwritefile(Ufile *file, void *buf, int len, vlong off);
+int sys_dup(int fd);
+int sys_dup2(int old, int new);
+int sys_fcntl(int fd, int cmd, int arg);
+int sys_close(int fd);
+int sys_ioctl(int fd, int cmd, void *arg);
+int sys_read(int fd, void *buf, int len);
+int sys_readv(int fd, void *vec, int n);
+int sys_pread64(int fd, void *buf, int len, ulong off);
+int sys_write(int fd, void *buf, int len);
+int sys_pwrite64(int fd, void *buf, int len, ulong off);
+int sys_writev(int fd, void *vec, int n);
+ulong sys_lseek(int fd, ulong off, int whence);
+int sys_llseek(int fd, ulong hioff, ulong looff, vlong *res, int whence);
+int sys_umask(int umask);
+int sys_flock(int fd, int cmd);
+int sys_fsync(int fd);
+int sys_fchdir(int fd);
+int sys_getcwd(char *buf, int len);
+int sys_fchmod(int fd, int mode);
+int sys_fchown(int fd, int uid, int gid);
+int sys_ftruncate(int fd, ulong size);
+
+/* poll */
+void pollwait(Ufile *f, Uwaitq *q, void *t);
+int sys_poll(void *p, int nfd, long timeout);
+int sys_select(int nfd, ulong *rfd, ulong *wfd, ulong *efd, void *ptv);
+
+/* mem */
+void* kmalloc(int size);
+void* kmallocz(int size, int zero);
+void* krealloc(void *ptr, int size);
+char* kstrdup(char *s);
+char* ksmprint(char *fmt, ...);
+#pragma varargck argpos ksmprint 1
+
+ulong pagealign(ulong addr);
+
+void initmem(void);
+void exitmem(void);
+void clonemem(Uproc *new, int copy);
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe);
+void* mapstack(int size);
+void mapdata(ulong base);
+void unmapuserspace(void);
+int okaddr(void *ptr, int len, int write);
+
+ulong sys_linux_mmap(void *a);
+ulong sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff);
+int sys_munmap(ulong addr, ulong len);
+ulong sys_brk(ulong bk);
+int sys_mprotect(ulong addr, ulong len, int prot);
+int sys_msync(ulong addr, ulong len, int flags);
+ulong sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr);
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3);
+
+/* exec */
+int sys_execve(char *name, char *argv[], char *envp[]);
+
+/* time */
+void inittime(void);
+int sys_time(long *p);
+int sys_gettimeofday(void *tvp, void *tzp);
+int sys_clock_gettime(int clock, void *t);
+int sys_nanosleep(void *rqp, void *rmp);
+int proctimes(Uproc *p, ulong *t);
+int sys_times(void *times);
+
+/* tls */
+void inittls(void);
+void clonetls(Uproc *new);
+
+int sys_set_thread_area(void *pinfo);
+int sys_get_thread_area(void *pinfo);
+int sys_modify_ldt(int func, void *data, int count);
+
+/* bufproc */
+void *newbufproc(int fd);
+void freebufproc(void *bp);
+int readbufproc(void *bp, void *data, int len, int peek, int noblock);
+int pollbufproc(void *bp, Ufile *file, void *tab);
+int nreadablebufproc(void *bp);
+
+/* main */
+void panic(char *msg, ...);
+int onstack(long *stk, int (*func)(void *arg), void *arg);
+void profme(void);
+
+/* stat */
+int ufstat(int fd, Ustat *ps);
+Udirent *newdirent(char *path, char *name, int mode);
+
+int sys_getxattr(char *path, char *name, void *value, int size);
+int sys_lgetxattr(char *path, char *name, void *value, int size);
+int sys_fgetxattr(int fd, char *name, void *value, int size);
+int sys_setxattr(char *path, char *name, void *value, int flags, int size);
+int sys_lsetxattr(char *path, char *name, void *value, int flags, int size);
+int sys_fsetxattr(int fd, char *name, void *value, int size, int flags);
+
+int sys_linux_fstat(int fd, void *st);
+int sys_linux_fstat64(int fd, void *st);
+int sys_linux_getdents(int fd, void *buf, int nbuf);
+int sys_linux_getdents64(int fd, void *buf, int nbuf);
+int sys_linux_lstat(char *path, void *st);
+int sys_linux_lstat64(char *path, void *st);
+int sys_linux_stat(char *path, void *st);
+int sys_linux_stat64(char *path, void *st);
+
+int sys_statfs(char *name, void *pstatfs);
+
+/* fs */
+void fsmount(Udev *dev, char *path);
+
+char* allocpath(char *base, char *prefix, char *name);
+char* fullpath(char *base, char *name);
+char* shortpath(char *base, char *path);
+char* fsfullpath(char *path);
+char* fsrootpath(char *path);
+char* basepath(char *p, char **ps);
+ulong hashpath(char *s);
+
+int fsaccess(char *path, int mode);
+int fschmod(char *path, int mode);
+int fschown(char *path, int uid, int gid, int link);
+int fslink(char *old, char *new, int sym);
+int fsmkdir(char *path, int mode);
+int fsopen(char *path, int mode, int perm, Ufile **pf);
+int fsreadlink(char *path, char *buf, int len);
+int fsrename(char *old, char *new);
+int fsstat(char *path, int link, Ustat *ps);
+int fstruncate(char *path, vlong size);
+int fsunlink(char *path, int rmdir);
+int fsutime(char *path, int atime, int mtime);
+
+int sys_access(char *name, int mode);
+int sys_chdir(char *name);
+int sys_chroot(char *name);
+int sys_chmod(char *name, int mode);
+int sys_chown(char *name, int uid, int gid);
+int sys_creat(char *name, int perm);
+int sys_lchown(char *name, int uid, int gid);
+int sys_link(char *old, char *new);
+int sys_open(char *name, int mode, int perm);
+int sys_readlink(char *name, char *buf, int len);
+int sys_rename(char *from, char *to);
+int sys_rmdir(char *name);
+int sys_symlink(char *old, char *new);
+int sys_truncate(char *name, ulong size);
+int sys_unlink(char *name);
+int sys_utime(char *name, void *times);
+int sys_utimes(char *name, void *tvp);
+int sys_mkdir(char *name, int mode);
+
+/* drivers */
+void rootdevinit(void);
+void sockdevinit(void);
+int sys_linux_socketcall(int call, int *arg);
+void pipedevinit(void);
+int sys_pipe(int *fds);
+void fddevinit(void);
+void ptsdevinit(void);
+void dspdevinit(void);
+void miscdevinit(void);
+void ptydevinit(void);
+void consdevinit(void);
+void procdevinit(void);
+
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Mount Mount;
+
+struct Mount
+{
+ Mount *next;
+ Udev *dev;
+ int npath;
+ char path[];
+};
+
+static Mount *mtab;
+
+void
+fsmount(Udev *dev, char *path)
+{
+ Mount *m, **p;
+ int n;
+
+ if(dev == nil)
+ return;
+
+ n = strlen(path);
+ m = kmalloc(sizeof(*m) + n + 1);
+ m->dev = dev;
+ m->next = nil;
+ m->npath = n;
+ strcpy(m->path, path);
+
+ for(p=&mtab;;p=&((*p)->next)){
+ Mount *x;
+
+ if(x = *p){
+ if(m->npath < x->npath)
+ continue;
+ if(m->npath == x->npath){
+ if(strcmp(m->path, x->path) < 0)
+ continue;
+ }
+ }
+ m->next = *p;
+ *p = m;
+ break;
+ }
+}
+
+ulong
+hashpath(char *s)
+{
+ ulong h;
+ for(h=0; *s; s++)
+ h = (h * 13) + (*s - 'a');
+ return h;
+}
+
+char*
+basepath(char *p, char **ps)
+{
+ char *x, *s;
+ int n;
+
+ if(s = strrchr(p, '/')){
+ if(s[1] != 0){
+ if(ps)
+ *ps = kstrdup(s+1);
+ if((n = s - p) == 0)
+ n = 1;
+ x = kmalloc(n+1);
+ memmove(x, p, n);
+ x[n] = 0;
+ return x;
+ }
+ }
+ if(ps)
+ *ps = nil;
+ return nil;
+}
+
+char*
+allocpath(char *base, char *prefix, char *name)
+{
+ char *p, *s;
+ int n, m, k;
+
+ n = strlen(base);
+ m = strlen(name);
+ k = prefix ? strlen(prefix) : 0;
+ p = s = kmalloc(n+m+k+2);
+ memmove(p, base, n);
+ p += n;
+ if(m || k)
+ *p++ = '/';
+ if(k){
+ memmove(p, prefix, k);
+ p += k;
+ }
+ memmove(p, name, m+1);
+ return s;
+}
+
+char*
+fullpath(char *base, char *name)
+{
+ char *s;
+
+ if(*name == '/' || *name == '#'){
+ s = kstrdup(name);
+ } else if(base) {
+ s = allocpath(base, nil, name);
+ } else {
+ s = nil;
+ }
+ if(s != nil)
+ cleanname(s);
+ return s;
+}
+
+char*
+shortpath(char *base, char *path)
+{
+ int n;
+
+ n = strlen(base);
+ if((n <= strlen(path)) && (strncmp(path, base, n)==0)){
+ path += n;
+ if(*path == '/')
+ path++;
+ if(*path == 0)
+ path = ".";
+ }
+ return path;
+}
+
+char*
+fsfullpath(char *path)
+{
+ char *root;
+
+ path = fullpath(current->cwd, path);
+ if(path && (root = current->root)){
+ root = allocpath(root, nil, path+1);
+ free(path);
+ path = root;
+ }
+ return path;
+}
+
+char*
+fsrootpath(char *path)
+{
+ char *root;
+
+ if(root = current->root){
+ root = shortpath(root, path);
+ if(*root == '.'){
+ path = "/";
+ } else if(root > path){
+ path = root-1;
+ }
+ }
+ return path;
+}
+
+static Mount*
+path2mount(char *path)
+{
+ Mount *m;
+
+ for(m=mtab; m; m=m->next){
+ if(strncmp(path, m->path, m->npath) == 0){
+ switch(path[m->npath]){
+ case '\0':
+ case '/':
+ return m;
+ }
+ }
+ }
+ return nil;
+}
+
+static Udev*
+path2dev(char *path)
+{
+ Mount *m;
+
+ if(m = path2mount(path))
+ return m->dev;
+ return nil;
+}
+
+static int
+fsenter(int *perr)
+{
+ int err;
+
+ if(perr == nil)
+ perr = &err;
+ if(current->linkloop > 8)
+ return *perr = -ELOOP;
+ current->linkloop++;
+ return 0;
+}
+
+static void
+fsleave(void)
+{
+ current->linkloop--;
+}
+
+int sys_getcwd(char *buf, int len)
+{
+ int n;
+ char *cwd;
+
+ trace("sys_getcwd(%p, %x)", buf, len);
+
+ cwd = current->cwd;
+ n = strlen(cwd)+1;
+ if(n > len)
+ return -ERANGE;
+ memmove(buf, cwd, n);
+ return n;
+}
+
+int
+fsopen(char *path, int mode, int perm, Ufile **pf)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsopen(%s, %#o, %#o)", path, mode, perm);
+
+ *pf = nil;
+ if(fsenter(&err) < 0)
+ return err;
+ err = -ENOENT;
+ if((dev = path2dev(path)) && dev->open)
+ err = dev->open(path, mode, perm, pf);
+ fsleave();
+ return err;
+}
+
+int
+fsaccess(char *path, int mode)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsaccess(%s, %#o)", path, mode);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -ENOENT;
+ if(dev = path2dev(path)){
+ err = 0;
+ if(dev->access)
+ err = dev->access(path, mode);
+ }
+ fsleave();
+
+ return err;
+}
+
+int sys_access(char *name, int mode)
+{
+ int err;
+
+ trace("sys_access(%s, %#o)", name, mode);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsaccess(name, mode);
+ free(name);
+
+ return err;
+}
+
+int sys_open(char *name, int mode, int perm)
+{
+ int err;
+ Ufile *file;
+
+ trace("sys_open(%s, %#o, %#o)", name, mode, perm);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsopen(name, mode, perm, &file);
+ free(name);
+
+ if(err == 0)
+ err = newfd(file, FD_CLOEXEC);
+
+ return err;
+}
+
+int sys_creat(char *name, int perm)
+{
+ trace("sys_create(%s, %#o)", name, perm);
+
+ return sys_open(name, O_CREAT|O_TRUNC, perm);
+}
+
+int
+fsstat(char *path, int link, Ustat *ps)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsstat(%s, %d)", path, link);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->stat){
+ memset(ps, 0, sizeof(Ustat));
+ err = dev->stat(path, link, ps);
+ }
+ fsleave();
+ return err;
+}
+
+int
+sys_chdir(char *name)
+{
+ int err;
+ Ufile *f;
+
+ trace("sys_chdir(%s)", name);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsopen(name, O_RDONLY, 0, &f);
+ free(name);
+ if(err == 0){
+ err = chdirfile(f);
+ putfile(f);
+ }
+ return err;
+}
+
+int sys_chroot(char *name)
+{
+ Ufile *f;
+ Ustat s;
+ int err;
+
+ trace("sys_chroot(%s)", name);
+
+ f = nil;
+ if((err = fsopen(name, O_RDONLY, 0, &f)) < 0)
+ goto out;
+ err = -ENOTDIR;
+ if(f->path == nil)
+ goto out;
+ if(devtab[f->dev]->fstat == nil)
+ goto out;
+ if((err = devtab[f->dev]->fstat(f, &s)) < 0)
+ goto out;
+ err = -ENOTDIR;
+ if((s.mode & ~0777) != S_IFDIR)
+ goto out;
+ err = 0;
+ free(current->root);
+ if(strcmp(f->path, "/") == 0){
+ current->root = nil;
+ } else {
+ current->root = kstrdup(f->path);
+ }
+out:
+ putfile(f);
+ return err;
+}
+
+int
+fschown(char *path, int uid, int gid, int link)
+{
+ int err;
+ Udev *dev;
+
+ trace("fschown(%s, %d, %d, %d)", path, uid, gid, link);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->chown)
+ err = dev->chown(path, uid, gid, link);
+ fsleave();
+ return err;
+}
+
+int sys_chown(char *name, int uid, int gid)
+{
+ int err;
+
+ trace("sys_chown(%s, %d, %d)", name, uid, gid);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fschown(name, uid, gid, 0);
+ free(name);
+
+ return err;
+}
+
+int sys_lchown(char *name, int uid, int gid)
+{
+ int err;
+
+ trace("sys_lchown(%s, %d, %d)", name, uid, gid);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fschown(name, uid, gid, 1);
+ free(name);
+
+ return err;
+}
+
+int
+fsreadlink(char *path, char *buf, int len)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsreadlink(%s)", path);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->readlink)
+ err = dev->readlink(path, buf, len);
+ fsleave();
+
+ return err;
+}
+
+int sys_readlink(char *name, char *buf, int len)
+{
+ int err;
+
+ trace("sys_readlink(%s, %p, %x)", name, buf, len);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsreadlink(name, buf, len);
+ free(name);
+
+ return err;
+}
+
+int
+fsrename(char *old, char *new)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsrename(%s, %s)", old, new);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(old)) && dev->rename){
+ err = -EXDEV;
+ if(dev == path2dev(new))
+ err = dev->rename(old, new);
+ }
+ fsleave();
+
+ return err;
+}
+
+
+int sys_rename(char *from, char *to)
+{
+ int err;
+
+ trace("sys_rename(%s, %s)", from, to);
+
+ if((from = fsfullpath(from)) == nil)
+ return -EFAULT;
+ if((to = fsfullpath(to)) == nil){
+ free(from);
+ return -EFAULT;
+ }
+ err = fsrename(from, to);
+ free(from);
+ free(to);
+
+ return err;
+}
+
+int
+fsmkdir(char *path, int mode)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsmkdir(%s, %#o)", path, mode);
+
+ if(fsenter(&err) < 0)
+ return err;
+
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->mkdir)
+ err = dev->mkdir(path, mode);
+ fsleave();
+
+ return err;
+}
+
+int sys_mkdir(char *name, int mode)
+{
+ int err;
+
+ trace("sys_mkdir(%s, %#o)", name, mode);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsmkdir(name, mode);
+ free(name);
+
+ return err;
+}
+
+int
+fsutime(char *path, int atime, int mtime)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsutime(%s, %d, %d)", path, atime, mtime);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->utime)
+ err = dev->utime(path, atime, mtime);
+ fsleave();
+
+ return err;
+}
+
+struct linux_utimbuf
+{
+ long atime;
+ long mtime;
+};
+
+int sys_utime(char *name, void *times)
+{
+ int err;
+ struct linux_utimbuf *t = times;
+
+ trace("sys_utime(%s, %p)", name, times);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ if(t != nil){
+ err = fsutime(name, t->atime, t->mtime);
+ }else{
+ long x = time(0);
+ err = fsutime(name, x, x);
+ }
+ free(name);
+
+ return err;
+}
+
+int sys_utimes(char *name, void *tvp)
+{
+ int err;
+ struct linux_timeval *t = tvp;
+
+ trace("sys_utimes(%s, %p)", name, tvp);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ if(t != nil){
+ err = fsutime(name, t[0].tv_sec, t[1].tv_sec);
+ }else{
+ long x = time(0);
+ err = fsutime(name, x, x);
+ }
+ free(name);
+
+ return err;
+}
+
+int
+fschmod(char *path, int mode)
+{
+ int err;
+ Udev *dev;
+
+ trace("fschmod(%s, %#o)", path, mode);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->chmod)
+ err = dev->chmod(path, mode);
+ fsleave();
+
+ return err;
+}
+
+int sys_chmod(char *name, int mode)
+{
+ int err;
+
+ trace("sys_chmod(%s, %#o)", name, mode);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fschmod(name, mode);
+ free(name);
+
+ return err;
+}
+
+int
+fstruncate(char *path, vlong size)
+{
+ int err;
+ Udev *dev;
+
+ trace("fstruncate(%s, %llx)", path, size);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->truncate)
+ err = dev->truncate(path, size);
+ fsleave();
+
+ return err;
+}
+
+int sys_truncate(char *name, ulong size)
+{
+ int err;
+
+ trace("sys_truncate(%s, %lux)", name, size);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fstruncate(name, size);
+ free(name);
+
+ return err;
+}
+
+int
+fsunlink(char *path, int rmdir)
+{
+ int err;
+ Udev *dev;
+
+ trace("fsunlink(%s, %d)", path, rmdir);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(path)) && dev->unlink)
+ err = dev->unlink(path, rmdir);
+ fsleave();
+
+ return err;
+}
+
+int sys_unlink(char *name)
+{
+ int err;
+
+ trace("sys_unlink(%s)", name);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsunlink(name, 0);
+ free(name);
+
+ return err;
+}
+
+int sys_rmdir(char *name)
+{
+ int err;
+
+ trace("sys_rmdir(%s)", name);
+
+ if((name = fsfullpath(name)) == nil)
+ return -EFAULT;
+ err = fsunlink(name, 1);
+ free(name);
+
+ return err;
+}
+
+int
+fslink(char *old, char *new, int sym)
+{
+ int err;
+ Udev *dev;
+
+ trace("fslink(%s, %s, %d)", old, new, sym);
+
+ if(fsenter(&err) < 0)
+ return err;
+ err = -EPERM;
+ if((dev = path2dev(new)) && dev->link){
+ err = -EXDEV;
+ if(sym || dev == path2dev(old))
+ err = dev->link(old, new, sym);
+ }
+ fsleave();
+
+ return err;
+}
+
+int sys_link(char *old, char *new)
+{
+ int err;
+
+ trace("sys_link(%s, %s)", old, new);
+
+ if((old = fsfullpath(old)) == nil)
+ return -EFAULT;
+ if((new = fsfullpath(new)) == nil){
+ free(old);
+ return -EFAULT;
+ }
+ err = fslink(old, new, 0);
+ free(old);
+ free(new);
+
+ return err;
+}
+
+int sys_symlink(char *old, char *new)
+{
+ int err;
+
+ trace("sys_symlink(%s, %s)", old, new);
+
+ if((new = fsfullpath(new)) == nil)
+ return -EFAULT;
+ err = fslink(old, new, 1);
+ free(new);
+
+ return err;
+}
+
--- /dev/null
+#!/bin/rc
+
+arg0=$0
+DISPLAY=:0
+HOME=/tmp
+PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/X11R6/bin:/usr/games
+
+x=''
+e=/bin/linuxemu
+r=/sys/lib/linux
+
+fn eprint {
+ echo $arg0: $* >[1=2]
+}
+
+fn usage {
+ echo usage: $arg0 [-h] [-d...] [-u uid] [-g gid] [-startx] [-display :n] [-e emubin] [-r linuxroot] command [args ...] >[1=2]
+ exit usage
+}
+
+# extract options
+o=()
+while(~ $1 -*){
+ switch($1){
+ case -h
+ usage
+ case -r
+ shift
+ r=$1
+ case -e
+ shift
+ e=$1
+ case -startx
+ x=1
+ case -display
+ shift
+ DISPLAY=$1
+ case -[ug]
+ o=($o $1 $2)
+ shift
+ case -*
+ o=($o $1)
+ }
+ shift
+}
+
+switch($#*){
+case 0
+ usage
+}
+
+if(! ~ $x ''){
+ # find free local display
+ d=(`{{seq 0 32; {echo /srv/UD.X* | sed 's!/srv/UD\.X!!g; s!\ !\
+ !g; s!\*!!g;'}} | sort | uniq -c | awk '/^\ *1\ /{print $2}'})
+ d=$d(1)
+ X11/equis -ac :$d &
+ k=/proc/$apid/notepg
+ $arg0 -e $e -r $r -display :$d $o $*
+ {echo kill >$k} >/dev/null >[2=1]
+ exit
+}
+
+# rewrite the path so it would accessible after binding $r to /
+fn ninepath {
+ if(~ $1 /* && test -e $1 && ! test -e $r/$1){
+ echo /9$1
+ }
+ if not {
+ echo $1
+ }
+}
+
+w=`{pwd}
+r=`{cleanname -d $w $r}
+if(! test -d $r){
+ eprint bad rootpath: $r
+ exit rootpath
+}
+e=`{cleanname -d $w $e}
+if(! test -x $e){
+ eprint bad emubin: $e
+ exit emubin
+}
+e=`{ninepath $e}
+p=`{ninepath $w}
+a=($e $o)
+while(! ~ $#* 0){
+ x=`{ninepath $1}
+ a=($a $"x)
+ shift
+}
+
+# bind the required plan9 stuff
+rfork n
+mntgen $r
+bind -a '#P' /dev
+for(d in /9 /dev /proc /net /env /srv /n /mnt /tmp){
+ t=$r^$d
+ switch($d){
+ case /tmp /env /srv
+ bind -c $d $t
+ case /9
+ bind / $t
+ case *
+ bind $d $t
+ }
+}
+
+# change root and run the emulator
+builtin cd /
+bind $r /
+builtin cd $p
+exec $a
--- /dev/null
+enum {
+ O_ACCMODE = 0003,
+ O_RDONLY = 00,
+ O_WRONLY = 01,
+ O_RDWR = 02,
+ O_CREAT = 0100,
+ O_EXCL = 0200,
+ O_NOCTTY = 0400,
+ O_TRUNC = 01000,
+ O_APPEND = 02000,
+ O_NONBLOCK = 04000,
+ O_NDELAY = 04000,
+ O_SYNC = 010000,
+ FASYNC = 020000,
+};
+
+enum {
+ FD_CLOEXEC = 1,
+};
+
+enum {
+ F_DUPFD = 0,
+ F_GETFD,
+ F_SETFD,
+ F_GETFL,
+ F_SETFL,
+ F_GETLK,
+ F_SETLK,
+ F_SETLKW,
+ F_SETOWN,
+ F_GETOWN,
+ F_GETSIG,
+ F_GETLK64 = 12,
+ F_SETLK64 = 13,
+};
+
+enum {
+ S_IFMT = 0170000,
+ S_IFSOCK = 0140000,
+ S_IFLNK = 0120000,
+ S_IFREG = 0100000,
+ S_IFBLK = 0060000,
+ S_IFDIR = 0040000,
+ S_IFCHR = 0020000,
+ S_IFIFO = 0010000,
+ S_ISUID = 0004000,
+ S_ISGID = 0002000,
+ S_ISVTX = 0001000,
+};
+
+enum {
+ PROT_READ = 0x01,
+ PROT_WRITE = 0x02,
+ PROT_EXEC = 0x04,
+ PROT_SEM = 0x08,
+ PROT_NONE = 0x00,
+ PROT_GROWSDOWN = 0x01000000,
+ PROT_GROWSUP = 0x02000000,
+ MAP_SHARED = 0x01,
+ MAP_PRIVATE = 0x02,
+ MAP_TYPE = 0x0f,
+ MAP_FIXED = 0x10,
+ MAP_ANONYMOUS = 0x20,
+
+ MREMAP_MAYMOVE = 1,
+ MREMAP_FIXED = 2,
+};
+
+enum {
+ CLONE_VM = 0x00000100,
+ CLONE_FS = 0x00000200,
+ CLONE_FILES = 0x00000400,
+ CLONE_SIGHAND = 0x00000800,
+ CLONE_PTRACE = 0x00002000,
+ CLONE_VFORK = 0x00004000,
+ CLONE_PARENT = 0x00008000,
+ CLONE_THREAD = 0x00010000,
+ CLONE_NEWNS = 0x00020000,
+ CLONE_SYSVSEM = 0x00040000,
+ CLONE_SETTLS = 0x00080000,
+ CLONE_PARENT_SETTID = 0x00100000,
+ CLONE_CHILD_CLEARTID = 0x00200000,
+ CLONE_DETACHED = 0x00400000,
+ CLONE_UNTRACED = 0x00800000,
+ CLONE_CHILD_SETTID = 0x01000000,
+ CLONE_STOPPED = 0x02000000,
+};
+
+enum {
+ EPERM = 1,
+ ENOENT = 2,
+ ESRCH = 3,
+ EINTR = 4,
+ EIO = 5,
+ ENXIO = 6,
+ E2BIG = 7,
+ ENOEXEC = 8,
+ EBADF = 9,
+ ECHILD = 10,
+ EAGAIN = 11,
+ ENOMEM = 12,
+ EACCES = 13,
+ EFAULT = 14,
+ ENOTBLK = 15,
+ EBUSY = 16,
+ EEXIST = 17,
+ EXDEV = 18,
+ ENODEV = 19,
+ ENOTDIR = 20,
+ EISDIR = 21,
+ EINVAL = 22,
+ ENFILE = 23,
+ EMFILE = 24,
+ ENOTTY = 25,
+ ETXTBSY = 26,
+ EFBIG = 27,
+ ENOSPC = 28,
+ ESPIPE = 29,
+ EROFS = 30,
+ EMLINK = 31,
+ EPIPE = 32,
+ EDOM = 33,
+ ERANGE = 34,
+ EDEADLK = 35,
+ ENAMETOOLONG = 36,
+ ENOLCK = 37,
+ ENOSYS = 38,
+ ENOTEMPTY = 39,
+ ELOOP = 40,
+ ENOMSG = 42,
+ EIDRM = 43,
+ ECHRNG = 44,
+ EL2NSYNC = 45,
+ EL3HLT = 46,
+ EL3RST = 47,
+ ELNRNG = 48,
+ EUNATCH = 49,
+ ENOCSI = 50,
+ EL2HLT = 51,
+ EBADE = 52,
+ EBADR = 53,
+ EXFULL = 54,
+ ENOANO = 55,
+ EBADRQC = 56,
+ EBADSLT = 57,
+ EBFONT = 59,
+ ENOSTR = 60,
+ ENODATA = 61,
+ ETIME = 62,
+ ENOSR = 63,
+ ENONET = 64,
+ ENOPKG = 65,
+ EREMOTE = 66,
+ ENOLINK = 67,
+ EADV = 68,
+ ESRMNT = 69,
+ ECOMM = 70,
+ EPROTO = 71,
+ EMULTIHOP = 72,
+ EDOTDOT = 73,
+ EBADMSG = 74,
+ EOVERFLOW = 75,
+ ENOTUNIQ = 76,
+ EBADFD = 77,
+ EREMCHG = 78,
+ ELIBACC = 79,
+ ELIBBAD = 80,
+ ELIBSCN = 81,
+ ELIBMAX = 82,
+ ELIBEXEC = 83,
+ EILSEQ = 84,
+ ERESTART = 85,
+ ESTRPIPE = 86,
+ EUSERS = 87,
+ ENOTSOCK = 88,
+ EDESTADDRREQ = 89,
+ EMSGSIZE = 90,
+ EPROTOTYPE = 91,
+ ENOPROTOOPT = 92,
+ EPROTONOSUPPORT = 93,
+ ESOCKTNOSUPPORT = 94,
+ EOPNOTSUPP = 95,
+ EPFNOSUPPORT = 96,
+ EAFNOSUPPORT = 97,
+ EADDRINUSE = 98,
+ EADDRNOTAVAIL = 99,
+ ENETDOWN = 100,
+ ENETUNREACH = 101,
+ ENETRESET = 102,
+ ECONNABORTED = 103,
+ ECONNRESET = 104,
+ ENOBUFS = 105,
+ EISCONN = 106,
+ ENOTCONN = 107,
+ ESHUTDOWN = 108,
+ ETOOMANYREFS = 109,
+ ETIMEDOUT = 110,
+ ECONNREFUSED = 111,
+ EHOSTDOWN = 112,
+ EHOSTUNREACH = 113,
+ EALREADY = 114,
+ EINPROGRESS = 115,
+ ESTALE = 116,
+ EUCLEAN = 117,
+ ENOTNAM = 118,
+ ENAVAIL = 119,
+ EISNAM = 120,
+ EREMOTEIO = 121,
+ EDQUOT = 122,
+ ENOMEDIUM = 123,
+ EMEDIUMTYPE = 124,
+ EMAX = 125,
+};
+
+#define EWOULDBLOCK EAGAIN
+#define EDEADLOCK EDEADLK
+#define ENOATTR ENODATA
+
+enum {
+ POLLIN = (1<<0),
+ POLLPRI = (1<<1),
+ POLLOUT = (1<<2),
+ POLLERR = (1<<3),
+ POLLHUP = (1<<4),
+ POLLNVAL = (1<<5),
+ POLLRDNORM = (1<<6),
+ POLLRDBAND = (1<<7),
+ POLLWRNORM = (1<<8),
+ POLLWRBAND = (1<<9),
+ POLLMSG = (1<<10),
+ POLLREMOVE = (1<<11),
+ POLLRDHUP = 0x2000,
+ EPOLLONESHOT = (1<<30),
+ EPOLLET = (1<<31),
+};
+
+enum {
+ SIGHUP = 1,
+ SIGINT = 2,
+ SIGQUIT = 3,
+ SIGILL = 4,
+ SIGTRAP = 5,
+ SIGABRT = 6,
+ SIGIOT = 6,
+ SIGBUS = 7,
+ SIGFPE = 8,
+ SIGKILL = 9,
+ SIGUSR1 = 10,
+ SIGSEGV = 11,
+ SIGUSR2 = 12,
+ SIGPIPE = 13,
+ SIGALRM = 14,
+ SIGTERM = 15,
+ SIGSTKFLT = 16,
+ SIGCHLD = 17,
+ SIGCONT = 18,
+ SIGSTOP = 19,
+ SIGTSTP = 20,
+ SIGTTIN = 21,
+ SIGTTOU = 22,
+ SIGURG = 23,
+ SIGXCPU = 24,
+ SIGXFSZ = 25,
+ SIGVTALRM = 26,
+ SIGPROF = 27,
+ SIGWINCH = 28,
+ SIGIO = 29,
+ SIGPOLL = 29,
+ SIGLOST = 29,
+ SIGPWR = 30,
+ SIGSYS = 31,
+
+ SIGRT1 = 32,
+ SIGRT2 = 33,
+ SIGRT3 = 34,
+ SIGRT4 = 35,
+ SIGRT5 = 36,
+ SIGRT6 = 37,
+ SIGRT7 = 38,
+ SIGRT8 = 39,
+
+ SIGMAX = 40,
+};
+
+enum {
+ SI_USER = 0,
+ SI_QUEUE = -1,
+ SI_TIMER = -2,
+ SI_MESGQ = -3,
+ SI_ASYNCIO = -4,
+ SI_SIGIO = -5,
+ SI_TKILL = -6,
+ SI_DETHREAD = -7,
+};
+
+enum {
+ ILL_ILLOPC = 1,
+ ILL_ILLOPN,
+ ILL_ILLADR,
+ ILL_ILLTRP,
+ ILL_PROVOPC,
+ ILL_PRVREG,
+ ILL_COPROC,
+ ILL_BADSTK,
+};
+
+enum {
+ FPE_INTDIV = 1,
+ FPE_INTOVF,
+ FPE_FLTDIV,
+ FPE_FLTOVF,
+ FPE_FLTUND,
+ FPE_FLTRES,
+ FPE_FLTINV,
+ FPE_FLTSUB,
+};
+
+enum {
+ WNOHANG =0x00000001,
+ WUNTRACED =0x00000002,
+ WSTOPPED =0x00000002,
+ WEXITED =0x00000004,
+ WCONTINUED =0x00000008,
+ WNOWAIT =0x01000000,
+ WNOTHREAD =0x20000000,
+ WALL =0x40000000,
+ WCLONE =0x80000000,
+};
+
+struct linux_timeval
+{
+ long tv_sec;
+ long tv_usec;
+};
+
+struct linux_timespec
+{
+ long tv_sec;
+ long tv_nsec;
+};
+
+struct linux_user_desc {
+ uint entry_number;
+ ulong base_addr;
+ uint limit;
+ uint seg_32bit:1;
+ int contents:2;
+ uint read_exec_only:1;
+ uint limit_in_pages:1;
+ uint seg_not_present:1;
+ uint useable:1;
+};
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Linuxcall Linuxcall;
+
+struct Linuxcall
+{
+ char *name;
+ void *func;
+ int (*stub)(Ureg *, void *);
+};
+
+static int fcall0(Ureg *, void *func){return ((int (*)(void))func)();}
+static int fcall1(Ureg *u, void *func){return ((int (*)(int))func)(u->bx);}
+static int fcall2(Ureg *u, void *func){return ((int (*)(int, int))func)(u->bx, u->cx);}
+static int fcall3(Ureg *u, void *func){return ((int (*)(int, int, int))func)(u->bx, u->cx, u->dx);}
+static int fcall4(Ureg *u, void *func){return ((int (*)(int, int, int, int))func)(u->bx, u->cx, u->dx, u->si);}
+static int fcall5(Ureg *u, void *func){return ((int (*)(int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di);}
+static int fcall6(Ureg *u, void *func){return ((int (*)(int, int, int, int, int, int))func)(u->bx, u->cx, u->dx, u->si, u->di, u->bp);}
+
+#include "linuxcalltab.out"
+
+static Linuxcall nocall = {
+ .name = "nosys",
+ .func = sys_nosys,
+ .stub = fcall0,
+};
+
+static void
+linuxret(int errno)
+{
+ Uproc *p;
+ Ureg *u;
+
+ p = current;
+ u = p->ureg;
+ trace("linuxret(%lux: %s, %lux: %E)", u->pc, p->syscall, (ulong)errno, errno);
+ if(errno == -ERESTART){
+ p->restart->syscall = p->syscall;
+ return;
+ }
+ u->ax = (ulong)errno;
+ u->pc += 2;
+ p->restart->syscall = nil;
+ p->syscall = nil;
+}
+
+
+int
+linuxcall(void)
+{
+ Uproc *p;
+ Ureg *u;
+ Linuxcall *c;
+ uchar *pc;
+
+ p = current;
+ u = p->ureg;
+
+ /* CD 80 = INT 0x80 */
+ pc = (uchar*)u->pc;
+ if(pc[0] != 0xcd || pc[1] != 0x80){
+ trace("linuxcall(): not a syscall pc=%lux sp=%lux", u->pc, u->sp);
+ return -1;
+ }
+ c = &linuxcalltab[u->ax];
+ if(c > &linuxcalltab[nelem(linuxcalltab)-1])
+ c = &nocall;
+ p->syscall = c->name;
+ p->sysret = linuxret;
+ if(p->restart->syscall)
+ trace("linuxcall(): restarting %s", p->syscall);
+ linuxret(c->stub(u, c->func));
+ return 0;
+}
--- /dev/null
+0 0 restart_syscall sys_nosys
+1 1 exit sys_exit
+2 0 fork sys_fork
+3 3 read sys_read
+4 3 write sys_write
+5 3 open sys_open
+6 1 close sys_close
+7 3 waitpid sys_waitpid
+8 2 creat sys_creat
+9 2 link sys_link
+10 1 unlink sys_unlink
+11 3 execve sys_execve
+12 1 chdir sys_chdir
+13 1 time sys_time
+14 0 mknod sys_nosys
+15 2 chmod sys_chmod
+16 0 lchown sys_lchown
+17 0 break sys_nosys
+18 0 oldstat sys_nosys
+19 3 lseek sys_lseek
+20 0 getpid sys_getpid
+21 0 mount sys_nosys
+22 0 umount sys_nosys
+23 1 setuid sys_setuid
+24 0 getuid sys_getuid
+25 0 stime sys_nosys
+26 0 ptrace sys_nosys
+27 1 alarm sys_alarm
+28 0 oldfstat sys_nosys
+29 0 pause sys_nosys
+30 2 utime sys_utime
+31 0 stty sys_nosys
+32 0 gtty sys_nosys
+33 2 access sys_access
+34 0 nice sys_nosys
+35 0 ftime sys_nosys
+36 0 sync sys_nosys
+37 2 kill sys_kill
+38 2 rename sys_rename
+39 2 mkdir sys_mkdir
+40 1 rmdir sys_rmdir
+41 1 dup sys_dup
+42 1 pipe sys_pipe
+43 1 times sys_times
+44 0 prof sys_nosys
+45 1 brk sys_brk
+46 1 setgid sys_setgid
+47 0 getgid sys_getgid
+48 0 signal sys_nosys
+49 0 geteuid sys_nosys
+50 0 getegid sys_nosys
+51 0 acct sys_nosys
+52 0 umount2 sys_nosys
+53 0 lock sys_nosys
+54 3 ioctl sys_ioctl
+55 3 fcntl sys_fcntl
+56 0 mpx sys_nosys
+57 2 setpgid sys_setpgid
+58 0 ulimit sys_nosys
+59 0 oldolduname sys_nosys
+60 1 umask sys_umask
+61 1 chroot sys_chroot
+62 0 ustat sys_nosys
+63 2 dup2 sys_dup2
+64 0 getppid sys_getppid
+65 0 getpgrp sys_getpgrp
+66 0 setsid sys_setsid
+67 0 sigaction sys_nosys
+68 0 sgetmask sys_nosys
+69 0 ssetmask sys_nosys
+70 0 setreuid sys_nosys
+71 0 setregid sys_nosys
+72 0 sigsuspend sys_nosys
+73 0 sigpending sys_nosys
+74 0 sethostname sys_nosys
+75 2 setrlimit sys_setrlimit
+76 2 getrlimit sys_getrlimit
+77 0 getrusage sys_nosys
+78 2 gettimeofday sys_gettimeofday
+79 0 settimeofday sys_nosys
+80 0 getgroups sys_nosys
+81 0 setgroups sys_nosys
+82 0 select sys_nosys
+83 2 symlink sys_symlink
+84 0 oldlstat sys_nosys
+85 3 readlink sys_readlink
+86 0 uselib sys_nosys
+87 0 swapon sys_nosys
+88 0 reboot sys_nosys
+89 0 readdir sys_nosys
+90 1 mmap sys_linux_mmap
+91 2 munmap sys_munmap
+92 2 truncate sys_truncate
+93 2 ftruncate sys_ftruncate
+94 2 fchmod sys_fchmod
+95 0 fchown sys_fchown
+96 0 getpriority sys_nosys
+97 0 setpriority sys_nosys
+98 0 profil sys_nosys
+99 2 statfs sys_statfs
+100 0 fstatfs sys_nosys
+101 0 ioperm sys_nosys
+102 2 socketcall sys_linux_socketcall
+103 0 syslog sys_nosys
+104 3 setitimer sys_setitimer
+105 2 getitimer sys_getitimer
+106 2 stat sys_linux_stat
+107 2 lstat sys_linux_lstat
+108 2 fstat sys_linux_fstat
+109 0 olduname sys_nosys
+110 0 iopl sys_nosys
+111 0 vhangup sys_nosys
+112 0 idle sys_nosys
+113 0 vm86old sys_nosys
+114 4 wait4 sys_wait4
+115 0 swapoff sys_nosys
+116 0 sysinfo sys_nosys
+117 0 ipc sys_nosys
+118 1 fsync sys_fsync
+119 0 sigreturn sys_sigreturn
+120 5 clone sys_linux_clone
+121 0 setdomainname sys_nosys
+122 1 uname sys_uname
+123 3 modify_ldt sys_modify_ldt
+124 0 adjtimex sys_nosys
+125 3 mprotect sys_mprotect
+126 0 sigprocmask sys_nosys
+127 0 create_module sys_nosys
+128 0 init_module sys_nosys
+129 0 delete_module sys_nosys
+130 0 get_kernel_syms sys_nosys
+131 0 quotactl sys_nosys
+132 1 getpgid sys_getpgid
+133 1 fchdir sys_fchdir
+134 0 bdflush sys_nosys
+135 0 sysfs sys_nosys
+136 1 personality sys_personality
+137 0 afs_syscall sys_nosys
+138 0 setfsuid sys_nosys
+139 0 setfsgid sys_nosys
+140 5 _llseek sys_llseek
+141 3 getdents sys_linux_getdents
+142 5 _newselect sys_select
+143 0 flock sys_flock
+144 3 msync sys_msync
+145 3 readv sys_readv
+146 3 writev sys_writev
+147 1 getsid sys_getsid
+148 0 fdatasync sys_nosys
+149 0 _sysctl sys_nosys
+150 0 mlock sys_nosys
+151 0 munlock sys_nosys
+152 0 mlockall sys_nosys
+153 0 munlockall sys_nosys
+154 2 sched_setparam sys_sched_setparam
+155 2 sched_getparam sys_sched_getparam
+156 3 sched_setscheduler sys_sched_setscheduler
+157 1 sched_getscheduler sys_sched_getscheduler
+158 0 sched_yield sys_sched_yield
+159 0 sched_get_priority_max sys_nosys
+160 0 sched_get_priority_min sys_nosys
+161 0 sched_rr_get_interval sys_nosys
+162 2 nanosleep sys_nanosleep
+163 5 mremap sys_mremap
+164 3 setresuid sys_setresuid
+165 3 getresuid sys_getresuid
+166 0 vm86 sys_nosys
+167 0 query_module sys_nosys
+168 3 poll sys_poll
+169 0 nfsservctl sys_nosys
+170 3 setresgid sys_setresgid
+171 3 getresgid sys_getresgid
+172 0 prctl sys_nosys
+173 0 rt_sigreturn sys_rt_sigreturn
+174 4 rt_sigaction sys_rt_sigaction
+175 4 rt_sigprocmask sys_rt_sigprocmask
+176 2 rt_sigpending sys_rt_sigpending
+177 0 rt_sigtimedwait sys_nosys
+178 3 rt_sigqueueinfo sys_rt_sigqueueinfo
+179 2 rt_sigsuspend sys_rt_sigsuspend
+180 4 pread64 sys_pread64
+181 4 pwrite64 sys_pwrite64
+182 0 chown sys_chown
+183 2 getcwd sys_getcwd
+184 0 capget sys_nosys
+185 0 capset sys_nosys
+186 2 sigaltstack sys_sigaltstack
+187 0 sendfile sys_nosys
+188 0 getpmsg sys_nosys
+189 0 putpmsg sys_nosys
+190 0 vfork sys_vfork
+191 0 ugetrlimit sys_nosys
+192 6 mmap2 sys_mmap
+193 2 truncate64 sys_truncate
+194 2 ftruncate64 sys_ftruncate
+195 2 stat64 sys_linux_stat64
+196 2 lstat64 sys_linux_lstat64
+197 2 fstat64 sys_linux_fstat64
+198 3 lchown32 sys_lchown
+199 0 getuid32 sys_getuid
+200 0 getgid32 sys_getgid
+201 0 geteuid32 sys_getuid
+202 0 getegid32 sys_getgid
+203 2 setreuid32 sys_setreuid
+204 2 setregid32 sys_setregid
+205 2 getgroups32 sys_getgroups
+206 2 setgroups32 sys_setgroups
+207 3 fchown32 sys_fchown
+208 3 setresuid32 sys_setresuid
+209 3 getresuid32 sys_getresuid
+210 3 setresgid32 sys_setresgid
+211 3 getresgid32 sys_getresgid
+212 3 chown32 sys_chown
+213 1 setuid32 sys_setuid
+214 1 setgid32 sys_setgid
+215 0 setfsuid32 sys_nosys
+216 0 setfsgid32 sys_nosys
+217 0 pivot_root sys_nosys
+218 0 mincore sys_nosys
+219 0 madvise sys_nosys
+220 3 getdents64 sys_linux_getdents64
+221 3 fcntl64 sys_fcntl
+224 0 gettid sys_gettid
+225 0 readahead sys_nosys
+226 5 setxattr sys_setxattr
+227 5 lsetxattr sys_lsetxattr
+228 5 fsetxattr sys_fsetxattr
+229 4 getxattr sys_getxattr
+230 4 lgetxattr sys_lgetxattr
+231 4 fgetxattr sys_fgetxattr
+232 0 listxattr sys_nosys
+233 0 llistxattr sys_nosys
+234 0 flistxattr sys_nosys
+235 0 removexattr sys_nosys
+236 0 lremovexattr sys_nosys
+237 0 fremovexattr sys_nosys
+238 2 tkill sys_tkill
+239 0 sendfile64 sys_nosys
+240 6 futex sys_futex
+241 0 sched_setaffinity sys_nosys
+242 0 sched_getaffinity sys_nosys
+243 1 set_thread_area sys_set_thread_area
+244 1 get_thread_area sys_get_thread_area
+245 0 io_setup sys_nosys
+246 0 io_destroy sys_nosys
+247 0 io_getevents sys_nosys
+248 0 io_submit sys_nosys
+249 0 io_cancel sys_nosys
+250 0 fadvise64 sys_nosys
+252 1 exit_group sys_exit_group
+253 0 lookup_dcookie sys_nosys
+254 0 epoll_create sys_nosys
+255 0 epoll_ctl sys_nosys
+256 0 epoll_wait sys_nosys
+257 0 remap_file_pages sys_nosys
+258 1 set_tid_address sys_set_tid_address
+259 0 timer_create sys_nosys
+260 0 timer_settime sys_nosys
+261 0 timer_gettime sys_nosys
+262 0 timer_getoverrun sys_nosys
+263 0 timer_delete sys_nosys
+264 0 clock_settime sys_nosys
+265 2 clock_gettime sys_clock_gettime
+266 0 clock_getres sys_nosys
+267 0 clock_nanosleep sys_nosys
+268 0 statfs64 sys_nosys
+269 0 fstatfs64 sys_nosys
+270 0 tgkill sys_tgkill
+271 2 utimes sys_utimes
+272 0 fadvise64_64 sys_nosys
+273 0 vserver sys_nosys
+274 0 mbind sys_nosys
+275 0 get_mempolicy sys_nosys
+276 0 set_mempolicy sys_nosys
+277 0 mq_open sys_nosys
+278 0 mq_unlink sys_nosys
+279 0 mq_timedsend sys_nosys
+280 0 mq_timedreceive sys_nosys
+281 0 mq_notify sys_nosys
+282 0 mq_getsetattr sys_nosys
+283 0 sys_kexec_load sys_nosys
+284 0 waitid sys_nosys
+285 0 setaltroot sys_nosys
+286 0 add_key sys_nosys
+287 0 request_key sys_nosys
+288 0 keyctl sys_nosys
--- /dev/null
+#!/bin/awk -f
+BEGIN {
+ nsys = 0
+}
+
+/^#/ {
+ next
+}
+
+{
+ i=$1
+ if(nsys > i){
+ print "BROKEN TABLE: "nsys" > "i
+ exit
+ }
+ while(nsys < i){
+ sysarg[nsys] = 0
+ sysnam[nsys] = "nosys"nsys
+ sysfun[nsys] = "sys_nosys"
+ nsys++;
+ }
+ sysarg[nsys] = $2
+ sysnam[nsys] = $3
+ sysfun[nsys] = $4
+ nsys++
+}
+
+END {
+ print "static Linuxcall linuxcalltab[] = {"
+ for(i=0; i<nsys; i++){
+ print " { /* "i" */"
+ print " .name = \""sysnam[i]"\","
+ print " .func = "sysfun[i]","
+ print " .stub = fcall"sysarg[i]","
+ print " },"
+ }
+ print "};"
+ print ""
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <tos.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+static void
+die(void)
+{
+ exits(nil);
+}
+
+static char**
+readenv(void)
+{
+ char **env;
+ int fd, n, i, c;
+ Dir *d;
+
+ if((fd = open("/env", OREAD)) < 0)
+ return nil;
+ n = dirreadall(fd, &d);
+ close(fd);
+ env = kmalloc(sizeof(env[0]) * (n + 1));
+ c = 0;
+ for(i=0; i<n; i++){
+ char *v;
+ char *k;
+
+ k = d[i].name;
+
+ // filter out some stuff...
+ if(strncmp(k, "fn#", 3) == 0)
+ continue;
+ if(strcmp(k, "timezone") == 0)
+ continue;
+ if(strcmp(k, "0")==0)
+ continue;
+
+ if((v = getenv(d[i].name)) == nil)
+ continue;
+ if((env[c] = ksmprint("%s=%s", k, v)) == nil)
+ continue;
+ free(v);
+
+ c++;
+ }
+ env[c] = 0;
+
+ free(d);
+
+ return env;
+}
+
+struct onstackargs
+{
+ long *stk;
+ void *arg;
+ int (*func)(void *);
+ int ret;
+ jmp_buf jmp;
+};
+
+int
+onstack(long *stk, int (*func)(void *), void *arg)
+{
+ struct onstackargs a, *args;
+ jmp_buf jmp;
+ long *sp;
+
+ sp = (long*)&a;
+ if((long*)sp >= stk && (long*)sp < stk+(KSTACK / sizeof(long)))
+ return func(arg);
+
+ if(args = (struct onstackargs*)setjmp(jmp)){
+ args->ret = onstack(args->stk, args->func, args->arg);
+ longjmp(args->jmp, 1);
+ }
+
+ sp = &stk[(KSTACK / sizeof(long))-16];
+ jmp[JMPBUFSP] = (long)sp;
+
+ memset(stk, 0, KSTACK);
+
+ args = &a;
+ args->stk = stk;
+ args->func = func;
+ args->arg = arg;
+
+ if(!setjmp(args->jmp))
+ longjmp(jmp, (int)args);
+
+ return args->ret;
+}
+
+#pragma profile off
+
+static void
+proff(void (*fn)(void*), void *arg)
+{
+ if(_tos->prof.what == 0){
+ fn(arg);
+ }else{
+ prof(fn, arg, 2000, _tos->prof.what);
+ }
+}
+
+static void
+profexitjmpfn(void *arg)
+{
+ /*
+ * we are now called by the profiling function on the profstack.
+ * save the current continuation so we can return here on exit.
+ */
+ if(!setjmp(exitjmp))
+ longjmp((long*)arg, 1); /* return from profme() */
+}
+
+static int
+profmeprofstack(void *arg)
+{
+ proff(profexitjmpfn, arg);
+ for(;;) die();
+}
+
+#pragma profile on
+
+static long *profstack;
+
+void
+profme(void)
+{
+ jmp_buf j;
+
+ if(!setjmp(j))
+ onstack(profstack, profmeprofstack, j);
+}
+
+
+static void
+vpanic(char *msg, va_list arg)
+{
+ char buf[32];
+ int fd;
+
+ fprint(2, "PANIC: ");
+ vfprint(2, msg, arg);
+ fprint(2, "\n");
+
+ if(debug)
+ abort();
+
+ snprint(buf, sizeof(buf), "/proc/%d/notepg", getpid());
+ if((fd = open(buf, OWRITE)) >= 0){
+ write(fd, "kill", 4);
+ close(fd);
+ }
+ exits("panic");
+}
+
+void
+panic(char *msg, ...)
+{
+ va_list arg;
+
+ va_start(arg, msg);
+ vpanic(msg, arg);
+ va_end(arg);
+}
+
+void usage(void)
+{
+ fprint(2, "usage: linuxemu [-d] [-u uid] [-g gid] cmd [args]\n");
+ exits("usage");
+}
+
+struct mainstack
+{
+ long profstack[KSTACK / sizeof(long)];
+ long kstack[KSTACK / sizeof(long)];
+ Uproc *proc;
+ jmp_buf exitjmp;
+};
+
+void main(int argc, char *argv[])
+{
+ struct mainstack ms;
+ int err;
+ int uid, gid;
+ int fd;
+
+ fmtinstall('E', Efmt);
+ fmtinstall('S', Sfmt);
+
+ uid = 0;
+ gid = 0;
+ debug = 0;
+
+ ARGBEGIN {
+ case 'd':
+ debug++;
+ break;
+ case 'u':
+ uid = atoi(EARGF(usage()));
+ break;
+ case 'g':
+ gid = atoi(EARGF(usage()));
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if(argc < 1)
+ usage();
+
+ rootdevinit();
+ procdevinit();
+ ptydevinit();
+ consdevinit();
+ dspdevinit();
+ miscdevinit();
+ sockdevinit();
+ pipedevinit();
+
+ kstack = ms.kstack;
+ profstack = ms.profstack;
+ exitjmp = ms.exitjmp;
+ pcurrent = &ms.proc;
+ current = nil;
+
+ if(setjmp(exitjmp))
+ die();
+
+ initproc();
+ current->uid = uid;
+ current->gid = gid;
+
+ /* emulated console */
+ sys_close(0);
+ if((fd = sys_open("/dev/cons", O_RDWR, 0)) != 0)
+ fprint(2, "cant open console for stdin\n");
+ sys_close(1);
+ if(sys_dup(fd) != 1)
+ fprint(2, "cant dup stdout\n");
+ sys_close(2);
+ if(sys_dup(fd) != 2)
+ fprint(2, "cant dup stderr\n");
+
+ sys_fcntl(0, F_SETFD, 0);
+ sys_fcntl(1, F_SETFD, 0);
+ sys_fcntl(2, F_SETFD, 0);
+
+ err = sys_execve(*argv, argv, readenv());
+
+ fprint(2, "%s: %E\n", *argv, err);
+ longjmp(exitjmp, 1);
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Range Range;
+typedef struct Area Area;
+typedef struct Filemap Filemap;
+typedef struct Futex Futex;
+typedef struct Seg Seg;
+typedef struct Space Space;
+
+/* keep in order, lowest base address first */
+enum {
+ SEGDATA,
+ SEGPRIVATE,
+ SEGSHARED,
+ SEGSTACK,
+ SEGMAX,
+};
+
+static char *segname[SEGMAX] = { "data", "private", "shared", "stack" };
+
+struct Range
+{
+ ulong base;
+ ulong top;
+};
+
+struct Filemap
+{
+ Range addr;
+
+ Filemap *next;
+
+ char *path;
+ ulong offset;
+ int mode;
+ Ufile *file;
+
+ Ref;
+};
+
+struct Futex
+{
+ ulong *addr;
+
+ Futex *next;
+ Futex **link;
+
+ Ref;
+ Uwaitq;
+};
+
+struct Area
+{
+ Range addr;
+
+ Area *next; /* next higher area */
+ Area *prev; /* previous lower area */
+ Seg *seg; /* segment we belong to */
+
+ int prot;
+
+ Filemap *filemap;
+ Futex *futex;
+};
+
+struct Seg
+{
+ Ref;
+ QLock;
+
+ Range addr;
+ ulong limit; /* maximum address this segment can grow */
+
+ Area *areas; /* orderd by address */
+
+ int type; /* SEGDATA, SEGSHARED, SEGPRIVATE, SEGSTACK */
+
+ Area *freearea;
+ Filemap *freefilemap;
+ Futex *freefutex;
+};
+
+struct Space
+{
+ Ref;
+ QLock;
+
+ ulong brk;
+ Seg *seg[SEGMAX];
+};
+
+
+void*
+kmalloc(int size)
+{
+ void *p;
+
+ p = malloc(size);
+ if(p == nil)
+ panic("kmalloc: out of memory");
+ setmalloctag(p, getcallerpc(&size));
+ return p;
+}
+void*
+krealloc(void *ptr, int size)
+{
+ void *p;
+
+ p = realloc(ptr, size);
+ if(size > 0){
+ if(p == nil)
+ panic("krealloc: out of memory");
+ setmalloctag(p, getcallerpc(&ptr));
+ }
+ return p;
+}
+
+void*
+kmallocz(int size, int zero)
+{
+ void *p;
+
+ p = mallocz(size, zero);
+ if(p == nil)
+ panic("kmallocz: out of memory");
+ setmalloctag(p, getcallerpc(&size));
+ return p;
+}
+
+char*
+kstrdup(char *s)
+{
+ char *p;
+ int n;
+
+ n = strlen(s);
+ p = kmalloc(n+1);
+ memmove(p, s, n);
+ p[n] = 0;
+ setmalloctag(p, getcallerpc(&s));
+ return p;
+}
+
+char*
+ksmprint(char *fmt, ...)
+{
+ va_list args;
+ char *p;
+ int n;
+
+ n = 4096;
+ p = kmalloc(n);
+ va_start(args, fmt);
+ n = vsnprint(p, n, fmt, args);
+ va_end(args);
+ if((p = realloc(p, n+1)) == nil)
+ panic("ksmprint: out of memory");
+ setmalloctag(p, getcallerpc(&fmt));
+ return p;
+}
+
+ulong
+pagealign(ulong addr)
+{
+ ulong m;
+
+ m = PAGESIZE-1;
+ return (addr + m) & ~m;
+}
+
+static void
+syncarea(Area *a, Range r)
+{
+ if(a->filemap == nil)
+ return;
+ if(a->filemap->file == nil)
+ return;
+ if((a->prot & PROT_WRITE) == 0)
+ return;
+
+ if(r.base < a->addr.base)
+ r.base = a->addr.base;
+ if(r.top > a->addr.top)
+ r.top = a->addr.top;
+ if(r.base < a->filemap->addr.base)
+ r.base = a->filemap->addr.base;
+ if(r.top > a->filemap->addr.top)
+ r.top = a->filemap->addr.top;
+ pwritefile(a->filemap->file, (void*)r.base, r.top - r.base,
+ (r.base - a->filemap->addr.base) + a->filemap->offset);
+}
+
+static void
+linkarea(Seg *seg, Area *a)
+{
+ Area *p;
+
+ a->next = nil;
+ a->prev = nil;
+ a->seg = seg;
+
+ for(p = seg->areas; p && p->next; p=p->next)
+ if(p->addr.base > a->addr.base)
+ break;
+ if(p != nil){
+ if(p->addr.base > a->addr.base){
+ a->next = p;
+ if(a->prev = p->prev)
+ a->prev->next = a;
+ p->prev = a;
+ } else {
+ a->prev = p;
+ p->next = a;
+ }
+ }
+ if(a->prev == nil)
+ seg->areas = a;
+}
+
+static Area *
+duparea(Area *a)
+{
+ Area *r;
+
+ if(r = a->seg->freearea){
+ a->seg->freearea = r->next;
+ } else {
+ r = kmalloc(sizeof(Area));
+ }
+ r->addr = a->addr;
+ r->next = nil;
+ r->prev = nil;
+ r->seg = nil;
+ r->prot = a->prot;
+ if(r->filemap = a->filemap)
+ incref(r->filemap);
+ r->futex = nil;
+ return r;
+}
+
+static void
+freearea(Area *a)
+{
+ Filemap *f;
+ Futex *x;
+ Seg *seg;
+
+ seg = a->seg;
+ if(f = a->filemap){
+ syncarea(a, a->addr);
+ a->filemap = nil;
+ if(!decref(f)){
+ free(f->path);
+ putfile(f->file);
+ f->next = seg->freefilemap;
+ seg->freefilemap = f;
+ }
+ }
+ while(x = a->futex){
+ if(a->futex = x->next)
+ x->next->link = &a->futex;
+ x->link = nil;
+ x->next = nil;
+ wakeq(x, MAXPROC);
+ }
+ if(a->prev == nil){
+ if(seg->areas = a->next)
+ a->next->prev = nil;
+ } else {
+ if(a->prev->next = a->next)
+ a->next->prev = a->prev;
+ }
+
+ a->next = seg->freearea;
+ seg->freearea = a;
+}
+
+static Seg *
+allocseg(int type, Range addr, ulong limit, int attr, char *class)
+{
+ Seg *seg;
+
+ if(class){
+ trace("allocseg(): segattach %s segment %lux-%lux", segname[type], addr.base, addr.top);
+ if(segattach(attr, class, (void*)addr.base, addr.top - addr.base) != (void*)addr.base)
+ panic("allocseg: segattach %s segment: %r", segname[type]);
+ }
+
+ seg = kmallocz(sizeof(Seg), 1);
+ seg->addr = addr;
+ seg->limit = limit;
+ seg->type = type;
+ seg->ref = 1;
+
+ return seg;
+}
+
+static Seg *
+dupseg(Seg *old, int copy)
+{
+ Seg *new;
+ Area *a, *p, *x;
+
+ if(old == nil)
+ return nil;
+ if(!copy){
+ incref(old);
+ return old;
+ }
+ new = allocseg(old->type, old->addr, old->limit, 0, nil);
+ p = nil;
+ for(a=old->areas; a; a=a->next){
+ x = duparea(a);
+ x->seg = new;
+ if(x->prev = p){
+ p->next = x;
+ } else {
+ new->areas = x;
+ }
+ p = x;
+ }
+
+ return new;
+}
+
+static Space *
+getspace(Space *old, int copy)
+{
+ Space *new;
+ Seg *seg;
+ int t;
+
+ if(!copy){
+ incref(old);
+ return old;
+ }
+
+ new = kmallocz(sizeof(Space), 1);
+ new->ref = 1;
+
+ qlock(old);
+ for(t=0; t<SEGMAX; t++){
+ if(seg = old->seg[t]){
+ qlock(seg);
+ new->seg[t] = dupseg(seg, t != SEGSHARED);
+ qunlock(seg);
+ }
+ }
+ new->brk = old->brk;
+ qunlock(old);
+
+ return new;
+}
+
+static void
+putspace(Space *space)
+{
+ Seg *seg;
+ int t;
+ Area *a;
+ Filemap *f;
+ Futex *x;
+ void *addr;
+
+ if(decref(space))
+ return;
+ for(t=0; t<SEGMAX; t++){
+ if(seg = space->seg[t]){
+ addr = (void*)seg->addr.base;
+ if(!decref(seg)){
+ qlock(seg);
+ /* mark all areas as free */
+ while(a = seg->areas)
+ freearea(a);
+
+ /* clear the free lists */
+ while(a = seg->freearea){
+ seg->freearea = a->next;
+ free(a);
+ }
+ while(f = seg->freefilemap){
+ seg->freefilemap = f->next;
+ free(f);
+ }
+ while(x = seg->freefutex){
+ seg->freefutex = x->next;
+ free(x);
+ }
+ free(seg);
+ }
+ if(segdetach(addr) < 0)
+ panic("putspace: segdetach %s segment: %r", segname[t]);
+ }
+ }
+ free(space);
+}
+
+static int
+canmerge(Area *a, Area *b)
+{
+ return a->filemap==nil &&
+ a->futex==nil &&
+ b->filemap==nil &&
+ b->futex==nil &&
+ a->prot == b->prot;
+}
+
+static void
+mergearea(Area *a)
+{
+ if(a->prev && a->prev->addr.top == a->addr.base && canmerge(a->prev, a)){
+ a->addr.base = a->prev->addr.base;
+ freearea(a->prev);
+ }
+ if(a->next && a->next->addr.base == a->addr.top && canmerge(a->next, a)){
+ a->addr.top = a->next->addr.top;
+ freearea(a->next);
+ }
+}
+
+static int
+findhole(Seg *seg, Range *r, int fixed)
+{
+ Range h;
+ Area *a;
+ ulong m;
+ ulong z;
+ ulong hz;
+
+ z = r->top - r->base;
+ m = ~0;
+ h.base = seg->addr.base;
+ a = seg->areas;
+ for(;;) {
+ if((h.top = a ? a->addr.base : seg->addr.top) > h.base) {
+ if(fixed){
+ if(h.base > r->base)
+ break;
+ if((r->base >= h.base) && (r->top <= h.top))
+ goto found;
+ } else {
+ hz = h.top - h.base;
+ if((hz >= z) && (hz < m)) {
+ r->base = h.top - z;
+ r->top = h.top;
+ if((m = hz) == z)
+ goto found;
+ }
+ }
+ }
+ if(a == nil)
+ break;
+ h.base = a->addr.top;
+ a = a->next;
+ }
+ if(!fixed && (m != ~0))
+ goto found;
+ return 0;
+
+found:
+ return 1;
+}
+
+/* wake up all futexes in range and unlink from area */
+static void
+wakefutexarea(Area *a, Range addr)
+{
+ Futex *fu, *x;
+
+ for(fu = a->futex; fu; fu = x){
+ x = fu->next;
+ if((ulong)fu->addr >= addr.base && (ulong)fu->addr < addr.top){
+ if(*fu->link = x)
+ x->link = fu->link;
+ fu->link = nil;
+ fu->next = nil;
+
+ trace("wakefutexarea: fu=%p addr=%p", fu, fu->addr);
+ wakeq(fu, MAXPROC);
+ }
+ }
+}
+
+static void
+makehole(Seg *seg, Range r)
+{
+ Area *a, *b, *x;
+ Range f;
+
+ for(a = seg->areas; a; a = x){
+ x = a->next;
+
+ if(a->addr.top <= r.base)
+ continue;
+ if(a->addr.base >= r.top)
+ break;
+
+ f = r;
+ if(f.base < a->addr.base)
+ f.base = a->addr.base;
+ if(f.top > a->addr.top)
+ f.top = a->addr.top;
+
+ wakefutexarea(a, f);
+ if(f.base == a->addr.base){
+ if(f.top == a->addr.top){
+ freearea(a);
+ } else {
+ a->addr.base = f.top;
+ }
+ } else if(f.top == a->addr.top){
+ a->addr.top = f.base;
+ } else {
+ b = duparea(a);
+ b->addr.base = f.top;
+
+ a->addr.top = f.base;
+ linkarea(seg, b);
+ }
+
+ if(segfree((void*)f.base, f.top - f.base) < 0)
+ panic("makehole: segfree %s segment: %r", segname[seg->type]);
+ }
+}
+
+static Seg*
+addr2seg(Space *space, ulong addr)
+{
+ Seg *seg;
+ int t;
+
+ for(t=0; t<SEGMAX; t++){
+ if((seg = space->seg[t]) == nil)
+ continue;
+ qlock(seg);
+ if((addr >= seg->addr.base) && (addr < seg->addr.top))
+ return seg;
+ qunlock(seg);
+ }
+
+ return nil;
+}
+
+static Area*
+addr2area(Seg *seg, ulong addr)
+{
+ Area *a;
+
+ for(a=seg->areas; a; a=a->next)
+ if((addr >= a->addr.base) && (addr < a->addr.top))
+ return a;
+ return nil;
+}
+
+int
+okaddr(void *ptr, int len, int write)
+{
+ ulong addr;
+ Space *space;
+ Seg *seg;
+ Area *a;
+ int ok;
+
+ ok = 0;
+ addr = (ulong)ptr;
+ if(addr < PAGESIZE)
+ goto out;
+ if(space = current->mem){
+ qlock(space);
+ if(seg = addr2seg(space, addr)){
+ while(a = addr2area(seg, addr)){
+ if(write){
+ if((a->prot & PROT_WRITE) == 0)
+ break;
+ } else {
+ if((a->prot & PROT_READ) == 0)
+ break;
+ }
+ if((ulong)ptr + len <= a->addr.top){
+ ok = 1;
+ break;
+ }
+ addr = a->addr.top;
+ }
+ qunlock(seg);
+ }
+ qunlock(space);
+ }
+out:
+ trace("okaddr(%lux-%lux, %d) -> %d", addr, addr+len, write, ok);
+ return ok;
+}
+
+static void
+unmapspace(Space *space, Range r)
+{
+ Seg *seg;
+ int t;
+
+ for(t=0; t<SEGMAX; t++){
+ if((seg = space->seg[t]) == nil)
+ continue;
+ qlock(seg);
+ if(seg->addr.base >= r.top){
+ qunlock(seg);
+ break;
+ }
+ if(seg->addr.top > r.base)
+ makehole(seg, r);
+ qunlock(seg);
+ }
+}
+
+static Area*
+mapspace(Space *space, Range r, int flags, int prot, int *perr)
+{
+ Seg *seg;
+ Area *a;
+ Range f;
+ int t;
+
+ if(flags & MAP_PRIVATE){
+ if(r.base >= space->seg[SEGSTACK]->addr.base){
+ t = SEGSTACK;
+ } else if(r.base >= space->seg[SEGDATA]->addr.base &&
+ r.base < space->seg[SEGDATA]->limit){
+ t = SEGDATA;
+ } else {
+ t = SEGPRIVATE;
+ }
+ } else {
+ t = SEGSHARED;
+ }
+
+ if((seg = space->seg[t]) == nil)
+ goto nomem;
+
+ qlock(seg);
+ if((r.base >= seg->addr.base) && (r.top <= seg->limit)){
+ if(r.base >= seg->addr.top)
+ goto addrok;
+
+ f = r;
+ if(f.top > seg->addr.top)
+ f.top = seg->addr.top;
+ if(findhole(seg, &f, 1))
+ goto addrok;
+ if(flags & MAP_FIXED){
+ if(seg->type == SEGSHARED){
+ trace("mapspace(): cant make hole %lux-%lux in shared segment",
+ f.base, f.top);
+ goto nomem;
+ }
+ makehole(seg, f);
+ goto addrok;
+ }
+ }
+
+ if(flags & MAP_FIXED){
+ trace("mapspace(): no free hole for fixed mapping %lux-%lux in %s segment",
+ r.base, r.top, segname[seg->type]);
+ goto nomem;
+ }
+
+ if(findhole(seg, &r, 0))
+ goto addrok;
+
+ r.top -= r.base;
+ r.base = seg->addr.top;
+ r.top += r.base;
+
+addrok:
+ trace("mapspace(): addr %lux-%lux", r.base, r.top);
+
+ if(r.top > seg->addr.top){
+ if(r.top > seg->limit){
+ trace("mapspace(): area top %lux over %s segment limit %lux",
+ r.top, segname[seg->type], seg->limit);
+ goto nomem;
+ }
+ trace("mapspace(): segbrk %s segment %lux-%lux -> %lux",
+ segname[seg->type], seg->addr.base, seg->addr.top, r.top);
+ if(segbrk((void*)seg->addr.base, (void*)r.top) == (void*)-1){
+ trace("mapspace(): segbrk failed: %r");
+ goto nomem;
+ }
+ seg->addr.top = r.top;
+ }
+
+ if(a = seg->freearea){
+ seg->freearea = a->next;
+ } else {
+ a = kmalloc(sizeof(Area));
+ }
+ a->addr = r;
+ a->prot = prot;
+ a->filemap = nil;
+ a->futex = nil;
+
+ linkarea(seg, a);
+
+ /* keep seg locked */
+ return a;
+
+nomem:
+ if(seg != nil)
+ qunlock(seg);
+ if(perr) *perr = -ENOMEM;
+ return nil;
+}
+
+static ulong
+brkspace(Space *space, ulong bk)
+{
+ Seg *seg;
+ Area *a;
+ ulong old, new;
+ Range r;
+
+ if((seg = space->seg[SEGDATA]) == nil)
+ goto out;
+
+ qlock(seg);
+ if(space->brk < seg->addr.base)
+ space->brk = seg->addr.top;
+
+ if(bk < seg->addr.base)
+ goto out;
+
+ old = pagealign(space->brk);
+ new = pagealign(bk);
+
+ if(old != new){
+ if(bk < space->brk){
+ r.base = new;
+ r.top = old;
+ qunlock(seg);
+ seg = nil;
+
+ unmapspace(space, r);
+ } else {
+ r.base = old;
+ r.top = new;
+
+ trace("brkspace(): new mapping %lux-%lux", r.base, r.top);
+ for(a = addr2area(seg, old - PAGESIZE); a; a = a->next){
+ if(a->addr.top <= r.base)
+ continue;
+ if(a->addr.base > r.top + PAGESIZE)
+ break;
+
+ trace("brkspace(): mapping %lux-%lux is in the way", a->addr.base, a->addr.top);
+ goto out;
+ }
+ qunlock(seg);
+ seg = nil;
+
+ a = mapspace(space, r,
+ MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED,
+ PROT_READ|PROT_WRITE|PROT_EXEC, nil);
+
+ if(a == nil)
+ goto out;
+
+ seg = a->seg;
+ mergearea(a);
+ }
+ }
+
+ if(space->brk != bk){
+ trace("brkspace: set new brk %lux", bk);
+ space->brk = bk;
+ }
+
+out:
+ if(seg != nil)
+ qunlock(seg);
+
+ return space->brk;
+}
+
+static ulong
+remapspace(Space *space, ulong addr, ulong oldlen, ulong newlen, ulong newaddr, int flags)
+{
+ Area *a;
+ Seg *seg;
+ int move;
+ Range r;
+
+ if(pagealign(addr) != addr)
+ return -EINVAL;
+
+ oldlen = pagealign(oldlen);
+ newlen = pagealign(newlen);
+
+ if((addr + oldlen) < addr)
+ return -EINVAL;
+ if((addr + newlen) <= addr)
+ return -EINVAL;
+
+ move = 0;
+ if(flags & MREMAP_FIXED){
+ if(pagealign(newaddr) != newaddr)
+ return -EINVAL;
+ if((flags & MREMAP_MAYMOVE) == 0)
+ return -EINVAL;
+ if((newaddr <= addr) && ((newaddr+newlen) > addr))
+ return -EINVAL;
+ if((addr <= newaddr) && ((addr+oldlen) > newaddr))
+ return -EINVAL;
+ move = (newaddr != addr);
+ }
+
+ if(newlen < oldlen){
+ r.base = addr + newlen;
+ r.top = addr + oldlen;
+
+ unmapspace(space, r);
+
+ oldlen = newlen;
+ }
+
+ if((newlen == oldlen) && !move)
+ return addr;
+
+ if((seg = addr2seg(space, addr)) == nil)
+ return -EFAULT;
+
+ if((a = addr2area(seg, addr)) == nil)
+ goto fault;
+ if(a->addr.top < (addr + oldlen))
+ goto fault;
+
+ if(move)
+ goto domove;
+ if((addr + oldlen) != a->addr.top)
+ goto domove;
+ if((addr + newlen) > seg->limit)
+ goto domove;
+ if(a->next != nil)
+ if((addr + newlen) > a->next->addr.base)
+ goto domove;
+
+ if((addr + newlen) > seg->addr.top){
+ trace("remapspace(): segbrk %s segment %lux-%lux -> %lux",
+ segname[seg->type], seg->addr.base, seg->addr.top, (addr + newlen));
+ if(segbrk((void*)seg->addr.base, (void*)(addr + newlen)) == (void*)-1){
+ trace("remapspace(): segbrk: %r");
+ goto domove;
+ }
+
+ seg->addr.top = (addr + newlen);
+ }
+ a->addr.top = (addr + newlen);
+ mergearea(a);
+ qunlock(seg);
+
+ return addr;
+
+domove:
+ trace("remapspace(): domove not implemented");
+ if(seg != nil)
+ qunlock(seg);
+ return -ENOMEM;
+
+fault:
+ if(seg != nil)
+ qunlock(seg);
+ return -EFAULT;
+}
+
+static void
+syncspace(Space *space, Range r)
+{
+ Seg *seg;
+ Area *a;
+
+ if(seg = addr2seg(space, r.base)){
+ for(a = addr2area(seg, r.base); a; a=a->next){
+ if(r.base >= a->addr.top)
+ break;
+ syncarea(a, r);
+ }
+ qunlock(seg);
+ }
+}
+
+void*
+mapstack(int size)
+{
+ Space *space;
+ ulong a;
+
+ space = current->mem;
+ a = space->seg[SEGSTACK]->addr.top;
+ size = pagealign(size);
+ a = sys_mmap(a - size, size,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
+ if(a == 0)
+ return nil;
+
+ return (void*)(a + size);
+}
+
+void
+mapdata(ulong base)
+{
+ Space *space;
+ Range r;
+ ulong top;
+ int t;
+
+ space = current->mem;
+ base = pagealign(base);
+ top = space->seg[SEGSTACK]->addr.base - PAGESIZE;
+
+ for(t=0; t<SEGMAX; t++){
+ if(space->seg[t] == nil){
+ switch(t){
+ case SEGDATA:
+ r.base = base;
+ break;
+ case SEGPRIVATE:
+ r.base = base + 0x10000000;
+ break;
+ case SEGSHARED:
+ r.base = top - 0x10000000;
+ break;
+ }
+ r.top = r.base + PAGESIZE;
+ space->seg[t] = allocseg(t, r, r.top, 0, (t == SEGSHARED) ? "shared" : "memory");
+ }
+ if(t > 0 && space->seg[t-1])
+ space->seg[t-1]->limit = space->seg[t]->addr.base - PAGESIZE;
+ }
+}
+
+/*
+ * unmapuserspace is called from kprocfork to get rid of
+ * the linux memory segments used by the calling process
+ * before current is set to zero. we just segdetach() all that
+ * segments but keep the data structures valid for the calling
+ * (linux) process.
+ */
+void
+unmapuserspace(void)
+{
+ Space *space;
+ Seg *seg;
+ int t;
+
+ space = current->mem;
+ qlock(space);
+ for(t=0; t<SEGMAX; t++){
+ if((seg = space->seg[t]) == nil)
+ continue;
+ if(segdetach((void*)seg->addr.base) < 0)
+ panic("unmapuserspace: segdetach %s segment: %r", segname[seg->type]);
+ }
+ qunlock(space);
+}
+
+/* hack:
+ * we write segment out into a file, detach it and reattach
+ * a new one and reading contents back. i'm surprised that
+ * this even works seamless with the Plan9 Bss! :-)
+ */
+static void
+convertseg(Range r, ulong attr, char *class)
+{
+ char name[64];
+ ulong p;
+ int n;
+ int fd;
+ ulong len;
+
+ snprint(name, sizeof(name), "/tmp/seg%s%d", class, getpid());
+ fd = create(name, ORDWR|ORCLOSE, 0600);
+ if(fd < 0)
+ panic("convertseg: cant create %s: %r", name);
+
+ len = r.top - r.base;
+
+ if(len > 0){
+ n = write(fd, (void*)r.base, len);
+ if(n != len)
+ panic("convertseg: write: %r");
+ }
+
+ /* copy string to stack because its memory gets detached :-) */
+ strncpy(name, class, sizeof(name));
+
+ trace("detaching %lux-%lux", r.base, r.top);
+
+ /* point of no return */
+ if(segdetach((void*)r.base) < 0)
+ panic("convertseg: segdetach: %r");
+ if(segattach(attr, name, (void*)r.base, len) != (void*)r.base)
+ *((int*)0) = 0;
+
+ p = 0;
+ while(p < len) {
+ /*
+ * we use pread directly to avoid hitting profiling code until
+ * data segment is read back again. pread is unprofiled syscall
+ * stub.
+ */
+ n = pread(fd, (void*)(r.base + p), len - p, (vlong)p);
+ if(n <= 0)
+ *((int*)0) = 0;
+ p += n;
+ }
+
+ /* anything normal again */
+ trace("segment %lux-%lux reattached as %s", r.base, r.top, class);
+
+ close(fd);
+}
+
+void initmem(void)
+{
+ Space *space;
+ Range r, x;
+ char buf[80];
+ int fd;
+ int n;
+
+ static int firsttime = 1;
+
+ space = kmallocz(sizeof(Space), 1);
+ space->ref = 1;
+
+ snprint(buf, sizeof(buf), "/proc/%d/segment", getpid());
+ if((fd = open(buf, OREAD)) < 0)
+ panic("initspace: cant open %s: %r", buf);
+
+ n = 10 + 9 + 9 + 4 + 1;
+ x.base = x.top = 0;
+ while(readn(fd, buf, n)==n){
+ char *name;
+
+ buf[8] = 0;
+ buf[18] = 0;
+ buf[28] = 0;
+ buf[33] = 0;
+
+ name = &buf[0];
+ r.base = strtoul(&buf[9], nil, 16);
+ r.top = strtoul(&buf[19], nil, 16);
+
+ trace("initspace(): %s %lux-%lux", name, r.base, r.top);
+
+ if(firsttime){
+ /*
+ * convert Plan9 data+bss segments into shared segments so
+ * that the memory of emulator data structures gets shared across
+ * all processes. This only happens if initspace() is called the first time.
+ */
+ if(strstr(name, "Data")==name)
+ convertseg(r, 0, "shared");
+ if(strstr(name, "Bss")==name)
+ convertseg(r, 0, "shared");
+ }
+
+ if(strstr(name, "Stack")==name){
+ x.top = r.base - PAGESIZE;
+ x.base = x.top - pagealign((MAXPROC / 4) * USTACK);
+
+ if(!firsttime)
+ break;
+ }
+ }
+ close(fd);
+ firsttime = 0;
+
+ /* allocate the linux stack */
+ space->seg[SEGSTACK] = allocseg(SEGSTACK, x, x.top, 0, "memory");
+
+ current->mem = space;
+}
+
+void exitmem(void)
+{
+ Space *space;
+
+ if(space = current->mem){
+ current->mem = nil;
+ putspace(space);
+ }
+}
+
+void clonemem(Uproc *new, int copy)
+{
+ Space *space;
+
+ if((space = current->mem) == nil){
+ new->mem = nil;
+ return;
+ }
+ new->mem = getspace(space, copy);
+}
+
+ulong procmemstat(Uproc *proc, ulong *pdat, ulong *plib, ulong *pshr, ulong *pstk, ulong *pexe)
+{
+ Space *space;
+ ulong size, z;
+ int i;
+
+ if(pdat) *pdat = 0;
+ if(plib) *plib = 0;
+ if(pshr) *pshr = 0;
+ if(pstk) *pstk = 0;
+ if(pexe) *pexe = 0;
+
+ if((space = proc->mem) == nil)
+ return 0;
+
+ size = 0;
+ qlock(space);
+ for(i=0; i<SEGMAX; i++){
+ Area *a;
+ Seg *seg;
+ if((seg = space->seg[i]) == nil)
+ continue;
+ qlock(seg);
+ for(a = seg->areas; a; a = a->next){
+ z = a->addr.top - a->addr.base;
+ switch(i){
+ case SEGDATA:
+ if(pdat)
+ *pdat += z;
+ case SEGPRIVATE:
+ if(plib)
+ *plib += z;
+ break;
+ case SEGSHARED:
+ if(pshr)
+ *pshr += z;
+ break;
+ case SEGSTACK:
+ if(pstk)
+ *pstk += z;
+ break;
+ }
+ if(pexe && (a->prot & PROT_EXEC))
+ *pexe += z;
+ size += z;
+ }
+ qunlock(seg);
+ }
+ qunlock(space);
+
+ return size;
+}
+
+struct linux_mmap_args {
+ ulong addr;
+ int len;
+ int prot;
+ int flags;
+ int fd;
+ ulong offset;
+};
+
+ulong
+sys_linux_mmap(void *a)
+{
+ struct linux_mmap_args *p = a;
+
+ if(pagealign(p->offset) != p->offset)
+ return -EINVAL;
+
+ return sys_mmap(
+ p->addr,
+ p->len,
+ p->prot,
+ p->flags,
+ p->fd,
+ p->offset / PAGESIZE);
+}
+
+ulong
+sys_mmap(ulong addr, ulong len, int prot, int flags, int fd, ulong pgoff)
+{
+ Space *space;
+ Seg *seg;
+ Range r;
+ ulong o;
+ int e, n;
+ Area *a;
+ Filemap *f;
+ Ufile *file;
+
+ trace("sys_mmap(%lux, %lux, %d, %d, %d, %lux)", addr, len, prot, flags, fd, pgoff);
+
+ if(pagealign(addr) != addr)
+ return (ulong)-EINVAL;
+
+ r.base = addr;
+ r.top = addr + pagealign(len);
+ if(r.top <= r.base)
+ return (ulong)-EINVAL;
+
+ file = nil;
+ if((flags & MAP_ANONYMOUS)==0)
+ if((file = fdgetfile(fd))==nil)
+ return (ulong)-EBADF;
+
+ space = current->mem;
+ qlock(space);
+ if((a = mapspace(space, r, flags, prot, &e)) == nil){
+ qunlock(space);
+ putfile(file);
+ return (ulong)e;
+ }
+
+ seg = a->seg;
+ r = a->addr;
+
+ if(flags & MAP_ANONYMOUS){
+ mergearea(a);
+ qunlock(seg);
+ qunlock(space);
+
+ return r.base;
+ }
+
+ o = pgoff * PAGESIZE;
+
+ if(f = seg->freefilemap)
+ seg->freefilemap = f->next;
+ if(f == nil)
+ f = kmalloc(sizeof(Filemap));
+ f->ref = 1;
+ f->addr = r;
+ f->next = nil;
+ f->path = kstrdup(file->path);
+ f->offset = o;
+ if((f->mode = file->mode) != O_RDONLY){
+ f->file = getfile(file);
+ } else {
+ f->file = nil;
+ }
+ a->filemap = f;
+ qunlock(seg);
+ qunlock(space);
+
+ trace("map %s [%lux-%lux] at [%lux-%lux]", file->path, o, o + (r.top - r.base), r.base, r.top);
+
+ addr = r.base;
+ while(addr < r.top){
+ n = preadfile(file, (void*)addr, r.top - addr, o);
+ if(n == 0)
+ break;
+ if(n < 0){
+ trace("read failed at offset %lux for address %lux failed: %r", o, addr);
+ break;
+ }
+ addr += n;
+ o += n;
+ }
+
+ putfile(file);
+
+ return r.base;
+}
+
+int sys_munmap(ulong addr, ulong len)
+{
+ Space *space;
+ Range r;
+
+ trace("sys_munmap(%lux, %lux)", addr, len);
+
+ if(pagealign(addr) != addr)
+ return -EINVAL;
+ r.base = addr;
+ r.top = addr + pagealign(len);
+ if(r.top <= r.base)
+ return -EINVAL;
+
+ space = current->mem;
+ qlock(space);
+ unmapspace(current->mem, r);
+ qunlock(space);
+
+ return 0;
+}
+
+ulong
+sys_brk(ulong bk)
+{
+ Space *space;
+ ulong a;
+
+ trace("sys_brk(%lux)", bk);
+
+ space = current->mem;
+ qlock(space);
+ a = brkspace(space, bk);
+ qunlock(space);
+
+ return a;
+}
+
+int sys_mprotect(ulong addr, ulong len, int prot)
+{
+ Space *space;
+ Seg *seg;
+ Area *a, *b;
+ int err;
+
+ trace("sys_mprotect(%lux, %lux, %lux)", addr, len, (ulong)prot);
+
+ len = pagealign(len);
+ if(pagealign(addr) != addr)
+ return -EINVAL;
+ if(len == 0)
+ return -EINVAL;
+
+ err = -ENOMEM;
+ space = current->mem;
+ qlock(space);
+ if(seg = addr2seg(space, addr)){
+ for(a = addr2area(seg, addr); a!=nil; a=a->next){
+ if(addr + len <= a->addr.base)
+ break;
+ err = 0;
+ if(a->prot == prot)
+ continue;
+ wakefutexarea(a, a->addr);
+ if(a->addr.base < addr){
+ b = duparea(a);
+ a->addr.base = addr;
+ b->addr.top = addr;
+ linkarea(seg, b);
+ }
+ if(a->addr.top > addr + len){
+ b = duparea(a);
+ a->addr.top = addr + len;
+ b->addr.base = addr + len;
+ linkarea(seg, b);
+ }
+ trace("%lux-%lux %lux -> %lux", a->addr.base, a->addr.top, (ulong)a->prot, (long)prot);
+ a->prot = prot;
+ }
+ qunlock(seg);
+ }
+ qunlock(space);
+
+ return err;
+}
+
+int sys_msync(ulong addr, ulong len, int flags)
+{
+ Space *space;
+ Range r;
+
+ trace("sys_msync(%lux, %lux, %x)", addr, len, flags);
+
+ if(pagealign(addr) != addr)
+ return -EINVAL;
+ r.base = addr;
+ r.top = addr + pagealign(len);
+ if(r.top <= r.base)
+ return -EINVAL;
+
+ space = current->mem;
+ qlock(space);
+ syncspace(space, r);
+ qunlock(space);
+
+ return 0;
+}
+
+ulong
+sys_mremap(ulong addr, ulong oldlen, ulong newlen, int flags, ulong newaddr)
+{
+ Space *space;
+ int r;
+
+ trace("sys_mremap(%lux, %lux, %lux, %x, %lux)",
+ addr, oldlen, newlen, flags, newaddr);
+
+ space = current->mem;
+ qlock(space);
+ r = remapspace(space, addr, oldlen, newlen, newaddr, flags);
+ qunlock(space);
+
+ return r;
+}
+
+enum {
+ FUTEX_WAIT,
+ FUTEX_WAKE,
+ FUTEX_FD,
+ FUTEX_REQUEUE,
+ FUTEX_CMP_REQUEUE,
+};
+
+int sys_futex(ulong *addr, int op, int val, void *ptime, ulong *addr2, int val3)
+{
+ Space *space;
+ Seg *seg;
+ Area *a;
+ Futex *fu, *fu2;
+ int err, val2;
+ vlong timeout;
+
+ trace("sys_futex(%p, %d, %d, %p, %p, %d)", addr, op, val, ptime, addr2, val3);
+
+ seg = nil;
+ err = -EFAULT;
+ if((space = current->mem) == 0)
+ goto out;
+
+ qlock(space);
+ if((seg = addr2seg(space, (ulong)addr)) == nil){
+ qunlock(space);
+ goto out;
+ }
+ qunlock(space);
+ if((a = addr2area(seg, (ulong)addr)) == nil)
+ goto out;
+ for(fu = a->futex; fu; fu = fu->next)
+ if(fu->addr == addr)
+ break;
+
+ switch(op){
+ case FUTEX_WAIT:
+ trace("sys_futex(): FUTEX_WAIT futex=%p addr=%p", fu, addr);
+
+ if(fu == nil){
+ if(fu = seg->freefutex){
+ seg->freefutex = fu->next;
+ } else {
+ fu = kmallocz(sizeof(Futex), 1);
+ }
+ fu->ref = 1;
+ fu->addr = addr;
+ if(fu->next = a->futex)
+ fu->next->link = &fu->next;
+ fu->link = &a->futex;
+ a->futex = fu;
+ } else {
+ incref(fu);
+ }
+
+ err = 0;
+ timeout = 0;
+ if(ptime != nil){
+ struct linux_timespec *ts = ptime;
+ vlong now;
+
+ wakeme(1);
+ now = nsec();
+ if(current->restart->syscall){
+ timeout = current->restart->futex.timeout;
+ } else {
+ timeout = now + (vlong)ts->tv_sec * 1000000000LL + ts->tv_nsec;
+ }
+ if(now < timeout){
+ current->timeout = timeout;
+ setalarm(timeout);
+ } else {
+ err = -ETIMEDOUT;
+ }
+ }
+ if(err == 0){
+ if(*addr != val){
+ err = -EWOULDBLOCK;
+ } else {
+ err = sleepq(fu, seg, 1);
+ }
+ }
+ if(ptime != nil){
+ current->timeout = 0;
+ wakeme(0);
+ }
+ if(err == -ERESTART)
+ current->restart->futex.timeout = timeout;
+
+ if(!decref(fu)){
+ if(fu->link){
+ if(*fu->link = fu->next)
+ fu->next->link = fu->link;
+ fu->link = nil;
+ fu->next = nil;
+ }
+ fu->next = seg->freefutex;
+ seg->freefutex = fu;
+ }
+ break;
+
+ case FUTEX_WAKE:
+ trace("sys_futex(): FUTEX_WAKE futex=%p addr=%p", fu, addr);
+ err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+ break;
+
+ case FUTEX_CMP_REQUEUE:
+ trace("sys_futex(): FUTEX_CMP_REQUEUE futex=%p addr=%p", fu, addr);
+ if(*addr != val3){
+ err = -EAGAIN;
+ break;
+ case FUTEX_REQUEUE:
+ trace("sys_futex(): FUTEX_REQUEUE futex=%p addr=%p", fu, addr);
+ }
+ err = fu ? wakeq(fu, val < 0 ? 0 : val) : 0;
+ if(err > 0){
+ val2 = (int)ptime;
+
+ /* BUG: fu2 has to be in the same segment as fu */
+ if(a = addr2area(seg, (ulong)addr2)){
+ for(fu2 = a->futex; fu2; fu2 = fu2->next){
+ if(fu2->addr == addr2){
+ err += requeue(fu, fu2, val2);
+ break;
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ err = -ENOSYS;
+ }
+
+out:
+ if(seg)
+ qunlock(seg);
+ return err;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <mp.h>
+#include <libsec.h>
+
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum
+{
+ Mnull,
+ Mzero,
+ Mfull,
+ Mrandom,
+ Murandom,
+ Mmax,
+};
+
+typedef struct Miscfile Miscfile;
+struct Miscfile
+{
+ Ufile;
+ int m;
+};
+
+static int
+path2m(char *path)
+{
+ int m;
+
+ m = -1;
+ if(strcmp(path, "/dev/null")==0){
+ m = Mnull;
+ } else if(strcmp(path, "/dev/zero")==0){
+ m = Mzero;
+ } else if(strcmp(path, "/dev/full")==0){
+ m = Mfull;
+ } else if(strcmp(path, "/dev/random")==0){
+ m = Mrandom;
+ } else if(strcmp(path, "/dev/urandom")==0){
+ m = Murandom;
+ }
+
+ return m;
+}
+
+static int
+openmisc(char *path, int mode, int, Ufile **pf)
+{
+ Miscfile *f;
+ int m;
+
+ if((m = path2m(path)) < 0)
+ return -ENOENT;
+ f = kmallocz(sizeof(*f), 1);
+ f->ref = 1;
+ f->mode = mode;
+ f->path = kstrdup(path);
+ f->fd = -1;
+ f->dev = MISCDEV;
+ f->m = m;
+ *pf = f;
+ return 0;
+}
+
+static int
+closemisc(Ufile *)
+{
+ return 0;
+}
+
+static int
+readmisc(Ufile *f, void *buf, int len, vlong)
+{
+ switch(((Miscfile*)f)->m){
+ case Mnull:
+ return 0;
+ case Mzero:
+ memset(buf, 0, len);
+ return len;
+ case Mfull:
+ return -EIO;
+ case Mrandom:
+ genrandom(buf, len);
+ return len;
+ case Murandom:
+ prng(buf, len);
+ return len;
+ default:
+ return -EIO;
+ }
+}
+
+static int
+writemisc(Ufile *f, void *, int len, vlong)
+{
+ switch(((Miscfile*)f)->m){
+ case Mnull:
+ case Mzero:
+ case Mrandom:
+ case Murandom:
+ return len;
+ case Mfull:
+ return -ENOSPC;
+ default:
+ return -EIO;
+ }
+}
+
+static int
+statmisc(char *path, int, Ustat *s)
+{
+ if(path2m(path) < 0)
+ return -ENOENT;
+
+ s->mode = 0666 | S_IFCHR;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = 0;
+ s->ino = hashpath(path);
+ s->dev = 0;
+ s->rdev = 0;
+ s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+ return 0;
+}
+
+static int
+fstatmisc(Ufile *f, Ustat *s)
+{
+ return fsstat(f->path, 0, s);
+};
+
+static Udev miscdev =
+{
+ .open = openmisc,
+ .read = readmisc,
+ .write = writemisc,
+ .close = closemisc,
+ .stat = statmisc,
+ .fstat = fstatmisc,
+};
+
+void miscdevinit(void)
+{
+ devtab[MISCDEV] = &miscdev;
+
+ fsmount(&miscdev, "/dev/null");
+ fsmount(&miscdev, "/dev/zero");
+ fsmount(&miscdev, "/dev/full");
+ fsmount(&miscdev, "/dev/random");
+ fsmount(&miscdev, "/dev/urandom");
+
+ srand(truerand());
+}
--- /dev/null
+</$objtype/mkfile
+
+TARG=linuxemu
+BIN=$home/bin/$objtype
+RCBIN=$home/bin/rc
+CFLAGS=-FTVw
+
+OFILES=\
+ bits.$O \
+ bufproc.$O \
+ error.$O \
+ exec.$O \
+ file.$O \
+ fs.$O \
+ main.$O \
+ mem.$O \
+ poll.$O \
+ proc.$O \
+ signal.$O \
+ stat.$O \
+ time.$O \
+ tls.$O \
+ trace.$O \
+ trap.$O \
+ linuxcall.$O \
+ consdev.$O \
+ dspdev.$O \
+ miscdev.$O \
+ pipedev.$O \
+ ptydev.$O \
+ rootdev.$O \
+ sockdev.$O \
+ procdev.$O \
+
+
+HFILES=fns.h dat.h linux.h
+
+CLEANFILES=linuxcalltab.out linuxdat.acid
+
+</sys/src/cmd/mkone
+
+linuxcalltab.out: linuxcalltab linuxcalltab.awk
+ ./linuxcalltab.awk <linuxcalltab >$target
+
+linuxcall.$O: linuxcalltab.out
+
+linuxdat.acid: $HFILES main.c trace.c signal.c mem.c file.c
+ rm -f $target
+ for(i in main.c){
+ $CC -a $i >>$target
+ }
+ for(i in bufproc.c error.c exec.c file.c fs.c mem.c poll.c \
+ proc.c signal.c stat.c time.c tls.c trace.c trap.c \
+ consdev.c dspdev.c miscdev.c pipedev.c \
+ ptydev.c rootdev.c sockdev.c procdev.c){
+ $CC -aa $i >>$target
+ }
+
+$RCBIN/linux: linux
+ cp linux $RCBIN/linux
+
+acid:V: linuxdat.acid
+
+install:V: $RCBIN/linux
+
+
+
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Pipe Pipe;
+
+struct Pipe
+{
+ Ufile;
+ void *bufproc;
+ ulong atime;
+ ulong mtime;
+ int ino;
+};
+
+enum{
+ Maxatomic = 64*1024,
+};
+
+int
+pipewrite(int fd, void *buf, int len)
+{
+ uchar *p, *e;
+ int err, n;
+
+ p = buf;
+ e = p + len;
+ while(p < e){
+ n = e - p;
+ if(n > Maxatomic)
+ n = Maxatomic;
+ if(notifyme(1))
+ err = -ERESTART;
+ else {
+ err = write(fd, p, n);
+ notifyme(0);
+ if(err < 0)
+ err = mkerror();
+ }
+ if(err < 0){
+ if(p == (uchar*)buf)
+ return err;
+ break;
+ }
+ p += err;
+ }
+ return p - (uchar*)buf;
+}
+
+static int
+closepipe(Ufile *file)
+{
+ Pipe *pipe = (Pipe*)file;
+
+ close(pipe->fd);
+ freebufproc(pipe->bufproc);
+
+ return 0;
+}
+
+static void*
+bufprocpipe(Pipe *pipe)
+{
+ if(pipe->bufproc == nil)
+ pipe->bufproc = newbufproc(pipe->fd);
+ return pipe->bufproc;
+}
+
+static int
+pollpipe(Ufile *file, void *tab)
+{
+ Pipe *pipe = (Pipe*)file;
+
+ return pollbufproc(bufprocpipe(pipe), pipe, tab);
+}
+
+static int
+readpipe(Ufile *file, void *buf, int len, vlong)
+{
+ Pipe *pipe = (Pipe*)file;
+ int ret;
+
+ if((pipe->mode & O_NONBLOCK) || (pipe->bufproc != nil)){
+ ret = readbufproc(bufprocpipe(pipe), buf, len, 0, (pipe->mode & O_NONBLOCK));
+ } else {
+ if(notifyme(1))
+ return -ERESTART;
+ ret = read(pipe->fd, buf, len);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ }
+ if(ret > 0)
+ pipe->atime = time(nil);
+ return ret;
+}
+
+static int
+writepipe(Ufile *file, void *buf, int len, vlong)
+{
+ Pipe *pipe = (Pipe*)file;
+ int ret;
+
+ ret = pipewrite(pipe->fd, buf, len);
+ if(ret > 0)
+ pipe->mtime = time(nil);
+ return ret;
+}
+
+static int
+ioctlpipe(Ufile *file, int cmd, void *arg)
+{
+ Pipe *pipe = (Pipe*)file;
+
+ switch(cmd){
+ default:
+ return -ENOTTY;
+ case 0x541B:
+ {
+ int r;
+
+ if(arg == nil)
+ return -EINVAL;
+ if((r = nreadablebufproc(bufprocpipe(pipe))) < 0){
+ *((int*)arg) = 0;
+ return r;
+ }
+ *((int*)arg) = r;
+ }
+ return 0;
+ }
+}
+
+int sys_pipe(int *fds)
+{
+ Pipe *file;
+ int p[2];
+ int i, fd;
+ static int ino = 0x1234;
+
+ trace("sys_pipe(%p)", fds);
+
+ if(pipe(p) < 0)
+ return mkerror();
+
+ for(i=0; i<2; i++){
+ file = kmallocz(sizeof(Pipe), 1);
+ file->ref = 1;
+ file->mode = O_RDWR;
+ file->dev = PIPEDEV;
+ file->fd = p[i];
+ file->ino = ino++;
+ file->atime = file->mtime = time(nil);
+ if((fd = newfd(file, 0)) < 0){
+ if(i > 0)
+ sys_close(fds[0]);
+ close(p[0]);
+ close(p[1]);
+ return fd;
+ }
+ fds[i] = fd;
+ }
+ return 0;
+}
+
+static void
+fillstat(Pipe *pipe, Ustat *s)
+{
+ s->ino = pipe->ino;
+ s->mode = 0666 | S_IFIFO;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->atime = pipe->atime;
+ s->mtime = pipe->mtime;
+ s->size = 0;
+}
+
+static int
+fstatpipe(Ufile *file, Ustat *s)
+{
+ Pipe *pipe = (Pipe*)file;
+ fillstat(pipe, s);
+ return 0;
+};
+
+static Udev pipedev =
+{
+ .read = readpipe,
+ .write = writepipe,
+ .poll = pollpipe,
+ .close = closepipe,
+ .ioctl = ioctlpipe,
+ .fstat = fstatpipe,
+};
+
+void pipedevinit(void)
+{
+ devtab[PIPEDEV] = &pipedev;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+void pollwait(Ufile *f, Uwaitq *q, void *t)
+{
+ Uwait *w, **p;
+
+ if(f == nil || t == nil || q == nil)
+ return;
+
+ p = t;
+ w = addwaitq(q);
+ w->file = getfile(f);
+ w->next = *p;
+ *p = w;
+}
+
+static void
+clearpoll(Uwait **p)
+{
+ Uwait *w;
+
+ while(w = *p){
+ *p = w->next;
+ delwaitq(w);
+ }
+}
+
+struct linux_pollfd
+{
+ int fd;
+ short events;
+ short revents;
+};
+
+int sys_poll(void *p, int nfd, long timeout)
+{
+ int i, e, err;
+ Uwait *tab;
+ Ufile *file;
+ vlong now, t;
+ struct linux_pollfd *fds = p;
+
+ trace("sys_poll(%p, %d, %ld)", p, nfd, timeout);
+
+ if(nfd < 0)
+ return -EINVAL;
+
+ t = 0;
+ wakeme(1);
+ if(timeout > 0){
+ now = nsec();
+ if(current->restart->syscall){
+ t = current->restart->poll.timeout;
+ } else {
+ t = now + timeout*1000000LL;
+ }
+ if(now < t){
+ current->timeout = t;
+ setalarm(t);
+ }
+ }
+
+ tab = nil;
+ for(;;){
+ clearpoll(&tab);
+
+ err = 0;
+ for(i=0; i<nfd; i++){
+ e = 0;
+ if(fds[i].fd >= 0){
+ e = POLLNVAL;
+ if(file = fdgetfile(fds[i].fd)){
+ if(devtab[file->dev]->poll == nil){
+ e = POLLIN|POLLOUT;
+ } else {
+ e = devtab[file->dev]->poll(file, (err == 0) ? &tab : nil);
+ }
+ putfile(file);
+ e &= fds[i].events | POLLERR | POLLHUP;
+ }
+ }
+ if(fds[i].revents = e){
+ trace("sys_poll(): fd %d is ready with %x", fds[i].fd, fds[i].revents);
+ err++;
+ }
+ }
+ if(err > 0)
+ break;
+ if(timeout >= 0 && current->timeout == 0){
+ trace("sys_poll(): timeout");
+ break;
+ }
+ if((err = sleepproc(nil, 1)) < 0){
+ trace("sys_poll(): interrupted");
+ current->restart->poll.timeout = t;
+ break;
+ }
+ }
+ clearpoll(&tab);
+ wakeme(0);
+
+ if(timeout > 0)
+ current->timeout = 0;
+
+ return err;
+}
+
+int sys_select(int nfd, ulong *rfd, ulong *wfd, ulong *efd, void *ptv)
+{
+ int i, p, e, w, nwrd, nbits, fd, err;
+ ulong m;
+ Uwait *tab;
+ Ufile *file;
+ vlong now, t;
+ struct linux_timeval *tv = ptv;
+ struct {
+ int fd;
+ int ret;
+ } *ardy, astk[16];
+
+ trace("sys_select(%d, %p, %p, %p, %p)", nfd, rfd, wfd, efd, ptv);
+
+ if(nfd < 0)
+ return -EINVAL;
+
+ if(tv != nil)
+ if(tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+ return -EINVAL;
+
+ nwrd = (nfd + (8 * sizeof(m))-1) / (8 * sizeof(m));
+
+ nbits = 0;
+ for(w=0; w<nwrd; w++)
+ for(m=1; m; m<<=1)
+ if((rfd && rfd[w] & m) || (wfd && wfd[w] & m) || (efd && efd[w] & m))
+ nbits++;
+
+ if(nbits > nelem(astk)){
+ ardy = kmalloc(nbits * sizeof(ardy[0]));
+ } else {
+ ardy = astk;
+ }
+
+ t = 0;
+ wakeme(1);
+ if(tv != nil){
+ now = nsec();
+ if(current->restart->syscall){
+ t = current->restart->select.timeout;
+ } else {
+ t = now + tv->tv_sec*1000000000LL + tv->tv_usec*1000;
+ }
+ if(now < t){
+ current->timeout = t;
+ setalarm(t);
+ }
+ }
+
+ tab = nil;
+ for(;;){
+ clearpoll(&tab);
+
+ fd = 0;
+ err = 0;
+ for(w=0; w<nwrd; w++){
+ for(m=1; m; m<<=1, fd++){
+ p = 0;
+ if(rfd && rfd[w] & m)
+ p |= POLLIN;
+ if(wfd && wfd[w] & m)
+ p |= POLLOUT;
+ if(efd && efd[w] & m)
+ p |= POLLERR;
+ if(!p || ((file = fdgetfile(fd)) == nil))
+ continue;
+ if(devtab[file->dev]->poll == nil){
+ e = POLLIN|POLLOUT;
+ } else {
+ e = devtab[file->dev]->poll(file, (err == 0) ? &tab : nil);
+ }
+ putfile(file);
+ if(e &= p) {
+ ardy[err].fd = fd;
+ ardy[err].ret = e;
+ if(++err == nbits)
+ break;
+ }
+ }
+ }
+ if(err > 0)
+ break;
+ if(tv != nil && current->timeout == 0){
+ trace("sys_select(): timeout");
+ break;
+ }
+ if((err = sleepproc(nil, 1)) < 0){
+ trace("sys_select(): interrupted");
+ current->restart->select.timeout = t;
+ break;
+ }
+ }
+ clearpoll(&tab);
+ wakeme(0);
+
+ if(tv != nil){
+ current->timeout = 0;
+ t -= nsec();
+ if(t < 0)
+ t = 0;
+ tv->tv_sec = (long)(t/1000000000LL);
+ tv->tv_usec = (long)((t%1000000000LL)/1000);
+ }
+
+ if(err >= 0){
+ if(rfd) memset(rfd, 0, nwrd*sizeof(m));
+ if(wfd) memset(wfd, 0, nwrd*sizeof(m));
+ if(efd) memset(efd, 0, nwrd*sizeof(m));
+
+ nbits = 0;
+ for(i=0; i<err; i++){
+ e = ardy[i].ret;
+ fd = ardy[i].fd;
+ w = fd / (8 * sizeof(m));
+ m = 1 << (fd % (8 * sizeof(m)));
+ if(rfd && (e & POLLIN)){
+ rfd[w] |= m;
+ nbits++;
+ }
+ if(wfd && (e & POLLOUT)){
+ wfd[w] |= m;
+ nbits++;
+ }
+ if(efd && (e & POLLERR)){
+ efd[w] |= m;
+ nbits++;
+ }
+ }
+ err = nbits;
+ }
+
+ if(ardy != astk)
+ free(ardy);
+
+ return err;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+static int timernotefd;
+static void timerproc(void*);
+
+static int
+pidhash(int pid)
+{
+ return (pid - 1) % MAXPROC;
+}
+
+Uproc*
+getproc(int tid)
+{
+ Uproc *p;
+
+ if(tid > 0){
+ p = &proctab.proc[pidhash(tid)];
+ if(p->tid == tid)
+ return p;
+ }
+ return nil;
+}
+
+Uproc*
+getprocn(int n)
+{
+ Uproc *p;
+
+ p = &proctab.proc[n];
+ if(p->tid > 0)
+ return p;
+ return nil;
+}
+
+static Uproc*
+allocproc(void)
+{
+ Uproc *p;
+ int tid, i;
+
+ for(i=0; i<MAXPROC; i++){
+ tid = proctab.nextpid++;
+ p = &proctab.proc[pidhash(tid)];
+ if(p->tid <= 0){
+ proctab.alloc++;
+
+ p->tid = tid;
+ p->pid = tid;
+ p->pgid = tid;
+ p->psid = tid;
+ return p;
+ }
+ }
+
+ trace("allocproc(): out of processes");
+ return nil;
+}
+
+static void
+freeproc(Uproc *p)
+{
+ Uwait *w;
+
+ while(w = p->freewait){
+ p->freewait = w->next;
+ free(w);
+ }
+ exittrace(p);
+ free(p->comm);
+ free(p->root);
+ free(p->cwd);
+ free(p->kcwd);
+ memset(p, 0, sizeof(*p));
+ proctab.alloc--;
+}
+
+void initproc(void)
+{
+ Uproc *p;
+ char buf[1024];
+ int pid;
+
+ proctab.nextpid = 10;
+
+ p = allocproc();
+ p->kpid = getpid();
+ snprint(buf, sizeof(buf), "/proc/%d/note", p->kpid);
+ p->notefd = open(buf, OWRITE);
+ snprint(buf, sizeof(buf), "/proc/%d/args", p->kpid);
+ p->argsfd = open(buf, ORDWR);
+
+ current = p;
+
+ inittrace();
+ inittime();
+ initsignal();
+ initmem();
+ inittls();
+ initfile();
+
+ if((pid = procfork(timerproc, nil, 0)) < 0)
+ panic("initproc: unable to fork timerproc: %r");
+
+ snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+ timernotefd = open(buf, OWRITE);
+
+ current->root = nil;
+ current->cwd = kstrdup(getwd(buf, sizeof(buf)));
+ current->kcwd = kstrdup(current->cwd);
+ current->linkloop = 0;
+ current->starttime = nsec();
+
+ inittrap();
+}
+
+void
+setprocname(char *s)
+{
+ if(current == nil){
+ char buf[32];
+ int fd;
+
+ snprint(buf, sizeof(buf), "/proc/%d/args", getpid());
+ if((fd = open(buf, OWRITE)) >= 0){
+ write(fd, s, strlen(s));
+ close(fd);
+ }
+ } else {
+ write(current->argsfd, s, strlen(s));
+ }
+}
+
+static void
+intrnote(void *, char *msg)
+{
+ if(strncmp(msg, "interrupt", 9) == 0)
+ noted(NCONT);
+ noted(NDFLT);
+}
+
+struct kprocforkargs
+{
+ int flags;
+ void (*func)(void *aux);
+ void *aux;
+};
+
+static int
+kprocfork(void *arg)
+{
+ struct kprocforkargs args;
+ int pid;
+
+ memmove(&args, arg, sizeof(args));
+
+ if((pid = rfork(RFPROC|RFMEM|args.flags)) != 0)
+ return pid;
+
+ notify(intrnote);
+
+ unmapuserspace();
+ current = nil;
+
+ profme();
+ args.func(args.aux);
+ longjmp(exitjmp, 1);
+ return -1;
+}
+
+/*
+ * procfork starts a kernel process running on kstack.
+ * that process will have linux memory segments (stack, private,
+ * shared) unmapped but plan9 segments (text, bss, stack) shared.
+ * here is no Uproc associated with it! current will be set to nil so
+ * you cant call sys_????() functions in here.
+ * procfork returns the plan9 pid. (usefull for posting notes)
+ */
+int procfork(void (*func)(void *aux), void *aux, int flags)
+{
+ struct kprocforkargs args;
+
+ args.flags = flags;
+ args.func = func;
+ args.aux = aux;
+
+ return onstack(kstack, kprocfork, &args);
+}
+
+static void *Intr = (void*)~0;
+
+static char Notifyme[] = "notifyme";
+static char Wakeme[] = "wakeme";
+static char Xchange[] = "xchange";
+
+static char Wakeup[] = "wakeup";
+static char Abort[] = "abort";
+
+int notifyme(int on)
+{
+ Uproc *p;
+
+ p = current;
+ qlock(p);
+ if(on){
+ if(p->notified || signalspending(p)){
+ qunlock(p);
+ return 1;
+ }
+ if(p->state == nil)
+ p->state = Notifyme;
+ } else {
+ p->state = nil;
+ }
+ qunlock(p);
+ return 0;
+}
+
+void wakeme(int on)
+{
+ Uproc *p;
+
+ p = current;
+ qlock(p);
+ if(on){
+ if(p->state == nil)
+ p->state = Wakeme;
+ } else {
+ p->state = nil;
+ }
+ qunlock(p);
+}
+
+int sleepproc(QLock *l, int flags)
+{
+ Uproc *p;
+ void *ret;
+ char *x;
+
+ p = current;
+ qlock(p);
+ x = p->state;
+ if(x == nil || x == Wakeme){
+ p->xstate = x;
+ p->state = Xchange;
+ if(l != nil)
+ qunlock(l);
+ qunlock(p);
+ if(flags && signalspending(p)){
+ ret = Intr;
+ } else {
+ ret = rendezvous(p, Xchange);
+ }
+ if(ret == Intr){
+ qlock(p);
+ if(p->state != Xchange){
+ while((ret = rendezvous(p, Xchange)) == Intr)
+ ;
+ } else {
+ p->state = x;
+ }
+ qunlock(p);
+ }
+ if(l != nil)
+ qlock(l);
+ } else {
+ p->state = Wakeme;
+ ret = x;
+ qunlock(p);
+ }
+ return (ret == Wakeup) ? 0 : -ERESTART;
+}
+
+static int
+wakeup(Uproc *proc, char *m, int force)
+{
+ char *x;
+
+ if(proc != nil){
+ qlock(proc);
+ x = proc->state;
+
+ if(x == Wakeme){
+ proc->state = m;
+ qunlock(proc);
+ return 1;
+ }
+ if(x == Xchange){
+ proc->state = proc->xstate;
+ proc->xstate = nil;
+ qunlock(proc);
+ while(rendezvous(proc, m) == Intr)
+ ;
+ return 1;
+ }
+ if((m != Wakeup) && (proc->notified == 0)){
+ if(x == Notifyme)
+ proc->state = nil;
+ if(x == Notifyme || force){
+ proc->notified = 1;
+ qunlock(proc);
+ write(proc->notefd, "interrupt", 9);
+ return 1;
+ }
+ }
+ qunlock(proc);
+ }
+ return 0;
+}
+
+Uwait* addwaitq(Uwaitq *q)
+{
+ Uproc *p;
+ Uwait *w;
+
+ p = current;
+ if(w = p->freewait){
+ p->freewait = w->next;
+ } else {
+ w = kmalloc(sizeof(*w));
+ }
+
+ w->next = nil;
+
+ w->proc = p;
+ w->file = nil;
+
+ w->q = q;
+ qlock(q);
+ w->nextq = q->w;
+ q->w = w;
+ qunlock(q);
+
+ return w;
+}
+
+void delwaitq(Uwait *w)
+{
+ Uwaitq *q;
+ Uwait **x;
+
+ q = w->q;
+ qlock(q);
+ for(x = &q->w; *x; x=&((*x)->nextq)){
+ if(*x == w){
+ *x = w->nextq;
+ break;
+ }
+ }
+ qunlock(q);
+
+ w->q = nil;
+ w->nextq = nil;
+
+ w->proc = nil;
+ putfile(w->file);
+ w->file = nil;
+
+ w->next = current->freewait;
+ current->freewait = w;
+}
+
+int requeue(Uwaitq *q1, Uwaitq *q2, int nrequeue)
+{
+ int n;
+ Uwait *w;
+
+ n = 1000;
+ for(;;){
+ qlock(q1);
+ if(canqlock(q2))
+ break;
+ qunlock(q1);
+ if(--n <= 0)
+ return 0;
+ sleep(0);
+ }
+ n = 0;
+ while((w = q1->w) && (n < nrequeue)){
+ q1->w = w->nextq;
+ w->q = q2;
+ w->nextq = q2->w;
+ q2->w = w;
+ n++;
+ }
+ qunlock(q2);
+ qunlock(q1);
+ return n;
+}
+
+int wakeq(Uwaitq *q, int nwake)
+{
+ int n;
+ Uwait *w;
+
+ n = 0;
+ if(q != nil){
+ qlock(q);
+ for(w = q->w; w && n < nwake; w=w->nextq)
+ n += wakeup(w->proc, Wakeup, 0);
+ qunlock(q);
+ }
+ return n;
+}
+
+int sleepq(Uwaitq *q, QLock *l, int flags)
+{
+ Uwait *w;
+ int ret;
+
+ w = addwaitq(q);
+ ret = sleepproc(l, flags);
+ delwaitq(w);
+
+ return ret;
+}
+
+static Uproc *alarmq;
+
+int
+procsetalarm(Uproc *proc, vlong t)
+{
+ Uproc **pp;
+ int ret;
+
+ if(proc->alarm && t >= proc->alarm)
+ return 0;
+ ret = (alarmq == nil) || (t < alarmq->alarm);
+ for(pp = &alarmq; *pp; pp = &((*pp)->alarmq)){
+ if(*pp == proc){
+ *pp = proc->alarmq;
+ break;
+ }
+ }
+ for(pp = &alarmq; *pp; pp = &((*pp)->alarmq))
+ if((*pp)->alarm > t)
+ break;
+ proc->alarm = t;
+ proc->alarmq = *pp;
+ *pp = proc;
+ return ret;
+}
+
+void
+setalarm(vlong t)
+{
+ qlock(&proctab);
+ if(procsetalarm(current, t))
+ write(timernotefd, "interrupt", 9);
+ qunlock(&proctab);
+}
+
+/* signal.c */
+extern void alarmtimer(Uproc *proc, vlong now);
+
+static void
+timerproc(void*)
+{
+ Uproc *h;
+ vlong now;
+ long m;
+
+ setprocname("timerproc()");
+
+ while(proctab.alloc > 0){
+ qlock(&proctab);
+ m = 2000;
+ now = nsec();
+ while(h = alarmq){
+ if(now < h->alarm){
+ m = (h->alarm - now) / 1000000;
+ break;
+ }
+ alarmq = h->alarmq;
+ h->alarm = 0;
+ h->alarmq = nil;
+ if(h->timeout){
+ if(now >= h->timeout){
+ h->timeout = 0;
+ wakeup(h, Wakeup, 0);
+ } else
+ procsetalarm(h, h->timeout);
+ }
+ alarmtimer(h, now);
+ }
+ qunlock(&proctab);
+ sleep((m + (1000/HZ-1))/(1000/HZ));
+ }
+}
+
+/*
+static void
+timerproc(void *)
+{
+ Uproc *p;
+ vlong expire, now, wake, dead;
+ int err, i, alive;
+ char c;
+
+ setprocname("timerproc()");
+ dead = 0;
+ for(;;){
+ qlock(&proctab);
+ now = nsec();
+ wake = now + 60000000000LL;
+ alive = 0;
+ for(i=0; i<MAXPROC; i++){
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->wstate & WEXITED)
+ continue;
+ if(p->kpid <= 0)
+ continue;
+
+ if(now >= dead){
+ if(read(p->argsfd, &c, 1) < 0){
+ err = mkerror();
+ if(err != -EINTR && err != -ERESTART){
+ p->kpid = 0;
+ qunlock(&proctab);
+ exitproc(p, SIGKILL, 1);
+ qlock(&proctab);
+ continue;
+ }
+ }
+ }
+ alive++;
+ expire = p->timeout;
+ if(expire > 0){
+ if(now >= expire){
+ p->timeout = 0;
+ wakeup(p, Wakeup, 0);
+ } else {
+ if(expire < wake)
+ wake = expire;
+ }
+ }
+ expire = alarmtimer(p, now, wake);
+ if(expire < wake)
+ wake = expire;
+ }
+ qunlock(&proctab);
+
+ if(now >= dead)
+ dead = now + 5000000000LL;
+ if(dead < wake)
+ wake = dead;
+ if(alive == 0)
+ break;
+ wake -= now;
+
+ sleep(wake/1000000LL);
+ }
+}
+*/
+
+int sys_waitpid(int pid, int *pexit, int opt)
+{
+ int i, n, m, status;
+ Uproc *p;
+
+ trace("sys_waitpid(%d, %p, %d)", pid, pexit, opt);
+
+ m = WEXITED;
+ if(opt & WUNTRACED)
+ m |= WSTOPPED;
+ if(opt & WCONTINUED)
+ m |= WCONTINUED;
+
+ qlock(&proctab);
+ for(;;){
+ n = 0;
+ for(i=0; i<MAXPROC; i++){
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p == current)
+ continue;
+ if((p->exitsignal != SIGCHLD) && (opt & (WALL|WCLONE))==0)
+ continue;
+ if(p->ppid != current->pid)
+ continue;
+ if(pid > 0){
+ if(p->pid != pid)
+ continue;
+ } else if(pid == 0){
+ if(p->pgid != current->pgid)
+ continue;
+ } else if(pid < -1){
+ if(p->pgid != -pid)
+ continue;
+ }
+ n++;
+ trace("sys_waitpid(): child %d wstate %x", p->pid, p->wstate);
+ if(p->wevent & m)
+ goto found;
+ }
+ if(n == 0){
+ qunlock(&proctab);
+ trace("sys_waitpid(): no children we can wait for");
+ return -ECHILD;
+ }
+ if(opt & WNOHANG){
+ qunlock(&proctab);
+ trace("sys_waitpid(): no exited/stoped/cont children");
+ return 0;
+ }
+ if((i = sleepproc(&proctab, 1)) < 0){
+ qunlock(&proctab);
+ return i;
+ }
+ }
+
+found:
+ pid = p->pid;
+ status = p->exitcode;
+ p->wevent &= ~(p->wevent & m);
+ if(p->wstate & WEXITED){
+ trace("sys_waitpid(): found zombie %d exitcode %d", pid, status);
+ freeproc(p);
+ }
+ qunlock(&proctab);
+ if(pexit)
+ *pexit = status;
+ return pid;
+}
+
+struct linux_rusage {
+ struct linux_timeval ru_utime; /* user time used */
+ struct linux_timeval ru_stime; /* system time used */
+ long ru_maxrss; /* maximum resident set size */
+ long ru_ixrss; /* integral shared memory size */
+ long ru_idrss; /* integral unshared data size */
+ long ru_isrss; /* integral unshared stack size */
+ long ru_minflt; /* page reclaims */
+ long ru_majflt; /* page faults */
+ long ru_nswap; /* swaps */
+ long ru_inblock; /* block input operations */
+ long ru_oublock; /* block output operations */
+ long ru_msgsnd; /* messages sent */
+ long ru_msgrcv; /* messages received */
+ long ru_nsignals; /* signals received */
+ long ru_nvcsw; /* voluntary context switches */
+ long ru_nivcsw; /* involuntary context switches */
+};
+
+int sys_wait4(int pid, int *pexit, int opt, void *prusage)
+{
+ int ret;
+ struct linux_rusage *ru = prusage;
+
+ trace("sys_wait4(%d, %p, %d, %p)", pid, pexit, opt, prusage);
+
+ ret = sys_waitpid(pid, pexit, opt);
+ if(ru != nil)
+ memset(ru, 0, sizeof(*ru));
+
+ return ret;
+}
+
+int
+threadcount(int pid)
+{
+ Uproc *p;
+ int i, n;
+
+ n = 0;
+ for(i = 0; i<MAXPROC; i++){
+ p = getprocn(i);
+ if(p != nil && p->pid == pid)
+ n++;
+ }
+ return n;
+}
+
+int
+killproc(Uproc *p, Usiginfo *info, int group)
+{
+ int i, n;
+ Uproc *w;
+ int sig, err;
+
+ if((err = sendsignal(p, info, group)) <= 0)
+ return err;
+ w = p;
+ sig = info->signo;
+ if(group && !wantssignal(w, sig)){
+ for(i=1, n = p->tid + 1; i<MAXPROC; i++, n++){
+ if((p = getprocn(pidhash(n))) == nil)
+ continue;
+ if(p->pid != w->pid)
+ continue;
+ if(!wantssignal(p, info->signo))
+ continue;
+ w = p;
+ break;
+ }
+ }
+ wakeup(w, Abort, (sig == SIGKILL || sig == SIGSTOP || sig == SIGALRM));
+ return 0;
+}
+
+enum
+{
+ CLD_EXITED = 1,
+ CLD_KILLED,
+ CLD_DUMPED,
+ CLD_TRAPPED,
+ CLD_STOPPED,
+ CLD_CONTINUED,
+};
+
+/*
+ * queue the exit signal into the parent process. this
+ * doesnt do the wakeup like killproc().
+ */
+static int
+sendexitsignal(Uproc *parent, Uproc *proc, int sig, int code)
+{
+ Usiginfo si;
+
+ memset(&si, 0, sizeof(si));
+ switch(si.signo = sig){
+ case SIGCHLD:
+ switch(code & 0xFF){
+ case 0:
+ si.code = CLD_EXITED;
+ break;
+ case SIGSTOP:
+ si.code = CLD_STOPPED;
+ break;
+ case SIGCONT:
+ si.code = CLD_CONTINUED;
+ break;
+ case SIGKILL:
+ si.code = CLD_KILLED;
+ break;
+ default:
+ si.code = CLD_DUMPED;
+ break;
+ }
+ si.chld.pid = proc->pid;
+ si.chld.uid = proc->uid;
+ si.chld.status = code;
+ }
+ return sendsignal(parent, &si, 1);
+}
+
+/*
+ * wakeup all threads who are in the same thread group
+ * as p including p. must be called with proctab locked.
+ */
+static void
+wakeupall(Uproc *p, char *m, int force)
+{
+ int pid, i, n;
+
+ pid = p->pid;
+ for(i=0, n = p->tid; i<MAXPROC; i++, n++)
+ if(p = getprocn(pidhash(n)))
+ if(p->pid == pid)
+ wakeup(p, m, force);
+}
+
+static void
+zap(void *)
+{
+ exitproc(current, 0, 0);
+}
+
+void
+zapthreads(void)
+{
+ Uproc *p;
+ int i, n, z;
+
+ for(;;){
+ z = 0;
+ for(i=1, n = current->tid+1; i<MAXPROC; i++, n++){
+ if((p = getprocn(pidhash(n))) == nil)
+ continue;
+ if(p->pid != current->pid || p == current)
+ continue;
+ if(p->kpid <= 0)
+ continue;
+
+ trace("zapthreads() zapping thread %p", p);
+ p->tracearg = current;
+ p->traceproc = zap;
+ wakeup(p, Abort, 1);
+ z++;
+ }
+ if(z == 0)
+ break;
+ sleepproc(&proctab, 0);
+ }
+}
+
+struct kexitprocargs
+{
+ Uproc *proc;
+ int code;
+ int group;
+};
+
+#pragma profile off
+
+static int
+kexitproc(void *arg)
+{
+ struct kexitprocargs *args;
+ Uproc *proc;
+ int code, group;
+ Uproc *parent, *child, **pp;
+ int i;
+
+ args = arg;
+ proc = args->proc;
+ code = args->code;
+ group = args->group;
+
+ if(proc == current){
+ trace("kexitproc: cleartidptr = %p", proc->cleartidptr);
+ if(okaddr(proc->cleartidptr, sizeof(*proc->cleartidptr), 1))
+ *proc->cleartidptr = 0;
+ sys_futex((ulong*)proc->cleartidptr, 1, MAXPROC, nil, nil, 0);
+
+ qlock(&proctab);
+ exitsignal();
+ qunlock(&proctab);
+
+ exitmem();
+ }
+
+ exitfile(proc);
+
+ close(proc->notefd); proc->notefd = -1;
+ close(proc->argsfd); proc->argsfd = -1;
+
+ qlock(&proctab);
+
+ for(pp = &alarmq; *pp; pp = &((*pp)->alarmq)){
+ if(*pp == proc){
+ *pp = proc->alarmq;
+ proc->alarmq = nil;
+ break;
+ }
+ }
+
+ /* reparent children, and reap when zombies */
+ for(i=0; i<MAXPROC; i++){
+ if((child = getprocn(i)) == nil)
+ continue;
+ if(child->ppid != proc->pid)
+ continue;
+ child->ppid = 0;
+ if(child->wstate & WEXITED)
+ freeproc(child);
+ }
+
+ /* if we got zapped, just free the proc and wakeup zapper */
+ if((proc == current) && (proc->traceproc == zap) && (parent = proc->tracearg)){
+ freeproc(proc);
+ wakeup(parent, Wakeup, 0);
+ goto zapped;
+ }
+
+ if(group && proc == current)
+ zapthreads();
+
+ parent = getproc(proc->ppid);
+ if((threadcount(proc->pid)==1) && parent &&
+ (proc->exitsignal == SIGCHLD) && !ignoressignal(parent, SIGCHLD)){
+
+ /* we are zombie */
+ proc->exitcode = code;
+ proc->wstate = WEXITED;
+ proc->wevent = proc->wstate;
+ if(proc == current){
+ current->kpid = 0;
+ sendexitsignal(parent, proc, proc->exitsignal, code);
+ wakeupall(parent, Abort, 0);
+ qunlock(&proctab);
+ longjmp(exitjmp, 1);
+ } else {
+ sendexitsignal(parent, proc, proc->exitsignal, code);
+ }
+ } else {
+ /* we are clone */
+ if(parent && proc->exitsignal > 0)
+ sendexitsignal(parent, proc, proc->exitsignal, code);
+ freeproc(proc);
+ }
+ if(parent)
+ wakeupall(parent, Abort, 0);
+
+zapped:
+ qunlock(&proctab);
+
+ if(proc == current)
+ longjmp(exitjmp, 1);
+
+ return 0;
+}
+
+void exitproc(Uproc *proc, int code, int group)
+{
+ struct kexitprocargs args;
+
+ trace("exitproc(%p, %d, %d)", proc, code, group);
+
+ args.proc = proc;
+ args.code = code;
+ args.group = group;
+
+ if(proc == current){
+ onstack(kstack, kexitproc, &args);
+ } else {
+ kexitproc(&args);
+ }
+}
+
+struct kstoparg
+{
+ Uproc *stopper;
+ int code;
+};
+
+static void
+stop(void *aux)
+{
+ struct kstoparg *arg = aux;
+
+ stopproc(current, arg->code, 0);
+}
+
+void stopproc(Uproc *proc, int code, int group)
+{
+ struct kstoparg *arg;
+ Uproc *p, *parent;
+ int i, n, z;
+
+ trace("stopproc(%p, %d, %d)", proc, code, group);
+
+ qlock(&proctab);
+ proc->exitcode = code;
+ proc->wstate = WSTOPPED;
+ proc->wevent = proc->wstate;
+
+ if((proc == current) && (proc->traceproc == stop) && (arg = proc->tracearg)){
+ proc->traceproc = nil;
+ proc->tracearg = nil;
+ wakeup(arg->stopper, Wakeup, 0);
+ qunlock(&proctab);
+ return;
+ }
+
+ /* put all threads in the stopped state */
+ arg = nil;
+ while(group){
+ if(arg == nil){
+ arg = kmalloc(sizeof(*arg));
+ arg->stopper = current;
+ arg->code = code;
+ }
+ z = 0;
+ for(i=1, n = proc->tid+1; i<MAXPROC; i++, n++){
+ if((p = getprocn(pidhash(n))) == nil)
+ continue;
+ if(p->pid != proc->pid || p == proc)
+ continue;
+ if(p->kpid <= 0)
+ continue;
+ if(p->wstate & (WSTOPPED | WEXITED))
+ continue;
+
+ trace("stopproc() stopping thread %p", p);
+ p->tracearg = arg;
+ p->traceproc = stop;
+ wakeup(p, Abort, 1);
+ z++;
+ }
+ if(z == 0)
+ break;
+ sleepproc(&proctab, 0);
+ }
+ free(arg);
+
+ if(parent = getproc(proc->ppid)){
+ if(group && !ignoressignal(parent, SIGCHLD))
+ sendexitsignal(parent, proc, SIGCHLD, code);
+ wakeupall(parent, Abort, 0);
+ }
+ qunlock(&proctab);
+}
+
+void contproc(Uproc *proc, int code, int group)
+{
+ Uproc *p, *parent;
+ int i, n;
+
+ trace("contproc(%p, %d, %d)", proc, code, group);
+
+ qlock(&proctab);
+ proc->exitcode = code;
+ proc->wstate = WCONTINUED;
+ proc->wevent = proc->wstate;
+ if(group){
+ for(i=1, n = proc->tid+1; i<MAXPROC; i++, n++){
+ if((p = getprocn(pidhash(n))) == nil)
+ continue;
+ if(p->pid != proc->pid || p == proc)
+ continue;
+ if(p->kpid <= 0)
+ continue;
+ if((p->wstate & WSTOPPED) == 0)
+ continue;
+ if(p->wstate & (WCONTINUED | WEXITED))
+ continue;
+
+ trace("contproc() waking thread %p", p);
+ p->exitcode = code;
+ p->wstate = WCONTINUED;
+ p->wevent = p->wstate;
+ wakeup(p, Wakeup, 0);
+ }
+ }
+ if(parent = getproc(proc->ppid)){
+ if(group && !ignoressignal(parent, SIGCHLD))
+ sendexitsignal(parent, proc, SIGCHLD, code);
+ wakeupall(parent, Abort, 0);
+ }
+ qunlock(&proctab);
+}
+
+int sys_exit(int code)
+{
+ trace("sys_exit(%d)", code);
+
+ exitproc(current, (code & 0xFF)<<8, 0);
+ return -1;
+}
+
+int sys_exit_group(int code)
+{
+ trace("sys_exit_group(%d)", code);
+
+ exitproc(current, (code & 0xFF)<<8, 1);
+ return -1;
+}
+
+struct kcloneprocargs
+{
+ int flags;
+ void *newstack;
+ int *parenttidptr;
+ void *tlsdescr;
+ int *childtidptr;
+};
+
+static int
+kcloneproc(void *arg)
+{
+ struct kcloneprocargs args;
+ struct linux_user_desc tls;
+ Ureg ureg;
+ int rflags, pid, tid;
+ char buf[80];
+ Uproc *new;
+
+ memmove(&args, arg, sizeof(args));
+ memmove(&ureg, current->ureg, sizeof(ureg));
+ if(args.flags & CLONE_SETTLS){
+ if(!okaddr(args.tlsdescr, sizeof(tls), 0))
+ return -EFAULT;
+ memmove(&tls, args.tlsdescr, sizeof(tls));
+ }
+
+ qlock(&proctab);
+ if((new = allocproc()) == nil){
+ qunlock(&proctab);
+ return -EAGAIN;
+ }
+ tid = new->tid;
+
+ if(args.flags & CLONE_PARENT_SETTID){
+ if(!okaddr(args.parenttidptr, sizeof(*args.parenttidptr), 1)){
+ freeproc(new);
+ qunlock(&proctab);
+ return -EFAULT;
+ }
+ *args.parenttidptr = tid;
+ }
+
+ rflags = RFPROC;
+ if(args.flags & CLONE_VM)
+ rflags |= RFMEM;
+
+ qlock(current);
+ if((pid = rfork(rflags)) < 0){
+ freeproc(new);
+ qunlock(current);
+ qunlock(&proctab);
+
+ trace("kcloneproc(): rfork failed: %r");
+ return mkerror();
+ }
+
+ if(pid){
+ /* parent */
+ new->kpid = pid;
+ new->exitsignal = args.flags & 0xFF;
+ new->innote = 0;
+ new->ureg = &ureg;
+ new->syscall = current->syscall;
+ new->sysret = current->sysret;
+ new->comm = nil;
+ new->ncomm = 0;
+ new->linkloop = 0;
+ new->root = current->root ? kstrdup(current->root) : nil;
+ new->cwd = kstrdup(current->cwd);
+ new->kcwd = kstrdup(current->kcwd);
+ new->starttime = nsec();
+
+ snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+ new->notefd = open(buf, OWRITE);
+ snprint(buf, sizeof(buf), "/proc/%d/args", pid);
+ new->argsfd = open(buf, ORDWR);
+
+ if(args.flags & (CLONE_THREAD | CLONE_PARENT)){
+ new->ppid = current->ppid;
+ } else {
+ new->ppid = current->pid;
+ }
+
+ if(args.flags & CLONE_THREAD)
+ new->pid = current->pid;
+
+ new->cleartidptr = nil;
+ if(args.flags & CLONE_CHILD_CLEARTID)
+ new->cleartidptr = args.childtidptr;
+
+ new->pgid = current->pgid;
+ new->psid = current->psid;
+ new->uid = current->uid;
+ new->gid = current->gid;
+
+ clonetrace(new, !(args.flags & CLONE_THREAD));
+ clonesignal(new, !(args.flags & CLONE_SIGHAND), !(args.flags & CLONE_THREAD));
+ clonemem(new, !(args.flags & CLONE_VM));
+ clonefile(new, !(args.flags & CLONE_FILES));
+ clonetls(new);
+ qunlock(&proctab);
+
+ while(rendezvous(new, 0) == (void*)~0)
+ ;
+
+ qunlock(current);
+
+ return tid;
+ }
+
+ /* child */
+ current = new;
+ profme();
+
+ /* wait for parent to copy our resources */
+ while(rendezvous(new, 0) == (void*)~0)
+ ;
+
+ trace("kcloneproc(): hello world");
+
+ if(args.flags & CLONE_SETTLS)
+ sys_set_thread_area(&tls);
+
+ if(args.flags & CLONE_CHILD_SETTID)
+ if(okaddr(args.childtidptr, sizeof(*args.childtidptr), 1))
+ *args.childtidptr = tid;
+
+ if(args.newstack != nil)
+ current->ureg->sp = (ulong)args.newstack;
+ current->sysret(0);
+ retuser();
+
+ return -1;
+}
+
+#pragma profile on
+
+int sys_linux_clone(int flags, void *newstack, int *parenttidptr, int *tlsdescr, void *childtidptr)
+{
+ struct kcloneprocargs a;
+
+ trace("sys_linux_clone(%x, %p, %p, %p, %p)", flags, newstack, parenttidptr, childtidptr, tlsdescr);
+
+ a.flags = flags;
+ a.newstack = newstack;
+ a.parenttidptr = parenttidptr;
+ a.childtidptr = childtidptr;
+ a.tlsdescr = tlsdescr;
+
+ return onstack(kstack, kcloneproc, &a);
+}
+
+int sys_fork(void)
+{
+ trace("sys_fork()");
+
+ return sys_linux_clone(SIGCHLD, nil, nil, nil, nil);
+}
+
+int sys_vfork(void)
+{
+ trace("sys_vfork()");
+
+ return sys_fork();
+}
+
+int sys_getpid(void)
+{
+ trace("sys_getpid()");
+
+ return current->pid;
+}
+
+int sys_getppid(void)
+{
+ trace("sys_getppid()");
+
+ return current->ppid;
+}
+
+int sys_gettid(void)
+{
+ trace("sys_gettid()");
+
+ return current->tid;
+}
+
+int sys_setpgid(int pid, int pgid)
+{
+ int i, n;
+
+ trace("sys_setpgid(%d, %d)", pid, pgid);
+
+ if(pgid == 0)
+ pgid = current->pgid;
+ if(pid == 0)
+ pid = current->pid;
+
+ n = 0;
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ Uproc *p;
+
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->pid != pid)
+ continue;
+
+ p->pgid = pgid;
+ n++;
+ }
+ qunlock(&proctab);
+
+ return n ? 0 : -ESRCH;
+}
+
+int sys_getpgid(int pid)
+{
+ int i;
+ int pgid;
+
+ trace("sys_getpgid(%d)", pid);
+
+ pgid = -ESRCH;
+ if(pid == 0)
+ return current->pgid;
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ Uproc *p;
+
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->pid != pid)
+ continue;
+
+ pgid = p->pgid;
+ break;
+ }
+ qunlock(&proctab);
+
+ return pgid;
+}
+
+int sys_getpgrp(void)
+{
+ trace("sys_getpgrp()");
+
+ return sys_getpgid(0);
+}
+
+int sys_getuid(void)
+{
+ trace("sys_getuid()");
+
+ return current->uid;
+}
+
+int sys_getgid(void)
+{
+ trace("sys_getgid()");
+
+ return current->gid;
+}
+
+int sys_setuid(int uid)
+{
+ trace("sys_setuid(%d)", uid);
+
+ current->uid = uid;
+ return 0;
+}
+
+int sys_setgid(int gid)
+{
+ trace("sys_setgid(%d)", gid);
+
+ current->gid = gid;
+ return 0;
+}
+
+int sys_setresuid(int ruid, int euid, int suid)
+{
+ trace("sys_setresuid(%d, %d, %d)", ruid, euid, suid);
+
+ return 0;
+}
+
+int sys_setresgid(int rgid, int egid, int sgid)
+{
+ trace("sys_setresgid(%d, %d, %d)", rgid, egid, sgid);
+
+ return 0;
+}
+int sys_setreuid(int ruid, int euid)
+{
+ trace("sys_setreuid(%d, %d)", ruid, euid);
+
+ return 0;
+}
+
+int sys_setregid(int rgid, int egid)
+{
+ trace("sys_setregid(%d, %d)", rgid, egid);
+
+ return 0;
+}
+
+int sys_getresuid(int *ruid, int *euid, int *suid)
+{
+ trace("sys_getresuid(%p, %p, %p)", ruid, euid, suid);
+
+ if(ruid == nil)
+ return -EINVAL;
+ if(euid == nil)
+ return -EINVAL;
+ if(suid == nil)
+ return -EINVAL;
+
+ *ruid = current->uid;
+ *euid = current->uid;
+ *suid = current->uid;
+
+ return 0;
+}
+
+int sys_getresgid(int *rgid, int *egid, int *sgid)
+{
+ trace("sys_getresgid(%p, %p, %p)", rgid, egid, sgid);
+
+ if(rgid == nil)
+ return -EINVAL;
+ if(egid == nil)
+ return -EINVAL;
+ if(sgid == nil)
+ return -EINVAL;
+
+ *rgid = current->gid;
+ *egid = current->gid;
+ *sgid = current->gid;
+
+ return 0;
+}
+
+int sys_setsid(void)
+{
+ int i;
+
+ trace("sys_setsid()");
+
+ if(current->pid == current->pgid)
+ return -EPERM;
+
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ Uproc *p;
+
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->pid != current->pid)
+ continue;
+ p->pgid = current->pid;
+ p->psid = current->pid;
+ }
+ qunlock(&proctab);
+
+ settty(nil);
+
+ return current->pgid;
+}
+
+int sys_getsid(int pid)
+{
+ int i, pgid;
+
+ trace("sys_getsid(%d)", pid);
+
+ pgid = -ESRCH;
+ if(pid == 0)
+ pid = current->pid;
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ Uproc *p;
+
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->pid != pid)
+ continue;
+ if(p->pid != p->psid)
+ continue;
+ pgid = p->pgid;
+ break;
+ }
+ qunlock(&proctab);
+
+ return pgid;
+}
+
+int sys_getgroups(int size, int *groups)
+{
+ trace("sys_getgroups(%d, %p)", size, groups);
+ if(size < 0)
+ return -EINVAL;
+ return 0;
+}
+
+int sys_setgroups(int size, int *groups)
+{
+ trace("sys_setgroups(%d, %p)", size, groups);
+ return 0;
+}
+
+struct linux_utsname
+{
+ char sysname[65];
+ char nodename[65];
+ char release[65];
+ char version[65];
+ char machine[65];
+ char domainname[65];
+};
+
+int sys_uname(void *a)
+{
+ struct linux_utsname *p = a;
+
+ trace("sys_uname(%p)", a);
+
+ strncpy(p->sysname, "Linux", 65);
+ strncpy(p->nodename, sysname(), 65);
+ strncpy(p->release, "3.2.1", 65);
+ strncpy(p->version, "linuxemu", 65);
+ strncpy(p->machine, "i386", 65);
+ strncpy(p->domainname, sysname(), 65);
+
+ return 0;
+}
+
+int sys_personality(ulong p)
+{
+ trace("sys_personality(%lux)", p);
+
+ if(p != 0 && p != 0xffffffff)
+ return -EINVAL;
+ return 0;
+}
+
+int sys_tkill(int tid, int sig)
+{
+ int err;
+
+ trace("sys_tkill(%d, %S)", tid, sig);
+
+ err = -EINVAL;
+ if(tid > 0){
+ Uproc *p;
+
+ err = -ESRCH;
+ qlock(&proctab);
+ if(p = getproc(tid)){
+ Usiginfo si;
+
+ memset(&si, 0, sizeof(si));
+ si.signo = sig;
+ si.code = SI_TKILL;
+ si.kill.pid = current->tid;
+ si.kill.uid = current->uid;
+ err = killproc(p, &si, 0);
+ }
+ qunlock(&proctab);
+ }
+ return err;
+}
+
+int sys_tgkill(int pid, int tid, int sig)
+{
+ int err;
+
+ trace("sys_tgkill(%d, %d, %S)", pid, tid, sig);
+
+ err = -EINVAL;
+ if(tid > 0){
+ Uproc *p;
+
+ err = -ESRCH;
+ qlock(&proctab);
+ if((p = getproc(tid)) && (p->pid == pid)){
+ Usiginfo si;
+
+ memset(&si, 0, sizeof(si));
+ si.signo = sig;
+ si.code = SI_TKILL;
+ si.kill.pid = current->tid;
+ si.kill.uid = current->uid;
+ err = killproc(p, &si, 0);
+ }
+ qunlock(&proctab);
+ }
+ return err;
+}
+
+int sys_rt_sigqueueinfo(int pid, int sig, void *info)
+{
+ int err;
+ Uproc *p;
+ Usiginfo si;
+
+ trace("sys_rt_sigqueueinfo(%d, %S, %p)", pid, sig, info);
+
+ err = -ESRCH;
+ qlock(&proctab);
+ if(p = getproc(pid)){
+ memset(&si, 0, sizeof(si));
+ linux2siginfo(info, &si);
+ si.signo = sig;
+ si.code = SI_QUEUE;
+ err = killproc(p, &si, 1);
+ }
+ qunlock(&proctab);
+ return err;
+}
+
+enum {
+ PIDMAPBITS1 = 8*sizeof(ulong),
+};
+
+int sys_kill(int pid, int sig)
+{
+ int i, j, n;
+ Uproc *p;
+ Usiginfo si;
+ ulong pidmap[(MAXPROC + PIDMAPBITS1-1) / PIDMAPBITS1];
+ ulong m;
+
+ trace("sys_kill(%d, %S)", pid, sig);
+
+ n = 0;
+ memset(pidmap, 0, sizeof(pidmap));
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ if((p = getprocn(i)) == nil)
+ continue;
+ if(p->wstate & WEXITED)
+ continue;
+ if(p->kpid <= 0)
+ continue;
+
+ if(pid == 0){
+ if(p->pgid != current->pgid)
+ continue;
+ } else if(pid == -1){
+ if(p->pid <= 1)
+ continue;
+ if(p->tid == current->tid)
+ continue;
+ } else if(pid < -1) {
+ if(p->pgid != -pid)
+ continue;
+ } else {
+ if(p->pid != pid)
+ continue;
+ }
+
+ /* make sure we send only one signal per pid */
+ j = pidhash(p->pid);
+ m = 1 << (j % PIDMAPBITS1);
+ j /= PIDMAPBITS1;
+ if(pidmap[j] & m)
+ continue;
+ pidmap[j] |= m;
+
+ if(sig > 0){
+ memset(&si, 0, sizeof(si));
+ si.signo = sig;
+ si.code = SI_USER;
+ si.kill.pid = current->tid;
+ si.kill.uid = current->uid;
+ killproc(p, &si, 1);
+ }
+ n++;
+ }
+ qunlock(&proctab);
+ if(n == 0)
+ return -ESRCH;
+ return 0;
+}
+
+int sys_set_tid_address(int *tidptr)
+{
+ trace("sys_set_tid_address(%p)", tidptr);
+
+ current->cleartidptr = tidptr;
+ return current->pid;
+}
+
+struct linux_sched_param
+{
+ int sched_priority;
+};
+
+int sys_sched_setscheduler(int pid, int policy, void *param)
+{
+ trace("sys_sched_setscheduler(%d, %d, %p)", pid, policy, param);
+
+ if(getproc(pid) == nil)
+ return -ESRCH;
+ return 0;
+}
+
+int sys_sched_getscheduler(int pid)
+{
+ trace("sys_sched_getscheduler(%d)", pid);
+
+ if(getproc(pid) == nil)
+ return -ESRCH;
+ return 0;
+}
+
+int sys_sched_setparam(int pid, void *param)
+{
+ trace("sys_sched_setparam(%d, %p)", pid, param);
+
+ if(getproc(pid) == nil)
+ return -ESRCH;
+ return 0;
+}
+
+int sys_sched_getparam(int pid, void *param)
+{
+ struct linux_sched_param *p = param;
+
+ trace("sys_sched_getparam(%d, %p)", pid, param);
+
+ if(getproc(pid) == nil)
+ return -ESRCH;
+ if(p == nil)
+ return -EINVAL;
+ p->sched_priority = 0;
+
+ return 0;
+}
+
+int sys_sched_yield(void)
+{
+ trace("sys_sched_yield()");
+
+ sleep(0);
+ return 0;
+}
+
+enum {
+ RLIMIT_CPU,
+ RLIMIT_FSIZE,
+ RLIMIT_DATA,
+ RLIMIT_STACK,
+ RLIMIT_CORE,
+ RLIMIT_RSS,
+ RLIMIT_NPROC,
+ RLIMIT_NOFILE,
+ RLIMIT_MEMLOCK,
+ RLIMIT_AS,
+ RLIMIT_LOCKS,
+ RLIMIT_SIGPENDING,
+ RLIMIT_MSGQUEUE,
+
+ RLIM_NLIMITS,
+
+ RLIM_INFINITY = ~0UL,
+};
+
+struct linux_rlimit
+{
+ ulong rlim_cur;
+ ulong rlim_max;
+};
+
+int sys_getrlimit(long resource, void *rlim)
+{
+ struct linux_rlimit *r = rlim;
+
+ trace("sys_getrlimit(%ld, %p)", resource, rlim);
+
+ if(resource >= RLIM_NLIMITS)
+ return -EINVAL;
+ if(rlim == nil)
+ return -EFAULT;
+
+ r->rlim_cur = RLIM_INFINITY;
+ r->rlim_max = RLIM_INFINITY;
+
+ switch(resource){
+ case RLIMIT_STACK:
+ r->rlim_cur = USTACK;
+ r->rlim_max = USTACK;
+ break;
+ case RLIMIT_CORE:
+ r->rlim_cur = 0;
+ break;
+ case RLIMIT_NPROC:
+ r->rlim_cur = MAXPROC;
+ r->rlim_max = MAXPROC;
+ break;
+ case RLIMIT_NOFILE:
+ r->rlim_cur = MAXFD;
+ r->rlim_max = MAXFD;
+ break;
+ }
+ return 0;
+}
+
+int sys_setrlimit(long resource, void *rlim)
+{
+ trace("sys_setrlimit(%ld, %p)", resource, rlim);
+
+ if(resource >= RLIM_NLIMITS)
+ return -EINVAL;
+ if(rlim == nil)
+ return -EFAULT;
+
+ return -EPERM;
+}
+
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <mp.h>
+#include <libsec.h>
+
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+ Qproc,
+ Qstat,
+ Qcpuinfo,
+ Qmeminfo,
+ Quptime,
+ Qloadavg,
+ Qself,
+ Qpid,
+ Qcwd,
+ Qcmdline,
+ Qenviron,
+ Qexe,
+ Qroot,
+ Qpidstat,
+ Qpidstatm,
+ Qstatus,
+ Qmaps,
+ Qfd,
+ Qfd1,
+ Qtask,
+ Qtask1,
+ Qmax,
+};
+
+static struct {
+ int mode;
+ char *name;
+} procdevtab[] = {
+ 0555|S_IFDIR, "proc",
+ 0444|S_IFREG, "stat",
+ 0444|S_IFREG, "cpuinfo",
+ 0444|S_IFREG, "meminfo",
+ 0444|S_IFREG, "uptime",
+ 0444|S_IFREG, "loadavg",
+ 0777|S_IFLNK, "self",
+ 0555|S_IFDIR, "###",
+ 0777|S_IFLNK, "cwd",
+ 0444|S_IFREG, "cmdline",
+ 0444|S_IFREG, "environ",
+ 0777|S_IFLNK, "exe",
+ 0777|S_IFLNK, "root",
+ 0444|S_IFREG, "stat",
+ 0444|S_IFREG, "statm",
+ 0444|S_IFREG, "status",
+ 0444|S_IFREG, "maps",
+ 0555|S_IFDIR, "fd",
+ 0777|S_IFLNK, "###",
+ 0555|S_IFDIR, "task",
+ 0555|S_IFDIR, "###",
+};
+
+typedef struct Procfile Procfile;
+struct Procfile
+{
+ Ufile;
+ int q;
+ int pid;
+ vlong lastoff;
+ char *data;
+ int ndata;
+};
+
+static int
+path2q(char *path, int *ppid, int *pfd)
+{
+ int i, q, pid, fd;
+ char *x;
+
+ q = -1;
+ pid = -1;
+ fd = -1;
+ path++;
+ for(i=Qproc; i<Qmax; i++){
+ if(x = strchr(path, '/'))
+ *x = 0;
+ if(path[0]>='0' && path[0]<='9'){
+ switch(i){
+ case Qpid:
+ case Qtask1:
+ pid = atoi(path);
+ goto match;
+ case Qfd1:
+ fd = atoi(path);
+ goto match;
+ }
+ }
+ if(strcmp(path, procdevtab[i].name) == 0){
+match: if(x == nil){
+ q = i;
+ break;
+ }
+ if(i == Qself){ /* hack */
+ pid = current->pid;
+ i = Qpid;
+ }
+ if((procdevtab[i].mode & ~0777) == S_IFDIR){
+ path = x+1;
+ if(i == Qtask1)
+ i = Qpid;
+ }
+ }
+ if(x != nil)
+ *x = '/';
+ }
+ if(ppid)
+ *ppid = pid;
+ if(pfd)
+ *pfd = fd;
+ return q;
+}
+
+/*
+ * the proc device also implements the functionality
+ * for /dev/std^(in out err) and /dev/fd. we just
+ * rewrite the path to the names used in /proc.
+ */
+static char*
+rewritepath(char *path)
+{
+ if(strcmp(path, "/dev/stdin")==0){
+ path = kstrdup("/proc/self/fd/0");
+ } else if(strcmp(path, "/dev/stdout")==0){
+ path = kstrdup("/proc/self/fd/1");
+ } else if(strcmp(path, "/dev/stderr")==0){
+ path = kstrdup("/proc/self/fd/2");
+ } else if(strncmp(path, "/dev/fd", 7) == 0){
+ path = allocpath("/proc/self", "fd", path+7);
+ } else {
+ path = kstrdup(path);
+ }
+ return path;
+}
+
+static int
+readlinkproc(char *path, char *buf, int len);
+
+static int
+openproc(char *path, int mode, int perm, Ufile **pf)
+{
+ char buf[256], *t;
+ int n, q, pid, err;
+ Procfile *f;
+
+ err = -ENOENT;
+ path = rewritepath(path);
+ if((q = path2q(path, &pid, nil)) < 0)
+ goto out;
+ if((procdevtab[q].mode & ~0777) == S_IFLNK){
+ n = readlinkproc(path, buf, sizeof(buf)-1);
+ if(n > 0){
+ buf[n] = 0;
+ err = fsopen(buf, mode, perm, pf);
+ }
+ goto out;
+ }
+ if((mode & O_ACCMODE) != O_RDONLY){
+ err = -EPERM;
+ goto out;
+ }
+ if(q >= Qpid){
+ qlock(&proctab);
+ if(getproc(pid) == nil){
+ qunlock(&proctab);
+ goto out;
+ }
+ qunlock(&proctab);
+ }
+
+ /* hack */
+ if(strncmp(path, "/proc/self", 10) == 0){
+ t = ksmprint("/proc/%d%s", pid, path+10);
+ free(path); path = t;
+ }
+
+ f = kmallocz(sizeof(*f), 1);
+ f->ref = 1;
+ f->mode = mode;
+ f->path = path; path = nil;
+ f->fd = -1;
+ f->dev = PROCDEV;
+ f->q = q;
+ f->pid = pid;
+ *pf = f;
+ err = 0;
+
+out:
+ free(path);
+ return err;
+}
+
+static int
+closeproc(Ufile *file)
+{
+ Procfile *f = (Procfile*)file;
+
+ if(f->data)
+ free(f->data);
+ return 0;
+}
+
+enum {
+ SScpu,
+ SSswitches,
+ SSinterrupts,
+ SSsyscalls,
+ SSpagefaults,
+ SStlbmisses,
+ SStlbpurges,
+ SSloadavg,
+ SSidletime,
+ SSintrtime,
+ SSmax,
+};
+
+static char*
+sysstat(ulong *prun, ulong *pidle, ulong *pload)
+{
+ char buf[1024], *p, *e, *t, *data;
+ ulong dt, swtch, user, sys, load;
+ static ulong run, idle, intr;
+ int n, fd;
+
+ data = nil;
+ swtch = user = sys = load = 0;
+
+ dt = (ulong)(((nsec() - boottime) * HZ) / 1000000000LL) - run;
+ run += dt;
+
+ n = 0;
+ if((fd = open("/dev/sysstat", OREAD)) >= 0){
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ }
+ if(n > 0){
+ buf[n] = 0;
+ p = buf;
+ while(e = strchr(p, '\n')){
+ char *f[SSmax];
+ *e = 0;
+ if(getfields(p, f, SSmax, 1, "\t ") != SSmax)
+ break;
+
+ if(p == buf){
+ swtch += atoi(f[SSswitches]);
+
+ idle += (atoi(f[SSidletime]) * dt)/100;
+ intr += (atoi(f[SSintrtime]) * dt)/100;
+
+ load = 100-atoi(f[SSidletime]);
+
+ user = run - idle - intr;
+ sys = run - user;
+
+ data = ksmprint("cpu %lud %lud %lud %lud %lud %lud %lud\n",
+ user, 0UL, sys, idle, 0UL, intr, 0UL);
+ }
+ t = ksmprint("%scpu%d %lud %lud %lud %lud %lud %lud %lud\n",
+ data, atoi(f[SScpu]), user, 0UL, sys, idle, 0UL, intr, 0UL);
+ free(data);
+ data = t;
+
+ p = e+1;
+ }
+ t = ksmprint("%sbtime %lud\nctxt %lud\n", data,
+ (ulong)(boottime/1000000000LL), swtch);
+ free(data);
+ data = t;
+ }
+ if(prun)
+ *prun = run;
+ if(pidle)
+ *pidle = idle;
+ if(pload)
+ *pload = load;
+
+ return data;
+}
+
+static char*
+procstat(Uproc *p)
+{
+ return
+ (p->wstate & WEXITED) ? "Z (zombie)" :
+ (p->wstate & WSTOPPED) ? "T (stopped)" :
+ (p->state == nil) ? "R (running)" : "S (sleeping)";
+}
+
+static char*
+procname(Uproc *p)
+{
+ char *s;
+
+ p = getproc(p->pid);
+ if(p == nil || p->comm == nil)
+ return "";
+ if(s = strrchr(p->comm, '/'))
+ return s+1;
+ return p->comm;
+}
+
+
+static void
+gendata(Procfile *f)
+{
+ char *s, *t;
+ int i, nproc, nready;
+ ulong tms[4];
+ Uproc *p;
+
+ f->ndata = 0;
+ if(s = f->data){
+ f->data = nil;
+ free(s);
+ }
+ s = nil;
+
+ if(f->q >= Qpid){
+ ulong vmsize, vmdat, vmlib, vmshr, vmstk, vmexe;
+
+ qlock(&proctab);
+ if((p = getproc(f->pid)) == nil){
+ qunlock(&proctab);
+ return;
+ }
+ switch(f->q){
+ case Qcmdline:
+ p = getproc(p->pid);
+ if(p == nil || p->comm == nil)
+ break;
+ i = strlen(p->comm)+1;
+ if(i >= p->ncomm-2)
+ break;
+ f->ndata = p->ncomm-i-2;
+ f->data = kmalloc(f->ndata);
+ memmove(f->data, p->comm + i, f->ndata);
+ qunlock(&proctab);
+ return;
+
+ case Qenviron:
+ break;
+ case Qpidstat:
+ if(proctimes(p, tms) != 0)
+ memset(tms, 0, sizeof(tms));
+ vmsize = procmemstat(p, nil, nil, nil, nil, nil);
+ s = ksmprint(
+ "%d (%s) %c %d %d %d %d %d %lud %lud "
+ "%lud %lud %lud %lud %lud %ld %ld %ld %ld %ld "
+ "%ld %lud %lud %ld %lud %lud %lud %lud %lud %lud "
+ "%lud %lud %lud %lud %lud %lud %lud %d %d\n",
+ p->tid,
+ procname(p),
+ procstat(p)[0],
+ p->ppid,
+ p->pgid,
+ p->psid,
+ 0, /* tty */
+ 0, /* tty pgrp */
+ 0UL, /* flags */
+ 0UL, 0UL, 0UL, 0UL, /* pagefault stats */
+ tms[0], /* utime */
+ tms[1], /* stime */
+ tms[2], /* cutime */
+ tms[3], /* cstime */
+ 0UL, /* priority */
+ 0UL, /* nice */
+ 0UL, /* always 0UL */
+ 0UL, /* time to next alarm */
+ (ulong)(((p->starttime - boottime) * HZ) / 1000000000LL),
+ vmsize, /* vm size in bytes */
+ vmsize, /* vm working set */
+ 0UL, /* rlim */
+ p->codestart,
+ p->codeend,
+ p->stackstart,
+ 0UL, /* SP */
+ 0UL, /* PC */
+ 0UL, /* pending signal mask */
+ 0UL, /* blocked signal mask */
+ 0UL, /* ignored signal mask */
+ 0UL, /* catched signal mask */
+ 0UL, /* wchan */
+ 0UL, /* nswap */
+ 0UL, /* nswap children */
+ p->exitsignal,
+ 0); /* cpu */
+ break;
+ case Qpidstatm:
+ vmsize = procmemstat(p, &vmdat, &vmlib, &vmshr, &vmstk, &vmexe);
+ s = ksmprint("%lud %lud %lud %lud %lud %lud %lud\n",
+ vmsize/PAGESIZE, vmsize/PAGESIZE, vmshr/PAGESIZE,
+ vmexe/PAGESIZE, vmstk/PAGESIZE, vmlib/PAGESIZE, 0UL);
+ break;
+ case Qstatus:
+ s = ksmprint(
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "Uid:\t%d\t%d\t%d\t%d\n"
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n"
+ "Threads:\t%d\n",
+ procname(p),
+ procstat(p),
+ p->pid,
+ p->tid,
+ p->ppid,
+ p->uid, p->uid, p->uid, p->uid,
+ p->gid, p->gid, p->gid, p->gid,
+ MAXFD,
+ threadcount(p->pid));
+ break;
+ case Qmaps:
+ break;
+ }
+ qunlock(&proctab);
+ } else {
+ ulong run, idle, load;
+
+ nproc = nready = 0;
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ p = getprocn(i);
+ if(p == nil)
+ continue;
+ nproc++;
+ if(p->state == nil)
+ nready++;
+ }
+ i = proctab.nextpid;
+ qunlock(&proctab);
+
+ switch(f->q){
+ case Qstat:
+ s = sysstat(nil, nil, nil);
+ t = ksmprint(
+ "%s"
+ "processes %d\n"
+ "procs_running %d\n"
+ "procs_blocked %d\n",
+ s,
+ i,
+ nready,
+ nproc-nready);
+ free(s);
+ s = t;
+ break;
+ case Qcpuinfo:
+ break;
+ case Qmeminfo:
+ break;
+ case Quptime:
+ free(sysstat(&run, &idle, nil));
+ s = ksmprint("%lud.%lud %lud.%lud\n", run/HZ, run%HZ, idle/HZ, idle%HZ);
+ break;
+ case Qloadavg:
+ free(sysstat(nil, nil, &load));
+ s = ksmprint("%lud.%lud 0 0 %d/%d %d\n", load/100, load%100, nready, nproc, i);
+ break;
+ }
+ }
+
+ f->data = s;
+ f->ndata = s ? strlen(s) : 0;
+}
+
+static vlong
+sizeproc(Ufile *file)
+{
+ Procfile *f = (Procfile*)file;
+
+ if(f->data == nil)
+ gendata(f);
+ return f->ndata;
+}
+
+static int
+readproc(Ufile *file, void *buf, int len, vlong off)
+{
+ Procfile *f = (Procfile*)file;
+ int ret;
+
+ if((f->data == nil) || (off != f->lastoff))
+ gendata(f);
+ ret = 0;
+ if(f->data && (off < f->ndata)){
+ ret = f->ndata - off;
+ if(ret > len)
+ ret = len;
+ memmove(buf, f->data + off, ret);
+ f->lastoff = off + ret;
+ }
+ return ret;
+}
+
+static int
+readlinkproc(char *path, char *buf, int len)
+{
+ int err, q, pid, fd;
+ char *data;
+ Uproc *p;
+ Ufile *a;
+
+ err = -ENOENT;
+ path = rewritepath(path);
+ if((q = path2q(path, &pid, &fd)) < 0)
+ goto out;
+ data = nil;
+ if(q >= Qpid){
+ qlock(&proctab);
+ if((p = getproc(pid)) == nil){
+ qunlock(&proctab);
+ goto out;
+ }
+ switch(q){
+ case Qcwd:
+ data = kstrdup(p->cwd);
+ break;
+ case Qexe:
+ p = getproc(p->pid);
+ if(p == nil || p->comm == nil)
+ break;
+ data = kstrdup(p->comm);
+ break;
+ case Qroot:
+ data = kstrdup(p->root ? p->root : "/");
+ break;
+ case Qfd1:
+ a = procfdgetfile(p, fd);
+ if(a == nil || a->path == nil){
+ putfile(a);
+ qunlock(&proctab);
+ goto out;
+ }
+ data = kstrdup(a->path);
+ putfile(a);
+ break;
+ }
+ qunlock(&proctab);
+ } else {
+ switch(q){
+ case Qself:
+ data = ksmprint("/proc/%d", current->pid);
+ break;
+ }
+ }
+ err = 0;
+ if(data){
+ err = strlen(data);
+ if(err > len)
+ err = len;
+ memmove(buf, data, err);
+ free(data);
+ }
+out:
+ free(path);
+ return err;
+}
+
+static int
+readdirproc(Ufile *file, Udirent **pd)
+{
+ Procfile *f = (Procfile*)file;
+ char buf[12];
+ Uproc *p;
+ Ufile *a;
+ int n, i;
+
+ n = 0;
+ switch(f->q){
+ case Qproc:
+ for(i=f->q+1; (procdevtab[i].mode & ~0777) != S_IFDIR; i++){
+ if((*pd = newdirent(f->path, procdevtab[i].name, procdevtab[i].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ }
+ /* no break */
+ case Qtask:
+ qlock(&proctab);
+ for(i=0; i<MAXPROC; i++){
+ p = getprocn(i);
+ if(p == nil)
+ continue;
+ if((f->q == Qproc) && (p->pid != p->tid))
+ continue;
+ if((f->q == Qtask) && (p->pid != f->pid))
+ continue;
+ snprint(buf, sizeof(buf), "%d", p->tid);
+ if((*pd = newdirent(f->path, buf, procdevtab[i].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ }
+ qunlock(&proctab);
+ break;
+
+ case Qpid:
+ if((*pd = newdirent(f->path, procdevtab[Qtask].name, procdevtab[Qtask].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ /* no break */
+ case Qtask1:
+ if((*pd = newdirent(f->path, procdevtab[Qfd].name, procdevtab[Qfd].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ for(i=Qpid+1; (procdevtab[i].mode & ~0777) != S_IFDIR; i++){
+ if((*pd = newdirent(f->path, procdevtab[i].name, procdevtab[i].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ }
+ break;
+
+ case Qfd:
+ qlock(&proctab);
+ if((p = getproc(f->pid)) == nil){
+ qunlock(&proctab);
+ break;
+ }
+ for(i=0; i<MAXFD; i++){
+ a = procfdgetfile(p, i);
+ if(a == nil || a->path == nil){
+ putfile(a);
+ continue;
+ }
+ putfile(a);
+ snprint(buf, sizeof(buf), "%d", i);
+ if((*pd = newdirent(f->path, buf, procdevtab[Qfd1].mode)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ }
+ qunlock(&proctab);
+ break;
+ }
+
+ return n;
+}
+
+static int
+statproc(char *path, int, Ustat *s)
+{
+ int q, pid, fd, uid, gid, err;
+ ulong ctime;
+ Uproc *p;
+ Ufile *a;
+
+ err = -ENOENT;
+ path = rewritepath(path);
+ if((q = path2q(path, &pid, &fd)) < 0)
+ goto out;
+ if(q >= Qpid){
+ qlock(&proctab);
+ if((p = getproc(pid)) == nil){
+ qunlock(&proctab);
+ goto out;
+ }
+ if(q == Qfd1){
+ a = procfdgetfile(p, fd);
+ if(a == nil || a->path == nil){
+ putfile(a);
+ qunlock(&proctab);
+ goto out;
+ }
+ putfile(a);
+ }
+ uid = p->uid;
+ gid = p->gid;
+ ctime = p->starttime/1000000000LL;
+ qunlock(&proctab);
+ } else {
+ uid = current->uid;
+ gid = current->gid;
+ ctime = boottime/1000000000LL;
+ }
+ err = 0;
+ s->mode = procdevtab[q].mode;
+ s->uid = uid;
+ s->gid = gid;
+ s->size = 0;
+ s->ino = hashpath(path);
+ s->dev = 0;
+ s->rdev = 0;
+ s->atime = s->mtime = s->ctime = ctime;
+out:
+ free(path);
+ return err;
+}
+
+static int
+fstatproc(Ufile *f, Ustat *s)
+{
+ return fsstat(f->path, 0, s);
+};
+
+static Udev procdev =
+{
+ .open = openproc,
+ .read = readproc,
+ .size = sizeproc,
+ .readlink = readlinkproc,
+ .readdir = readdirproc,
+ .close = closeproc,
+ .stat = statproc,
+ .fstat = fstatproc,
+};
+
+void procdevinit(void)
+{
+ devtab[PROCDEV] = &procdev;
+
+ fsmount(&procdev, "/proc");
+ fsmount(&procdev, "/dev/fd");
+ fsmount(&procdev, "/dev/stdin");
+ fsmount(&procdev, "/dev/stdout");
+ fsmount(&procdev, "/dev/stderr");
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Termios Termios;
+typedef struct Winsize Winsize;
+typedef struct Cbuf Cbuf;
+typedef struct Tty Tty;
+typedef struct Pty Pty;
+typedef struct Ptyfile Ptyfile;
+
+/* cflags */
+enum {
+ IGNBRK = 01,
+ BRKINT = 02,
+ IGNPAR = 04,
+ PARMRK = 010,
+ INPCK = 020,
+ ISTRIP = 040,
+ INLCR = 0100,
+ IGNCR = 0200,
+ ICRNL = 0400,
+ IUCLC = 01000,
+ IXON = 02000,
+ IXANY = 04000,
+ IXOFF = 010000,
+ IMAXBEL = 020000,
+ IUTF8 = 040000,
+};
+
+/* oflags */
+enum {
+ OPOST = 01,
+ OLCUC = 02,
+ ONLCR = 04,
+ OCRNL = 010,
+ ONOCR = 020,
+ ONLRET = 040,
+ OFILL = 0100,
+ OFDEL = 0200,
+ NLDLY = 0400,
+ NL0 = 0,
+ NL1 = 0400,
+ CRDLY = 03000,
+ CR0 = 0,
+ CR1 = 01000,
+ CR2 = 02000,
+ CR3 = 03000,
+ TABDLY = 014000,
+ TAB0 = 0,
+ TAB1 = 04000,
+ TAB2 = 010000,
+ TAB3 = 014000,
+ XTABS = 014000,
+ BSDLY = 020000,
+ BS0 = 0,
+ BS1 = 020000,
+ VTDLY = 040000,
+ VT0 = 0,
+ VT1 = 040000,
+ FFDLY = 0100000,
+ FF0 = 0,
+ FF1 = 0100000,
+};
+
+/* cflags */
+enum {
+ CSIZE = 060,
+ CS5 = 0,
+ CS6 = 020,
+ CS7 = 040,
+ CS8 = 060,
+ CREAD = 0200,
+ CLOCAL = 04000,
+ HUPCL = 02000,
+};
+
+/* lflags */
+enum {
+ ISIG = 01,
+ ICANON = 02,
+ XCASE = 04,
+ ECHO = 010,
+ ECHOE = 020,
+ ECHOK = 040,
+ ECHONL = 0100,
+ NOFLSH = 0200,
+ TOSTOP = 0400,
+ ECHOCTL = 01000,
+ ECHOPRT = 02000,
+ ECHOKE = 04000,
+ FLUSH0 = 010000,
+ PENDIN = 040000,
+ IEXTEN = 0100000,
+};
+
+/* cc */
+enum {
+ VINTR = 0,
+ VQUIT,
+ VERASE,
+ VKILL,
+ VEOF,
+ VTIME,
+ VMIN,
+ VSWTCH,
+ VSTART,
+ VSTOP,
+ VSUSP,
+ VEOL,
+ VREPRINT,
+ VDISCARD,
+ VERASEW,
+ VLNEXT,
+ VEOL2,
+ NCCS,
+};
+
+struct Termios
+{
+ int iflag; /* input modes */
+ int oflag; /* output modes */
+ int cflag; /* control modes */
+ int lflag; /* local modes */
+ uchar cline;
+ uchar cc[NCCS]; /* control characters */
+};
+
+struct Winsize
+{
+ ushort row;
+ ushort col;
+ ushort px;
+ ushort py;
+};
+
+struct Cbuf
+{
+ int rp;
+ int wp;
+ char cb[256];
+};
+
+struct Tty
+{
+ Termios t;
+ Winsize winsize;
+
+ int escaped;
+ int eol;
+
+ int pgid;
+
+ Cbuf wb;
+ Cbuf rb;
+};
+
+struct Pty
+{
+ Tty;
+
+ int id;
+ int closed;
+ int locked;
+
+ struct {
+ Uwaitq r;
+ Uwaitq w;
+ } q[2];
+
+ Ref;
+ QLock;
+};
+
+struct Ptyfile
+{
+ Ufile;
+
+ Pty *pty;
+
+ int master;
+};
+
+static Pty *ptys[64];
+
+int cbput(Cbuf *b, char c)
+{
+ int x;
+ x = b->wp+1&(sizeof(b->cb)-1);
+ if(x == b->rp)
+ return -1;
+ b->cb[b->wp] = c;
+ b->wp = x;
+ return 0;
+}
+
+int cbget(Cbuf *b)
+{
+ char c;
+ if(b->rp == b->wp)
+ return -1;
+ c = b->cb[b->rp];
+ b->rp = (b->rp + 1) & (sizeof(b->cb)-1);
+ return c;
+}
+
+int cbfill(Cbuf *b)
+{
+ return (b->wp - b->rp) & (sizeof(b->cb)-1);
+}
+
+void ttyinit(Tty *t)
+{
+ memset(&t->t, 0, sizeof(t->t));
+
+ t->t.iflag = ICRNL;
+ t->t.oflag = OPOST|ONLCR|NL0|CR0|TAB0|BS0|VT0|FF0;
+ t->t.lflag = ICANON|IEXTEN|ECHO|ECHOE|ECHOK;
+
+ if(current)
+ t->pgid = current->pgid;
+}
+
+int ttywrite(Tty *t, char *buf, int len)
+{
+ char *p, *e;
+
+ for(p=buf, e=buf+len; p<e; p++){
+ char c;
+
+ c = *p;
+ if((t->t.oflag & OPOST) == 0) {
+ if(cbput(&t->wb, c) < 0)
+ break;
+ continue;
+ }
+ switch(c) {
+ case '\n':
+ if(t->t.oflag & ONLCR) {
+ if(cbput(&t->wb, '\r') < 0)
+ goto done;
+ }
+ if(cbput(&t->wb, c) < 0)
+ goto done;
+ break;
+
+ case '\t':
+ if((t->t.oflag & TAB3) == TAB3) {
+ int tab;
+
+ tab = 8;
+ while(tab--)
+ cbput(&t->wb, ' ');
+ break;
+ }
+ /* Fall Through */
+ default:
+ if(t->t.oflag & OLCUC)
+ if(c >= 'a' && c <= 'z')
+ c = 'A' + (c-'a');
+ if(cbput(&t->wb, c) < 0)
+ goto done;
+ }
+ }
+done:
+ return p-buf;
+}
+
+int ttycanread(Tty *t, int *n)
+{
+ int x;
+
+ x = cbfill(&t->rb);
+ if(t->t.lflag & ICANON){
+ if(t->eol == 0)
+ return 0;
+ } else {
+ if(x == 0)
+ return 0;
+ }
+ if(n != nil)
+ *n = x;
+ return 1;
+}
+
+int ttyread(Tty *t, char *buf, int len)
+{
+ char *p, *e;
+
+ if((t->t.lflag & ICANON) && t->eol == 0)
+ return 0;
+
+ for(p=buf, e=buf+len; p<e; p++){
+ int c;
+
+ if((c = cbget(&t->rb)) < 0)
+ break;
+
+ if(c==0 || c=='\n'){
+ t->eol--;
+ if(t->t.lflag & ICANON){
+ if(c == 0)
+ break;
+ *p++ = c;
+ break;
+ }
+ }
+
+ *p = c;
+ }
+ return p-buf;
+}
+
+
+static void
+echo(Tty *t, char c)
+{
+ if(t->t.lflag & ECHO) {
+ switch(c) {
+ case '\r':
+ if(t->t.oflag & OCRNL) {
+ cbput(&t->wb, '\n');
+ break;
+ }
+ cbput(&t->wb, c);
+ break;
+ case '\n':
+ if(t->t.oflag & ONLCR)
+ cbput(&t->wb, '\r');
+ cbput(&t->wb, '\n');
+ break;
+ case '\t':
+ if((t->t.oflag & TAB3) == TAB3) {
+ int tab;
+
+ tab = 8;
+ while(tab--)
+ cbput(&t->wb, ' ');
+ break;
+ }
+ /* Fall Through */
+ default:
+ cbput(&t->wb, c);
+ break;
+ }
+ }
+ else
+ if(c == '\n' && (t->t.lflag&(ECHONL|ICANON)) == (ECHONL|ICANON)) {
+ if(t->t.oflag & ONLCR)
+ cbput(&t->wb, '\r');
+ cbput(&t->wb, '\n');
+ }
+}
+
+static int
+bs(Tty *t)
+{
+ char c;
+ int x;
+
+ if(cbfill(&t->rb) == 0)
+ return 0;
+ x = (t->rb.wp-1)&(sizeof(t->rb.cb)-1);
+ c = t->rb.cb[x];
+ if(c == 0 || c == '\n')
+ return 0;
+ t->rb.wp = x;
+ echo(t, '\b');
+ if(t->t.lflag & ECHOE) {
+ echo(t, ' ');
+ echo(t, '\b');
+ }
+ return 1;
+}
+
+int ttywriteinput(Tty *t, char *buf, int len)
+{
+ char *p, *e;
+
+ for(p=buf, e=buf+len; p<e; p++){
+ char c;
+
+ c = *p;
+
+ if(t->t.iflag & ISTRIP)
+ c &= 0177;
+
+ if((t->t.iflag & IXON) && c == t->t.cc[VSTOP]) {
+ p++;
+ break;
+ }
+
+ switch(c) {
+ case '\r':
+ if(t->t.iflag & IGNCR)
+ continue;
+ if(t->t.iflag & ICRNL)
+ c = '\n';
+ break;
+ case '\n':
+ if(t->t.iflag&INLCR)
+ c = '\r';
+ break;
+ }
+
+ if(t->t.lflag & ISIG) {
+ if(c == t->t.cc[VINTR]){
+ if(t->pgid > 0)
+ sys_kill(-t->pgid, SIGINT);
+ continue;
+ }
+ if(c == t->t.cc[VQUIT]){
+ if(t->pgid > 0)
+ sys_kill(-t->pgid, SIGQUIT);
+ continue;
+ }
+ }
+
+ if((t->t.lflag & ICANON) && t->escaped == 0) {
+ if(c == t->t.cc[VERASE]) {
+ bs(t);
+ continue;
+ }
+ if(c == t->t.cc[VKILL]) {
+ while(bs(t))
+ ;
+ if(t->t.lflag & ECHOK)
+ echo(t, '\n');
+ continue;
+ }
+ }
+
+ if(t->escaped == 0 && (c == t->t.cc[VEOF] || c == '\n'))
+ t->eol++;
+
+ if((t->t.lflag & ICANON) == 0) {
+ echo(t, c);
+ cbput(&t->rb, c);
+ continue;
+ }
+
+ if(t->escaped)
+ echo(t, '\b');
+
+ if(c != t->t.cc[VEOF])
+ echo(t, c);
+
+ if(c != '\\') {
+ if(c == t->t.cc[VEOF])
+ c = 0;
+ cbput(&t->rb, c);
+ t->escaped = 0;
+ continue;
+ }
+ if(t->escaped) {
+ cbput(&t->rb, '\\');
+ t->escaped = 0;
+ }
+ else
+ t->escaped = 1;
+ }
+
+ return p-buf;
+}
+
+int ttycanreadoutput(Tty *t, int *n)
+{
+ int x;
+
+ x = cbfill(&t->wb);
+ if(n != nil)
+ *n = x;
+ return x > 0 ? 1 : 0;
+}
+
+int ttyreadoutput(Tty *t, char *buf, int len)
+{
+ char *p, *e;
+
+ for(p=buf, e=buf+len; p<e; p++){
+ int c;
+
+ if((c = cbget(&t->wb)) < 0)
+ break;
+ *p = c;
+ }
+ return p-buf;
+}
+
+static int
+pollpty(Ufile *f, void *tab)
+{
+ Ptyfile *p = (Ptyfile*)f;
+ int err;
+ int n;
+
+ if(p->pty == nil)
+ return 0;
+
+ qlock(p->pty);
+ if(p->master){
+ pollwait(p, &p->pty->q[1].r, tab);
+ n = ttycanreadoutput(p->pty, nil);
+ } else {
+ pollwait(p, &p->pty->q[0].r, tab);
+ n = ttycanread(p->pty, nil);
+ }
+ err = POLLOUT;
+ if(n){
+ err |= POLLIN;
+ } else if(p->master==0 && p->pty->closed){
+ err |= (POLLIN | POLLHUP);
+ }
+ qunlock(p->pty);
+
+ return err;
+}
+
+static int
+readpty(Ufile *f, void *data, int len, vlong)
+{
+ int err;
+ Ptyfile *p = (Ptyfile*)f;
+
+ if(p->pty == nil)
+ return -EPERM;
+ qlock(p->pty);
+ for(;;){
+ if(p->master){
+ err = ttycanreadoutput(p->pty, nil);
+ } else {
+ err = ttycanread(p->pty, nil);
+ }
+ if(err > 0){
+ if(p->master){
+ err = ttyreadoutput(p->pty, (char*)data, len);
+ }else{
+ err = ttyread(p->pty, (char*)data, len);
+ }
+ } else {
+ if(p->master == 0 && p->pty->closed){
+ err = -EIO;
+ } else if(p->mode & O_NONBLOCK){
+ err = -EAGAIN;
+ } else {
+ if((err = sleepq(&p->pty->q[p->master].r, p->pty, 1)) == 0)
+ continue;
+ }
+ }
+ wakeq(&p->pty->q[!p->master].w, MAXPROC);
+ break;
+ }
+ qunlock(p->pty);
+
+ return err;
+}
+
+static int
+writepty(Ufile *f, void *data, int len, vlong)
+{
+ Ptyfile *p = (Ptyfile*)f;
+ int err;
+
+ if(p->pty == nil)
+ return -EPERM;
+ if(len == 0)
+ return len;
+
+ qlock(p->pty);
+ for(;;){
+ if(p->pty->closed){
+ err = -EIO;
+ break;
+ }
+ if(p->master){
+ err = ttywriteinput(p->pty, (char*)data, len);
+ } else{
+ err = ttywrite(p->pty, (char*)data, len);
+ }
+ if(err == 0){
+ if((err = sleepq(&p->pty->q[p->master].w, p->pty, 1)) == 0)
+ continue;
+ } else {
+ if(ttycanread(p->pty, nil))
+ wakeq(&p->pty->q[0].r, MAXPROC);
+ if(ttycanreadoutput(p->pty, nil))
+ wakeq(&p->pty->q[1].r, MAXPROC);
+ }
+ break;
+ }
+ qunlock(p->pty);
+
+ return err;
+}
+
+static int
+closepty(Ufile *f)
+{
+ Ptyfile *p = (Ptyfile*)f;
+
+ if(p->pty == nil)
+ return 0;
+
+ qlock(p->pty);
+ if(p->master)
+ p->pty->closed = 1;
+ if(!decref(p->pty)){
+ ptys[p->pty->id] = nil;
+ qunlock(p->pty);
+ free(p->pty);
+ } else {
+ wakeq(&p->pty->q[0].r, MAXPROC);
+ wakeq(&p->pty->q[0].w, MAXPROC);
+ wakeq(&p->pty->q[1].r, MAXPROC);
+ wakeq(&p->pty->q[1].w, MAXPROC);
+ qunlock(p->pty);
+ }
+ return 0;
+}
+
+static int
+changetty(Ptyfile *tty)
+{
+ Ufile *old;
+
+ if(old = gettty()){
+ putfile(old);
+ return (old == tty) ? 0 : -EPERM;
+ }
+ tty->pty->pgid = current->pgid;
+ settty(tty);
+ return 0;
+}
+
+static int
+ioctlpty(Ufile *f, int cmd, void *arg)
+{
+ Ptyfile *p = (Ptyfile*)f;
+ int err, pid;
+
+ if(p->pty == nil)
+ return -ENOTTY;
+
+ trace("ioctlpty(%s, %lux, %p)", p->path, (ulong)cmd, arg);
+
+ err = 0;
+ qlock(p->pty);
+ switch(cmd){
+ default:
+ trace("ioctlpty: unknown: 0x%x", cmd);
+ err = -ENOTTY;
+ break;
+
+ case 0x5401: /* TCGETS */
+ memmove(arg, &p->pty->t, sizeof(Termios));
+ break;
+
+ case 0x5402: /* TCSETS */
+ case 0x5403: /* TCSETSW */
+ case 0x5404: /* TCSETSF */
+ memmove(&p->pty->t, arg, sizeof(Termios));
+ break;
+
+ case 0x5422: // TIOCNOTTY
+ if((f = gettty()) && (f != p)){
+ putfile(f);
+ err = -ENOTTY;
+ break;
+ }
+ settty(nil);
+ break;
+
+ case 0x540E: // TIOCSCTTY
+ err = changetty(p);
+ break;
+
+ case 0x540F: // TIOCGPGRP
+ *(int*)arg = p->pty->pgid;
+ break;
+
+ case 0x5410: // TIOCSPGRP
+ p->pty->pgid = *(int*)arg;
+ break;
+
+ case 0x5413: // TIOCGWINSZ
+ memmove(arg, &p->pty->winsize, sizeof(Winsize));
+ break;
+
+ case 0x5414: // TIOCSWINSZ
+ if(memcmp(&p->pty->winsize, arg, sizeof(Winsize)) == 0)
+ break;
+ memmove(&p->pty->winsize, arg, sizeof(Winsize));
+ if((pid = p->pty->pgid) > 0){
+ qunlock(p->pty);
+
+ sys_kill(-pid, SIGWINCH);
+ return 0;
+ }
+ break;
+ case 0x40045431: // TIOCSPTLCK
+ if(p->master)
+ p->pty->locked = *(int*)arg;
+ break;
+
+ case 0x80045430:
+ *(int*)arg = p->pty->id;
+ break;
+
+ case 0x541B:
+ if(arg == nil)
+ break;
+ if(p->master){
+ ttycanreadoutput(p->pty, &err);
+ } else {
+ ttycanread(p->pty, &err);
+ }
+ if(err < 0){
+ *((int*)arg) = 0;
+ break;
+ }
+ *((int*)arg) = err;
+ err = 0;
+ break;
+ }
+ qunlock(p->pty);
+
+ return err;
+}
+
+static int
+openpty(char *path, int mode, int perm, Ufile **pf)
+{
+ Pty *pty;
+ Ptyfile *p;
+ int id;
+ int master;
+
+ USED(perm);
+
+ if(strcmp("/dev/tty", path)==0){
+ if(*pf = gettty())
+ return 0;
+ return -ENOTTY;
+ } else if(strcmp("/dev/pts", path)==0){
+ pty = nil;
+ master = -1;
+ } else if(strcmp("/dev/ptmx", path)==0){
+ master = 1;
+ for(id=0; id<nelem(ptys); id++){
+ if(ptys[id] == nil)
+ break;
+ }
+ if(id == nelem(ptys))
+ return -EBUSY;
+
+ pty = kmallocz(sizeof(*pty), 1);
+ pty->ref = 1;
+
+ ttyinit(pty);
+
+ ptys[pty->id = id] = pty;
+ } else {
+ master = 0;
+ if(strncmp("/dev/pts/", path, 9) != 0)
+ return -ENOENT;
+ id = atoi(path + 9);
+ if(id < 0 || id >= nelem(ptys))
+ return -ENOENT;
+ if((pty = ptys[id]) == nil)
+ return -ENOENT;
+
+ qlock(pty);
+ if(pty->closed || pty->locked){
+ qunlock(pty);
+ return -EIO;
+ }
+ incref(pty);
+ qunlock(pty);
+ }
+
+ p = kmallocz(sizeof(*p), 1);
+ p->dev = PTYDEV;
+ p->ref = 1;
+ p->fd = -1;
+ p->mode = mode;
+ p->path = kstrdup(path);
+ p->pty = pty;
+ p->master = master;
+
+ if(!master && !(mode & O_NOCTTY))
+ changetty(p);
+
+ *pf = p;
+
+ return 0;
+}
+
+static int
+readdirpty(Ufile *f, Udirent **pd)
+{
+ Ptyfile *p = (Ptyfile*)f;
+ int i, n;
+
+ *pd = nil;
+ if(p->pty != nil)
+ return -EPERM;
+ n = 0;
+ for(i=0; i<nelem(ptys); i++){
+ char buf[12];
+
+ if(ptys[i] == nil)
+ continue;
+ snprint(buf, sizeof(buf), "%d", i);
+ if((*pd = newdirent(f->path, buf, S_IFCHR | 0666)) == nil)
+ break;
+ pd = &((*pd)->next);
+ n++;
+ }
+ return n;
+}
+
+static int
+fstatpty(Ufile *f, Ustat *s)
+{
+ Ptyfile *p = (Ptyfile*)f;
+
+ if(p->pty != nil){
+ s->mode = 0666 | S_IFCHR;
+ if(p->master){
+ s->rdev = 5<<8 | 2;
+ } else {
+ s->rdev = 3<<8;
+ }
+ } else {
+ s->mode = 0777 | S_IFDIR;
+ s->rdev = 0;
+ }
+ s->ino = hashpath(p->path);
+ s->dev = 0;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = 0;
+ s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+ return 0;
+};
+
+static int
+statpty(char *path, int, Ustat *s)
+{
+ if(strcmp("/dev/tty", path)==0){
+ s->mode = 0666 | S_IFCHR;
+ } else if(strcmp("/dev/ptmx", path)==0){
+ s->mode = 0666 | S_IFCHR;
+ s->rdev = 5<<8 | 2;
+ } else if(strcmp("/dev/pts", path)==0){
+ s->mode = 0777 | S_IFDIR;
+ } else if(strncmp("/dev/pts/", path, 9)==0){
+ int id;
+
+ id = atoi(path + 9);
+ if(id < 0 || id >= nelem(ptys))
+ return -ENOENT;
+ if(ptys[id] == nil)
+ return -ENOENT;
+
+ s->mode = 0666 | S_IFCHR;
+ s->rdev = 3<<8;
+ } else {
+ return -ENOENT;
+ }
+ s->ino = hashpath(path);
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = 0;
+ s->atime = s->mtime = s->ctime = boottime/1000000000LL;
+ return 0;
+}
+
+static int
+chmodpty(char *path, int mode)
+{
+ USED(path);
+ USED(mode);
+
+ return 0;
+}
+
+static int
+chownpty(char *path, int uid, int gid, int link)
+{
+ USED(path);
+ USED(uid);
+ USED(gid);
+ USED(link);
+
+ return 0;
+}
+
+static int
+fchmodpty(Ufile *f, int mode)
+{
+ USED(f);
+ USED(mode);
+
+ return 0;
+}
+
+static int
+fchownpty(Ufile *f, int uid, int gid)
+{
+ USED(f);
+ USED(uid);
+ USED(gid);
+
+ return 0;
+}
+
+static Udev ptydev =
+{
+ .open = openpty,
+ .read = readpty,
+ .write = writepty,
+ .poll = pollpty,
+ .close = closepty,
+ .readdir = readdirpty,
+ .ioctl = ioctlpty,
+ .fstat = fstatpty,
+ .stat = statpty,
+ .fchmod = fchmodpty,
+ .fchown = fchownpty,
+ .chmod = chmodpty,
+ .chown = chownpty,
+};
+
+void ptydevinit(void)
+{
+ devtab[PTYDEV] = &ptydev;
+ fsmount(&ptydev, "/dev/pts");
+ fsmount(&ptydev, "/dev/ptmx");
+ fsmount(&ptydev, "/dev/tty");
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Rfile Rfile;
+typedef struct Rpath Rpath;
+
+struct Rfile
+{
+ Ufile;
+ struct
+ {
+ Dir *d;
+ int i;
+ int n;
+ } diraux;
+};
+
+struct Rpath
+{
+ Ref;
+
+ Rpath *hash;
+
+ int deleted;
+ char str[];
+};
+
+static Rpath *rpathtab[64];
+static QLock rpathtablk;
+
+static Rpath **
+rpathent(char *path)
+{
+ Rpath **prp;
+
+ prp = &rpathtab[hashpath(path) % nelem(rpathtab)];
+ while(*prp){
+ if(strcmp(path, (*prp)->str) == 0)
+ break;
+ prp = &((*prp)->hash);
+ }
+ return prp;
+}
+
+static char*
+linkname(char *name)
+{
+ if(strncmp(name, ".udir.L.", 8) == 0)
+ name += 8;
+ return name;
+}
+
+static char*
+udirpath(char *base, char *name, char type)
+{
+ char buf[9];
+
+ strcpy(buf, ".udir.T.");
+ buf[6] = type;
+ return allocpath(base, buf, name);
+}
+
+static int
+udirget(char *base, char *name, char type, char **val)
+{
+ char *f, *b;
+ int n, r, s;
+ int fd;
+
+ r = -1;
+ f = udirpath(base, name, type);
+ if((fd = open(shortpath(current->kcwd, f), OREAD)) < 0)
+ goto out;
+ if(val == nil){
+ r = 0;
+ goto out;
+ }
+ if((s = seek(fd, 0, 2)) < 0)
+ goto out;
+ b = kmalloc(s+1);
+ n = 0;
+ if(s > 0){
+ seek(fd, 0, 0);
+ if((n = read(fd, b, s)) < 0){
+ free(b);
+ goto out;
+ }
+ }
+ b[n] = 0;
+
+ r = 0;
+ *val = b;
+out:
+ free(f);
+ close(fd);
+ return r;
+}
+
+static char*
+resolvepath1(char *path, int link)
+{
+ char *r, *b, *p, *o, *e;
+ char **a;
+
+ int n;
+ int i;
+
+ r = nil;
+ a = nil;
+ n = 0;
+
+ b = kstrdup(path);
+ for(p=b; *p; p++){
+ if(*p == '/'){
+ if((n % 16) == 0)
+ a = krealloc(a, sizeof(a[0]) * (n+16));
+ a[n++] = p;
+ }
+ }
+
+ e = nil;
+ for(i=n-1; i>=0; i--){
+ char *t;
+ char *f;
+
+ o = e;
+ e = a[i];
+ *e++ = 0;
+
+ f = linkname(e);
+ t = nil;
+
+ if(!udirget(b, f, 'L', &t)){
+ if(t == nil)
+ break;
+ if(link && o==nil){
+ free(t);
+ if(f != e)
+ break;
+ t = udirpath(b, e, 'L');
+ }
+ r = fullpath(b, t);
+ free(t);
+ if(o && o[1]){
+ t = r;
+ r = fullpath(t, &o[1]);
+ free(t);
+ }
+ break;
+ }
+
+ --e;
+ if(o) *o = '/';
+ }
+ free(b);
+ free(a);
+
+ return r;
+}
+
+static char *
+resolvepath(char *path, int link)
+{
+ char *t;
+ int x;
+
+ x = 0;
+ path = kstrdup(path);
+ while(t = resolvepath1(path, link)){
+ if(++x > 8){
+ free(t);
+ free(path);
+ return nil;
+ }
+ free(path); path = t;
+ }
+ return path;
+}
+
+static int
+ropen(char *path, int mode, int perm, Ufile **pf)
+{
+ Ufile *f;
+ int err;
+ char *s, *t;
+ int mode9, perm9;
+ int fd;
+ char *base;
+ char *name;
+ Rpath **prp;
+
+ trace("ropen(%s, %#o, %#o, ...)", path, mode, perm);
+
+ base = nil;
+ name = nil;
+ mode9 = mode & 3;
+ perm9 = (perm & ~current->umask) & 0777;
+
+ s = shortpath(current->kcwd, path);
+
+ if(mode & O_CREAT) {
+ Dir *d;
+
+ err = -EINVAL;
+ if((base = basepath(path, &name)) == nil)
+ goto out;
+
+ /* resolve base directory */
+ if((d = dirstat(shortpath(current->kcwd, base))) == nil){
+ err = mkerror();
+ if(t = resolvepath1(base, 0)){
+ free(base); base = t;
+ t = allocpath(t, nil, name);
+ err = fsopen(t, mode, perm, pf);
+ free(t);
+ }
+ goto out;
+ }
+ err = -ENOTDIR;
+ if((d->mode & DMDIR) == 0){
+ free(d);
+ goto out;
+ }
+ free(d);
+
+ /* check if here is a symlink in the way */
+ t = udirpath(base, name, 'L');
+ if((fd = open(shortpath(current->kcwd, t), OREAD)) >= 0){
+ free(t);
+ close(fd);
+
+ if(mode & O_EXCL){
+ err = -EEXIST;
+ goto out;
+ }
+
+ if((t = resolvepath1(path, 0)) == nil)
+ goto out;
+ err = fsopen(t, mode, perm, pf);
+ free(t);
+ goto out;
+ }
+ free(t);
+
+ if(mode & (O_EXCL | O_TRUNC)){
+ if(mode & O_EXCL)
+ mode9 |= OEXCL;
+ fd = create(s, mode9, perm9);
+ } else {
+ /* try open first to avoid truncating existing the file */
+ if((fd = open(s, mode9)) < 0)
+ fd = create(s, mode9, perm9);
+ }
+ if(fd < 0){
+ err = mkerror();
+ goto out;
+ }
+ } else {
+ if(((mode & 3) == O_RDWR) || ((mode & 3) == O_WRONLY))
+ if(mode & O_TRUNC)
+ mode9 |= OTRUNC;
+
+ if((fd = open(s, mode9)) < 0){
+ err = mkerror();
+ if(t = resolvepath1(path, 0)){
+ err = fsopen(t, mode, perm, pf);
+ free(t);
+ }
+ goto out;
+ }
+ }
+
+ qlock(&rpathtablk);
+ prp = rpathent(path);
+ if(*prp != nil){
+ incref(*prp);
+ } else {
+ Rpath *rp;
+
+ rp = kmalloc(sizeof(*rp) + strlen(path) + 1);
+ rp->ref = 1;
+ rp->hash = nil;
+ rp->deleted = 0;
+ strcpy(rp->str, path);
+ *prp = rp;
+ }
+ qunlock(&rpathtablk);
+
+ f = kmallocz(sizeof(Rfile), 1);
+ f->ref = 1;
+ f->path = kstrdup(path);
+ f->dev = ROOTDEV;
+ f->mode = mode;
+ f->fd = fd;
+ f->off = 0;
+ *pf = f;
+
+ err = 0;
+
+out:
+ free(base);
+ free(name);
+
+ return err;
+}
+
+static int
+rclose(Ufile *f)
+{
+ Rpath **prp;
+ Rfile *file = (Rfile*)f;
+ static char path[1024]; /* protected by rpathtablk */
+
+ qlock(&rpathtablk);
+ prp = rpathent(file->path);
+ if(!decref(*prp)){
+ Rpath *rp = *prp;
+ *prp = rp->hash;
+ if(rp->deleted){
+ if(fd2path(file->fd, path, sizeof(path)) == 0)
+ remove(shortpath(current->kcwd, path));
+ }
+ free(rp);
+ }
+ qunlock(&rpathtablk);
+
+ close(file->fd);
+ return 0;
+}
+
+static int
+rread(Ufile *f, void *buf, int len, vlong off)
+{
+ Rfile *file = (Rfile*)f;
+ int ret, n;
+
+ n = ret = 0;
+ if(notifyme(1))
+ return -ERESTART;
+ while((n < len) && ((ret = pread(file->fd, (uchar*)buf + n, len - n, off + n)) > 0))
+ n += ret;
+ notifyme(0);
+ if(ret < 0)
+ return mkerror();
+ return n;
+}
+
+static int
+rwrite(Ufile *f, void *buf, int len, vlong off)
+{
+ Rfile *file = (Rfile*)f;
+ int ret;
+
+ if(notifyme(1))
+ return -ERESTART;
+ ret = pwrite(file->fd, buf, len, off);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ return ret;
+}
+
+static vlong
+rsize(Ufile *f)
+{
+ Rfile *file = (Rfile*)f;
+
+ return seek(file->fd, 0, 2);
+}
+
+static int
+raccess(char *path, int mode)
+{
+ static char omode[] = {
+ 0, // ---
+ OEXEC, // --x
+ OWRITE, // -w-
+ ORDWR, // -wx
+ OREAD, // r--
+ OEXEC, // r-x
+ ORDWR, // rw-
+ ORDWR // rwx
+ };
+
+ int err;
+ int fd;
+ Dir *d;
+ char *s;
+
+ err = -EINVAL;
+ if(mode & ~07)
+ return err;
+
+ s = shortpath(current->kcwd, path);
+ if((d = dirstat(s)) == nil){
+ err = mkerror();
+ if(path = resolvepath1(path, 0)){
+ err = fsaccess(path, mode);
+ free(path);
+ }
+ goto out;
+ }
+
+ /* ignore the exec bit... firefox gets confused */
+ mode &= ~01;
+ if((mode == 0) || (d->mode & DMDIR)){
+ err = 0;
+ } else {
+ err = -EACCES;
+ if((mode & 01) && ((d->mode & 0111) == 0))
+ goto out;
+ if((mode & 02) && ((d->mode & 0222) == 0))
+ goto out;
+ if((mode & 04) && ((d->mode & 0444) == 0))
+ goto out;
+ if((fd = open(s, omode[mode])) >= 0){
+ close(fd);
+ err = 0;
+ }
+ }
+out:
+ free(d);
+ return err;
+}
+
+static ulong
+dir2statmode(Dir *d)
+{
+ ulong mode;
+
+ mode = d->mode & 0777;
+ if(d->mode & DMDIR)
+ mode |= S_IFDIR;
+ else if(strcmp(d->name, "cons") == 0)
+ mode |= S_IFCHR;
+ else if(strncmp(d->name, "PTS.", 4) == 0)
+ mode |= S_IFCHR;
+ else if(strcmp(d->name, "zero") == 0)
+ mode |= S_IFCHR | 0222;
+ else if(strcmp(d->name, "null") == 0)
+ mode |= S_IFCHR | 0222;
+ else if(strncmp(d->name, ".udir.", 6) == 0){
+ switch(d->name[6]){
+ case 'L':
+ mode |= S_IFLNK;
+ break;
+ case 'S':
+ mode |= S_IFSOCK;
+ break;
+ case 'F':
+ mode |= S_IFIFO;
+ break;
+ case 'C':
+ mode |= S_IFCHR;
+ break;
+ case 'B':
+ mode |= S_IFBLK;
+ break;
+ }
+ } else if(d->type == '|')
+ mode |= S_IFIFO;
+ else if(d->type == 'H')
+ mode |= S_IFBLK;
+ else
+ mode |= S_IFREG;
+
+ return mode;
+}
+
+static void
+dir2ustat(Dir *d, Ustat *s)
+{
+ s->mode = dir2statmode(d);
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = d->length;
+ s->atime = d->atime;
+ s->mtime = d->mtime;
+ s->ctime = d->mtime;
+ s->ino = 0; // use d->qid?
+ s->dev = 0;
+ s->rdev = 0;
+}
+
+static int
+rstat(char *path, int link, Ustat *s)
+{
+ Dir *d;
+ int err;
+ char *t;
+
+ if((d = dirstat(shortpath(current->kcwd, path))) == nil){
+ if(link){
+ char *base;
+ char *name;
+ if(base = basepath(path, &name)){
+ t = udirpath(base, name, 'L');
+ free(name);
+ free(base);
+ d = dirstat(shortpath(current->kcwd, t));
+ free(t);
+ }
+
+ }
+ }
+ if(d == nil){
+ err = mkerror();
+ if(t = resolvepath1(path, 0)){
+ err = fsstat(t, link, s);
+ free(t);
+ }
+ return err;
+ }
+
+ dir2ustat(d, s);
+ s->ino = hashpath(path);
+
+ free(d);
+ return 0;
+}
+
+static int
+rfstat(Ufile *f, Ustat *s)
+{
+ Dir *d;
+
+ if((d = dirfstat(f->fd)) == nil)
+ return mkerror();
+
+ dir2ustat(d, s);
+ s->ino = hashpath(f->path);
+
+ free(d);
+ return 0;
+}
+
+static char*
+fixname(char *name)
+{
+ if(name == nil)
+ return nil;
+ if(strncmp(name, ".udir.", 6) == 0){
+ if(name[6] && name[7]=='.')
+ name += 8;
+ }
+ return name;
+}
+
+static int
+rreaddir(Ufile *f, Udirent **pd)
+{
+ Dir *d;
+ int i, n;
+
+ seek(f->fd, 0, 0);
+ n = dirreadall(f->fd, &d);
+ if(n < 0)
+ return mkerror();
+ for(i=0; i<n; i++){
+ if((*pd = newdirent(f->path, fixname(d[i].name), dir2statmode(&d[i]))) == nil)
+ break;
+ pd = &((*pd)->next);
+ }
+ free(d);
+ return i;
+}
+
+static int
+rreadlink(char *path, char *buf, int len)
+{
+ int err;
+ int fd;
+
+ char *t;
+ char *name;
+ char *base;
+
+ trace("rreadlink(%s)", path);
+
+ if((base = basepath(path, &name)) == nil)
+ return -EINVAL;
+
+ /* resolve base path */
+ if((fd = open(shortpath(current->kcwd, base), OREAD)) < 0){
+ err = mkerror();
+ if(t = resolvepath1(base, 0)){
+ free(base); base = t;
+ t = allocpath(base, nil, name);
+ err = fsreadlink(t, buf, len);
+ free(t);
+ }
+ goto out;
+ }
+ close(fd);
+
+ /* check if path is regular file */
+ if((fd = open(shortpath(current->kcwd, path), OREAD)) >= 0){
+ close(fd);
+ err = -EINVAL;
+ goto out;
+ }
+
+ t = udirpath(base, name, 'L');
+ if((fd = open(shortpath(current->kcwd, t), OREAD)) < 0){
+ err = mkerror();
+ free(t);
+ goto out;
+ }
+ free(t);
+ if((err = read(fd, buf, len)) < 0)
+ err = mkerror();
+ close(fd);
+out:
+ free(base);
+ free(name);
+ return err;
+}
+
+enum {
+ COPYSIZE = 8*1024,
+};
+
+static int
+copyfile(char *from, char *to)
+{
+ int err, fromfd, tofd;
+ char *buf, *s;
+ Dir *ent;
+ Dir *dir;
+
+ dir = nil;
+ buf = nil;
+ ent = nil;
+
+ tofd = -1;
+
+ trace("copyfile(%s, %s)", from, to);
+
+ if((fromfd = open(shortpath(current->kcwd, from), OREAD)) < 0){
+ err = mkerror();
+ goto out;
+ }
+ if((dir = dirfstat(fromfd)) == nil){
+ err = mkerror();
+ goto out;
+ }
+ s = shortpath(current->kcwd, to);
+ if((err = open(s, OREAD)) >= 0){
+ close(err);
+ err = -EEXIST;
+ goto out;
+ }
+ if(dir->mode & DMDIR){
+ int n;
+ if((tofd = create(s, OREAD, dir->mode)) < 0){
+ err = mkerror();
+ goto out;
+ }
+ close(tofd);
+ tofd = -1;
+ while((n = dirread(fromfd, &ent)) > 0){
+ int i;
+
+ for(i=0; i<n; i++){
+ char *froment, *toent;
+
+ froment = allocpath(from, nil, ent[i].name);
+ toent = allocpath(to, nil, ent[i].name);
+ err = copyfile(froment, toent);
+ free(froment);
+ free(toent);
+
+ if(err < 0)
+ goto out;
+ }
+ free(ent); ent = nil;
+ }
+ } else {
+ if((tofd = create(s, OWRITE, dir->mode)) < 0){
+ err = mkerror();
+ goto out;
+ }
+ buf = kmalloc(COPYSIZE);
+ for(;;){
+ err = read(fromfd, buf, COPYSIZE);
+ if(err == 0)
+ break;
+ if(err < 0){
+ err = mkerror();
+ goto out;
+ }
+ if(write(tofd, buf, err) != err){
+ err = mkerror();
+ goto out;
+ }
+ }
+ }
+
+ err = 0;
+out:
+ free(ent);
+ free(dir);
+ free(buf);
+ close(fromfd);
+ close(tofd);
+ return err;
+}
+
+static int
+removefile(char *path)
+{
+ int err;
+ int n;
+ Dir *d;
+ int fd;
+ char *s;
+
+ trace("removefile(%s)", path);
+
+ s = shortpath(current->kcwd, path);
+
+ if((d = dirstat(s)) == nil)
+ return mkerror();
+ if(remove(s) == 0){
+ free(d);
+ return 0;
+ }
+ if((d->mode & DMDIR) == 0){
+ free(d);
+ return mkerror();
+ }
+ free(d);
+ if((fd = open(s, OREAD)) < 0)
+ return mkerror();
+ err = 0;
+ d = nil;
+ while((n = dirread(fd, &d)) > 0){
+ char *t;
+ int i;
+
+ for(i=0; i<n; i++){
+ t = allocpath(path, nil, d[i].name);
+ err = removefile(t);
+ free(t);
+
+ if(err < 0)
+ break;
+ }
+ free(d); d = nil;
+
+ if(err < 0)
+ break;
+ }
+ close(fd);
+ if(err < 0)
+ return err;
+ if(n < 0)
+ return mkerror();
+ if(remove(s) < 0)
+ return mkerror();
+ return 0;
+}
+
+static int
+resolvefromtopath(char **from, char **to)
+{
+ char *t;
+
+ trace("resolvefromtopath(%s, %s)", *from, *to);
+
+ if((*from = resolvepath(*from, 1)) == nil){
+ *to = nil;
+ return -ELOOP;
+ }
+ if((*to = resolvepath(*to, 1)) == nil){
+ free(*from);
+ *from = nil;
+ return -ELOOP;
+ }
+ if(strstr(*from, ".udir.L")){
+ char *x;
+
+ x = nil;
+ for(t=*to; *t; t++){
+ if(*t == '/')
+ x = t;
+ }
+
+ if(strncmp(x+1, ".udir.", 6)){
+ *x = 0;
+ t = udirpath(*to, x+1, 'L');
+ free(*to); *to = t;
+ }
+ }
+
+ return 0;
+}
+
+static int
+rrename(char *from, char *to)
+{
+ int err;
+ char *x, *y, *t;
+
+ trace("rrename(%s, %s)", from, to);
+
+ if((err = resolvefromtopath(&from, &to)) < 0)
+ goto out;
+ if(strcmp(from, to) == 0)
+ goto out;
+ x = nil;
+ for(t=from; *t; t++){
+ if(*t == '/')
+ x = t;
+ }
+ y = nil;
+ for(t=to; *t; t++){
+ if(*t == '/')
+ y = t;
+ }
+ if(x && y){
+ char *e;
+
+ e = nil;
+ *x = 0; *y = 0;
+ if(strcmp(from, to) == 0)
+ e = &y[1];
+ *x = '/'; *y = '/';
+
+ if(e != nil){
+ Dir d;
+
+ nulldir(&d);
+ d.name = e;
+
+ remove(to);
+ if(dirwstat(shortpath(current->kcwd, from), &d) < 0)
+ err = mkerror();
+ goto out;
+ }
+ }
+ t = ksmprint("%s%d%d.tmp", to, current->pid, current->tid);
+ if((err = copyfile(from, t)) == 0){
+ Dir d;
+
+ nulldir(&d);
+ d.name = &y[1];
+
+ remove(shortpath(current->kcwd, to));
+ if(dirwstat(shortpath(current->kcwd, t), &d) < 0) {
+ err = mkerror();
+ } else {
+ removefile(from);
+ }
+ }
+ if(err != 0)
+ removefile(t);
+ free(t);
+out:
+ free(from);
+ free(to);
+
+ return err;
+}
+
+static int
+rmkdir(char *path, int mode)
+{
+ int err;
+ Dir *d;
+ int fd;
+ int mode9;
+
+ char *base;
+ char *name;
+ char *t;
+
+ trace("rmkdir(%s, %#o)", path, mode);
+
+ if((base = basepath(path, &name)) == nil)
+ return -EINVAL;
+
+ if((d = dirstat(shortpath(current->kcwd, base))) == nil){
+ err = mkerror();
+ if(t = resolvepath1(base, 0)){
+ free(base); base = t;
+ t = allocpath(base, nil, name);
+ err = fsmkdir(t, mode);
+ free(t);
+ }
+ goto out;
+ }
+ err = -ENOTDIR;
+ if((d->mode & DMDIR) == 0){
+ free(d);
+ goto out;
+ }
+ free(d);
+
+ err = -EEXIST;
+ t = udirpath(base, name, 'L');
+ if(d = dirstat(shortpath(current->kcwd, t))){
+ free(d);
+ free(t);
+ goto out;
+ }
+ free(t);
+
+ mode9 = DMDIR | ((mode & ~current->umask) & 0777);
+ if((fd = create(shortpath(current->kcwd, path), OREAD|OEXCL, mode9)) < 0){
+ err = mkerror();
+ goto out;
+ }
+ close(fd);
+ err = 0;
+
+out:
+ free(name);
+ free(base);
+ return err;
+}
+
+static void
+combinedir(Dir *ndir, Dir *odir)
+{
+ if(ndir->mode != ~0)
+ ndir->mode = (odir->mode & ~0777) | (ndir->mode & 0777);
+}
+
+static int
+uwstat(char *path, Dir *ndir, int link)
+{
+ int err;
+ Dir *dir;
+ char *s;
+
+ trace("uwstat(%s, ..., %d)", path, link);
+
+ s = shortpath(current->kcwd, path);
+ if((dir = dirstat(s)) == nil){
+ err = mkerror();
+ if(link){
+ char *base;
+ char *name;
+
+ if(base = basepath(path, &name)){
+ char *t;
+
+ t = udirpath(base, name, 'L');
+ free(base);
+ free(name);
+
+ err = uwstat(t, ndir, 0);
+ free(t);
+ }
+ }
+ return err;
+ }
+ combinedir(ndir, dir);
+ err = 0;
+ if(dirwstat(s, ndir) < 0)
+ err = mkerror();
+ free(dir);
+ return err;
+}
+
+static int
+uwfstat(Ufile *f, Dir *ndir)
+{
+ int err;
+ Dir *dir;
+
+ if((dir = dirfstat(f->fd)) == nil){
+ err = mkerror();
+ goto out;
+ }
+ combinedir(ndir, dir);
+ err = 0;
+ if(dirfwstat(f->fd, ndir) < 0)
+ err = mkerror();
+out:
+ free(dir);
+ return err;
+}
+
+static int
+rutime(char *path, long atime, long mtime)
+{
+ Dir ndir;
+ int err;
+
+ trace("rutime(%s, %ld, %ld)", path, atime, mtime);
+
+ nulldir(&ndir);
+ ndir.atime = atime;
+ ndir.mtime = mtime;
+
+ if((err = uwstat(path, &ndir, 1)) < 0){
+ char *t;
+
+ if(t = resolvepath1(path, 0)){
+ err = fsutime(t, atime, mtime);
+ free(t);
+ }
+ }
+ return err;
+}
+
+static int
+rchmod(char *path, int mode)
+{
+ Dir ndir;
+ int err;
+
+ trace("rchmod(%s, %#o)", path, mode);
+
+ nulldir(&ndir);
+ ndir.mode = mode;
+
+ if((err = uwstat(path, &ndir, 1)) < 0){
+ char *t;
+
+ if(t = resolvepath1(path, 0)){
+ err = fschmod(t, mode);
+ free(t);
+ }
+ }
+ return err;
+}
+
+static int
+rchown(char *path, int uid, int gid, int link)
+{
+ Ustat s;
+
+ USED(uid);
+ USED(gid);
+
+ /* FIXME, just return the right errorcode for now */
+ return fsstat(path, link, &s);
+}
+
+static int
+rtruncate(char *path, vlong size)
+{
+ Dir ndir;
+ int err;
+
+ trace("rtruncate(%s, %lld)", path, size);
+
+ nulldir(&ndir);
+ ndir.length = size;
+
+ if((err = uwstat(path, &ndir, 0)) < 0){
+ char *t;
+
+ if(t = resolvepath1(path, 0)){
+ err = fstruncate(t, size);
+ free(t);
+ }
+ }
+ return err;
+}
+
+static int
+rfchmod(Ufile *f, int mode)
+{
+ Dir ndir;
+
+ nulldir(&ndir);
+ ndir.mode = mode;
+ return uwfstat(f, &ndir);
+}
+
+static int
+rfchown(Ufile *f, int uid, int gid)
+{
+ USED(f);
+ USED(uid);
+ USED(gid);
+
+ return 0;
+}
+
+static int
+rftruncate(Ufile *f, vlong size)
+{
+ Dir ndir;
+
+ nulldir(&ndir);
+ ndir.length = size;
+ return uwfstat(f, &ndir);
+}
+
+static int
+runlink(char *path, int rmdir)
+{
+ int err;
+ Dir *dir;
+ char *t, *s;
+ char *base;
+ char *name;
+ char *rpath;
+ Rpath **prp;
+
+ trace("runlink(%s, %d)", path, rmdir);
+
+ rpath = nil;
+ dir = nil;
+ err = -EINVAL;
+ if((base = basepath(path, &name)) == nil)
+ goto out;
+ if(dir = dirstat(shortpath(current->kcwd, path))){
+ rpath = kstrdup(path);
+ } else {
+ rpath = udirpath(base, name, 'L');
+ dir = dirstat(shortpath(current->kcwd, rpath));
+ }
+ if(dir == nil){
+ err = mkerror();
+ if(t = resolvepath1(path, 0)){
+ err = fsunlink(t, rmdir);
+ free(t);
+ }
+ goto out;
+ }
+ if(rmdir){
+ if((dir->mode & DMDIR) == 0){
+ err = -ENOTDIR;
+ goto out;
+ }
+ } else {
+ if(dir->mode & DMDIR){
+ err = -EISDIR;
+ goto out;
+ }
+ }
+
+ s = shortpath(current->kcwd, rpath);
+
+ qlock(&rpathtablk);
+ prp = rpathent(path);
+ if(*prp){
+ Dir ndir;
+
+ t = ksmprint(".%s.%d.deleted", name, current->kpid);
+ nulldir(&ndir);
+ ndir.name = t;
+ trace("runlink: file %s still in use renaming to -> %s", path, t);
+ if(dirwstat(s, &ndir) < 0){
+ qunlock(&rpathtablk);
+ err = mkerror();
+ free(t);
+ goto out;
+ }
+ free(t);
+ (*prp)->deleted = 1;
+ qunlock(&rpathtablk);
+
+ } else {
+ int x;
+ qunlock(&rpathtablk);
+
+ x = 0;
+ while(remove(s) < 0){
+ err = mkerror();
+ if(++x > 8){
+ /* old debian bug clashes with mntgen */
+ if(strcmp(base, "/")==0 && strstr(path, ".dpkg-"))
+ err = -ENOENT;
+ goto out;
+ }
+ }
+ }
+ err = 0;
+out:
+ free(dir);
+ free(name);
+ free(base);
+ free(rpath);
+
+ return err;
+}
+
+static int
+rlink(char *old, char *new, int sym)
+{
+ int err;
+ int fd;
+ char *base;
+ char *name;
+ char *t;
+
+ trace("rlink(%s, %s, %d)", old, new, sym);
+
+ if((base = basepath(new, &name)) == nil)
+ return -EINVAL;
+
+ /* resolve base directory */
+ if((fd = open(shortpath(current->kcwd, base), OREAD)) < 0){
+ err = mkerror();
+ if(t = resolvepath1(base, 0)){
+ free(base); base = t;
+ t = allocpath(base, nil, name);
+ err = fslink(old, t, sym);
+ free(t);
+ }
+ goto out;
+ }
+ close(fd);
+
+ if(sym == 0){
+ if((err = resolvefromtopath(&old, &new)) == 0)
+ err = copyfile(old, new);
+ free(old);
+ free(new);
+ goto out;
+ }
+
+ /* check if regular file is in the way */
+ err = -EEXIST;
+ if((fd = open(shortpath(current->kcwd, new), OREAD)) >= 0){
+ close(fd);
+ goto out;
+ }
+
+ /* try to create the link, will fail if alreadt exists */
+ t = udirpath(base, name, 'L');
+ if((fd = create(shortpath(current->kcwd, t), OWRITE|OEXCL, 0777)) < 0){
+ err = mkerror();
+ free(t);
+ goto out;
+ }
+ free(t);
+
+ if(write(fd, old, strlen(old)) < 0){
+ err = mkerror();
+ close(fd);
+ goto out;
+ }
+ close(fd);
+ err = 0;
+out:
+ free(base);
+ free(name);
+ return err;
+}
+
+static Udev rootdev =
+{
+ .open = ropen,
+ .access = raccess,
+ .stat = rstat,
+ .link = rlink,
+ .unlink = runlink,
+ .rename = rrename,
+ .mkdir = rmkdir,
+ .utime = rutime,
+ .chmod = rchmod,
+ .chown = rchown,
+ .truncate = rtruncate,
+
+ .read = rread,
+ .write = rwrite,
+ .size = rsize,
+ .close = rclose,
+
+ .fstat = rfstat,
+ .readdir = rreaddir,
+ .readlink = rreadlink,
+
+ .fchmod = rfchmod,
+ .fchown = rfchown,
+ .ftruncate = rftruncate,
+};
+
+void rootdevinit(void)
+{
+ devtab[ROOTDEV] = &rootdev;
+
+ fsmount(&rootdev, "");
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Signal Signal;
+typedef struct Action Action;
+typedef struct Queue Queue;
+typedef struct Timers Timers;
+typedef struct Handlers Handlers;
+typedef struct Private Private;
+
+struct Signal
+{
+ Usiginfo;
+ Signal *next;
+};
+
+struct Action
+{
+ void *handler;
+ int flags;
+ uvlong block;
+};
+
+struct Queue
+{
+ Ref;
+ QLock;
+
+ Signal *head;
+ Signal **tailp;
+ Signal *free;
+ Signal a[64];
+
+ Ufile *tty;
+};
+
+struct Timers
+{
+ Ref;
+ struct {
+ vlong interval;
+ vlong expire;
+ } itimer[2];
+};
+
+struct Handlers
+{
+ Ref;
+ QLock;
+ Action a[SIGMAX-1];
+};
+
+struct Private
+{
+ Handlers *h;
+ Queue *q;
+ Timers *t;
+
+ struct {
+ ulong sp;
+ ulong size;
+ } altstack;
+
+ uvlong block;
+
+ Urestart *freerestart;
+};
+
+enum
+{
+ SIG_ERR = -1,
+ SIG_DFL = 0,
+ SIG_IGN = 1,
+ SIG_HOLD = 2,
+};
+
+enum
+{
+ SA_NOCLDSTOP = 1,
+ SA_NOCLDWAIT = 2,
+ SA_SIGINFO = 4,
+ SA_ONSTACK = 0x08000000,
+ SA_RESTART = 0x10000000,
+ SA_NODEFER = 0x40000000,
+ SA_RESETHAND = 0x80000000,
+};
+
+enum
+{
+ SS_ONSTACK = 1,
+ SS_DISABLE = 2,
+};
+
+#define MASK(sig) (1LL << ((sig)-1))
+
+static void
+nextsignal(uvlong rblock, int wait);
+
+static int
+getsignal(Private *p, Usiginfo *pinfo, int wait);
+
+static void
+initrestart(Uproc *proc)
+{
+ Urestart *r;
+
+ r = &proc->restart0;
+ r->syscall = nil;
+ r->link = nil;
+ proc->restart = r;
+}
+
+static void
+poprestart(Private *p)
+{
+ Urestart *r;
+
+ for(;;){
+ r = current->restart;
+ if(r->link==nil || r->syscall)
+ break;
+ current->restart = r->link;
+
+ r->link = p->freerestart;
+ p->freerestart = r;
+ }
+ if(r->syscall)
+ current->syscall = r->syscall;
+}
+
+static Queue*
+mkqueue(void)
+{
+ Queue *q;
+ int i;
+
+ q = kmallocz(sizeof(Queue), 1);
+ q->ref = 1;
+ q->head = nil;
+ q->tailp = &q->head;
+ for(i=0; i<nelem(q->a); i++)
+ q->a[i].next = (i+1 == nelem(q->a)) ? nil : &q->a[i+1];
+ q->free = q->a;
+
+ return q;
+}
+
+static Handlers*
+mkhandlers(void)
+{
+ Handlers *h;
+ int i;
+
+ h = kmallocz(sizeof(Handlers), 1);
+ h->ref = 1;
+ for(i=1; i<SIGMAX; i++)
+ h->a[i-1].handler = (void*)SIG_DFL;
+ return h;
+}
+
+static Timers*
+mktimers(void)
+{
+ Timers *t;
+
+ t = kmallocz(sizeof(Timers), 1);
+ t->ref = 1;
+ return t;
+}
+
+/* bits.s */
+extern int get_ds(void);
+extern int get_cs(void);
+static ulong user_cs, user_ds;
+
+void initsignal(void)
+{
+ Private *p;
+
+ if(user_ds==0 && user_cs==0){
+ user_ds = get_ds();
+ user_cs = get_cs();
+ }
+
+ p = kmallocz(sizeof(*p), 1);
+ p->block = 0;
+
+ p->q = mkqueue();
+ p->h = mkhandlers();
+ p->t = mktimers();
+
+ current->signal = p;
+ initrestart(current);
+}
+
+void exitsignal(void)
+{
+ Private *p;
+ Queue *q;
+ Timers *t;
+ Signal **i;
+ Handlers *h;
+ Urestart *r;
+
+ if((p = current->signal) == nil)
+ return;
+ current->signal = nil;
+ q = p->q;
+ qlock(q);
+again:
+ for(i=&q->head; *i; i=&((*i)->next)){
+ Signal *r;
+ r = *i;
+ if(!r->group && (r->topid == current->tid)){
+ if((*i = r->next) == nil)
+ q->tailp = i;
+ r->next = q->free;
+ q->free = r;
+ goto again;
+ }
+ }
+ qunlock(q);
+ if(!decref(q)){
+ putfile(q->tty);
+ q->tty = nil;
+ free(q);
+ }
+ h = p->h;
+ if(!decref(h))
+ free(h);
+ t = p->t;
+ if(!decref(t))
+ free(t);
+ while(r = current->restart){
+ if(r->link == nil)
+ break;
+ current->restart = r->link;
+ r->link = p->freerestart;
+ p->freerestart = r;
+ }
+ current->restart = nil;
+ while(r = p->freerestart){
+ p->freerestart = r->link;
+ free(r);
+ }
+ free(p);
+}
+
+void clonesignal(Uproc *new, int copyhand, int newproc)
+{
+ Private *p, *n;
+
+ if((p = current->signal) == nil)
+ return;
+
+ n = kmallocz(sizeof(*n), 1);
+ if(copyhand){
+ n->h = mkhandlers();
+
+ qlock(p->h);
+ memmove(n->h->a, p->h->a, sizeof(n->h->a));
+ qunlock(p->h);
+ } else {
+ incref(p->h);
+ n->h = p->h;
+ }
+
+ qlock(p->q);
+ if(newproc){
+ n->q = mkqueue();
+ n->q->tty = getfile(p->q->tty);
+ n->t = mktimers();
+ n->altstack = p->altstack;
+ } else {
+ incref(p->q);
+ n->q = p->q;
+ incref(p->t);
+ n->t = p->t;
+ }
+ qunlock(p->q);
+
+ n->block = p->block;
+ new->signal = n;
+
+ initrestart(new);
+}
+
+void
+settty(Ufile *tty)
+{
+ Private *p;
+ Ufile *old;
+
+ if((p = current->signal) == nil)
+ return;
+ tty = getfile(tty);
+ qlock(p->q);
+ old = p->q->tty;
+ p->q->tty = tty;
+ qunlock(p->q);
+ putfile(old);
+}
+
+Ufile*
+gettty(void)
+{
+ Private *p;
+ Ufile *tty;
+
+ if((p = current->signal) == nil)
+ return nil;
+ qlock(p->q);
+ tty = getfile(p->q->tty);
+ qunlock(p->q);
+ return tty;
+}
+
+int ignoressignal(Uproc *proc, int sig)
+{
+ Private *p;
+ int a, f;
+
+ if((p = proc->signal) == nil)
+ return 1;
+ qlock(p->h);
+ a = (int)p->h->a[sig-1].handler;
+ f = p->h->a[sig-1].flags;
+ qunlock(p->h);
+ switch(sig){
+ case SIGKILL:
+ case SIGSTOP:
+ return 0;
+ case SIGCHLD:
+ if(f & SA_NOCLDWAIT)
+ return 1;
+ break;
+ case SIGWINCH:
+ case SIGURG:
+ if(a == SIG_DFL)
+ return 1;
+ }
+ return (a == SIG_IGN);
+}
+
+int wantssignal(Uproc *proc, int sig)
+{
+ Private *p;
+
+ p = proc->signal;
+ if(p == nil || p->block & MASK(sig))
+ return 0;
+ return !ignoressignal(proc, sig);
+}
+
+int sendsignal(Uproc *proc, Usiginfo *info, int group)
+{
+ Private *p;
+ Signal *s;
+
+ trace("sendsignal(%S) to %d from %d",
+ info->signo, proc->tid, (current != nil) ? current->tid : 0);
+
+ if(ignoressignal(proc, info->signo)){
+ trace("sendsignal(): ignored signal %S", info->signo);
+ return 0;
+ }
+
+ p = proc->signal;
+ qlock(p->q);
+ if(info->signo < SIGRT1){
+ for(s=p->q->head; s; s=s->next){
+ if(!s->group && (s->topid != proc->tid))
+ continue;
+ if(s->signo == info->signo){
+ qunlock(p->q);
+ trace("sendsignal(): droping follow up signal %S", info->signo);
+ return 0;
+ }
+ }
+ }
+ if((s = p->q->free) == nil){
+ qunlock(p->q);
+ trace("sendsignal(): out of signal buffers");
+ return -EAGAIN;
+ }
+ p->q->free = s->next;
+ s->next = nil;
+ memmove(s, info, sizeof(*info));
+ s->group = group;
+ s->topid = group ? proc->pid : proc->tid;
+ *p->q->tailp = s;
+ p->q->tailp = &s->next;
+ qunlock(p->q);
+ return 1;
+}
+
+int
+signalspending(Uproc *proc)
+{
+ Private *p;
+ Signal *s;
+ int ret;
+
+ p = proc->signal;
+ if(p == nil || p->q->head == nil)
+ return 0;
+
+ ret = 0;
+ qlock(p->q);
+ for(s=p->q->head; s; s=s->next){
+ if(!s->group && (s->topid != current->tid))
+ continue;
+ if(MASK(s->signo) & p->block)
+ continue;
+ ret = 1;
+ break;
+ }
+ qunlock(p->q);
+
+ return ret;
+}
+
+static int
+getsignal(Private *p, Usiginfo *pinfo, int wait)
+{
+ Signal *r;
+ Signal **i;
+ int sig;
+
+ if(!wait && p->q->head == nil)
+ return 0;
+
+ sig = 0;
+ qlock(p->q);
+ for(;;){
+ for(i=&p->q->head; *i; i=&((*i)->next)){
+ r = *i;
+
+ if(!r->group && (r->topid != current->tid))
+ continue;
+
+ if(p->block & MASK(r->signo)){
+ if(sig == 0)
+ sig = -r->signo;
+ continue;
+ }
+ sig = r->signo;
+
+ /* dequeue nonblocked signal */
+ memmove(pinfo, r, sizeof(*pinfo));
+ if((*i = r->next) == nil)
+ p->q->tailp = i;
+ r->next = p->q->free;
+ p->q->free = r;
+ break;
+ }
+ if(wait && sig <= 0){
+ if(sleepproc(p->q, 0) == 0)
+ continue;
+ }
+ break;
+ }
+ qunlock(p->q);
+
+ return sig;
+}
+
+static uvlong
+sigset2uvlong(uchar *set, int setsize)
+{
+ uvlong r;
+ int i;
+
+ r = 0;
+ if(setsize > sizeof(uvlong))
+ setsize = sizeof(uvlong);
+ for(i=0; i<setsize; i++)
+ r |= (uvlong)set[i] << (i * 8);
+ return r;
+}
+
+static void
+uvlong2sigset(uchar *set, int setsize, uvlong mask)
+{
+ int i;
+
+ for(i=0; i<setsize; i++){
+ if(i < sizeof(uvlong)){
+ set[i] = ((mask >> (i*8)) & 0xff);
+ } else {
+ set[i] = 0;
+ }
+ }
+}
+
+struct linux_siginfo {
+ int signo;
+ int errno;
+ int code;
+
+ union {
+ int _pad[29];
+
+ /* kill() */
+ struct {
+ int pid; /* sender's pid */
+ int uid; /* sender's uid */
+ } kill;
+
+ /* POSIX.1b timers */
+ struct {
+ int tid; /* timer id */
+ int overrun; /* overrun count */
+ int val; /* same as below */
+ } timer;
+
+ /* POSIX.1b signals */
+ struct {
+ int pid; /* sender's pid */
+ int uid; /* sender's uid */
+ int val;
+ } rt;
+
+ /* SIGCHLD */
+ struct {
+ int pid; /* which child */
+ int uid; /* sender's uid */
+ int status; /* exit code */
+ long utime;
+ long stime;
+ } chld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ void *addr; /* faulting insn/memory ref. */
+ int trapno; /* TRAP # which caused the signal */
+ } fault;
+
+ /* SIGPOLL */
+ struct {
+ long band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int fd;
+ } poll;
+ };
+};
+
+void
+siginfo2linux(Usiginfo *info, void *p)
+{
+ struct linux_siginfo *li = p;
+ int sig;
+
+ sig = info->signo;
+
+ li->signo = sig;
+ li->errno = info->errno;
+ li->code = info->code;
+
+ switch(sig){
+ case SIGALRM:
+ li->timer.tid = info->timer.tid;
+ li->timer.overrun = info->timer.overrun;
+ li->timer.val = info->timer.val;
+ break;
+ case SIGCHLD:
+ li->chld.pid = info->chld.pid;
+ li->chld.uid = info->chld.uid;
+ li->chld.status = info->chld.status;
+ li->chld.utime = info->chld.utime;
+ li->chld.stime = info->chld.stime;
+ break;
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGSEGV:
+ li->fault.addr = info->fault.addr;
+ li->fault.trapno = info->fault.trapno;
+ break;
+ case SIGPOLL:
+ li->poll.fd = info->poll.fd;
+ li->poll.band = info->poll.band;
+ break;
+ case SIGRT1:
+ case SIGRT2:
+ case SIGRT3:
+ case SIGRT4:
+ case SIGRT5:
+ case SIGRT6:
+ case SIGRT7:
+ case SIGRT8:
+ li->rt.pid = info->rt.pid;
+ li->rt.uid = info->rt.uid;
+ li->rt.val = info->rt.val;
+ break;
+ default:
+ li->kill.pid = info->kill.pid;
+ li->kill.uid = info->kill.uid;
+ }
+}
+
+void
+linux2siginfo(void *p, Usiginfo *info)
+{
+ struct linux_siginfo *li = p;
+ int sig;
+
+ sig = li->signo;
+
+ info->signo = sig;
+ info->errno = li->errno;
+ info->code = li->code;
+
+ switch(sig){
+ case SIGALRM:
+ info->timer.tid = li->timer.tid;
+ info->timer.overrun = li->timer.overrun;
+ info->timer.val = li->timer.val;
+ break;
+ case SIGCHLD:
+ info->chld.pid = li->chld.pid;
+ info->chld.uid = li->chld.uid;
+ info->chld.status = li->chld.status;
+ info->chld.utime = li->chld.utime;
+ info->chld.stime = li->chld.stime;
+ break;
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGSEGV:
+ info->fault.addr = li->fault.addr;
+ info->fault.trapno = li->fault.trapno;
+ break;
+ case SIGPOLL:
+ info->poll.fd = li->poll.fd;
+ info->poll.band = li->poll.band;
+ break;
+ case SIGRT1:
+ case SIGRT2:
+ case SIGRT3:
+ case SIGRT4:
+ case SIGRT5:
+ case SIGRT6:
+ case SIGRT7:
+ case SIGRT8:
+ info->rt.pid = li->rt.pid;
+ info->rt.uid = li->rt.uid;
+ info->rt.val = li->rt.val;
+ break;
+ default:
+ info->kill.pid = li->kill.pid;
+ info->kill.uid = li->kill.uid;
+ }
+}
+
+struct linux_sigcontext {
+ ulong gs;
+ ulong fs;
+ ulong es;
+ ulong ds;
+ ulong di;
+ ulong si;
+ ulong bp;
+ ulong sp;
+ ulong bx;
+ ulong dx;
+ ulong cx;
+ ulong ax;
+ ulong trapno;
+ ulong err;
+ ulong ip;
+ ulong cs;
+ ulong flags;
+ ulong sp_at_signal;
+ ulong ss;
+ void* fpstate;
+ ulong oldmask;
+ ulong cr2;
+};
+
+static void
+ureg2linuxsigcontext(Ureg *u, struct linux_sigcontext *sc)
+{
+ sc->gs = u->gs;
+ sc->fs = u->fs;
+ sc->es = u->es;
+ sc->ds = u->ds;
+ sc->di = u->di;
+ sc->si = u->si;
+ sc->bp = u->bp;
+ sc->sp = u->sp;
+ sc->bx = u->bx;
+ sc->dx = u->dx;
+ sc->cx = u->cx;
+ sc->ax = u->ax;
+ sc->trapno = u->trap;
+ sc->err = u->ecode;
+ sc->ip = u->pc;
+ sc->cs = u->cs;
+ sc->flags = u->flags;
+ sc->sp_at_signal = u->sp;
+ sc->ss = u->ss;
+ sc->cr2 = 0;
+}
+
+struct linux_sigset {
+ ulong sig[2];
+};
+
+struct linux_signalstack {
+ ulong sp;
+ int flags;
+ ulong size;
+};
+
+struct linux_ucontext {
+ ulong flags;
+ struct linux_ucontext *link;
+ struct linux_signalstack stack;
+ struct linux_sigcontext context;
+ struct linux_sigset sigmask;
+};
+
+static void
+linuxsigcontext2ureg(struct linux_sigcontext *sc, Ureg *u)
+{
+ u->pc = sc->ip;
+ u->sp = sc->sp;
+ u->ax = sc->ax;
+ u->bx = sc->bx;
+ u->cx = sc->cx;
+ u->dx = sc->dx;
+ u->di = sc->di;
+ u->si = sc->si;
+ u->bp = sc->bp;
+
+ u->cs = sc->cs;
+ u->ss = sc->ss;
+ u->ds = sc->ds;
+ u->es = sc->es;
+ u->fs = sc->fs;
+ u->gs = sc->gs;
+}
+
+struct linux_sigframe {
+ void *ret;
+ int sig;
+
+ union {
+ struct linux_sigcontext sc;
+
+ struct {
+ struct linux_siginfo *pinfo;
+ struct linux_ucontext *puc;
+
+ struct linux_siginfo info;
+ struct linux_ucontext uc;
+ } rt;
+ };
+};
+
+#pragma profile off
+
+static int
+linuxstackflags(Private *p, ulong sp)
+{
+ if(p->altstack.size == 0 || p->altstack.sp == 0)
+ return SS_DISABLE;
+ if(sp - p->altstack.sp < p->altstack.size)
+ return SS_ONSTACK;
+ return 0;
+}
+
+static void
+linuxsignal(Private *p, Action *a, Usiginfo *i, uvlong rblock)
+{
+ struct linux_sigframe _frame;
+ struct linux_sigframe *f;
+ Ureg *u;
+ int stackflags;
+
+ u = current->ureg;
+
+ stackflags = linuxstackflags(p, u->sp);
+ if((a->flags & SA_ONSTACK) && (stackflags == 0)){
+ trace("linuxsignal: altstack %lux %lux", p->altstack.sp, p->altstack.size);
+ f = (struct linux_sigframe*)(p->altstack.sp + p->altstack.size);
+ f--;
+ } else {
+ f = &_frame;
+ }
+
+ trace("linuxsignal(): frame %p", f);
+ memset(f, 0, sizeof(*f));
+
+ f->sig = i->signo;
+
+ if(a->flags & SA_SIGINFO){
+ f->ret = linux_rtsigreturn;
+ siginfo2linux(i, &f->rt.info);
+ f->rt.pinfo = &f->rt.info;
+
+ f->rt.uc.stack.sp = p->altstack.sp;
+ f->rt.uc.stack.size = p->altstack.size;
+ f->rt.uc.stack.flags = stackflags;
+
+ ureg2linuxsigcontext(u, &f->rt.uc.context);
+ f->rt.uc.context.oldmask = rblock & 0xFFFFFFFF;
+ f->rt.uc.sigmask.sig[0] = rblock & 0xFFFFFFFF;
+ f->rt.uc.sigmask.sig[1] = (rblock >> 32) & 0xFFFFFFFF;
+ f->rt.puc = &f->rt.uc;
+ u->cx = (ulong)f->rt.puc;
+ u->dx = (ulong)f->rt.pinfo;
+ } else {
+ f->ret = linux_sigreturn;
+ ureg2linuxsigcontext(u, &f->sc);
+ f->sc.oldmask = rblock & 0xFFFFFFFF;
+ u->cx = 0;
+ u->dx = 0;
+ }
+
+ u->di = 0;
+ u->si = 0;
+ u->bp = 0;
+ u->bx = 0;
+
+ u->ax = (ulong)i->signo;
+
+ u->sp = (ulong)f;
+ u->pc = (ulong)a->handler;
+
+ u->cs = user_cs;
+ u->ss = user_ds;
+ u->ds = user_ds;
+ u->es = user_ds;
+
+ p->block |= a->block;
+
+ trace("linuxsignal(): retuser pc=%lux sp=%lux", u->pc, u->sp);
+ retuser();
+}
+
+int
+sys_sigreturn(void)
+{
+ struct linux_sigframe *f;
+ Private *p;
+ Ureg *u;
+
+ trace("sys_sigreturn()");
+
+ p = current->signal;
+ u = current->ureg;
+
+ f = (struct linux_sigframe*)(u->sp - 4);
+
+ trace("sys_sigreturn(): frame %p", f);
+
+ linuxsigcontext2ureg(&f->sc, u);
+ p->block &= ~0xFFFFFFFF;
+ p->block |= f->sc.oldmask;
+ nextsignal(p->block, 0);
+ poprestart(p);
+
+ trace("sys_sigreturn(): retuser pc=%lux sp=%lux", u->pc, u->sp);
+ retuser();
+
+ return -1;
+}
+
+int
+sys_rt_sigreturn(void)
+{
+ struct linux_sigframe *f;
+ Private *p;
+ Ureg *u;
+
+ trace("sys_rt_sigreturn()");
+
+ p = current->signal;
+ u = current->ureg;
+
+ f = (struct linux_sigframe*)(u->sp - 4);
+ trace("sys_rt_sigreturn(): frame %p", f);
+
+ linuxsigcontext2ureg(&f->rt.uc.context, u);
+ p->block = (uvlong)f->rt.uc.sigmask.sig[0] | (uvlong)f->rt.uc.sigmask.sig[1]<<32;
+ nextsignal(p->block, 0);
+ poprestart(p);
+
+ trace("sys_rt_sigreturn(): pc=%lux sp=%lux", u->pc, u->sp);
+ retuser();
+
+ return -1;
+}
+
+/*
+ * nextsignal transfers execution to the next pending
+ * signal or just returns. after the signal got executed,
+ * the block mask is restored to rblock. if heres no
+ * pending signal and wait is non zero the current
+ * process is suspended until here is a signal available.
+ */
+
+static void
+nextsignal(uvlong rblock, int wait)
+{
+ Private *p;
+ int sig;
+ Usiginfo info;
+ Action a;
+ Urestart *r;
+
+ for(;;){
+ if((p = current->signal) == nil)
+ return;
+
+ if(current->wstate & WSTOPPED){
+ p->block = ~(MASK(SIGCONT) | MASK(SIGKILL));
+ sig = getsignal(p, &info, 1);
+ p->block = rblock;
+ if(sig <= 0)
+ return;
+ if(sig == SIGCONT){
+ contproc(current, sig, info.group);
+ continue;
+ }
+ } else {
+ if((sig = getsignal(p, &info, wait)) <= 0)
+ return;
+ if(sig == SIGCONT)
+ continue;
+ if(sig == SIGSTOP){
+ stopproc(current, sig, info.group);
+ continue;
+ }
+ }
+ break;
+ }
+
+ trace("nextsignal(): signal %S", sig);
+
+ qlock(p->h);
+ a = p->h->a[sig-1];
+ if(a.flags & SA_RESETHAND)
+ p->h->a[sig-1].handler = (void*)SIG_DFL;
+ if(a.flags & SA_NODEFER == 0)
+ a.block |= MASK(sig);
+ qunlock(p->h);
+
+ switch((int)a.handler){
+ case SIG_DFL:
+ switch(sig){
+ case SIGCHLD:
+ case SIGWINCH:
+ case SIGURG:
+ goto Ignored;
+ }
+ /* no break */
+ case SIG_ERR:
+ trace("nextsignal(): signal %S causes exit", sig);
+ exitproc(current, sig, 1);
+Ignored:
+ case SIG_IGN:
+ case SIG_HOLD:
+ trace("nextsignal(): signal %S ignored", sig);
+ return;
+ }
+
+ if(current->restart->syscall){
+ if(a.flags & SA_RESTART){
+ if(r = p->freerestart)
+ p->freerestart = r->link;
+ if(r == nil)
+ r = kmalloc(sizeof(*r));
+ r->syscall = nil;
+ r->link = current->restart;
+ current->restart = r;
+ } else {
+ trace("nextsignal(): interrupting syscall %s", current->syscall);
+ current->sysret(-EINTR);
+ }
+ }
+
+ linuxsignal(p, &a, &info, rblock);
+}
+
+void handlesignals(void)
+{
+ Private *p;
+
+ if(p = current->signal)
+ nextsignal(p->block, 0);
+}
+
+int
+sys_rt_sigsuspend(uchar *set, int setsize)
+{
+ Private *p;
+ uvlong b, rblock;
+
+ trace("sys_rt_sigsuspend(%p, %d)", set, setsize);
+
+ p = current->signal;
+ b = sigset2uvlong(set, setsize);
+ b &= ~(MASK(SIGKILL) | MASK(SIGSTOP));
+
+ rblock = p->block;
+ p->block = b;
+
+ /*
+ * if a signal got handled, it will pop out after the the
+ * sigsuspend syscall with return value set to -EINTR
+ */
+ current->sysret(-EINTR);
+
+ for(;;)
+ nextsignal(rblock, 1);
+}
+
+#pragma profile on
+
+struct linux_altstack
+{
+ ulong sp;
+ int flags;
+ ulong size;
+};
+
+int sys_sigaltstack(void *stk, void *ostk)
+{
+ Private *p;
+ struct linux_altstack *a = stk, *oa = ostk;
+ int flags;
+ ulong sp, size;
+
+ trace("sys_sigaltstack(%lux, %lux)", (ulong)stk, (ulong)ostk);
+
+ p = current->signal;
+ sp = p->altstack.sp;
+ size = p->altstack.size;
+ flags = linuxstackflags(p, current->ureg->sp);
+
+ if(a){
+ if(flags == SS_ONSTACK)
+ return -EPERM;
+
+ if(a->flags == SS_DISABLE){
+ p->altstack.sp = 0;
+ p->altstack.size = 0;
+ } else {
+ p->altstack.sp = a->sp;
+ p->altstack.size = a->size;
+ }
+
+ trace("sys_signalstack(): new altstack %lux-%lux",
+ p->altstack.sp, p->altstack.sp + p->altstack.size);
+ }
+ if(oa){
+ oa->sp = sp;
+ oa->size = size;
+ oa->flags = flags;
+ }
+
+ return 0;
+}
+
+struct linux_sigaction
+{
+ void *handler;
+ ulong flags;
+ void *restorer;
+ uchar mask[];
+};
+
+int sys_rt_sigaction(int sig, void *pact, void *poact, int setsize)
+{
+ Private *p;
+ Action *a;
+ struct linux_sigaction *act;
+ struct linux_sigaction *oact;
+ void *handler;
+ int flags;
+ uvlong block;
+
+ trace("sys_rt_sigaction(%S, %p, %p, %d)", sig, pact, poact, setsize);
+
+ p = current->signal;
+ act = (struct linux_sigaction*)pact;
+ oact = (struct linux_sigaction*)poact;
+
+ if((sig < 1) || (sig >= SIGMAX))
+ return -EINVAL;
+
+ qlock(p->h);
+ a = &p->h->a[sig-1];
+ handler = a->handler;
+ flags = a->flags;
+ block = a->block;
+ if(act){
+ trace("flags = %x", a->flags);
+ a->handler = act->handler;
+ a->flags = act->flags;
+ a->block = sigset2uvlong(act->mask, setsize);
+ }
+ if(oact){
+ oact->handler = handler;
+ oact->flags = flags;
+ oact->restorer = 0;
+ uvlong2sigset(oact->mask, setsize, block);
+ }
+ qunlock(p->h);
+
+ return 0;
+}
+
+int sys_rt_sigpending(uchar *set, int setsize)
+{
+ Private *p;
+ Signal *s;
+ uvlong m;
+
+ trace("sys_rt_sigpending(%p, %d)", set, setsize);
+
+ p = current->signal;
+ m = 0LL;
+ qlock(p->q);
+ for(s=p->q->head; s; s=s->next){
+ if(!s->group && (s->topid != current->tid))
+ continue;
+ m |= MASK(s->signo);
+ }
+ qunlock(p->q);
+
+ uvlong2sigset(set, setsize, m);
+ return 0;
+}
+
+enum
+{
+ SIG_BLOCK = 0,
+ SIG_UNBLOCK = 1,
+ SIG_SETMASK = 2,
+};
+
+int sys_rt_sigprocmask(int how, uchar *act, uchar *oact, int setsize)
+{
+ Private *p;
+ uvlong m, block;
+
+ trace("sys_rt_sigprocmask(%d, %p, %p, %d)", how, act, oact, setsize);
+
+ p = current->signal;
+ block = p->block;
+ if(act){
+ m = sigset2uvlong(act, setsize);
+ m &= ~(MASK(SIGKILL) | MASK(SIGSTOP));
+ switch(how){
+ default:
+ return -EINVAL;
+ case SIG_BLOCK:
+ p->block |= m;
+ break;
+ case SIG_UNBLOCK:
+ p->block &= ~m;
+ break;
+ case SIG_SETMASK:
+ p->block = m;
+ break;
+ }
+ }
+ if(oact)
+ uvlong2sigset(oact, setsize, block);
+ return 0;
+}
+
+struct linux_itimer
+{
+ struct linux_timeval it_interval;
+ struct linux_timeval it_value;
+};
+
+static vlong
+hzround(vlong t)
+{
+ vlong q = 1000000000LL/HZ;
+ return (t + q-1) / q;
+}
+
+int sys_setitimer(int which, void *value, void *ovalue)
+{
+ Private *p;
+ Timers *t;
+ vlong now, rem, delta;
+ struct linux_itimer *nv = value, *ov = ovalue;
+
+ trace("sys_setitimer(%d, %p, %p)", which, value, ovalue);
+
+ p = current->signal;
+ t = p->t;
+
+ if(which < 0 || which >= nelem(t->itimer))
+ return -EINVAL;
+
+ now = nsec();
+ delta = t->itimer[which].interval;
+ rem = t->itimer[which].expire - now;
+ if(rem < 0)
+ rem = 0;
+ if(nv != nil){
+ trace("nv->{interval->{%ld, %ld}, value->{%ld, %ld}}",
+ nv->it_interval.tv_sec, nv->it_interval.tv_usec,
+ nv->it_value.tv_sec, nv->it_value.tv_usec);
+ t->itimer[which].interval = hzround(nv->it_interval.tv_sec*1000000000LL +
+ nv->it_interval.tv_usec*1000);
+ t->itimer[which].expire = (now + nv->it_value.tv_sec*1000000000LL +
+ nv->it_value.tv_usec*1000);
+ setalarm(t->itimer[which].expire);
+ }
+
+ if(ov != nil){
+ ov->it_interval.tv_sec = delta / 1000000000LL;
+ ov->it_interval.tv_usec = (delta % 1000000000LL)/1000;
+ ov->it_value.tv_sec = rem / 1000000000LL;
+ ov->it_value.tv_usec = (rem % 1000000000LL)/1000;
+ trace("ov->{interval->{%ld, %ld}, value->{%ld, %ld}}",
+ ov->it_interval.tv_sec, ov->it_interval.tv_usec,
+ ov->it_value.tv_sec, ov->it_value.tv_usec);
+ }
+
+ return 0;
+}
+
+int sys_getitimer(int which, void *value)
+{
+ Private *p;
+ Timers *t;
+ vlong rem, delta;
+ struct linux_itimer *v = value;
+
+ trace("sys_getitimer(%d, %p)", which, value);
+
+ p = current->signal;
+ t = p->t;
+
+ if(value == nil)
+ return -EINVAL;
+ if(which < 0 || which >= nelem(t->itimer))
+ return -EINVAL;
+
+ delta =t->itimer[which].interval;
+ rem = t->itimer[which].expire - nsec();
+
+ if(rem < 0)
+ rem = 0;
+ v->it_interval.tv_sec = delta / 1000000000LL;
+ v->it_interval.tv_usec = (delta % 1000000000LL)/1000;
+ v->it_value.tv_sec = rem / 1000000000LL;
+ v->it_value.tv_usec = (rem % 1000000000LL)/1000;
+
+ return 0;
+}
+
+int sys_alarm(long seconds)
+{
+ Private *p;
+ Timers *t;
+ vlong old, now;
+
+ trace("sys_alarm(%ld)", seconds);
+ p = current->signal;
+ t = p->t;
+ now = nsec();
+ old = t->itimer[0].expire - now;
+ if(old < 0)
+ old = 0;
+ t->itimer[0].interval = 0;
+ if(seconds > 0){
+ t->itimer[0].expire = now + (vlong)seconds * 1000000000LL;
+ setalarm(t->itimer[0].expire);
+ } else {
+ t->itimer[0].expire = 0;
+ }
+ return old / 1000000000LL;
+}
+
+int
+Sfmt(Fmt *f)
+{
+ static char *t[] = {
+ [SIGHUP] = "SIGHUP",
+ [SIGINT] = "SIGINT",
+ [SIGQUIT] = "SIGQUIT",
+ [SIGILL] = "SIGILL",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGABRT] = "SIGABRT",
+ [SIGBUS] = "SIGBUS",
+ [SIGFPE] = "SIGFPE",
+ [SIGKILL] = "SIGKILL",
+ [SIGUSR1] = "SIGUSR1",
+ [SIGSEGV] = "SIGSEGV",
+ [SIGUSR2] = "SIGUSR2",
+ [SIGPIPE] = "SIGPIPE",
+ [SIGALRM] = "SIGALRM",
+ [SIGTERM] = "SIGTERM",
+ [SIGSTKFLT] = "SIGSTKFLT",
+ [SIGCHLD] = "SIGCHLD",
+ [SIGCONT] = "SIGCONT",
+ [SIGSTOP] = "SIGSTOP",
+ [SIGTSTP] = "SIGTSTP",
+ [SIGTTIN] = "SIGTTIN",
+ [SIGTTOU] = "SIGTTOU",
+ [SIGURG] = "SIGURG",
+ [SIGXCPU] = "SIGXCPU",
+ [SIGXFSZ] = "SIGXFSZ",
+ [SIGVTALRM] = "SIGVTALRM",
+ [SIGPROF] = "SIGPROF",
+ [SIGWINCH] = "SIGWINCH",
+ [SIGIO] = "SIGIO",
+ [SIGPWR] = "SIGPWR",
+ [SIGSYS] = "SIGSYS",
+ [SIGRT1] = "SIGRT1",
+ [SIGRT2] = "SIGRT2",
+ [SIGRT3] = "SIGRT3",
+ [SIGRT4] = "SIGRT4",
+ [SIGRT5] = "SIGRT5",
+ [SIGRT6] = "SIGRT6",
+ [SIGRT7] = "SIGRT7",
+ [SIGRT8] = "SIGRT8",
+ };
+
+ int sig;
+
+ sig = va_arg(f->args, int);
+ if(sig < 1 || sig >= SIGMAX)
+ return fmtprint(f, "%d", sig);
+ return fmtprint(f, "%d [%s]", sig, t[sig]);
+}
+
+/* proc.c */
+extern int procsetalarm(Uproc *proc, vlong t);
+
+void
+alarmtimer(Uproc *proc, vlong now)
+{
+ Private *p;
+ Timers *t;
+ vlong expire, delta;
+ Usiginfo si;
+ int i, overrun;
+
+ if((p = proc->signal) == nil)
+ return;
+ t = p->t;
+ for(i=0; i < nelem(t->itimer); i++){
+ expire = t->itimer[i].expire;
+ if(expire <= 0)
+ continue;
+ if(now < expire){
+ procsetalarm(proc, expire);
+ continue;
+ }
+ overrun = 0;
+ delta = (t->itimer[i].interval);
+ if(delta > 0){
+ expire += delta;
+ while(expire <= now){
+ expire += delta;
+ overrun++;
+ }
+ procsetalarm(proc, expire);
+ } else {
+ expire = 0;
+ }
+ t->itimer[i].expire = expire;
+
+ memset(&si, 0, sizeof(si));
+ si.signo = SIGALRM;
+ si.code = SI_TIMER;
+ si.timer.tid = i;
+ si.timer.overrun = overrun;
+ killproc(proc, &si, 1);
+ }
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+typedef struct Socket Socket;
+typedef struct Connectproc Connectproc;
+typedef struct Listenproc Listenproc;
+
+enum {
+ Ctlsize = 128,
+};
+
+struct Socket
+{
+ Ufile;
+
+ int family;
+ int stype;
+ int protocol;
+
+ int other;
+ char net[40];
+ char name[Ctlsize];
+
+ int naddr;
+ uchar addr[40];
+
+ void *bufproc;
+ Connectproc *connectproc;
+ Listenproc *listenproc;
+
+ int connected;
+ int error;
+
+ Socket *next;
+};
+
+struct Connectproc
+{
+ Ref;
+ QLock;
+ Socket *sock;
+ int notefd;
+ Uwaitq wq;
+ char str[Ctlsize];
+};
+
+struct Listenproc
+{
+ Ref;
+ QLock;
+ Socket *sock;
+ int notefd;
+ Uwaitq wq;
+ Socket *q;
+ char str[Ctlsize];
+};
+
+enum
+{
+ AF_UNIX =1,
+ AF_INET =2,
+ AF_INET6 =10,
+};
+
+enum
+{
+ SOCK_STREAM =1,
+ SOCK_DGRAM =2,
+ SOCK_RAW =3,
+};
+
+static char*
+srvname(char *npath, char *path, int len)
+{
+ char *p;
+
+ p = strrchr(path, '/');
+ if(p == 0)
+ p = path;
+ else
+ p++;
+ snprint(npath, len, "/srv/UD.%s", p);
+ return npath;
+}
+
+static int
+srvunixsock(int fd, char *path)
+{
+ int ret;
+ int sfd;
+ char buf[8+Ctlsize+1];
+
+ sfd = -1;
+ ret = -1;
+ if(fd < 0)
+ goto out;
+ srvname(buf, path, sizeof(buf));
+ remove(buf);
+ if((sfd = create(buf, OWRITE, 0666)) < 0)
+ goto out;
+ sprint(buf, "%d", fd);
+ if(write(sfd, buf, strlen(buf)) < 0)
+ goto out;
+ ret = 0;
+out:
+ if(sfd >= 0)
+ close(sfd);
+ return ret;
+}
+
+static void
+unsrvunixsock(char *path)
+{
+ char buf[8+Ctlsize+1];
+
+ srvname(buf, path, sizeof(buf));
+ remove(buf);
+}
+
+static Socket*
+allocsock(int family, int stype, int protocol)
+{
+ Socket *sock;
+
+ sock = kmallocz(sizeof(*sock), 1);
+ sock->family = family;
+ sock->stype = stype;
+ sock->protocol = protocol;
+ sock->fd = -1;
+ sock->other = -1;
+ sock->ref = 1;
+ sock->dev = SOCKDEV;
+ sock->mode = O_RDWR;
+
+ return sock;
+}
+
+static int
+newsock(int family, int stype, int protocol)
+{
+ Socket *sock;
+ char *net;
+ char buf[Ctlsize];
+ int pfd[2];
+ int cfd, dfd;
+ int n;
+ int err;
+
+ trace("newsock(%d, %d, %d)", family, stype, protocol);
+
+ err = -EINVAL;
+ switch(family){
+ case AF_INET:
+ case AF_INET6:
+ switch(stype){
+ case SOCK_DGRAM:
+ net = "udp";
+ break;
+ case SOCK_STREAM:
+ net = "tcp";
+ break;
+ default:
+ trace("newsock() unknown socket type %d/%d", family, stype);
+ return err;
+ }
+ break;
+ case AF_UNIX:
+ net = nil;
+ break;
+
+ default:
+ trace("newsock() unknown network family %d", family);
+ return err;
+ }
+
+ sock = allocsock(family, stype, protocol);
+ cfd = -1;
+ if(net == nil){
+ if(pipe(pfd) < 0){
+ err = mkerror();
+ goto errout;
+ }
+ sock->other = pfd[1];
+ sock->fd = pfd[0];
+ } else {
+ snprint(buf, sizeof(buf), "/net/%s/clone", net);
+ if((cfd = open(buf, ORDWR)) < 0){
+ err = mkerror();
+ goto errout;
+ }
+ n = read(cfd, buf, sizeof(buf)-1);
+ if(n < 0)
+ err = mkerror();
+ if(n <= 0)
+ goto errout;
+ buf[n] = 0;
+ n = atoi(buf);
+ snprint(buf, sizeof(buf), "/net/%s/%d/data", net, n);
+ if((dfd = open(buf, ORDWR)) < 0){
+ err = mkerror();
+ goto errout;
+ }
+ close(cfd);
+ sock->fd = dfd;
+ snprint(sock->net, sizeof(sock->net), "/net/%s", net);
+ snprint(sock->name, sizeof(sock->name), "%s/%d", sock->net, n);
+ }
+ return newfd(sock, FD_CLOEXEC);
+
+errout:
+ close(cfd);
+ free(sock);
+ return err;
+}
+
+static void
+freeconnectproc(Connectproc *cp)
+{
+ if(cp == nil)
+ return;
+ qlock(cp);
+ cp->sock = nil;
+ if(decref(cp)){
+ write(cp->notefd, "interrupt", 9);
+ qunlock(cp);
+ return;
+ }
+ qunlock(cp);
+ close(cp->notefd);
+ free(cp);
+}
+
+static void
+freelistenproc(Listenproc *lp)
+{
+ Socket *q;
+
+ if(lp == nil)
+ return;
+ qlock(lp);
+ lp->sock = nil;
+ if(decref(lp)){
+ write(lp->notefd, "interrupt", 9);
+ qunlock(lp);
+ return;
+ }
+ while(q = lp->q){
+ lp->q = q->next;
+ putfile(q);
+ }
+ qunlock(lp);
+ close(lp->notefd);
+ free(lp);
+}
+
+static int
+closesock(Ufile *file)
+{
+ Socket *sock = (Socket*)file;
+
+ close(sock->fd);
+ close(sock->other);
+ freebufproc(sock->bufproc);
+ freeconnectproc(sock->connectproc);
+ freelistenproc(sock->listenproc);
+ return 0;
+}
+
+
+static void
+connectproc(void *aux)
+{
+ int fd, cfd, other;
+ char buf[Ctlsize], tmp[8+Ctlsize+1];
+ Connectproc *cp;
+ Socket *sock;
+ int err;
+
+ cp = (Connectproc*)aux;
+ qlock(cp);
+ if((sock = cp->sock) == nil)
+ goto out;
+
+ snprint(buf, sizeof(buf), "connectproc() %s", cp->str);
+ setprocname(buf);
+
+ err = 0;
+ switch(sock->family){
+ case AF_UNIX:
+ fd = sock->fd;
+ other = sock->other;
+ qunlock(cp);
+
+ err = -ECONNREFUSED;
+ srvname(tmp, cp->str, sizeof(buf));
+ if((cfd = open(tmp, ORDWR)) < 0)
+ break;
+
+ memset(buf, 0, sizeof(buf));
+ snprint(buf, sizeof(buf), "linuxemu.%d.%lux", getpid(), (ulong)sock);
+ if(srvunixsock(other, buf) < 0){
+ close(cfd);
+ break;
+ }
+
+ /*
+ * write Ctrlsize-1 bytes so concurrent writes will not be merged together as
+ * Ctrlsize-1 is the size used in read(). see /sys/src/ape/lib/bsd/accept.c:87
+ * this should be fixed in ape's connect() as well.
+ */
+ if(write(cfd, buf, sizeof(buf)-1) != sizeof(buf)-1){
+ close(cfd);
+ unsrvunixsock(buf);
+ break;
+ }
+ close(cfd);
+ if((read(fd, tmp, strlen(buf)) != strlen(buf)) || memcmp(buf, tmp, strlen(buf))){
+ unsrvunixsock(buf);
+ break;
+ }
+ unsrvunixsock(buf);
+ err = 0;
+ break;
+
+ default:
+ snprint(buf, sizeof(buf), "%s/ctl", sock->name);
+ qunlock(cp);
+ if((cfd = open(buf, ORDWR)) < 0){
+ err = mkerror();
+ break;
+ }
+ if(fprint(cfd, "connect %s", cp->str) < 0)
+ err = mkerror();
+ close(cfd);
+ }
+
+ qlock(cp);
+ if((sock = cp->sock) == nil)
+ goto out;
+ if(err == 0){
+ close(sock->other);
+ sock->other = -1;
+ sock->connected = 1;
+ }
+ sock->error = err;
+out:
+ wakeq(&cp->wq, MAXPROC);
+ qunlock(cp);
+ freeconnectproc(cp);
+}
+
+static int
+sockaddr2str(Socket *sock, uchar *addr, int addrlen, char *buf, int nbuf)
+{
+ int err;
+
+ err = -EINVAL;
+ switch(sock->family){
+ case AF_INET:
+ if(addrlen < 8)
+ break;
+ err = snprint(buf, nbuf, "%d.%d.%d.%d!%d",
+ (int)(addr[4]),
+ (int)(addr[5]),
+ (int)(addr[6]),
+ (int)(addr[7]),
+ (int)(((ulong)addr[2]<<8)|(ulong)addr[3]));
+ break;
+
+ case AF_INET6:
+ /* TODO */
+ break;
+
+ case AF_UNIX:
+ if(addrlen <= 2)
+ break;
+ addrlen -= 2;
+ if(addrlen >= nbuf)
+ addrlen = nbuf-1;
+ memmove(buf, addr+2, addrlen);
+ buf[addrlen] = 0;
+ err = addrlen;
+ break;
+ }
+
+ return err;
+}
+
+static int
+connectsock(Socket *sock, uchar *addr, int addrlen)
+{
+ Connectproc *cp;
+ int err;
+ char buf[Ctlsize];
+ int pid;
+
+ if(sock->connected)
+ return -EISCONN;
+ if(sock->connectproc)
+ return -EALREADY;
+
+ if((err = sockaddr2str(sock, addr, addrlen, buf, sizeof(buf))) < 0)
+ return err;
+
+ cp = kmallocz(sizeof(*cp), 1);
+ cp->ref = 2;
+ cp->sock = sock;
+ strncpy(cp->str, buf, sizeof(cp->str));
+
+ qlock(cp);
+ sock->error = 0;
+ if((pid = procfork(connectproc, cp, 0)) < 0){
+ qunlock(cp);
+ free(cp);
+ return mkerror();
+ }
+ snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+ cp->notefd = open(buf, OWRITE);
+
+ if(addrlen > sizeof(sock->addr))
+ addrlen = sizeof(sock->addr);
+ sock->naddr = addrlen;
+ memmove(sock->addr, addr, addrlen);
+
+ sock->connectproc = cp;
+ if(sock->mode & O_NONBLOCK){
+ qunlock(cp);
+ return -EINPROGRESS;
+ }
+ if((err = sleepq(&cp->wq, cp, 1)) == 0)
+ err = sock->error;
+ qunlock(cp);
+
+ /*
+ * crazy shit is going on!
+ * see: http://www.madore.org/~david/computers/connect-intr.html
+ */
+ if(err != -EINTR && err != -ERESTART){
+ sock->connectproc = nil;
+ freeconnectproc(cp);
+ }
+ return err;
+}
+
+static int
+shutdownsock(Socket *sock, int how)
+{
+ USED(how);
+
+ freebufproc(sock->bufproc);
+ sock->bufproc = nil;
+ freeconnectproc(sock->connectproc);
+ sock->connectproc = nil;
+ freelistenproc(sock->listenproc);
+ sock->listenproc = nil;
+ close(sock->fd);
+ sock->fd = -1;
+ sock->connected = 0;
+
+ return 0;
+}
+
+static int
+bindsock(Socket *sock, uchar *addr, int addrlen)
+{
+ int port;
+ int cfd;
+ char buf[Ctlsize];
+
+ port = -1;
+ switch(sock->family){
+ default:
+ return -EINVAL;
+
+ case AF_UNIX:
+ break;
+ case AF_INET:
+ if(addrlen < 4)
+ return -EINVAL;
+ port = (int)(((ulong)addr[2]<<8)|(ulong)addr[3]);
+ break;
+ case AF_INET6:
+ /* TODO */
+ return -EINVAL;
+ }
+
+ if(port >= 0){
+ snprint(buf, sizeof(buf), "%s/ctl", sock->name);
+ if((cfd = open(buf, ORDWR)) < 0)
+ return mkerror();
+ if((fprint(cfd, "announce %d", port) < 0) || (fprint(cfd, "bind %d", port) < 0)){
+ close(cfd);
+ return mkerror();
+ }
+ close(cfd);
+ }
+
+ if(addrlen > sizeof(sock->addr))
+ addrlen = sizeof(sock->addr);
+ sock->naddr = addrlen;
+ memmove(sock->addr, addr, addrlen);
+
+ return 0;
+}
+
+static int
+strtoip(char *str, uchar *ip, int iplen)
+{
+ int i, d, v6;
+ char *p, *k;
+
+ i = 0;
+ v6 = 1;
+ memset(ip, 0, iplen);
+ for(p = str; *p; p++){
+ if(*p == ':'){
+ if(p[1] == ':'){
+ p++;
+ i = iplen;
+ for(k = p+1; *k; k++){
+ if(*k == ':'){
+ v6 = 1;
+ i -= 2;
+ }
+ if(*k == '.'){
+ v6 = 0;
+ i -= 1;
+ }
+ }
+ i -= v6+1;
+ } else {
+ i += 2;
+ }
+ continue;
+ } else if(*p == '.'){
+ i++;
+ continue;
+ }
+
+ for(k = p; *k && *k != '.' && *k != ':'; k++)
+ ;
+ if(*k == '.'){
+ v6 = 0;
+ } else if(*k == ':'){
+ v6 = 1;
+ }
+
+ if(i < 0 || i + v6+1 > iplen)
+ return -1;
+
+ if(*p >= '0' && *p <= '9'){
+ d = *p - '0';
+ } else if(v6 && (*p >= 'a' && *p <= 'f')){
+ d = 0x0A + *p - 'a';
+ } else if(v6 && (*p >= 'A' && *p <= 'F')){
+ d = 0x0A + *p - 'A';
+ } else {
+ return -1;
+ }
+
+ if(v6){
+ d |= ((int)ip[i]<<12 | (int)ip[i+1]<<4);
+ ip[i] = (d>>8) & 0xFF;
+ ip[i+1] = d & 0xFF;
+ } else {
+ ip[i] = ip[i]*10 + d;
+ }
+ }
+
+ return i + v6+1;
+}
+
+static int
+getsockaddr(Socket *sock, int remote, uchar *addr, int len)
+{
+ char buf[Ctlsize];
+ char *p;
+ uchar *a;
+ int fd;
+ int n, port;
+
+ a = addr;
+ switch(sock->family){
+ case AF_UNIX:
+ if(len < sock->naddr)
+ break;
+ memmove(a, sock->addr, sock->naddr);
+ return sock->naddr;
+ case AF_INET:
+ case AF_INET6:
+ snprint(buf, sizeof(buf), "%s/%s", sock->name, remote?"remote":"local");
+ if((fd = open(buf, OREAD)) < 0)
+ return mkerror();
+ if((n = read(fd, buf, sizeof(buf)-1)) < 0){
+ close(fd);
+ return mkerror();
+ }
+ close(fd);
+ if(n > 0 && buf[n-1] == '\n')
+ n--;
+ buf[n] = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if((p = strrchr(buf, '!')) == nil)
+ return -EINVAL;
+ *p++ = 0;
+ port = atoi(p);
+
+ trace("getsockaddr(): ip=%s port=%d", buf, port);
+
+ switch(sock->family){
+ case AF_INET:
+ if(len < 8)
+ break;
+ if(len > 16)
+ len = 16;
+ memset(a, 0, len);
+ a[0] = sock->family & 0xFF;
+ a[1] = (sock->family>>8) & 0xFF;
+ a[2] = (port >> 8) & 0xFF;
+ a[3] = port & 0xFF;
+ if(strtoip(buf, &a[4], 4) < 0)
+ break;
+ return len;
+
+ case AF_INET6:
+ /* TODO */
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static void
+listenproc(void *aux)
+{
+ Listenproc *lp;
+ Socket *sock, *q;
+ char buf[Ctlsize], tmp[8+Ctlsize+1];
+ int cfd, fd, n;
+
+ lp = (Listenproc*)aux;
+ qlock(lp);
+ if((sock = lp->sock) == nil)
+ goto out;
+
+ snprint(buf, sizeof(buf), "listenproc() %s", lp->str);
+ setprocname(buf);
+
+ for(;;){
+ n = 0;
+ cfd = -1;
+ switch(sock->family){
+ case AF_UNIX:
+ srvunixsock(sock->other, lp->str);
+ close(sock->other);
+ sock->other = -1;
+ fd = sock->fd;
+ qunlock(lp);
+ n = read(fd, buf, sizeof(buf)-1);
+ qlock(lp);
+ break;
+
+ default:
+ snprint(buf, sizeof(buf), "%s/listen", sock->name);
+ qunlock(lp);
+ if((cfd = open(buf, ORDWR)) >= 0)
+ n = read(cfd, buf, sizeof(buf)-1);
+ qlock(lp);
+ if(n <= 0)
+ close(cfd);
+ }
+ if(n <= 0)
+ break;
+ buf[n] = 0;
+
+ if((sock = lp->sock) == nil){
+ close(cfd);
+ break;
+ }
+
+ switch(sock->family){
+ case AF_UNIX:
+ srvname(tmp, buf, sizeof(tmp));
+ if((fd = open(tmp, ORDWR)) < 0)
+ break;
+ unsrvunixsock(buf);
+ if(write(fd, buf, strlen(buf)) != strlen(buf)){
+ close(fd);
+ fd = -1;
+ }
+ buf[0] = 0;
+ break;
+
+ default:
+ n = atoi(buf);
+ snprint(buf, sizeof(buf), "%s/%d", sock->net, n);
+ snprint(tmp, sizeof(tmp), "%s/data", buf);
+ fd = open(tmp, ORDWR);
+ close(cfd);
+ break;
+ }
+
+ if(fd < 0)
+ continue;
+
+ q = allocsock(sock->family, sock->stype, sock->protocol);
+ strncpy(q->net, sock->net, sizeof(q->net));
+ strncpy(q->name, buf, sizeof(q->name));
+
+ if(sock->family == AF_UNIX){
+ memmove(q->addr, sock->addr, q->naddr = sock->naddr);
+ } else {
+ q->naddr = getsockaddr(q, 0, q->addr, sizeof(q->addr));
+ }
+
+ q->fd = fd;
+ q->connected = 1;
+ q->next = lp->q;
+ lp->q = q;
+ wakeq(&lp->wq, MAXPROC);
+ }
+
+ if(sock->family == AF_UNIX)
+ unsrvunixsock(lp->str);
+out:
+ wakeq(&lp->wq, MAXPROC);
+ qunlock(lp);
+ freelistenproc(lp);
+}
+
+
+static int
+listensock(Socket *sock)
+{
+ Listenproc *lp;
+ int pid, err;
+ char buf[Ctlsize];
+
+ trace("listensock()");
+
+ if(sock->listenproc)
+ return 0;
+ if((err = sockaddr2str(sock, sock->addr, sock->naddr, buf, sizeof(buf))) < 0)
+ return err;
+
+ lp = kmallocz(sizeof(*lp), 1);
+ lp->ref = 2;
+ lp->sock = sock;
+ strncpy(lp->str, buf, sizeof(lp->str));
+
+ qlock(lp);
+ if((pid = procfork(listenproc, lp, 0)) < 0){
+ qunlock(lp);
+ free(lp);
+ return mkerror();
+ }
+ snprint(buf, sizeof(buf), "/proc/%d/note", pid);
+ lp->notefd = open(buf, OWRITE);
+ sock->listenproc = lp;
+ qunlock(lp);
+
+ return 0;
+}
+
+static int
+getsockname(Socket *sock, uchar *addr, int *paddrlen)
+{
+ int ret;
+
+ trace("getsockname(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+ if(addr == nil || paddrlen == nil)
+ return -EINVAL;
+
+ ret = sock->naddr;
+ memmove(addr, sock->addr, ret);
+ *paddrlen = ret;
+
+ return ret;
+}
+
+static int
+getpeername(Socket *sock, uchar *addr, int *paddrlen)
+{
+ int ret;
+
+ trace("getpeername(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+ if(addr == nil || paddrlen == nil)
+ return -EINVAL;
+
+ if((ret = getsockaddr(sock, 1, addr, *paddrlen)) > 0)
+ *paddrlen = ret;
+ return ret;
+}
+
+static int
+acceptsock(Socket *sock, uchar *addr, int *paddrlen)
+{
+ Listenproc *lp;
+ Socket *nsock;
+ int err;
+
+ trace("acceptsock(%p, %p, %p (%x))", sock, addr, paddrlen, paddrlen ? *paddrlen : 0);
+
+ if((lp = sock->listenproc) == nil)
+ return -EINVAL;
+
+ qlock(lp);
+ for(;;){
+ if(nsock = lp->q){
+ lp->q = nsock->next;
+ nsock->next = nil;
+ qunlock(lp);
+
+ if(addr != nil && paddrlen != nil){
+ err = getsockaddr(nsock, 1, addr, *paddrlen);
+ *paddrlen = err < 0 ? 0 : err;
+ }
+ return newfd(nsock, FD_CLOEXEC);
+ }
+
+ if(sock->mode & O_NONBLOCK){
+ err = -EAGAIN;
+ break;
+ }
+
+ if((err = sleepq(&lp->wq, lp, 1)) < 0)
+ break;
+ }
+ qunlock(lp);
+
+ return err;
+}
+
+static int
+socketpair(int family, int stype, int protocol, int sv[2])
+{
+ Socket *sock;
+ int p[2];
+ int i, fd;
+
+ trace("socketpair(%d, %d, %d, %p)", family, stype, protocol, sv);
+
+ if(family != AF_UNIX)
+ return -EAFNOSUPPORT;
+ if(pipe(p) < 0)
+ return mkerror();
+ for(i=0; i<2; i++){
+ sock = allocsock(family, stype, protocol);
+ sock->fd = p[i];
+ sock->connected = 1;
+ if((fd = newfd(sock, FD_CLOEXEC)) < 0){
+ if(i > 0)
+ sys_close(sv[0]);
+ close(p[0]);
+ close(p[1]);
+ return fd;
+ }
+ sv[i] = fd;
+ }
+ return 0;
+}
+
+static void*
+bufprocsock(Socket *sock)
+{
+ if(sock->bufproc == nil)
+ sock->bufproc = newbufproc(sock->fd);
+ return sock->bufproc;
+}
+
+static int
+pollsock(Ufile *file, void *tab)
+{
+ Socket *sock = (Socket*)file;
+ Listenproc *lp;
+ Connectproc *cp;
+
+ if(!sock->connected){
+ if(lp = sock->listenproc){
+ qlock(lp);
+ pollwait(file, &lp->wq, tab);
+ if(lp->q){
+ qunlock(lp);
+ return POLLIN;
+ }
+ qunlock(lp);
+ }
+ if(cp = sock->connectproc){
+ qlock(cp);
+ pollwait(file, &cp->wq, tab);
+ if(sock->error < 0){
+ qunlock(cp);
+ return POLLOUT;
+ }
+ qunlock(cp);
+ }
+ return 0;
+ }
+
+ return pollbufproc(bufprocsock(sock), sock, tab);
+}
+
+static int
+readsock(Ufile *file, void *buf, int len, vlong)
+{
+ Socket *sock = (Socket*)file;
+ int ret;
+
+ if(!sock->connected)
+ return -ENOTCONN;
+ if((sock->mode & O_NONBLOCK) || (sock->bufproc != nil)){
+ ret = readbufproc(bufprocsock(sock), buf, len, 0, (sock->mode & O_NONBLOCK));
+ } else {
+ if(notifyme(1))
+ return -ERESTART;
+ ret = read(sock->fd, buf, len);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ }
+ return ret;
+}
+
+extern int pipewrite(int fd, void *buf, int len);
+
+static int
+writesock(Ufile *file, void *buf, int len, vlong)
+{
+ Socket *sock = (Socket*)file;
+ int ret;
+
+ if(!sock->connected)
+ return -ENOTCONN;
+ if(sock->family == AF_UNIX)
+ return pipewrite(sock->fd, buf, len);
+ if(notifyme(1))
+ return -ERESTART;
+ ret = write(sock->fd, buf, len);
+ notifyme(0);
+ if(ret < 0)
+ ret = mkerror();
+ return ret;
+}
+
+static int
+ioctlsock(Ufile *file, int cmd, void *arg)
+{
+ Socket *sock = (Socket*)file;
+
+ switch(cmd){
+ default:
+ return -ENOTTY;
+ case 0x541B:
+ {
+ int r;
+
+ if(arg == nil)
+ return -EINVAL;
+ if((r = nreadablebufproc(bufprocsock(sock))) < 0){
+ *((int*)arg) = 0;
+ return r;
+ }
+ *((int*)arg) = r;
+ }
+ return 0;
+ }
+}
+
+static int
+sendto(Socket *sock, void *data, int len, int, uchar *, int)
+{
+ trace("sendto(%p, %p, %d, ...)", sock, data, len);
+
+ return writesock(sock, data, len, sock->off);
+}
+
+static int
+recvfrom(Socket *sock, void *data, int len, int flags, uchar *addr, int addrlen)
+{
+ int ret;
+
+ trace("recvfrom(%p, %p, %d, %x, %p, %d)", sock, data, len, flags, addr, addrlen);
+
+ if(flags & 2){
+ if(!sock->connected)
+ return -ENOTCONN;
+ ret = readbufproc(bufprocsock(sock), data, len, 1, 1);
+ } else {
+ ret = readsock(sock, data, len, sock->off);
+ }
+ if(addr){
+ memmove(addr, sock->addr, sock->naddr);
+ }
+ return ret;
+}
+
+enum {
+ SOL_SOCKET = 1,
+
+ SO_DEBUG = 1,
+ SO_REUSEADDR,
+ SO_TYPE,
+ SO_ERROR,
+};
+
+static int
+getoptsock(Socket *sock, int lvl, int opt, char *ov, int *ol)
+{
+ trace("getoptsock(%p, %d, %d, %p, %p)", sock, lvl, opt, ov, ol);
+
+ switch(lvl){
+ default:
+ Default:
+ return -EINVAL;
+
+ case SOL_SOCKET:
+ switch(opt){
+ default:
+ goto Default;
+ case SO_ERROR:
+ *ol = sizeof(int);
+ *((int*)ov) = sock->error;
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+enum {
+ SYS_SOCKET=1,
+ SYS_BIND,
+ SYS_CONNECT,
+ SYS_LISTEN,
+ SYS_ACCEPT,
+ SYS_GETSOCKNAME,
+ SYS_GETPEERNAME,
+ SYS_SOCKETPAIR,
+ SYS_SEND,
+ SYS_RECV,
+ SYS_SENDTO,
+ SYS_RECVFROM,
+ SYS_SHUTDOWN,
+ SYS_SETSOCKOPT,
+ SYS_GETSOCKOPT,
+ SYS_SENDMSG,
+ SYS_RECVMSG,
+};
+
+int sys_linux_socketcall(int call, int *arg)
+{
+ Socket *sock;
+ int ret;
+
+ trace("sys_linux_socketcall(%d, %p)", call, arg);
+
+ if(call == SYS_SOCKET)
+ return newsock(arg[0], arg[1], arg[2]);
+
+ if(call == SYS_SOCKETPAIR)
+ return socketpair(arg[0], arg[1], arg[2], (int*)arg[3]);
+
+ if((sock = (Socket*)fdgetfile(arg[0])) == nil)
+ return -EBADF;
+
+ if(sock->dev != SOCKDEV){
+ putfile(sock);
+ return -ENOTSOCK;
+ }
+
+ ret = -1;
+ switch(call){
+ case SYS_CONNECT:
+ ret = connectsock(sock, (void*)arg[1], arg[2]);
+ break;
+ case SYS_SENDTO:
+ ret = sendto(sock, (void*)arg[1], arg[2], arg[3], (void*)arg[4], arg[5]);
+ break;
+ case SYS_RECVFROM:
+ ret = recvfrom(sock, (void*)arg[1], arg[2], arg[3], (void*)arg[4], arg[5]);
+ break;
+ case SYS_SEND:
+ ret = sendto(sock, (void*)arg[1], arg[2], arg[3], nil, 0);
+ break;
+ case SYS_RECV:
+ ret = recvfrom(sock, (void*)arg[1], arg[2], arg[3], nil, 0);
+ break;
+ case SYS_GETSOCKNAME:
+ ret = getsockname(sock, (void*)arg[1], (void*)arg[2]);
+ break;
+ case SYS_GETPEERNAME:
+ ret = getpeername(sock, (void*)arg[1], (void*)arg[2]);
+ break;
+ case SYS_SHUTDOWN:
+ ret = shutdownsock(sock, arg[1]);
+ break;
+ case SYS_BIND:
+ ret = bindsock(sock, (void*)arg[1], arg[2]);
+ break;
+ case SYS_LISTEN:
+ ret = listensock(sock);
+ break;
+ case SYS_ACCEPT:
+ ret = acceptsock(sock, (void*)arg[1], (void*)arg[2]);
+ break;
+ case SYS_SETSOCKOPT:
+ ret = 0;
+ break;
+ case SYS_GETSOCKOPT:
+ ret = getoptsock(sock, (int)arg[1], (int)arg[2], (char*)arg[3], (int*)arg[4]);
+ break;
+ case SYS_SENDMSG:
+ case SYS_RECVMSG:
+ default:
+ trace("socketcall(): call %d not implemented", call);
+ }
+
+ putfile(sock);
+
+ return ret;
+}
+
+static void
+fillstat(Ustat *s)
+{
+ s->mode = 0666 | S_IFSOCK;
+ s->uid = current->uid;
+ s->gid = current->gid;
+ s->size = 0;
+}
+
+static int
+fstatsock(Ufile *, Ustat *s)
+{
+ fillstat(s);
+ return 0;
+};
+
+static Udev sockdev =
+{
+ .read = readsock,
+ .write = writesock,
+ .poll = pollsock,
+ .close = closesock,
+ .ioctl = ioctlsock,
+ .fstat = fstatsock,
+};
+
+void sockdevinit(void)
+{
+ devtab[SOCKDEV] = &sockdev;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+int
+ufstat(int fd, Ustat *ps)
+{
+ Ufile *f;
+ int err;
+
+ err = -EBADF;
+ if((f = fdgetfile(fd)) == nil)
+ goto out;
+ err = -EPERM;
+ if(devtab[f->dev]->fstat == nil)
+ goto out;
+ memset(ps, 0, sizeof(Ustat));
+ err = devtab[f->dev]->fstat(f, ps);
+out:
+ putfile(f);
+ return err;
+}
+
+struct linux_stat {
+ ushort st_dev;
+ ushort __pad1;
+ ulong st_ino;
+ ushort st_mode;
+ ushort st_nlink;
+ ushort st_uid;
+ ushort st_gid;
+ ushort st_rdev;
+ ushort __pad2;
+ ulong st_size;
+ ulong st_blksize;
+ ulong st_blocks;
+ ulong st_atime;
+ ulong __unused1;
+ ulong st_mtime;
+ ulong __unused2;
+ ulong st_ctime;
+ ulong __unused3;
+ ulong __unused4;
+ ulong __unused5;
+};
+
+static void
+ustat2linuxstat(Ustat *x, struct linux_stat *s)
+{
+ memset(s, 0, sizeof(*s));
+ s->st_dev = x->dev;
+ s->st_ino = x->ino;
+ s->st_mode = x->mode;
+ s->st_nlink = 1;
+ s->st_uid = x->uid;
+ s->st_gid = x->gid;
+ s->st_size = x->size;
+ s->st_rdev = x->rdev;
+ s->st_blksize = 4096;
+ s->st_blocks = (x->size+s->st_blksize-1) / s->st_blksize;
+ s->st_atime = x->atime;
+ s->st_mtime = x->mtime;
+ s->st_ctime = x->ctime;
+}
+
+
+struct linux_stat64 {
+ uvlong lst_dev;
+ uint __pad1;
+ uint __lst_ino;
+ uint lst_mode;
+ uint lst_nlink;
+ uint lst_uid;
+ uint lst_gid;
+ uvlong lst_rdev;
+ uint __pad2;
+ vlong lst_size;
+ uint lst_blksize;
+ uvlong lst_blocks;
+ uint lst_atime;
+ uint lst_atime_nsec;
+ uint lst_mtime;
+ uint lst_mtime_nsec;
+ uint lst_ctime;
+ uint lst_ctime_nsec;
+ uvlong lst_ino;
+};
+
+static void
+ustat2linuxstat64(Ustat *x, struct linux_stat64 *s)
+{
+ memset(s, 0, sizeof(*s));
+ s->lst_dev = x->dev;
+ s->lst_ino = x->ino;
+ s->__lst_ino = x->ino & 0xFFFFFFFF;
+ s->lst_mode = x->mode;
+ s->lst_nlink = 1;
+ s->lst_uid = x->uid;
+ s->lst_gid = x->gid;
+ s->lst_size = x->size;
+ s->lst_rdev = x->rdev;
+ s->lst_blksize = 4096; // good as any
+ s->lst_blocks = (x->size+s->lst_blksize-1) / s->lst_blksize;
+ s->lst_atime = x->atime;
+ s->lst_mtime = x->mtime;
+ s->lst_ctime = x->ctime;
+}
+
+int sys_linux_stat(char *path, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_stat(%s, %p)", path, st);
+ err = fsstat(path, 0, &x);
+ if(err < 0)
+ return err;
+ ustat2linuxstat(&x, (struct linux_stat*)st);
+ return err;
+}
+
+int sys_linux_lstat(char *path, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_lstat(%s, %p)", path, st);
+
+ if((path = fsfullpath(path)) == nil)
+ return -EFAULT;
+ err = fsstat(path, 1, &x);
+ free(path);
+
+ if(err < 0)
+ return err;
+ ustat2linuxstat(&x, (struct linux_stat*)st);
+ return err;
+}
+
+int sys_linux_stat64(char *path, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_stat64(%s, %p)", path, st);
+
+ if((path = fsfullpath(path)) == nil)
+ return -EFAULT;
+ err = fsstat(path, 0, &x);
+ free(path);
+
+ if(err < 0)
+ return err;
+ ustat2linuxstat64(&x, (struct linux_stat64*)st);
+ return err;
+}
+
+int sys_linux_lstat64(char *path, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_lstat64(%s, %p)", path, st);
+
+ if((path = fsfullpath(path)) == nil)
+ return -EFAULT;
+ err = fsstat(path, 1, &x);
+ free(path);
+
+ if(err < 0)
+ return err;
+ ustat2linuxstat64(&x, (struct linux_stat64*)st);
+ return err;
+}
+
+int sys_linux_fstat(int fd, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_fstat(%d, %p)", fd, st);
+
+ err = ufstat(fd, &x);
+ if(err < 0)
+ return err;
+ ustat2linuxstat(&x, (struct linux_stat*)st);
+ return err;
+}
+
+int sys_linux_fstat64(int fd, void *st)
+{
+ int err;
+ Ustat x;
+
+ trace("sys_linux_fstat64(%d, %p)", fd, st);
+
+ err = ufstat(fd, &x);
+ if(err < 0)
+ return err;
+ ustat2linuxstat64(&x, (struct linux_stat64*)st);
+ return err;
+}
+
+static int
+getdents(int fd, void *buf, int len, int (*fconv)(Udirent *, void *, int, int))
+{
+ Ufile *f;
+ Udirent *t, *x;
+ uchar *p, *e;
+ int o, r, err;
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ o = 0;
+ p = buf;
+ e = p + len;
+ t = f->rdaux;
+ if(t == nil || f->off == 0){
+ f->rdaux = nil;
+ while(x = t){
+ t = t->next;
+ free(x);
+ }
+ if((err = devtab[f->dev]->readdir(f, &t)) <= 0){
+ putfile(f);
+ return err;
+ }
+ f->rdaux = t;
+ }
+ for(; t; t=t->next){
+ /* just calculate size */
+ r = fconv(t, nil, 0, e - p);
+ if(r <= 0)
+ break;
+ if(o >= f->off){
+ /* convert */
+ f->off = o + r;
+ r = fconv(t, p, t->next ? f->off : 0, e - p);
+ p += r;
+ }
+ o += r;
+ }
+ putfile(f);
+ return p - (uchar*)buf;
+}
+
+Udirent*
+newdirent(char *path, char *name, int mode)
+{
+ Udirent *d;
+ int nlen;
+ char *s;
+
+ nlen = strlen(name);
+ d = kmallocz(sizeof(*d) + nlen + 1, 1);
+ d->mode = mode;
+ strcpy(d->name, name);
+ s = allocpath(path, nil, d->name);
+ d->ino = hashpath(s);
+ free(s);
+
+ return d;
+}
+
+struct linux_dirent {
+ long d_ino;
+ long d_off;
+ ushort d_reclen;
+ char d_name[];
+};
+
+static int
+udirent2linux(Udirent *u, void *d, int off, int left)
+{
+ int n;
+ struct linux_dirent *e = d;
+
+ n = sizeof(*e) + strlen(u->name) + 1;
+ if(n > left)
+ return 0;
+ if(e){
+ e->d_ino = u->ino & 0xFFFFFFFF;
+ e->d_off = off;
+ e->d_reclen = n;
+ strcpy(e->d_name, u->name);
+ }
+ return n;
+}
+
+struct linux_dirent64 {
+ uvlong d_ino;
+ vlong d_off;
+ ushort d_reclen;
+ uchar d_type;
+ char d_name[];
+};
+
+static int
+udirent2linux64(Udirent *u, void *d, int off, int left)
+{
+ int n;
+ struct linux_dirent64 *e = d;
+
+ n = sizeof(*e) + strlen(u->name) + 1;
+ if(n > left)
+ return 0;
+ if(e){
+ e->d_ino = u->ino;
+ e->d_off = off;
+ e->d_reclen = n;
+ e->d_type = (u->mode>>12)&15;
+ strcpy(e->d_name, u->name);
+ }
+ return n;
+}
+
+int sys_linux_getdents(int fd, void *buf, int nbuf)
+{
+ trace("sys_linux_getdents(%d, %p, %x)", fd, buf, nbuf);
+
+ return getdents(fd, buf, nbuf, udirent2linux);
+}
+
+int sys_linux_getdents64(int fd, void *buf, int nbuf)
+{
+ trace("sys_linux_getdents64(%d, %p, %x)", fd, buf, nbuf);
+
+ return getdents(fd, buf, nbuf, udirent2linux64);
+}
+
+struct linux_statfs {
+ long f_type;
+ long f_bsize;
+ long f_blocks;
+ long f_bfree;
+ long f_bavail;
+ long f_files;
+ long f_ffree;
+ long f_fsid[2];
+ long f_namelen;
+ long f_frsize;
+ long f_spare[5];
+};
+
+int sys_statfs(char *name, void *pstatfs)
+{
+ struct linux_statfs *s = pstatfs;
+
+ trace("sys_statfs(%s, %p)", name, s);
+
+ if((s == nil) || (name == nil))
+ return -EINVAL;
+
+ memset(s, 0, sizeof(*s));
+
+ s->f_namelen = 256;
+ s->f_bsize = 4096;
+ s->f_blocks = 0x80000000;
+ s->f_bavail = s->f_bfree = 0x80000000;
+ s->f_files = s->f_ffree = 0x40000000;
+
+ if(strncmp(name, "/dev/pts", 8) == 0){
+ s->f_type = 0x1cd1;
+ return 0;
+ }
+
+ memmove(&s->f_type, "PLN9", 4);
+ memmove(s->f_fsid, "PLAN9_FS", 8);
+
+ return 0;
+}
+
+int
+sys_getxattr(char *path, char *name, void *value, int size)
+{
+ trace("sys_getxattr(%s, %s, %p, %x)", path, name, value, size);
+
+ return -EOPNOTSUPP;
+}
+
+int
+sys_lgetxattr(char *path, char *name, void *value, int size)
+{
+ trace("sys_lgetxattr(%s, %s, %p, %x)", path, name, value, size);
+
+ return -EOPNOTSUPP;
+}
+
+int
+sys_fgetxattr(int fd, char *name, void *value, int size)
+{
+ Ufile *f;
+ int err;
+
+ trace("sys_fgetxattr(%d, %s, %p, %x)", fd, name, value, size);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = -EOPNOTSUPP;
+ putfile(f);
+
+ return err;
+}
+
+int
+sys_setxattr(char *path, char *name, void *value, int flags, int size)
+{
+ trace("sys_setxattr(%s, %s, %p, %x, %x)", path, name, value, flags, size);
+
+ return -EOPNOTSUPP;
+}
+
+int
+sys_lsetxattr(char *path, char *name, void *value, int flags, int size)
+{
+ trace("sys_lsetxattr(%s, %s, %p, %x, %x)", path, name, value, flags, size);
+
+ return -EOPNOTSUPP;
+}
+
+int
+sys_fsetxattr(int fd, char *name, void *value, int size, int flags)
+{
+ Ufile *f;
+ int err;
+
+ trace("sys_fsetxattr(%d, %s, %p, %x, %x)", fd, name, value, flags, size);
+
+ if((f = fdgetfile(fd)) == nil)
+ return -EBADF;
+ err = -EOPNOTSUPP;
+ putfile(f);
+ return err;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include <tos.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+struct linux_timezone
+{
+ int tz_minuteswest;
+ int tz_dsttime;
+};
+
+static struct linux_timezone systz;
+
+void
+inittime(void)
+{
+ Tm *t;
+
+ boottime = nsec();
+
+ systz.tz_minuteswest = 0;
+ systz.tz_dsttime = 0;
+
+ if(t = localtime(time(nil)))
+ systz.tz_minuteswest = t->tzoff / 60;
+}
+
+int sys_time(long *p)
+{
+ return time(p);
+}
+
+int sys_clock_gettime(int clock, void *t)
+{
+ struct linux_timespec *ts = t;
+ vlong x;
+
+ trace("sys_clock_gettime(%d, %p)", clock, t);
+ x = nsec();
+ ts->tv_sec = (long)(x/1000000000LL);
+ ts->tv_nsec = (long)(x%1000000000LL);
+ return 0;
+}
+
+int sys_gettimeofday(void *tvp, void *tzp)
+{
+ struct linux_timeval *tv = tvp;
+ struct linux_timezone *tz = tzp;
+ vlong t;
+
+ trace("sys_gettimeofday(%p, %p)", tvp, tzp);
+
+ t = nsec();
+ tv->tv_sec = (long)(t/1000000000LL);
+ tv->tv_usec = (long)((t%1000000000LL)/1000);
+
+ if(tz)
+ *tz = systz;
+
+ return 0;
+}
+
+int sys_nanosleep(void *rqp, void *rmp)
+{
+ struct linux_timespec *req = rqp;
+ struct linux_timespec *rem = rmp;
+ vlong t, now;
+ int err;
+
+ trace("sys_nanosleep(%p, %p)", rqp, rmp);
+
+ if(req == nil)
+ return -EFAULT;
+ if(req->tv_sec < 0 || req->tv_nsec < 0 || req->tv_nsec >= 1000000000LL)
+ return -EINVAL;
+
+ now = nsec();
+ if(current->restart->syscall){
+ t = current->restart->nanosleep.timeout;
+ } else {
+ t = now + req->tv_sec*1000000000LL + req->tv_nsec;
+ }
+
+ if(now < t){
+ if(notifyme(1))
+ err = -1;
+ else {
+ err = sleep((t - now) / 1000000LL);
+ notifyme(0);
+ }
+ if(err < 0){
+ now = nsec();
+ if(now < t){
+ current->restart->nanosleep.timeout = t;
+ if(rem != nil){
+ t -= now;
+ rem->tv_sec = (long)(t/1000000000LL);
+ rem->tv_nsec = (long)(t%1000000000LL);
+ }
+ return -ERESTART;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int proctimes(Uproc *p, ulong *t)
+{
+ char buf[1024], *f[12];
+ int fd, n;
+
+ t[0] = t[1] = t[2] = t[3] = 0;
+ snprint(buf, sizeof(buf), "/proc/%d/status", p->kpid);
+ if((fd = open(buf, OREAD)) < 0)
+ return mkerror();
+ if((n = read(fd, buf, sizeof(buf)-1)) <= 0){
+ close(fd);
+ return mkerror();
+ }
+ close(fd);
+ buf[n] = 0;
+ if(getfields(buf, f, 12, 1, "\t ") != 12)
+ return -EIO;
+ t[0] = atoi(f[2])*HZ / 1000;
+ t[1] = atoi(f[3])*HZ / 1000;
+ t[2] = atoi(f[4])*HZ / 1000;
+ t[3] = atoi(f[5])*HZ / 1000;
+ return 0;
+}
+
+struct linux_tms
+{
+ long tms_utime;
+ long tms_stime;
+ long tms_cutime;
+ long tms_cstime;
+};
+
+int sys_times(void *m)
+{
+ struct linux_tms *x = m;
+ ulong t[4];
+ int err;
+
+ trace("sys_times(%p)", m);
+
+ if(x != nil){
+ if((err = proctimes(current, t)) < 0)
+ return err;
+ x->tms_utime = t[0];
+ x->tms_stime = t[1];
+ x->tms_cutime = t[2];
+ x->tms_cstime = t[3];
+ }
+ return (HZ*(nsec() - boottime)) / 1000000000LL;
+}
\ No newline at end of file
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+enum {
+ Index,
+ Type,
+ Flags,
+ DPL,
+ Base,
+ Limit,
+ Nfields,
+};
+
+static int
+descempty(struct linux_user_desc *info)
+{
+ return info->base_addr==0 && info->limit==0 &&
+ info->contents==0 && info->read_exec_only==1 &&
+ info->seg_32bit==0 && info->limit_in_pages==0 &&
+ info->seg_not_present==1 && info->useable==0;
+}
+
+int sys_set_thread_area(void *pinfo)
+{
+ struct linux_user_desc *info = pinfo;
+ char buf[1024];
+ char *p, *e, *f[Nfields];
+ int n, fd, idx, err;
+
+ trace("sys_set_thread_area(%p)", pinfo);
+
+ err = -ENOSYS;
+ if((fd = open("/dev/gdt", ORDWR)) < 0)
+ goto out;
+
+ idx = info->entry_number;
+ if(idx == -1){
+ err = -ESRCH;
+ if((n = read(fd, buf, sizeof(buf)-1)) <= 0)
+ goto out;
+ buf[n] = 0;
+ p = buf;
+ while(e = strchr(p, '\n')){
+ *e = 0;
+ if(getfields(p, f, nelem(f), 1, " ") != nelem(f))
+ goto out;
+ idx = strtoul(f[Index], nil, 16);
+ if(idx >= 8*sizeof(current->tlsmask))
+ break;
+ if((current->tlsmask & (1<<idx)) == 0)
+ goto found;
+ p = e+1;
+ }
+ goto out;
+ }
+
+found:
+ err = -EINVAL;
+ if(idx < 0 || idx >= 8*sizeof(current->tlsmask))
+ goto out;
+
+ buf[0] = 0;
+ if(!info->seg_not_present)
+ strcat(buf, "P");
+ if(info->limit_in_pages)
+ strcat(buf, "G");
+ if(info->useable)
+ strcat(buf, "U");
+ if(info->contents & 2){
+ /* code segment */
+ if(info->contents & 1)
+ strcat(buf, "C");
+ if(info->seg_32bit)
+ strcat(buf, "D");
+ if(!info->read_exec_only)
+ strcat(buf, "R");
+ if(buf[0] == 0)
+ strcat(buf, "-");
+
+ if(fprint(fd, "%x code %s 3 %lux %lux\n",
+ idx, buf, (ulong)info->base_addr, (ulong)info->limit) < 0)
+ goto out;
+ } else {
+ /* data segment */
+ if(info->contents & 1)
+ strcat(buf, "E");
+ if(info->seg_32bit)
+ strcat(buf, "B");
+ if(!info->read_exec_only)
+ strcat(buf, "W");
+ if(buf[0] == 0)
+ strcat(buf, "-");
+
+ if(fprint(fd, "%x data %s 3 %lux %lux\n",
+ idx, buf, (ulong)info->base_addr, (ulong)info->limit) < 0)
+ goto out;
+ }
+
+ err = 0;
+ info->entry_number = idx;
+ if(!descempty(info)){
+ current->tlsmask |= 1<<idx;
+ } else {
+ current->tlsmask &= ~(1<<idx);
+ }
+
+out:
+ if(fd >= 0)
+ close(fd);
+ return err;
+}
+
+int sys_get_thread_area(void *pinfo)
+{
+ struct linux_user_desc *info = pinfo;
+ int err, n, fd, idx;
+ char buf[1024];
+ char *p, *e, *f[Nfields];
+
+ trace("sys_get_thread_area(%p)", pinfo);
+
+ err = -ENOSYS;
+ if((fd = open("/dev/gdt", OREAD)) < 0)
+ goto out;
+
+ err = -EINVAL;
+ if((n = read(fd, buf, sizeof(buf)-1)) <= 0)
+ goto out;
+ buf[n] = 0;
+ p = buf;
+ while(e = strchr(p, '\n')){
+ *e = 0;
+ if(getfields(p, f, nelem(f), 1, " ") != nelem(f))
+ goto out;
+ idx = strtoul(f[Index], nil, 16);
+ if(idx >= 8*sizeof(current->tlsmask))
+ break;
+ if(idx == info->entry_number)
+ goto found;
+ p = e+1;
+ }
+ goto out;
+
+found:
+ info->contents = 0;
+ if(strcmp(f[Type], "code") == 0)
+ info->contents |= 2;
+ info->seg_not_present = 1;
+ info->limit_in_pages = 0;
+ info->seg_32bit = 0;
+ info->read_exec_only = 1;
+ info->useable = 0;
+ for(p = f[Flags]; *p; p++){
+ switch(*p){
+ case 'P':
+ info->seg_not_present = 0;
+ break;
+ case 'G':
+ info->limit_in_pages = 1;
+ break;
+ case 'B':
+ case 'D':
+ info->seg_32bit = 1;
+ break;
+ case 'W':
+ case 'R':
+ info->read_exec_only = 0;
+ break;
+ case 'U':
+ info->useable = 1;
+ break;
+ case 'E':
+ case 'C':
+ info->contents |= 1;
+ break;
+ }
+ }
+
+ info->base_addr = strtoul(f[Base], nil, 16);
+ info->limit = strtoul(f[Limit], nil, 16);
+
+ err = 0;
+
+out:
+ if(fd >= 0)
+ close(fd);
+ return err;
+}
+
+static void
+cleardesc(struct linux_user_desc *info)
+{
+ info->base_addr=0;
+ info->limit=0;
+ info->contents=0;
+ info->read_exec_only=1;
+ info->seg_32bit=0;
+ info->limit_in_pages=0;
+ info->seg_not_present=1;
+ info->useable=0;
+}
+
+void inittls(void)
+{
+ struct linux_user_desc info;
+ int i;
+
+ for(i=0; i<8*sizeof(current->tlsmask); i++){
+ if((current->tlsmask & (1 << i)) == 0)
+ continue;
+ cleardesc(&info);
+ info.entry_number = i;
+ sys_set_thread_area(&info);
+ }
+ current->tlsmask = 0;
+}
+
+void clonetls(Uproc *new)
+{
+ new->tlsmask = current->tlsmask;
+}
+
+int sys_modify_ldt(int func, void *data, int count)
+{
+ trace("sys_modify_ldt(%d, %p, %x)", func, data, count);
+
+ return -ENOSYS;
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+
+#undef trace
+
+static char magic[] = "TRACEBUF";
+
+typedef struct Tracebuf Tracebuf;
+struct Tracebuf
+{
+ char magic[8];
+ int wp;
+ char lines[256][80];
+};
+
+static void*
+alloctrace(void)
+{
+ Tracebuf *t;
+
+ t = kmallocz(sizeof(*t), 1);
+ memmove(t->magic, magic, sizeof(t->magic));
+ return t;
+}
+
+static void
+checktrace(Tracebuf *t)
+{
+ if(memcmp(t->magic, magic, sizeof(t->magic)) != 0)
+ panic("tracebuffer corrupted");
+}
+
+static void
+freetrace(Tracebuf *t)
+{
+ if(t == nil)
+ return;
+ checktrace(t);
+ memset(t, 0, sizeof(*t));
+ free(t);
+}
+
+static void
+vputtrace(Tracebuf *t, char *fmt, va_list a)
+{
+ char *s;
+
+ checktrace(t);
+ s = t->lines[t->wp++ % nelem(t->lines)];
+ vsnprint(s, sizeof(t->lines[0]), fmt, a);
+ if(debug > 1)
+ fprint(2, "%d\t%s\n", (current != nil) ? current->tid : 0, s);
+}
+
+void inittrace(void)
+{
+ if(debug > 0)
+ current->trace = alloctrace();
+}
+
+void exittrace(Uproc *proc)
+{
+ Tracebuf *t;
+
+ if(t = proc->trace){
+ proc->trace = nil;
+ freetrace(t);
+ }
+}
+
+void clonetrace(Uproc *new, int copy)
+{
+ Tracebuf *t;
+
+ if((t = current->trace) == nil){
+ new->trace = nil;
+ return;
+ }
+
+ if(copy){
+ Tracebuf *x;
+
+ x = kmalloc(sizeof(*t));
+ memmove(x, t, sizeof(*t));
+ new->trace = x;
+
+ return;
+ }
+
+ new->trace = alloctrace();
+}
+
+void tprint(char *fmt, ...)
+{
+ va_list a;
+ Uproc *p;
+
+ p = current;
+ if(p && p->trace){
+ va_start(a, fmt);
+ vputtrace((Tracebuf*)p->trace, fmt, a);
+ va_end(a);
+ }
+}
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+#include "dat.h"
+#include "fns.h"
+#include "linux.h"
+
+#pragma profile off
+
+void
+retuser(void)
+{
+ Uproc *p;
+ Ureg *u;
+
+ p = current;
+ u = p->ureg;
+ p->ureg = nil;
+ if(p->innote == 0)
+ jumpureg(u);
+ p->innote = 0;
+ noted(NCONT);
+}
+
+static void
+handletrap(void *v, char *m)
+{
+ Uproc *p;
+ Usiginfo si;
+
+ p = current;
+ p->innote = 1;
+ p->ureg = v;
+
+ if(strncmp(m, "interrupt", 9) == 0){
+ if(p->notified){
+ p->notified = 0;
+ } else {
+ memset(&si, 0, sizeof(si));
+ si.signo = SIGINT;
+ sendsignal(p, &si, 0);
+ }
+ goto handled;
+ }
+
+ if(p->traceproc)
+ goto traced;
+
+ if(strncmp(m, "sys: trap: general protection violation", 39) == 0)
+ if(linuxcall() == 0)
+ goto handled;
+
+ if(strncmp(m, "sys: write on closed pipe", 25) == 0)
+ goto handled;
+
+ if(strncmp(m, "sys: trap: invalid opcode", 25) == 0){
+ memset(&si, 0, sizeof(si));
+ si.signo = SIGILL;
+ si.code = ILL_ILLOPC;
+ si.fault.addr = (void*)p->ureg->pc;
+ sendsignal(p, &si, 0);
+ goto handled;
+ }
+
+ if(strncmp(m, "sys: trap: divide error", 23) == 0){
+ memset(&si, 0, sizeof(si));
+ si.signo = SIGFPE;
+ si.code = FPE_INTDIV;
+ si.fault.addr = (void*)p->ureg->pc;
+ sendsignal(p, &si, 0);
+ goto handled;
+ }
+
+ if(strncmp(m, "sys: trap: overflow", 19) == 0){
+ memset(&si, 0, sizeof(si));
+ si.signo = SIGFPE;
+ si.code = FPE_INTOVF;
+ si.fault.addr = (void*)p->ureg->pc;
+ sendsignal(p, &si, 0);
+ goto handled;
+ }
+
+ trace("handletrap: %s", m);
+ if(debug)
+ noted(NDFLT);
+
+ exitproc(p, SIGKILL, 1);
+
+handled:
+ if(p->traceproc)
+traced: p->traceproc(p->tracearg);
+
+ handlesignals();
+ retuser();
+}
+
+#pragma profile on
+
+
+void inittrap(void)
+{
+ ulong f;
+
+ /* disable FPU faults */
+ f = getfcr();
+ f &= ~(FPINEX|FPOVFL|FPUNFL|FPZDIV|FPINVAL);
+ setfcr(f);
+
+ notify(handletrap);
+}