// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
package net
import (
"context"
"io"
"os"
"runtime"
"sync/atomic"
"syscall"
)
// Network file descriptor.
type netFD struct {
// locking/lifetime of sysfd + serialize access to Read and Write methods
fdmu fdMutex
// immutable until Close
sysfd int
family int
sotype int
isStream bool
isConnected bool
net string
laddr Addr
raddr Addr
// writev cache.
iovecs *[]syscall.Iovec
// wait server
pd pollDesc
}
func sysInit() {
}
func newFD(sysfd, family, sotype int, net string) (*netFD, error) {
return &netFD{sysfd: sysfd, family: family, sotype: sotype, net: net, isStream: sotype == syscall.SOCK_STREAM}, nil
}
func (fd *netFD) init() error {
if err := fd.pd.init(fd); err != nil {
return err
}
return nil
}
func (fd *netFD) setAddr(laddr, raddr Addr) {
fd.laddr = laddr
fd.raddr = raddr
runtime.SetFinalizer(fd, (*netFD).Close)
}
func (fd *netFD) name() string {
var ls, rs string
if fd.laddr != nil {
ls = fd.laddr.String()
}
if fd.raddr != nil {
rs = fd.raddr.String()
}
return fd.net + ":" + ls + "->" + rs
}
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret error) {
// Do not need to call fd.writeLock here,
// because fd is not yet accessible to user,
// so no concurrent operations are possible.
switch err := connectFunc(fd.sysfd, ra); err {
case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
case nil, syscall.EISCONN:
select {
case <-ctx.Done():
return mapErr(ctx.Err())
default:
}
if err := fd.init(); err != nil {
return err
}
return nil
case syscall.EINVAL:
// On Solaris we can see EINVAL if the socket has
// already been accepted and closed by the server.
// Treat this as a successful connection--writes to
// the socket will see EOF. For details and a test
// case in C see https://golang.org/issue/6828.
if runtime.GOOS == "solaris" {
return nil
}
fallthrough
default:
return os.NewSyscallError("connect", err)
}
if err := fd.init(); err != nil {
return err
}
if deadline, _ := ctx.Deadline(); !deadline.IsZero() {
fd.setWriteDeadline(deadline)
defer fd.setWriteDeadline(noDeadline)
}
// Start the "interrupter" goroutine, if this context might be canceled.
// (The background context cannot)
//
// The interrupter goroutine waits for the context to be done and
// interrupts the dial (by altering the fd's write deadline, which
// wakes up waitWrite).
if ctx != context.Background() {
// Wait for the interrupter goroutine to exit before returning
// from connect.
done := make(chan struct{})
interruptRes := make(chan error)
defer func() {
close(done)
if ctxErr := <-interruptRes; ctxErr != nil && ret == nil {
// The interrupter goroutine called setWriteDeadline,
// but the connect code below had returned from
// waitWrite already and did a successful connect (ret
// == nil). Because we've now poisoned the connection
// by making it unwritable, don't return a successful
// dial. This was issue 16523.
ret = ctxErr
fd.Close() // prevent a leak
}
}()
go func() {
select {
case <-ctx.Done():
// Force the runtime's poller to immediately give up
// waiting for writability, unblocking waitWrite
// below.
fd.setWriteDeadline(aLongTimeAgo)
testHookCanceledDial()
interruptRes <- ctx.Err()
case <-done:
interruptRes <- nil
}
}()
}
for {
// Performing multiple connect system calls on a
// non-blocking socket under Unix variants does not
// necessarily result in earlier errors being
// returned. Instead, once runtime-integrated network
// poller tells us that the socket is ready, get the
// SO_ERROR socket option to see if the connection
// succeeded or failed. See issue 7474 for further
// details.
if err := fd.pd.waitWrite(); err != nil {
select {
case <-ctx.Done():
return mapErr(ctx.Err())
default:
}
return err
}
nerr, err := getsockoptIntFunc(fd.sysfd, syscall.SOL_SOCKET, syscall.SO_ERROR)
if err != nil {
return os.NewSyscallError("getsockopt", err)
}
switch err := syscall.Errno(nerr); err {
case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
case syscall.Errno(0), syscall.EISCONN:
if runtime.GOOS != "darwin" {
return nil
}
// See golang.org/issue/14548.
// On Darwin, multiple connect system calls on
// a non-blocking socket never harm SO_ERROR.
switch err := connectFunc(fd.sysfd, ra); err {
case nil, syscall.EISCONN:
return nil
}
default:
return os.NewSyscallError("getsockopt", err)
}
}
}
func (fd *netFD) destroy() {
// Poller may want to unregister fd in readiness notification mechanism,
// so this must be executed before closeFunc.
fd.pd.close()
closeFunc(fd.sysfd)
fd.sysfd = -1
runtime.SetFinalizer(fd, nil)
}
func (fd *netFD) Close() error {
if !fd.fdmu.increfAndClose() {
return errClosing
}
// Unblock any I/O. Once it all unblocks and returns,
// so that it cannot be referring to fd.sysfd anymore,
// the final decref will close fd.sysfd. This should happen
// fairly quickly, since all the I/O is non-blocking, and any
// attempts to block in the pollDesc will return errClosing.
fd.pd.evict()
fd.decref()
return nil
}
func (fd *netFD) shutdown(how int) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return os.NewSyscallError("shutdown", syscall.Shutdown(fd.sysfd, how))
}
func (fd *netFD) closeRead() error {
return fd.shutdown(syscall.SHUT_RD)
}
func (fd *netFD) closeWrite() error {
return fd.shutdown(syscall.SHUT_WR)
}
func (fd *netFD) Read(p []byte) (n int, err error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if len(p) == 0 {
// If the caller wanted a zero byte read, return immediately
// without trying. (But after acquiring the readLock.) Otherwise
// syscall.Read returns 0, nil and eofError turns that into
// io.EOF.
// TODO(bradfitz): make it wait for readability? (Issue 15735)
return 0, nil
}
if err := fd.pd.prepareRead(); err != nil {
return 0, err
}
if fd.isStream && len(p) > 1<<30 {
p = p[:1<<30]
}
for {
n, err = syscall.Read(fd.sysfd, p)
if err != nil {
n = 0
if err == syscall.EAGAIN {
if err = fd.pd.waitRead(); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
break
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("read", err)
}
return
}
func (fd *netFD) readFrom(p []byte) (n int, sa syscall.Sockaddr, err error) {
if err := fd.readLock(); err != nil {
return 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(); err != nil {
return 0, nil, err
}
for {
n, sa, err = syscall.Recvfrom(fd.sysfd, p, 0)
if err != nil {
n = 0
if err == syscall.EAGAIN {
if err = fd.pd.waitRead(); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
break
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("recvfrom", err)
}
return
}
func (fd *netFD) readMsg(p []byte, oob []byte) (n, oobn, flags int, sa syscall.Sockaddr, err error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(); err != nil {
return 0, 0, 0, nil, err
}
for {
n, oobn, flags, sa, err = syscall.Recvmsg(fd.sysfd, p, oob, 0)
if err != nil {
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN {
if err = fd.pd.waitRead(); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
break
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("recvmsg", err)
}
return
}
func (fd *netFD) Write(p []byte) (nn int, err error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(); err != nil {
return 0, err
}
for {
var n int
max := len(p)
if fd.isStream && max-nn > 1<<30 {
max = nn + 1<<30
}
n, err = syscall.Write(fd.sysfd, p[nn:max])
if n > 0 {
nn += n
}
if nn == len(p) {
break
}
if err == syscall.EAGAIN {
if err = fd.pd.waitWrite(); err == nil {
continue
}
}
if err != nil {
break
}
if n == 0 {
err = io.ErrUnexpectedEOF
break
}
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("write", err)
}
return nn, err
}
func (fd *netFD) writeTo(p []byte, sa syscall.Sockaddr) (n int, err error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(); err != nil {
return 0, err
}
for {
err = syscall.Sendto(fd.sysfd, p, 0, sa)
if err == syscall.EAGAIN {
if err = fd.pd.waitWrite(); err == nil {
continue
}
}
break
}
if err == nil {
n = len(p)
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("sendto", err)
}
return
}
func (fd *netFD) writeMsg(p []byte, oob []byte, sa syscall.Sockaddr) (n int, oobn int, err error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(); err != nil {
return 0, 0, err
}
for {
n, err = syscall.SendmsgN(fd.sysfd, p, oob, sa, 0)
if err == syscall.EAGAIN {
if err = fd.pd.waitWrite(); err == nil {
continue
}
}
break
}
if err == nil {
oobn = len(oob)
}
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("sendmsg", err)
}
return
}
func (fd *netFD) accept() (netfd *netFD, err error) {
if err := fd.readLock(); err != nil {
return nil, err
}
defer fd.readUnlock()
var s int
var rsa syscall.Sockaddr
if err = fd.pd.prepareRead(); err != nil {
return nil, err
}
for {
s, rsa, err = accept(fd.sysfd)
if err != nil {
nerr, ok := err.(*os.SyscallError)
if !ok {
return nil, err
}
switch nerr.Err {
case syscall.EAGAIN:
if err = fd.pd.waitRead(); err == nil {
continue
}
case syscall.ECONNABORTED:
// This means that a socket on the
// listen queue was closed before we
// Accept()ed it; it's a silly error,
// so try again.
continue
}
return nil, err
}
break
}
if netfd, err = newFD(s, fd.family, fd.sotype, fd.net); err != nil {
closeFunc(s)
return nil, err
}
if err = netfd.init(); err != nil {
fd.Close()
return nil, err
}
lsa, _ := syscall.Getsockname(netfd.sysfd)
netfd.setAddr(netfd.addrFunc()(lsa), netfd.addrFunc()(rsa))
return netfd, nil
}
// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
// If the kernel doesn't support it, this is set to 0.
var tryDupCloexec = int32(1)
func dupCloseOnExec(fd int) (newfd int, err error) {
if atomic.LoadInt32(&tryDupCloexec) == 1 {
r0, _, e1 := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_DUPFD_CLOEXEC, 0)
if runtime.GOOS == "darwin" && e1 == syscall.EBADF {
// On OS X 10.6 and below (but we only support
// >= 10.6), F_DUPFD_CLOEXEC is unsupported
// and fcntl there falls back (undocumented)
// to doing an ioctl instead, returning EBADF
// in this case because fd is not of the
// expected device fd type. Treat it as
// EINVAL instead, so we fall back to the
// normal dup path.
// TODO: only do this on 10.6 if we can detect 10.6
// cheaply.
e1 = syscall.EINVAL
}
switch e1 {
case 0:
return int(r0), nil
case syscall.EINVAL:
// Old kernel. Fall back to the portable way
// from now on.
atomic.StoreInt32(&tryDupCloexec, 0)
default:
return -1, os.NewSyscallError("fcntl", e1)
}
}
return dupCloseOnExecOld(fd)
}
// dupCloseOnExecUnixOld is the traditional way to dup an fd and
// set its O_CLOEXEC bit, using two system calls.
func dupCloseOnExecOld(fd int) (newfd int, err error) {
syscall.ForkLock.RLock()
defer syscall.ForkLock.RUnlock()
newfd, err = syscall.Dup(fd)
if err != nil {
return -1, os.NewSyscallError("dup", err)
}
syscall.CloseOnExec(newfd)
return
}
func (fd *netFD) dup() (f *os.File, err error) {
ns, err := dupCloseOnExec(fd.sysfd)
if err != nil {
return nil, err
}
// We want blocking mode for the new fd, hence the double negative.
// This also puts the old fd into blocking mode, meaning that
// I/O will block the thread instead of letting us use the epoll server.
// Everything will still work, just with more threads.
if err = syscall.SetNonblock(ns, false); err != nil {
return nil, os.NewSyscallError("setnonblock", err)
}
return os.NewFile(uintptr(ns), fd.name()), nil
}