Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/sentry/fsimpl/fuse/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ go_library(
"directory.go",
"file.go",
"fusefs.go",
"host_connection.go",
"inode.go",
"inode_connection.go",
"inode_refs.go",
Expand Down Expand Up @@ -110,12 +111,15 @@ go_test(
srcs = [
"connection_test.go",
"dev_test.go",
"host_connection_integration_test.go",
"host_connection_test.go",
"utils_test.go",
],
library = ":fuse",
deps = [
"//pkg/abi/linux",
"//pkg/errors/linuxerr",
"//pkg/hostarch",
"//pkg/marshal/primitive",
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel",
Expand Down
51 changes: 40 additions & 11 deletions pkg/sentry/fsimpl/fuse/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ const (
fuseDefaultMaxPagesPerReq = 32
)

// fuseConn abstracts the FUSE request/response transport. The connection
// struct delegates call dispatch to its fuseConn implementation.
type fuseConn interface {
call(ctx context.Context, r *Request) (*Response, error)
release(ctx context.Context)
}

// deviceConn implements fuseConn for the in-sandbox /dev/fuse path.
// It uses the queue-based mechanism where the FUSE daemon reads requests
// from and writes responses to the DeviceFD.
type deviceConn struct {
conn *connection
}

func (dc *deviceConn) call(ctx context.Context, r *Request) (*Response, error) {
fut, err := dc.conn.callFuture(ctx, r)
if err != nil {
return nil, linuxError(err)
}
res, err := fut.resolve(ctx)
if err != nil {
return res, linuxError(err)
}
return res, nil
}

func (dc *deviceConn) release(ctx context.Context) {}

// connection is the struct by which the sentry communicates with the FUSE server daemon.
//
// Lock order:
Expand All @@ -54,6 +82,10 @@ const (
type connection struct {
connectionRefs

// fuseConn is the transport implementation. For the DeviceFD path this
// is a *deviceConn; for host passthrough this is a *hostConnection.
fuseConn fuseConn `state:"nosave"`

// We target FUSE 7.23.
// The following FUSE_INIT flags are currently unsupported by this implementation:
// - FUSE_EXPORT_SUPPORT
Expand Down Expand Up @@ -309,7 +341,12 @@ func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOpti
// synchronization and without checking if fuseFD has already been used to
// mount another filesystem.

// Create the writeBuf for the header to be stored in.
return newFUSEConnectionOpts(opts)
}

// newFUSEConnectionOpts creates a FUSE connection with the given options.
// This is used by both the DeviceFD path and the host FD passthrough path.
func newFUSEConnectionOpts(opts *filesystemOptions) (*connection, error) {
conn := &connection{
completions: make(map[linux.FUSEOpID]*futureResponse),
fullQueueCh: make(chan struct{}, opts.maxActiveRequests),
Expand All @@ -321,6 +358,7 @@ func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOpti
initializedChan: make(chan struct{}),
connected: true,
}
conn.fuseConn = &deviceConn{conn: conn}
conn.InitRefs()
return conn, nil
}
Expand Down Expand Up @@ -379,16 +417,7 @@ func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error)
return nil, linuxerr.ECONNREFUSED
}

fut, err := conn.callFuture(ctx, r)
if err != nil {
return nil, linuxError(err)
}

res, err := fut.resolve(ctx)
if err != nil {
return res, linuxError(err)
}
return res, nil
return conn.fuseConn.call(ctx, r)
}

// callFuture makes a request to the server and returns a future response.
Expand Down
24 changes: 12 additions & 12 deletions pkg/sentry/fsimpl/fuse/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ func (fd *fileDescription) statusFlags() uint32 {
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *fileDescription) Release(ctx context.Context) {
// no need to release if FUSE server doesn't implement Open.
conn := fd.inode().fs.conn
if conn.noOpen {
fs := fd.inode().fs
if fs.conn.noOpen {
return
}

Expand All @@ -89,19 +89,19 @@ func (fd *fileDescription) Release(ctx context.Context) {
opcode = linux.FUSE_RELEASE
}
// Ignoring errors and FUSE server replies is analogous to Linux's behavior.
req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, opcode, &in)
req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, opcode, &in)
// The reply will be ignored since no callback is defined in asyncCallBack().
conn.Call(ctx, req)
fs.conn.Call(ctx, req)
}

// OnClose implements vfs.FileDescriptionImpl.OnClose.
func (fd *fileDescription) OnClose(ctx context.Context) error {
inode := fd.inode()
conn := inode.fs.conn
fs := inode.fs
inode.attrMu.Lock()
defer inode.attrMu.Unlock()

if conn.noOpen {
if fs.conn.noOpen {
return nil
}
if fd.OpenFlag&linux.FOPEN_NOFLUSH != 0 {
Expand All @@ -112,8 +112,8 @@ func (fd *fileDescription) OnClose(ctx context.Context) error {
Fh: fd.Fh,
LockOwner: 0, // TODO(gvisor.dev/issue/3245): file lock
}
req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FLUSH, &in)
res, err := conn.Call(ctx, req)
req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FLUSH, &in)
res, err := fs.conn.Call(ctx, req)
if err != nil {
return err
}
Expand Down Expand Up @@ -170,9 +170,9 @@ func (fd *fileDescription) Sync(ctx context.Context) error {
inode := fd.inode()
inode.attrMu.Lock()
defer inode.attrMu.Unlock()
conn := inode.fs.conn
fs := inode.fs
// no need to proceed if FUSE server doesn't implement Open.
if conn.noOpen {
if fs.conn.noOpen {
return linuxerr.EINVAL
}

Expand All @@ -181,8 +181,8 @@ func (fd *fileDescription) Sync(ctx context.Context) error {
FsyncFlags: fd.statusFlags(),
}
// Ignoring errors and FUSE server replies is analogous to Linux's behavior.
req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FSYNC, &in)
req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FSYNC, &in)
// The reply will be ignored since no callback is defined in asyncCallBack().
conn.CallAsync(ctx, req)
fs.conn.CallAsync(ctx, req)
return nil
}
82 changes: 77 additions & 5 deletions pkg/sentry/fsimpl/fuse/fusefs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"math"
"strconv"

"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
Expand Down Expand Up @@ -88,7 +89,8 @@ type filesystem struct {
devMinor uint32

// conn is used for communication between the FUSE server
// daemon and the sentry fusefs.
// daemon and the sentry fusefs. It holds shared protocol state and
// delegates call dispatch to its internal fuseConn transport.
conn *connection

// opts is the options the fusefs is initialized with.
Expand Down Expand Up @@ -130,14 +132,36 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
defer fuseFDGeneric.DecRef(ctx)
fuseFD, ok := fuseFDGeneric.Impl().(*DeviceFD)
if !ok {
log.Warningf("%s.GetFilesystem: device FD is %T, not a FUSE device", fsType.Name(), fuseFDGeneric)
if ok {
return fsType.getFilesystemDeviceFD(ctx, vfsObj, creds, kernelTask, fuseFD, devMinor, fsopts)
}

// Check if this is a host FD. Try the file description first (for
// regular files, pipes), then the dentry inode (for sockets, which
// have a different file description type but the same host inode).
rawHostFD := -1
if hfd, ok := fuseFDGeneric.Impl().(vfs.HostFDProvider); ok {
rawHostFD = hfd.HostFD()
} else if d := fuseFDGeneric.Dentry(); d != nil {
if kd, ok := d.Impl().(*kernfs.Dentry); ok {
if hfd, ok := kd.Inode().(vfs.HostFDProvider); ok {
rawHostFD = hfd.HostFD()
}
}
}
if rawHostFD == -1 {
log.Warningf("%s.GetFilesystem: fd is %T, not a FUSE device or host FD", fsType.Name(), fuseFDGeneric.Impl())
return nil, nil, linuxerr.EINVAL
}

return fsType.getFilesystemHostFD(ctx, vfsObj, creds, kernelTask, int32(rawHostFD), devMinor, fsopts)
}

// getFilesystemDeviceFD creates a FUSE filesystem backed by an in-sandbox
// /dev/fuse DeviceFD.
func (fsType FilesystemType) getFilesystemDeviceFD(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, kernelTask *kernel.Task, fuseFD *DeviceFD, devMinor uint32, fsopts *filesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
fuseFD.mu.Lock()
connected := fuseFD.connected()
// Create a new FUSE filesystem.
fs, err := newFUSEFilesystem(ctx, vfsObj, &fsType, fuseFD, devMinor, fsopts)
if err != nil {
log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
Expand All @@ -155,9 +179,56 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
}

// root is the fusefs root directory.
root := fs.newRoot(ctx, creds, fsopts.rootMode)
return fs.VFSFilesystem(), root.VFSDentry(), nil
}

// getFilesystemHostFD creates a FUSE filesystem that communicates with a FUSE
// server running on the host via a host file descriptor.
func (fsType FilesystemType) getFilesystemHostFD(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, kernelTask *kernel.Task, hostFD int32, devMinor uint32, fsopts *filesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
// Dup the host FD so that the FUSE connection owns its own copy.
// The original may be shared with or closed by the host import path
// (e.g. socket endpoints take ownership of the FD).
dupFD, err := unix.Dup(int(hostFD))
if err != nil {
log.Warningf("%s.getFilesystemHostFD: dup failed: %v", fsType.Name(), err)
return nil, nil, err
}
// The host import path sets the FD to non-blocking for epoll-based I/O.
// The FUSE passthrough connection uses synchronous blocking I/O, so
// clear the non-blocking flag.
if err := unix.SetNonblock(dupFD, false); err != nil {
unix.Close(dupFD)
log.Warningf("%s.getFilesystemHostFD: SetNonblock failed: %v", fsType.Name(), err)
return nil, nil, err
}

conn, err := newFUSEConnectionOpts(fsopts)
if err != nil {
unix.Close(dupFD)
log.Warningf("%s.getFilesystemHostFD: newFUSEConnection failed: %v", fsType.Name(), err)
return nil, nil, err
}

hostConn := newHostConnection(conn, int32(dupFD))
conn.fuseConn = hostConn

fs := &filesystem{
devMinor: devMinor,
opts: fsopts,
conn: conn,
clock: ktime.RealtimeClockFromContext(ctx),
}
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)

rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace()
hasSysAdmin := creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs)
if err := hostConn.InitSend(creds, uint32(kernelTask.ThreadID()), hasSysAdmin); err != nil {
log.Warningf("%s.getFilesystemHostFD: InitSend failed: %v", fsType.Name(), err)
return nil, nil, err
}

root := fs.newRoot(ctx, creds, fsopts.rootMode)
return fs.VFSFilesystem(), root.VFSDentry(), nil
}

Expand Down Expand Up @@ -295,6 +366,7 @@ func newFUSEFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, fsTyp

// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release(ctx context.Context) {
fs.conn.fuseConn.release(ctx)
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
fs.Filesystem.Release(ctx)
}
Expand Down
Loading
Loading