diff --git a/.gitignore b/.gitignore index 06c263d..ccd002a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /build/ *.pyc /gridfs_fuse.egg-info/ +/dist +/attr diff --git a/CHANGES b/CHANGES index aa2f37d..a372b95 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +* 2017-02-06 -- 0.1.2 + * Added mount.gridfs script which behaves similar to other mount progs + * Fixed bug during initial mount when creating new gridfs collection + * Allow database and collection to be specified in --mongodb-uri argument + * Changed setup.py to read from supporting files (LICENSE, README.md, etc.) + * 2016-01-13 -- 0.1.1 * 'stat' failed on other nodes while a file is created/written. * Better error handling if a file is read while it's still written. diff --git a/README.md b/README.md index 7c11d36..3976a41 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,36 @@ A FUSE wrapper around MongoDB gridfs using python and llfuse. ## Usage + ```bash -gridfs_fuse --mongodb-uri="mongodb://127.0.0.1:27017" --database="gridfs_fuse" --mount-point="/mnt/gridfs_fuse" +gridfs_fuse --mongodb-uri="mongodb://127.0.0.1:27017" --database="gridfs_fuse" --mount-point="/mnt/gridfs_fuse" # --options=allow_other +``` + +### fstab example +```fstab +mongodb://127.0.0.1:27017/gridfs_fuse.fs /mnt/gridfs_fuse gridfs defaults,allow_other 0 0 ``` +Note this assumes that you have the `mount.gridfs` program (or `mount_gridfs` on MacOS X) symlinked +into `/sbin/` e.g. `sudo ln -s $(which mount.gridfs) /sbin/` ## Requirements * pymongo * llfuse +## Install +Ubuntu 16.04: +```bash +sudo apt-get install libfuse python-llfuse +sudo -H pip install py_gridfs_fuse +``` + +MacOSX: +```bash +brew install osxfuse +sudo -H pip install py_gridfs_fuse +``` + + ## Operations supported * create/list/delete directories => folder support. * read files. diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..d917d3e --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.2 diff --git a/gridfs_fuse/__main__.py b/gridfs_fuse/__main__.py new file mode 100644 index 0000000..5d6a810 --- /dev/null +++ b/gridfs_fuse/__main__.py @@ -0,0 +1,3 @@ +from .main import main + +main() diff --git a/gridfs_fuse/main.py b/gridfs_fuse/main.py index ef0f6dd..edcfcc6 100644 --- a/gridfs_fuse/main.py +++ b/gridfs_fuse/main.py @@ -1,61 +1,250 @@ +''' +Mounts a GridFS filesystem using FUSE in Python +''' import logging -import optparse +import argparse import llfuse +import os +import sys + +from pymongo.uri_parser import parse_uri from .operations import operations_factory +FUSE_OPTIONS_HELP=''' +FUSE options for mount (comma-separated) [default: %(default)s]. + debug - turn on detailed debugging. + workers=N - number of workers [default: 1]. + single - equivalent to workers=1 for llfuse compatibility. + log_level=LEVEL - specifies the logging level. + log_file=FILE - specifies path for loging to file. + foreground - run process in foreground rather than as daemon process. -def configure_optparse(parser): - parser.add_option( - '--mongodb-uri', - dest='mongodb_uri', - default="mongodb://127.0.0.1:27017", - help="Connection string for MongoClient. http://goo.gl/abqY9") +Note: Generic options can be found at: http://man7.org/linux/man-pages/man8/mount.fuse.8.html +''' - parser.add_option( - '--database', - dest='database', - default='gridfs_fuse', - help="Name of the database where the filesystem goes") +class HelpFormatter(argparse.HelpFormatter): + '''A custom formatter to rearrange order of positionals + and hide actions starting with _''' + # use defined argument order to display usage + def _format_usage(self, usage, actions, groups, prefix): + if prefix is None: + prefix = 'usage: ' + + # if usage is specified, use that + if usage is not None: + usage = usage % dict(prog=self._prog) + + # if no optionals or positionals are available, usage is just prog + elif usage is None and not actions: + usage = '%(prog)s' % dict(prog=self._prog) + elif usage is None: + prog = '%(prog)s' % dict(prog=self._prog) + # build full usage string + actions_list = [] + for a in actions: + if len(a.option_strings) > 0: + actions_list.append(a) + elif a.dest == 'help': + actions_list.insert(0, a) + elif a.dest.startswith('_'): + pass # hide these + else: + actions_list.insert(1, a) if len(actions_list) else actions_list.append(a) + action_usage = self._format_actions_usage(actions_list, groups) # NEW + usage = ' '.join([s for s in [prog, action_usage] if s]) + # omit the long line wrapping code + # prefix with 'usage:' + return '%s%s\n\n' % (prefix, usage) + + def _format_action(self, action): + if not action.dest.startswith('_'): + return super(self.__class__, self)._format_action(action) + +class OrderedNamespace(argparse.Namespace): + '''Allows argument order to be retained''' + def __init__(self, **kwargs): + self.__dict__["_arg_order"] = [] + self.__dict__["_arg_order_first_time_through"] = True + argparse.Namespace.__init__(self, **kwargs) + + def __setattr__(self, name, value): + self.__dict__[name] = value + if name in self._arg_order and hasattr(self, "_arg_order_first_time_through"): + self.__dict__["_arg_order"] = [] + delattr(self, "_arg_order_first_time_through") + self.__dict__["_arg_order"].append(name) + + def _finalize(self): + if hasattr(self, "_arg_order_first_time_through"): + self.__dict__["_arg_order"] = [] + delattr(self, "_arg_order_first_time_through") + + def _latest_of(self, k1, k2): + try: + if self._arg_order.index(k1) > self._arg_order.index(k2): + return k1 + except ValueError: + if k1 in self._arg_order: + return k1 + return k2 - parser.add_option( - '--mount-point', - dest='mount_point', + +def configure_parser(parser): + '''Configures CLI options''' + parser.add_argument( + '-m', '--mount-point', + dest='mount_point', help="Path where to mount fuse/gridfs wrapper") + parser.add_argument( + '-u', '--mongodb-uri', + dest='mongodb_uri', + default="mongodb://127.0.0.1:27017/gridfs_fuse.fs", + help="""Connection string for MongoClient. http://goo.gl/abqY9 " + "[default: %(default)s]""") + + parser.add_argument( + '-d', '--database', + dest='database', + default='gridfs_fuse', + help="Name of the database where the filesystem goes [default: %(default)s]") + + parser.add_argument( + '-c', '--collection', dest='collection', default='fs', + help='Database collection for GridFS [default: %(default)s]') + + parser.add_argument( + '-o', '--options', dest='mount_opts', action='append', + default=['default_permissions'], + help=FUSE_OPTIONS_HELP) + + parser.add_argument( + '-l', '--log', dest='logfile', default=os.devnull, + const='gridfs_fuse.log', nargs='?', + help='Log actions to file [default: %(default)s]') + return parser +def fuse_configurator(parser): + '''Configure parser for mount CLI style of form: [-o ]''' + parser.add_argument('_script_path') # hack to fix ordering -def validate_options(options): - if not options.mongodb_uri: - raise Exception("--mongodb-uri is mandatory") + parser.add_argument('mongodb_uri', + help="MongoDB connection URI in form " + "'mongodb://[user:password@]hostname[:port]/db.collection'") + + parser.add_argument('mount_point', + help="Path to mount fuse gridfs filesystem") - if not options.database: - raise Exception("--database is mandatory") + parser.add_argument( + '-o', dest='mount_opts', action='append', + default=['default_permissions'], help=FUSE_OPTIONS_HELP) + return parser + +def validate_options(options): + '''Validates parser arguments''' + uri = parse_uri(options.mongodb_uri) + options.database = uri.get('database', options.database) + options.collection = uri.get('collection', options.collection) if not options.mount_point: - raise Exception("--mount-point is mandatory") + raise Exception("mount_point is mandatory") + +def fuse_validator(options): + '''Validates parser arguments using mount interface''' + options.database = 'gridfs_fuse' + options.collection = 'fs' + validate_options(options) + opts = dict([opt.split('=', 1) if '=' in opt else (opt, None) + for opt in options.mount_opts]) + options.logfile = opts.get('log_file', None) + +# shamelessly *adapted* from the the borg collective (see - borgbackup project) +def daemonize(): + """Detach process from controlling terminal and run in background + Returns: old and new get_process_id tuples + """ + old_id = os.getpid() + pid = os.fork() + if pid: + os._exit(0) + os.setsid() + pid = os.fork() + if pid: + os._exit(0) + new_id = os.getpid() + return old_id, new_id def run_fuse_mount(ops, options, mount_opts): - mount_opts = ['fsname=gridfs_fuse'] + mount_opts + '''Performs FUSE mount''' + mount_opts = ['fsname=gridfs'] + mount_opts + opts = dict((opt.split('=', 1) if '=' in opt else (opt, None) for opt in mount_opts)) + + # strip invalid keys + ignored_keys = ['debug', 'foreground', 'log_level', 'log_file', 'workers', 'single'] + valid_keys = [k for k in opts if k not in ignored_keys] + mount_opts = ['='.join([k, opts[k]]) if opts[k] is not None else k for k in valid_keys] + + # handle some key options here + if 'log_level' in opts: + try: + log_level = opts['log_level'].upper() + try: + log_level = int(log_level) + except ValueError: + pass + logging.getLogger().setLevel(getattr(logging, log_level)) + except (TypeError, ValueError) as error: + logging.warning('Unable to set log_level to {}: {}'.format(opts['log_level'], error)) + + # start gridfs bindings and run fuse process llfuse.init(ops, options.mount_point, mount_opts) + + # ensure that is single is given then it evaluates to true + if 'single' in opts and opts['single'] is None: + opts['single'] = True + # debug clobbers other log settings such as log_level + if 'debug' in opts: + logging.basicConfig( + format='[%(asctime)s] pid=%(process)s {%(module)s:%(funcName)s():%(lineno)d} %(levelname)s - %(message)s', + level=logging.DEBUG) + + + # TODO: Find way of capturing CTRL+C and calling llfuse.close() when in foreground + # Note: This maybe a bug in llfuse + workers = opts.get('workers', opts.get('single', 1)) # fudge for backwards compatibility try: - llfuse.main(single=True) + llfuse.main(workers) # maintain compatibility with single/workers kwarg + except KeyboardInterrupt: + pass finally: llfuse.close() -def main(): +def init(args, configure=configure_parser, validate=validate_options): + '''Initialise using specified parser config and validation''' logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + #format='[%(asctime)s] pid=%(process)s {%(module)s:%(funcName)s():%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO) - parser = optparse.OptionParser() - configure_optparse(parser) - options, args = parser.parse_args() - validate_options(options) + parser = argparse.ArgumentParser(formatter_class=HelpFormatter) + configure(parser) + options, _ = parser.parse_known_args(args, namespace=OrderedNamespace()) + + # flatten options list + flatten = lambda l: [item for sublist in l for item in sublist.split(',')] + options.mount_opts = flatten(options.mount_opts) + + validate(options) + + # have to fork process before creating MongoClient object otherwise safety warnings + if 'foreground' not in options.mount_opts: + pids = daemonize() # make the program run as non-blocking process + logging.debug('Daemonized parent process {} with child process {}'.format(*pids)) ops = operations_factory(options) @@ -63,9 +252,22 @@ def main(): # 'allow_other' Regardless who mounts it, all other users can access it # 'default_permissions' Let the kernel do the permission checks # 'nonempty' Allow mount on non empty directory - mount_opts = ['default_permissions'] + mount_opts = options.mount_opts run_fuse_mount(ops, options, mount_opts) -if __name__ == '__main-_': - main() + +def main(args=sys.argv): + '''Default interface''' + init(args, configure=configure_parser, validate=validate_options) # defaults + +def _mount_fuse_main(args=sys.argv): + '''Interface for mount.fuse''' + init(args, configure=fuse_configurator, validate=fuse_validator) + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass + diff --git a/gridfs_fuse/operations.py b/gridfs_fuse/operations.py index fb84c4d..e072b00 100644 --- a/gridfs_fuse/operations.py +++ b/gridfs_fuse/operations.py @@ -42,14 +42,23 @@ def inode(self): class Operations(llfuse.Operations): - def __init__(self, database): + def __init__(self, database, collection='fs', logfile=None, debug=os.environ.get('GRIDFS_FUSE_DEBUG')): super(Operations, self).__init__() self.logger = logging.getLogger("gridfs_fuse") - - self.meta = compat_collection(database, 'metadata') - self.gridfs = gridfs.GridFS(database) - self.gridfs_files = compat_collection(database, 'fs.files') + self.logger.setLevel(logging.DEBUG if debug else logging.ERROR) + try: + self.handler = logging.FileHandler(logfile) + self.handler.setLevel(logging.DEBUG) + except: + pass + #self._readonly = read_only + self._database = database + self._collection = collection + + self.meta = compat_collection(database, collection + '.metadata') + self.gridfs = gridfs.GridFS(database, collection) + self.gridfs_files = compat_collection(database, collection + '.files') self.active_inodes = collections.defaultdict(int) self.active_writes = {} @@ -58,9 +67,13 @@ def open(self, inode, flags): self.logger.debug("open: %s %s", inode, flags) # Do not allow writes to a existing file - if flags & os.O_WRONLY: + if flags & os.O_WRONLY: raise llfuse.FUSEError(errno.EACCES) + # Deny if write mode and filesystem is mounted as read-only + #if flags & (os.O_RDWR | os.O_CREAT | os.O_WRONLY | os.O_APPEND) and self._readonly: + # raise llfuse.FUSWERROR(errno.EPERM) + self.active_inodes[inode] += 1 return inode @@ -367,7 +380,7 @@ def statfs(self): def _entry_by_inode(self, inode): query = {'_id': inode} record = self.meta.find_one(query) - return self._doc_to_entry(record) + return self._doc_to_entry(record or {'childs': []}) def _insert_entry(self, entry): doc = self._entry_to_doc(entry) @@ -478,13 +491,20 @@ def _ensure_indexes(ops): ('parent_inode', pymongo.ASCENDING), ('filename', pymongo.ASCENDING) ] - ops.meta.create_index(index, unique=True) + try: + ops.meta.create_index(index, unique=True) + except pymongo.errors.OperationFailure: + ops.meta.drop() + _ensure_root_inode(ops) + _ensure_next_inode_document(ops) + ops.meta = compat_collection(ops._database, ops._collection + '.metadata') + ops.meta.create_index(index, unique=False) def operations_factory(options): client = pymongo.MongoClient(options.mongodb_uri) - ops = Operations(client[options.database]) + ops = Operations(client[options.database], collection=options.collection, logfile=options.logfile) _ensure_root_inode(ops) _ensure_next_inode_document(ops) _ensure_indexes(ops) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..68dce4c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +llfuse +pymongo diff --git a/setup.py b/setup.py index 09c5515..0983467 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,26 @@ from setuptools import setup from setuptools import find_packages +import os +import sys + +here = os.path.abspath(os.path.dirname(__file__)) + '/' + +mount_script = 'mount.gridfs' if sys.platform != 'darwin' else 'mount_gridfs' setup( name="gridfs_fuse", - version='0.1.1', - install_requires=[ - 'llfuse', - 'pymongo', - ], + url='https://github.com/axiros/py_gridfs_fuse', + description=open(here + 'README.md').readlines()[1].strip('\n'), + license=open(here + 'LICENSE.md').readlines()[0].strip('\n'), + version=open(here + 'VERSION').read().strip('\n') or '0.1.1', + install_requires=open(here + 'requirements.txt').readlines(), include_package_data=True, package_dir={'gridfs_fuse': 'gridfs_fuse'}, packages=find_packages('.'), entry_points={ 'console_scripts': [ 'gridfs_fuse = gridfs_fuse.main:main', + '%s = gridfs_fuse.main:_mount_fuse_main' %(mount_script), ] } )