Skip to content

Commit

Permalink
Merge pull request #5 from mcg1969/better-pip
Browse files Browse the repository at this point in the history
Better pip support
  • Loading branch information
mcg1969 authored May 1, 2019
2 parents 0b5818e + ac7d77a commit e65436d
Show file tree
Hide file tree
Showing 33 changed files with 777 additions and 607 deletions.
1 change: 1 addition & 0 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ requirements:
run:
- python {{ python }}
- pandas
- setuptools

test:
source_files:
Expand Down
173 changes: 77 additions & 96 deletions project_inspect/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .utils import load_file

import logging
import pkg_resources
logger = logging.getLogger(__name__)

__all__ = ['environment_by_prefix', 'kernel_name_to_prefix']
Expand All @@ -32,6 +33,7 @@ def get_python_builtins(pybin):
Returns:
set: a set of module names.
'''

try:
cmd = [pybin, '-c', 'import sys, json; print(json.dumps(sys.builtin_module_names))']
pycall = subprocess.check_output(cmd)
Expand All @@ -42,78 +44,23 @@ def get_python_builtins(pybin):
return set(sys.builtin_module_names)


def parse_egg_info(path):
def get_python_path(prefix):
'''
Returns the name, version, and key file list for a pip package.
Determines the standard python path for a given prefix.
Args:
path (str): path to the egg file or directory
prefix (str): path to the base of the Python environment.
Returns:
name (str): the name of the package.
version (str): the version of the package.
files (list of str): a list of the Python files found in the
manifest file (SOURCES.txt, RECORD), if such a file is found.
If the manifest is not found, an empty list is returned.
results: a list of paths.
'''
name = basename(path).rsplit('.', 1)[0]
if path.endswith('.egg-link'):
data = load_file(path)
if data is not None:
spdir = fp.read().splitlines()[0].strip()
name = name.replace('-', '_')
if data is not None:
path = join(spdir, name + '.egg-info')
version = '<dev>'
else:
spdir = dirname(path)
name, version = name.rsplit('-', 2)[:2]
pdata = {'name': name.lower(),
'version': version,
'build': '<pip>',
'depends': set(),
'modules': {'python': set(), 'r': set()}}
if path.endswith('.egg'):
pdata['modules']['python'].update(get_python_importables(path))
path = join(path, 'EGG-INFO')
else:
tops = [name]
tname = name.split('.', 1)[0]
tlpath = join(path, 'top_level.txt')
if exists(tlpath):
tldata = load_file(tlpath)
if tldata is not None:
tops.extend(line for line in map(str.rstrip, tldata.splitlines())
if line and line != tname)
for top in tops:
mparts = top.split('.')
level = len(mparts)
mpath = join(spdir, *mparts)
pdata['modules']['python'].update(get_python_importables(mpath, level=level))
fname = 'METADATA' if path.endswith('.dist-info') else 'PKG-INFO'
fpath = join(path, fname)
info = {}
if isfile(fpath):
data = load_file(fpath) or ''
for line in data.splitlines():
m = re.match(r'(\w+):\s*(\S+)', line, re.I)
if m:
key = m.group(1).lower()
info.setdefault(key, []).append(m.group(2))
break
if 'Requires-Dist' in info:
pdata['depends'].update(x.split(' ', 1)[0].lower() for x in info['requires-dist'])
else:
req_txt = join(path, 'requires.txt')
if exists(req_txt):
data = load_file(req_txt) or ''
for dep in data.splitlines():
m = re.match(r'^([\w_-]+)', dep)
if not m:
break
pdata['depends'].add(m.groups()[0].lower())
return pdata


results = (glob(join(prefix, 'lib', 'python*.zip')) +
glob(join(prefix, 'lib', 'python?.?')) +
glob(join(prefix, 'lib', 'python?.?', 'lib-dynload')) +
glob(join(prefix, 'lib', 'python?.?', 'site-packages')))
return results


def parse_conda_meta(mpath):
mdata = load_file(mpath) or {}
fname, fversion, fbuild = basename(mpath).rsplit('.', 1)[0].rsplit('-', 2)
Expand Down Expand Up @@ -164,22 +111,51 @@ def get_eggs(sp_dir):
Returns:
list: a list of the egg files/dirs found in that directory.
'''
results = []
results = {}
for fn in os.listdir(sp_dir):
if not fn.endswith(('.egg', '.egg-info', '.dist-info', '.egg-link')):
if not fn.endswith(('.egg-info', '.dist-info', '.egg', '.egg-link')):
continue
path = join(sp_dir, fn)
if fn.endswith('.egg-link'):
data = load_file(path)
if data is not None:
sp_dir = data.splitlines()[0].strip()
name = fn.rsplit('.', 1)[0].replace('-', '_')
path = join(sp_dir, name + '.egg-info')
if (isfile(path) or exists(join(path, 'METADATA')) or
exists(join(path, 'PKG-INFO')) or
exists(join(path, 'METADATA'))):
results.append(fn)
return set(results)
fullpath = os.path.join(sp_dir, fn)
factory = pkg_resources.dist_factory(sp_dir, fn, False)
try:
dists = list(factory(fullpath))
except Exception as e:
logger.warning('Error reading eggs in {}:\n{}'.format(fullpath, e))
dists = []
pdata = {'name': None,
'version': None,
'build': '<pip>',
'depends': set(),
'modules': {'python': set(), 'r': set()}}
results[fn] = pdata
for dist in dists:
if pdata['name'] is None:
pdata['name'] = dist.project_name
pdata['version'] = dist.version or '<dev>'
pdata['depends'].update(r.name for r in dist.requires())
sources = 'RECORD' if dist.has_metadata('RECORD') else 'SOURCES.txt'
if dist.has_metadata(sources) and dist.has_metadata('top_level.txt'):
sources = list(map(str.strip, dist.get_metadata(sources).splitlines()))
top_level = list(map(str.strip, dist.get_metadata('top_level.txt').splitlines()))
for top in top_level:
top_s = top + '/'
for src in sources:
src = src.split(',', 1)[0]
if src.endswith('__init__.py'):
src = dirname(src)
elif src.endswith(('.py', '.so')):
src = src[:-3]
else:
continue
pdata['modules']['python'].add(src.replace('/', '.'))
if not pdata['name']:
name, version = fn.rsplit('.', 1)[0], '<dev>'
if fn.endswith('.dist-info'):
name, version = fn.rsplit('-', 1)
elif fn.endswith('.egg-info'):
name, version, _ = fn.rsplit('-', 2)
pdata['name'], pdata['version'] = name, version
return results


@functools.lru_cache()
Expand All @@ -194,6 +170,8 @@ def get_python_importables(path, level=0):
gen = [(dirname(path), [], [basename(path) + sfx])]
path = dirname(path)
level -= 1
else:
return modules
root_path = path.rstrip('/')
while level > 0:
root_path = dirname(root_path)
Expand All @@ -207,13 +185,7 @@ def get_python_importables(path, level=0):
if file.startswith('.'):
continue
fpath = join(root, file)
if file.endswith('.pth'):
data = load_file(fpath) or ''
for npath in map(str.strip, data.splitlines()):
if npath:
npath = abspath(join(root, npath.strip()))
modules.update(get_python_importables(npath))
elif file == '__init__.py':
if file == '__init__.py':
modules[base_module] = fpath
elif file.endswith(('.so', '.py')):
file = file.rsplit('.', 1)[0]
Expand All @@ -234,12 +206,20 @@ def _create(bname):
'modules': {'python': set(), 'r': set()},
'imports': {'python': set(), 'r': set()}}
return packages[bname]
all_modules = {}
for pdata in get_eggs(path).values():
packages[pdata['name']] = pdata
pdata['build'] = '<local>'
pdata['imports'] = {'python': set(), 'r': set()}
all_modules.update((k, pdata['name']) for k in pdata['modules']['python'])
for module, fpath in get_python_importables(path).items():
bname = './' + module.split('.', 1)[0]
if exists(join(path, bname) + '.py'):
bname += '.py'
pdata = _create(bname)
bname = all_modules.get(module)
if bname is None:
bname = './' + module.split('.', 1)[0]
if exists(join(path, bname) + '.py'):
bname += '.py'
imports, _ = find_file_imports(fpath, submodules=True)
pdata = _create(bname)
pdata['modules']['python'].add(module)
pdata['imports']['python'].update(imports)
for fpath in glob(join(path, '*.R')) + glob(join(path, '*.ipynb')):
Expand Down Expand Up @@ -295,13 +275,14 @@ def environment_by_prefix(envdir, local=None):

# Find all non-conda egg directories and determine package name and version
# If a manifest exists, use that to remove imports from the unmanaged list
eggfiles = set()
eggfiles = {}
for spdir in glob(join(envdir, 'lib', 'python*', 'site-packages')):
eggfiles = get_eggs(spdir)
eggfiles.update(get_eggs(spdir))
for pdata in packages.values():
eggfiles -= pdata['eggs']
for eggfile in eggfiles:
pdata = parse_egg_info(join(spdir, eggfile))
for egg in pdata['eggs']:
if egg in eggfiles:
del eggfiles[egg]
for eggfile, pdata in eggfiles.items():
pname = pdata['name']
packages[pdata['name']] = pdata
for language, mdata in pdata['modules'].items():
Expand Down
2 changes: 1 addition & 1 deletion tests/darwin/all_all.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
n_owners,n_projects,n_environments,n_required,n_requested,n_python,n_r
2,4,6,468,35,6,4
2,4,6,468,36,6,4
2 changes: 1 addition & 1 deletion tests/darwin/all_package.csv
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ scikit-learn,2,2,2,0,0
scipy,2,3,3,1,0
seaborn,2,2,2,0,0
send2trash,2,4,6,0,0
setuptools,2,4,6,6,0
setuptools,2,4,6,6,1
simplegeneric,2,3,3,1,0
singledispatch,2,3,3,1,0
sip,2,3,3,0,0
Expand Down
Loading

0 comments on commit e65436d

Please sign in to comment.