reimplement requirement and version parsing

This commit is contained in:
DavHau 2020-11-21 00:08:20 +07:00
parent cb04f10167
commit 3ff3501551
11 changed files with 195 additions and 126 deletions

View file

@ -5,13 +5,14 @@ import re
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
from operator import itemgetter
from typing import List, Tuple, Iterable
import distlib.markers
from pkg_resources import RequirementParseError
from mach_nix.requirements import filter_reqs_by_eval_marker, Requirement, parse_reqs, context
from mach_nix.versions import PyVer, ver_sort_key, filter_versions, parse_ver, Version
from mach_nix.requirements import filter_reqs_by_eval_marker, Requirement, parse_reqs, context, filter_versions
from mach_nix.versions import PyVer, ver_sort_key, parse_ver, Version
from .bucket_dict import LazyBucketDict
from .nixpkgs import NixpkgsIndex
from ..cache import cached
@ -89,7 +90,7 @@ class DependencyProviderBase(ABC):
@cached()
def find_matches(self, req) -> List[Candidate]:
all = list(self.all_candidates_sorted(req.key, req.extras, req.build))
matching_versions = set(filter_versions([c.ver for c in all], req.specs))
matching_versions = set(filter_versions([c.ver for c in all], req))
matching_candidates = [c for c in all if c.ver in matching_versions]
return matching_candidates
@ -204,7 +205,6 @@ class CombinedDependencyProvider(DependencyProviderBase):
f"If it still doesn't work, there was probably a problem while crawling pypi.\n" \
f"Please open an issue at: https://github.com/DavHau/mach-nix/issues/new\n"
print(error_text, file=sys.stderr)
exit(1)
@cached()
def all_candidates_sorted(self, pkg_name, extras=None, build=None) -> Iterable[Candidate]:
@ -213,9 +213,9 @@ class CombinedDependencyProvider(DependencyProviderBase):
# order by reversed preference expected
for provider in reversed(tuple(self.allowed_providers_for_pkg(pkg_name).values())):
candidates += list(provider.all_candidates_sorted(pkg_name, extras, build))
if candidates:
return tuple(candidates)
self.print_error_no_versions_available(pkg_name, extras, build)
if not candidates:
self.print_error_no_versions_available(pkg_name, extras, build)
return tuple(candidates)
def all_candidates(self, name, extras=None, build=None) -> Iterable[Candidate]:
return self.all_candidates_sorted(name, extras, build)
@ -397,7 +397,7 @@ class WheelDependencyProvider(DependencyProviderBase):
ver = parse_ver('.'.join(self.py_ver_digits))
try:
parsed_py_requires = list(parse_reqs(f"python{wheel.requires_python}"))
return bool(filter_versions([ver], parsed_py_requires[0].specs))
return bool(filter_versions([ver], parsed_py_requires[0]))
except RequirementParseError:
print(f"WARNING: `requires_python` attribute of wheel {wheel.name}:{wheel.ver} could not be parsed")
return False
@ -503,7 +503,8 @@ class CondaDependencyProvider(DependencyProviderBase):
for file in files:
with open(file) as f:
content = json.load(f)
for fname, p in content['packages'].items():
for i, fname in enumerate(content['packages'].keys()):
p = content['packages'][fname]
name = p['name'].replace('_', '-').lower()
ver = p['version']
build = p['build']
@ -514,7 +515,7 @@ class CondaDependencyProvider(DependencyProviderBase):
if build in self.pkgs[name][ver]:
if 'collisions' not in self.pkgs[name][ver][build]:
self.pkgs[name][ver][build]['collisions'] = []
self.pkgs[name][ver][build]['collisions'].append(p['subdir'])
self.pkgs[name][ver][build]['collisions'].append((p['name'], p['subdir']))
continue
self.pkgs[name][ver][build] = p
self.pkgs[name][ver][build]['fname'] = fname
@ -529,7 +530,8 @@ class CondaDependencyProvider(DependencyProviderBase):
deviated_ver = self.deviated_version(name, c.ver, c.build)
candidate = self.pkgs[name][deviated_ver][c.build]
depends = list(filter(
lambda d: d.split()[0] not in self.ignored_pkgs and not d.startswith('_'),
lambda d: d.split()[0] not in self.ignored_pkgs,
# lambda d: d.split()[0] not in self.ignored_pkgs and not d.startswith('_'),
candidate['depends']
# always add optional dependencies to ensure constraints are applied
+ (candidate['constrains'] if 'constrains' in candidate else [])
@ -563,7 +565,9 @@ class CondaDependencyProvider(DependencyProviderBase):
))
if 'collisions' in p:
print(
f"WARNING: Colliding conda package in {self.channel}. Ignoring {p['name']} from {p['collisions']} "
f"WARNING: Colliding conda package in channel '{self.channel}' "
f"Ignoring {list(map(itemgetter(0), p['collisions']))} "
f"from {list(map(itemgetter(1), p['collisions']))} "
f"in favor of {p['name']} from '{p['subdir']}'")
return candidates
@ -576,9 +580,11 @@ class CondaDependencyProvider(DependencyProviderBase):
def python_ok(self, build):
for dep in build['depends']:
if dep == "pypy" or dep.startswith("pypy "):
return False
if dep.startswith("python "):
req = next(iter(parse_reqs([dep])))
if not filter_versions([self.py_ver_parsed], req.specs):
if not filter_versions([self.py_ver_parsed], req):
return False
return True

View file

@ -79,7 +79,8 @@ def handle_resolution_impossible(exc: ResolutionImpossible, reqs_str, providers_
causes_str += f"\n {ri.requirement}"
if ri.parent:
causes_str += \
f" - parent: {ri.parent.name}{ri.parent.extras if ri.parent.extras else ''}:{ri.parent.ver}"
f" - parent: {ri.parent.name}" \
f"{ri.parent.selected_extras if ri.parent.selected_extras else ''}:{ri.parent.ver}"
nl = '\n'
print(
f"\nSome requirements could not be resolved.\n"

View file

@ -1,7 +1,8 @@
{
"url": "https://github.com/davhau/conda-channels",
"rev": "366d2706c153488c01ceb882674b04be5ea1d3d4",
"date": "2020-11-19T02:00:42+00:00",
"sha256": "1n8ggw709lk4qfynhcjpgz9l6wkrplhaays5f7588ikw9mmk6j8h",
"fetchSubmodules": false
"url": "https://github.com/davhau/conda-channels",
"rev": "25d1eb5b42719e51821a043a6504c792400f23b4",
"date": "2020-11-20T14:00:42+00:00",
"sha256": "10jvzmbqkcy4706zsb4gva1d1z7vin7azarjia1jrxfxappw6gng",
"fetchSubmodules": false,
"indexSha256": "41f29befeb5e263e808eaf98985c7a2f4d5190ca6172e9cef759f068f72662e5"
}

View file

@ -1,7 +1,7 @@
{
"url": "https://github.com/davhau/pypi-deps-db",
"rev": "ae8b4b93e71245f954f73f5d1dc4597872be4e14",
"date": "2020-11-18T20:12:07+00:00",
"sha256": "085y8nnnk713h54cfix87sdzinf0vy6wy0d6dk2724b3m2drjsrx",
"rev": "d1685e891fc43c56c6fcb2effaa1eebad9ff92e1",
"date": "2020-11-20T08:09:48+00:00",
"sha256": "1jhi3bh77i2d759m153f52sj1sx9dw4fazzm8jlj018lhnlhih9w",
"fetchSubmodules": false
}

View file

@ -10,7 +10,7 @@
# Hash obtained using `nix-prefetch-url --unpack https://github.com/DavHau/pypi-deps-db/tarball/<pypi_deps_db_commit>`
pypiDataSha256 ? (builtins.fromJSON (builtins.readFile ./PYPI_DEPS_DB.json)).sha256,
condaDataRev ? (builtins.fromJSON (builtins.readFile ./CONDA_CHANNELS.json)).rev,
condaDataSha256 ? (builtins.fromJSON (builtins.readFile ./CONDA_CHANNELS.json)).sha256,
condaDataSha256 ? (builtins.fromJSON (builtins.readFile ./CONDA_CHANNELS.json)).indexSha256,
_providerDefaults ? with builtins; fromTOML (readFile ../provider_defaults.toml)
}:
@ -61,7 +61,7 @@ let
inherit nixpkgs_json;
inherit (db_and_fetcher) pypi_deps_db_src pypi_fetcher_commit pypi_fetcher_sha256;
conda_channels_json = (import ./conda-channels.nix {
inherit condaChannelsExtra pkgs;
inherit condaChannelsExtra condaDataRev condaDataSha256 pkgs;
providers = _providers;
}).condaChannelsJson;
disable_checks = ! tests;

View file

@ -7,8 +7,8 @@
# conda-channels index
repoName ? "conda-channels",
repoOwner ? "DavHau",
rev ? "e742cc6152473ddffb33e91181ff5d1b23222fc8",
sha256 ? "1dqxni9yjk1g327blmz3n9fmnp7vs9syr3hf7xzhnramkng1fb30",
condaDataRev ? (builtins.fromJSON (builtins.readFile ./CONDA_CHANNELS.json)).rev,
condaDataSha256 ? (builtins.fromJSON (builtins.readFile ./CONDA_CHANNELS.json)).indexSha256
}:
with builtins;
with pkgs.lib;
@ -28,8 +28,8 @@ let
channelRegistry = fromJSON (readFile (fetchurl {
name = "conda-channels-index";
url = "https://raw.githubusercontent.com/${repoOwner}/${repoName}/${rev}/sha256.json";
inherit sha256;
url = "https://raw.githubusercontent.com/${repoOwner}/${repoName}/${condaDataRev}/sha256.json";
sha256 = condaDataSha256;
}));
registryChannels = mapAttrs' (filepath: hash:
@ -41,7 +41,7 @@ let
nameValuePair
chan
(map (sys: (builtins.fetchurl {
url = "https://raw.githubusercontent.com/${repoOwner}/${repoName}/${rev}/${chan}/${sys}.json";
url = "https://raw.githubusercontent.com/${repoOwner}/${repoName}/${condaDataRev}/${chan}/${sys}.json";
sha256 = channelRegistry."./${chan}/${sys}.json";
})) [ systemMap."${system}" "noarch" ])
) channelRegistry;

View file

@ -3,3 +3,4 @@ set -e
nix-shell -p nix-prefetch-git --run "nix-prefetch-git --url https://github.com/davhau/pypi-deps-db --rev refs/heads/master --no-deepClone" | python -m json.tool - PYPI_DEPS_DB.json
nix-shell -p nix-prefetch-git --run "nix-prefetch-git --url https://github.com/nixos/nixpkgs --rev refs/heads/nixpkgs-unstable --no-deepClone" | python -m json.tool - NIXPKGS.json
nix-shell -p nix-prefetch-git --run "nix-prefetch-git --url https://github.com/davhau/conda-channels --rev refs/heads/master --no-deepClone" | python -m json.tool - CONDA_CHANNELS.json
jq --arg hash $(curl -L https://raw.githubusercontent.com/DavHau/conda-channels/master/sha256.json | sha256sum | awk '{ print $1 }') '. + {indexSha256: $hash}' CONDA_CHANNELS.json > C.json && mv C.json CONDA_CHANNELS.json

View file

@ -1,13 +1,14 @@
import re
from typing import Iterable
from typing import Iterable, Tuple, List
import distlib.markers
import pkg_resources
from conda.models.version import ver_eval
from distlib.markers import DEFAULT_CONTEXT
from pkg_resources._vendor.packaging.specifiers import SpecifierSet
from mach_nix.cache import cached
from mach_nix.versions import PyVer
from mach_nix.versions import PyVer, Version, parse_ver
def context(py_ver: PyVer, platform: str, system: str):
@ -23,7 +24,26 @@ def context(py_ver: PyVer, platform: str, system: str):
return context
class Requirement(pkg_resources.Requirement):
class Requirement:
def __init__(self, name, extras, specs: Tuple[Tuple[Tuple[str, str]]], build=None, marker=None):
self.name = name.lower().replace('_', '-')
self.extras = extras or tuple()
self.specs = specs or tuple()
self.build = build
self.marker = marker
def __repr__(self):
return ' '.join(map(lambda x: str(x), filter(lambda e: e, (self.name, self.extras, self.specs, self.build, self.marker))))
@property
def key(self):
return self.name
def __hash__(self):
return hash((self.name, self.specs, self.build))
class RequirementOld(pkg_resources.Requirement):
def __init__(self, line, build=None):
self.build = build
super(Requirement, self).__init__(line)
@ -68,35 +88,97 @@ def parse_reqs(strs):
yield Requirement(*parse_reqs_line(line))
re_specs = re.compile(r"(==|!=|>=|<=|>|<|~=)(.*)")
def parse_specs(spec_str):
parts = spec_str.split(',')
specs = []
for part in parts:
op, ver = re.fullmatch(re_specs, part.strip()).groups()
ver = ver.strip()
specs.append((op, ver))
return tuple(specs)
extra_name = r"([a-z]|[A-Z]|-|_|\d)+"
re_marker_extras = re.compile(rf"extra *== *'?({extra_name})'?")
def extras_from_marker(marker):
matches = re.findall(re_marker_extras, marker)
if matches:
return tuple(group[0] for group in matches)
return tuple()
re_reqs = re.compile(
r"^(([a-z]|[A-Z]|-|_|\d|\.)+)" # name
r"("
rf"(\[({extra_name},?)+\])?" # extras
r" *\(?(([,\|]? *(==|!=|>=|<=|>|<|~=|=)? *(\* |\d(\d|\.|\*|[a-z])*))+(?![_\d]))\)?" # specs
r"( *([a-z]|\d|_|\*)+)?" # build
r")?"
r"( *[:;] *(.*))?$") # marker
def parse_reqs_line(line):
build = None
line = line.strip(' ,')
splitted = line.split(' ')
match = re.fullmatch(re_reqs, line)
if not match:
raise Exception(f"couldn't parse: '{line}'")
groups = list(match.groups())
name = groups[0]
# conda spec with build like "tensorflow-base 2.0.0 gpu_py36h0ec5d1f_0"
# or "hdf5 >=1.10.5,<1.10.6.0a0 mpi_mpich_*"
if len(splitted) == 3 \
and not splitted[1] in all_ops \
and not any(op in splitted[0]+splitted[2] for op in all_ops) \
and (
splitted[-1].isdigit()
or (len(splitted[-1]) > 1 and splitted[-1][-2] == '_')
or '*' in splitted[-1]
or not any(op in splitted[1] for op in all_ops)
):
name, ver_spec, build = splitted
if not any(op in ver_spec for op in all_ops):
ver_spec = f"=={ver_spec}"
line = f"{name}{ver_spec}"
extras = groups[3]
if extras:
extras = tuple(extras.strip('[]').split(','))
else:
extras = tuple()
# parse conda specifiers without operator like "requests 2.24.*"
elif len(splitted) == 2:
name, ver_spec = splitted
if not any(op in name + ver_spec for op in all_ops):
ver_spec = f"=={ver_spec}"
line = f"{name}{ver_spec}"
all_specs = groups[6]
if all_specs:
all_specs = all_specs.split('|')
for i, specs in enumerate(all_specs):
if not re.search(r"==|!=|>=|<=|>|<|~=|=", specs):
all_specs[i] = '==' + specs
continue
if re.fullmatch(r"=\d(\d|\.|\*|[a-z])*", specs):
all_specs[i] = '=' + specs
all_specs = tuple(map(parse_specs, all_specs))
if build == "*":
build = None
build = groups[11]
if build:
build = build.strip()
return line, build
marker = groups[14]
if marker:
extras_marker = extras_from_marker(marker)
extras = extras + extras_marker
return name, extras, all_specs, build, marker
@cached(keyfunc=lambda args: hash((tuple(args[0]), args[1])))
def filter_versions(
versions: List[Version],
req: Requirement):
"""
Reduces a given list of versions to contain only versions
which are allowed according to the given specifiers
"""
assert isinstance(versions, list)
versions = list(versions)
if not req.specs:
return versions
all_versions = []
for specs in req.specs:
for op, ver in specs:
if op == '==':
if str(ver) == "*":
return versions
elif '*' in str(ver):
op = '='
ver = parse_ver(ver)
versions = list(filter(lambda v: ver_eval(v, f"{op}{ver}"), versions))
all_versions += list(versions)
return all_versions

View file

@ -5,9 +5,8 @@ import resolvelib
from mach_nix.data.nixpkgs import NixpkgsIndex
from mach_nix.data.providers import DependencyProviderBase, Candidate
from mach_nix.deptree import remove_circles_and_print
from mach_nix.requirements import Requirement
from mach_nix.requirements import Requirement, filter_versions
from mach_nix.resolver import Resolver, ResolvedPkg
from mach_nix.versions import filter_versions
# Implement logic so the resolver understands the requirement format.
@ -33,10 +32,7 @@ class Provider:
return self.provider.find_matches(req)
def is_satisfied_by(self, requirement, candidate: Candidate):
res = None
if not set(requirement.extras).issubset(set(candidate.selected_extras)):
res = False
res = bool(len(list(filter_versions([candidate.ver], requirement.specs))))
res = bool(len(list(filter_versions([candidate.ver], requirement))))
return res
def get_dependencies(self, candidate):

View file

@ -3,40 +3,47 @@ import pytest
from mach_nix.requirements import parse_reqs_line
@pytest.mark.parametrize("exp_build, exp_line, line", [
(None, 'requests==2.24.0', 'requests 2.24.0'),
(None, 'requests == 2.24.0', 'requests == 2.24.0'),
(None, 'requests==2.24.0', 'requests 2.24.0'),
(None, 'requests==2.24.0', 'requests 2.24.0 '),
(None, 'requests==2.24.0', ' requests 2.24.0 '),
(None, 'pdfminer.six == 20200726', 'pdfminer.six == 20200726'),
# multiple specs
("mpi_mpich_*", 'hdf5>=1.10.5,<1.10.6.0a0', 'hdf5 >=1.10.5,<1.10.6.0a0 mpi_mpich_*'),
# asterisk
('openblas', 'blas==1.*', 'blas 1.* openblas'),
('openblas', 'blas==*', 'blas * openblas'),
('openblas', 'blas==1.1', 'blas 1.1 openblas'),
('build123*', 'requests>=2.24.0', 'requests >=2.24.0 build123*'),
('build123*', 'requests==2.24.*', 'requests ==2.24.* build123*'),
('build123*', 'requests==2.24.*', 'requests 2.24.* build123*'),
('build123*', 'requests==2.24.0', 'requests 2.24.0 build123*'),
(None, 'requests==2.24.0', 'requests 2.24.0 *'),
# stripping
('build123*', 'requests==2.24.0', ' requests 2.24.0 build123*'),
('build123*', 'requests==2.24.0', 'requests 2.24.0 build123* '),
('build123*', 'requests==2.24.0', ' requests 2.24.0 build123* '),
# spacing
(None, 'python>=3.5', 'python>= 3.5'),
(None, 'python>=3.5', 'python >=3.5'),
# test 3 parts non-conda
(None, 'python >=2.6, !=3.0.*', 'python >=2.6, !=3.0.*'),
@pytest.mark.parametrize("input, exp_output", [
('requests', ('requests', (), None, None, None))
, ('requests[socks] ==2.24.0', ('requests', ('socks',), ((('==', '2.24.0'),),), None, None))
, ('requests[socks,test] 2.24.0', ('requests', ('socks', 'test'), ((('==', '2.24.0'),),), None, None))
, ('python >=2.7,<2.8.0a0', ('python', (), ((('>=', '2.7'), ('<', '2.8.0a0')),), None, None))
, ('requests == 2.24.0', ('requests', (), ((('==', '2.24.0'),),), None, None))
, ('pdfminer.six == 20200726', ('pdfminer.six', (), ((('==', '20200726'),),), None, None))
, ('python>= 3.5', ('python', (), ((('>=', '3.5'),),), None, None))
, ('python >=3.5', ('python', (), ((('>=', '3.5'),),), None, None))
, ('python >=2.6, !=3.0.*', ('python', (), ((('>=', '2.6'), ('!=', '3.0.*')),), None, None))
, ("unittest2 >=2.0,<3.0 ; python_version == '2.4' or python_version == '2.5'",
('unittest2', (), ((('>=', '2.0'), ('<', '3.0')),), None, "python_version == '2.4' or python_version == '2.5'"))
, ("pywin32 > 1.0 : sys.platform == 'win32'", ('pywin32', (), ((('>', '1.0'),),), None, "sys.platform == 'win32'"))
, ("certifi (==2016.9.26) ; extra == 'certs'",
('certifi', ('certs',), ((('==', '2016.9.26'),),), None, "extra == 'certs'"))
, ("sphinx ; extra == 'docs'", ('sphinx', ('docs',), None, None, "extra == 'docs'"))
, ('requests 2.24.0', ('requests', (), ((('==', '2.24.0'),),), None, None))
, ('requests 2.24.0', ('requests', (), ((('==', '2.24.0'),),), None, None))
, ('requests 2.24.0', ('requests', (), ((('==', '2.24.0'),),), None, None))
, ('requests 2.24.0', ('requests', (), ((('==', '2.24.0'),),), None, None))
, ('hdf5 >=1.10.5,<1.10.6.0a0 mpi_mpich_*',
('hdf5', (), ((('>=', '1.10.5'), ('<', '1.10.6.0a0')),), 'mpi_mpich_*', None))
, ('blas 1.* openblas', ('blas', (), ((('==', '1.*'),),), 'openblas', None))
, ('blas * openblas', ('blas', (), ((('==', '*'),),), 'openblas', None))
, ('blas 1.1 openblas', ('blas', (), ((('==', '1.1'),),), 'openblas', None))
, ('requests >=2.24.0 build123*', ('requests', (), ((('>=', '2.24.0'),),), 'build123*', None))
, ('requests ==2.24.* build123*', ('requests', (), ((('==', '2.24.*'),),), 'build123*', None))
, ('requests 2.24.* build123*', ('requests', (), ((('==', '2.24.*'),),), 'build123*', None))
, ('requests 2.24.0 build123*', ('requests', (), ((('==', '2.24.0'),),), 'build123*', None))
, ('requests 2.24.0 *bla', ('requests', (), ((('==', '2.24.0'),),), '*bla', None))
, ('requests 2.24.0 *', ('requests', (), ((('==', '2.24.0'),),), '*', None))
, ('requests * *bla', ('requests', (), ((('==', '*'),),), '*bla', None))
, ('requests * *', ('requests', (), ((('==', '*'),),), '*', None))
, ('requests 2.24.0 build123*', ('requests', (), ((('==', '2.24.0'),),), 'build123*', None))
, ('requests 2.24.0 build123*', ('requests', (), ((('==', '2.24.0'),),), 'build123*', None))
, ('requests 2.24.0 build123*', ('requests', (), ((('==', '2.24.0'),),), 'build123*', None))
, ('ruamel.yaml >=0.12.4,<0.16|0.16.5.*',
('ruamel.yaml', (), ((('>=', '0.12.4'), ('<', '0.16')), (('==', '0.16.5.*'),)), None, None))
, ('openjdk =8|11', ('openjdk', (), ((('==', '8'),), (('==', '11'),)), None, None))
, ('python 3.6.9 ab_73_pypy', ('python', (), ((('==', '3.6.9'),),), 'ab_73_pypy', None))
])
def test_parse_requirements(exp_build, exp_line, line):
new_line, build = parse_reqs_line(line)
assert (build, new_line) == (exp_build, exp_line)
def test_parse_requirements(input, exp_output):
assert parse_reqs_line(input) == exp_output

View file

@ -1,13 +1,10 @@
import sys
import traceback
from typing import Iterable, Tuple, List
from typing import Iterable
import packaging.version
from conda.common.compat import with_metaclass
from conda.models.version import ver_eval, VersionOrder, SingleStrArgCachingType
from packaging.version import LegacyVersion
from mach_nix.cache import cached
@with_metaclass(SingleStrArgCachingType)
@ -72,25 +69,3 @@ def ver_sort_key(ver: Version):
def best_version(versions: Iterable[Version]) -> Version:
return sorted(versions)[-1]
@cached(keyfunc=lambda args: hash(tuple(args[0]) + tuple(args[1])))
def filter_versions(
versions: List[Version],
specs: List[Tuple[str, str]]) -> List[Version]:
"""
Reduces a given list of versions to contain only versions
which are allowed according to the given specifiers
"""
versions = list(versions)
assert len(versions) > 0
for op, ver in specs:
if op == '==':
if str(ver) == "*":
return versions
elif '*' in str(ver):
op = '='
ver = parse_ver(ver)
versions = list(filter(lambda v: ver_eval(v, f"{op}{ver}"), versions))
return list(versions)