pip_resolve.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. import io
  2. import sys
  3. import os
  4. import json
  5. import re
  6. import argparse
  7. import utils
  8. import requirements
  9. import pipfile
  10. import setup_file
  11. import codecs
  12. from operator import le, lt, gt, ge, eq, ne
  13. try:
  14. import pkg_resources
  15. except ImportError:
  16. # try using the version vendored by pip
  17. try:
  18. import pip._vendor.pkg_resources as pkg_resources
  19. except ImportError:
  20. raise ImportError(
  21. "Could not import pkg_resources; please install setuptools or pip.")
  22. PYTHON_MARKER_REGEX = re.compile(r'python_version\s*(?P<operator>==|<=|>=|>|<)\s*[\'"](?P<python_version>.+?)[\'"]')
  23. SYSTEM_MARKER_REGEX = re.compile(r'sys_platform\s*==\s*[\'"](.+)[\'"]')
  24. def format_provenance_label(prov_tuple):
  25. fn, ln1, ln2 = prov_tuple
  26. if ln1 == ln2:
  27. return fn + ':' + str(ln1)
  28. else:
  29. return fn + ':' + str(ln1) + '-' + str(ln2)
  30. def create_tree_of_packages_dependencies(
  31. dist_tree,
  32. top_level_requirements,
  33. req_file_path,
  34. allow_missing=False,
  35. only_provenance=False
  36. ):
  37. """Create packages dependencies tree
  38. :param dict tree: the package tree
  39. :param set packages_names: set of select packages to be shown in the output.
  40. :param req_file_path: the path to the dependencies file
  41. (e.g. requirements.txt)
  42. :rtype: dict
  43. """
  44. DEPENDENCIES = 'dependencies'
  45. VERSION = 'version'
  46. NAME = 'name'
  47. DIR_VERSION = '0.0.0'
  48. PACKAGE_FORMAT_VERSION = 'packageFormatVersion'
  49. LABELS = 'labels'
  50. PROVENANCE = 'provenance'
  51. tree = utils.sorted_tree(dist_tree)
  52. nodes = tree.keys()
  53. key_tree = dict((k.key, v) for k, v in tree.items())
  54. lowercase_pkgs_names = [p.name.lower() for p in top_level_requirements]
  55. tlr_by_key = dict((tlr.name.lower(), tlr) for tlr in top_level_requirements)
  56. packages_as_dist_obj = [
  57. p for p in nodes if
  58. p.key.lower() in lowercase_pkgs_names or
  59. (p.project_name and p.project_name.lower()) in lowercase_pkgs_names]
  60. def create_children_recursive(root_package, key_tree, ancestors, all_packages_map):
  61. root_name = root_package[NAME].lower()
  62. if root_name not in key_tree:
  63. msg = 'Required packages missing: ' + root_name
  64. if allow_missing:
  65. sys.stderr.write(msg + "\n")
  66. return
  67. else:
  68. sys.exit(msg)
  69. ancestors = ancestors.copy()
  70. ancestors.add(root_name)
  71. children_packages_as_dist = key_tree[root_name]
  72. for child_dist in children_packages_as_dist:
  73. child_project_name = child_dist.project_name.lower()
  74. if child_project_name in ancestors:
  75. continue
  76. if DEPENDENCIES not in root_package:
  77. root_package[DEPENDENCIES] = {}
  78. if child_project_name in root_package[DEPENDENCIES]:
  79. continue
  80. if child_project_name in all_packages_map and child_project_name not in root_package[DEPENDENCIES]:
  81. root_package[DEPENDENCIES][child_project_name] = 'true'
  82. continue
  83. child_package = {
  84. NAME: child_project_name,
  85. VERSION: child_dist.installed_version,
  86. }
  87. create_children_recursive(child_package, key_tree, ancestors, all_packages_map)
  88. root_package[DEPENDENCIES][child_project_name] = child_package
  89. all_packages_map[child_project_name] = 'true'
  90. return root_package
  91. def create_dir_as_root():
  92. name, version = None, None
  93. if os.path.basename(req_file_path) == 'setup.py':
  94. with open(req_file_path, "r") as setup_py_file:
  95. name, version = setup_file.parse_name_and_version(setup_py_file.read())
  96. dir_as_root = {
  97. NAME: name or os.path.basename(os.path.dirname(os.path.abspath(req_file_path))),
  98. VERSION: version or DIR_VERSION,
  99. DEPENDENCIES: {},
  100. PACKAGE_FORMAT_VERSION: 'pip:0.0.1'
  101. }
  102. return dir_as_root
  103. def create_package_as_root(package, dir_as_root):
  104. package_as_root = {
  105. NAME: package.project_name.lower(),
  106. # Note: _version is a private field.
  107. VERSION: package._obj._version,
  108. }
  109. return package_as_root
  110. dir_as_root = create_dir_as_root()
  111. all_packages_map = {}
  112. for package in packages_as_dist_obj:
  113. package_as_root = create_package_as_root(package, dir_as_root)
  114. if only_provenance:
  115. package_as_root[LABELS] = {PROVENANCE: format_provenance_label(tlr_by_key[package_as_root[NAME]].provenance)}
  116. dir_as_root[DEPENDENCIES][package_as_root[NAME]] = package_as_root
  117. else:
  118. package_tree = create_children_recursive(package_as_root, key_tree, set([]), all_packages_map)
  119. dir_as_root[DEPENDENCIES][package_as_root[NAME]] = package_tree
  120. return dir_as_root
  121. def satisfies_python_requirement(parsed_operator, py_version_str):
  122. # TODO: use python semver library to compare versions
  123. operator_func = {
  124. ">": gt,
  125. "==": eq,
  126. "<": lt,
  127. "<=": le,
  128. ">=": ge,
  129. '!=': ne,
  130. }[parsed_operator]
  131. system_py_version_tuple = (sys.version_info[0], sys.version_info[1])
  132. py_version_tuple = tuple(py_version_str.split('.')) # string tuple
  133. if py_version_tuple[-1] == '*':
  134. system_py_version_tuple = system_py_version_tuple[0]
  135. py_version_tuple = int(py_version_tuple[0]) # int tuple
  136. else:
  137. py_version_tuple = tuple(int(x) for x in py_version_tuple) # int tuple
  138. return operator_func(system_py_version_tuple, py_version_tuple)
  139. def get_markers_text(requirement):
  140. if isinstance(requirement, pipfile.PipfileRequirement):
  141. return requirement.markers
  142. return requirement.line
  143. def matches_python_version(requirement):
  144. """Filter out requirements that should not be installed
  145. in this Python version.
  146. See: https://www.python.org/dev/peps/pep-0508/#environment-markers
  147. """
  148. markers_text = get_markers_text(requirement)
  149. if not (markers_text and re.match(".*;.*python_version", markers_text)):
  150. return True
  151. cond_text = markers_text.split(";", 1)[1]
  152. # Gloss over the 'and' case and return true on the first matching python version
  153. for sub_exp in re.split("\s*(?:and|or)\s*", cond_text):
  154. match = PYTHON_MARKER_REGEX.search(sub_exp)
  155. if match:
  156. match_dict = match.groupdict()
  157. if len(match_dict) == 2 and satisfies_python_requirement(
  158. match_dict['operator'],
  159. match_dict['python_version']
  160. ):
  161. return True
  162. return False
  163. def matches_environment(requirement):
  164. """Filter out requirements that should not be installed
  165. in this environment. Only sys_platform is inspected right now.
  166. This should be expanded to include other environment markers.
  167. See: https://www.python.org/dev/peps/pep-0508/#environment-markers
  168. """
  169. sys_platform = sys.platform.lower()
  170. markers_text = get_markers_text(requirement)
  171. if markers_text and 'sys_platform' in markers_text:
  172. match = SYSTEM_MARKER_REGEX.findall(markers_text)
  173. if len(match) > 0:
  174. return match[0].lower() == sys_platform
  175. return True
  176. def is_testable(requirement):
  177. return requirement.editable == False and requirement.vcs is None
  178. def detect_encoding_by_bom(path):
  179. with open(path, 'rb') as f:
  180. raw = f.read(4) # will read less if the file is smaller
  181. # BOM_UTF32_LE's start is equal to BOM_UTF16_LE so need to try the former first
  182. for enc, boms in \
  183. ('utf-8-sig', (codecs.BOM_UTF8,)), \
  184. ('utf-32', (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)), \
  185. ('utf-16', (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)):
  186. if any(raw.startswith(bom) for bom in boms):
  187. return enc
  188. return None
  189. def get_requirements_list(requirements_file_path, dev_deps=False):
  190. # TODO: refactor recognizing the dependency manager to a single place
  191. if os.path.basename(requirements_file_path) == 'Pipfile':
  192. with io.open(requirements_file_path, 'r', encoding='utf-8') as f:
  193. requirements_data = f.read()
  194. parsed_reqs = pipfile.parse(requirements_data)
  195. req_list = list(parsed_reqs.get('packages', []))
  196. if dev_deps:
  197. req_list.extend(parsed_reqs.get('dev-packages', []))
  198. if not req_list:
  199. return []
  200. else:
  201. for r in req_list:
  202. r.provenance = (requirements_file_path, r.provenance[1], r.provenance[2])
  203. elif os.path.basename(requirements_file_path) == 'setup.py':
  204. with open(requirements_file_path, 'r') as f:
  205. setup_py_file_content = f.read()
  206. requirements_data = setup_file.parse_requirements(setup_py_file_content)
  207. req_list = list(requirements.parse(requirements_data))
  208. provenance = setup_file.get_provenance(setup_py_file_content)
  209. for req in req_list:
  210. req.provenance = (
  211. os.path.basename(requirements_file_path),
  212. provenance,
  213. provenance
  214. )
  215. else:
  216. # assume this is a requirements.txt formatted file
  217. # Note: requirements.txt files are unicode and can be in any encoding.
  218. encoding = detect_encoding_by_bom(requirements_file_path)
  219. with io.open(requirements_file_path, 'r', encoding=encoding) as f:
  220. req_list = list(requirements.parse(f))
  221. req_list = filter(matches_environment, req_list)
  222. req_list = filter(is_testable, req_list)
  223. req_list = filter(matches_python_version, req_list)
  224. req_list = [r for r in req_list if r.name]
  225. for req in req_list:
  226. req.name = req.name.lower().replace('_', '-')
  227. return req_list
  228. def canonicalize_package_name(name):
  229. # https://packaging.python.org/guides/distributing-packages-using-setuptools/#name
  230. name = name.lower().replace('-', '.').replace('_', '.')
  231. name = re.sub(r'\.+', '.', name)
  232. return name
  233. def create_dependencies_tree_by_req_file_path(requirements_file_path,
  234. allow_missing=False,
  235. dev_deps=False,
  236. only_provenance=False,
  237. allow_empty=False):
  238. # get all installed packages
  239. pkgs = list(pkg_resources.working_set)
  240. # get all installed packages's distribution object
  241. dist_index = utils.build_dist_index(pkgs)
  242. # get all installed distributions tree
  243. dist_tree = utils.construct_tree(dist_index)
  244. # create a list of dependencies from the dependencies file
  245. required = get_requirements_list(requirements_file_path, dev_deps=dev_deps)
  246. if not required and not allow_empty:
  247. msg = 'No dependencies detected in manifest.'
  248. sys.exit(msg)
  249. else:
  250. installed = [canonicalize_package_name(p) for p in dist_index]
  251. top_level_requirements = []
  252. missing_package_names = []
  253. for r in required:
  254. if canonicalize_package_name(r.name) not in installed:
  255. missing_package_names.append(r.name)
  256. else:
  257. top_level_requirements.append(r)
  258. if missing_package_names:
  259. msg = 'Required packages missing: ' + (', '.join(missing_package_names))
  260. if allow_missing:
  261. sys.stderr.write(msg + "\n")
  262. else:
  263. sys.exit(msg)
  264. # build a tree of dependencies
  265. package_tree = create_tree_of_packages_dependencies(
  266. dist_tree, top_level_requirements, requirements_file_path, allow_missing, only_provenance)
  267. print(json.dumps(package_tree))
  268. def main():
  269. """Builds the dependency tree from the manifest file (Pipfile or requirements.txt) and
  270. prints it as JSON. The tree nodes are:
  271. interface DepTree {
  272. name: string;
  273. version?: string;
  274. dependencies?: {[n: string]: DepTree};
  275. labels: { provenance?: string };
  276. }
  277. The `provenance` label only present for the top-level nodes, indicates the position of the dependency
  278. version in the original file and is in the format "filename:lineNum" or "filename:lineFrom-lineTo",
  279. where line numbers are 1-based.
  280. """
  281. parser = argparse.ArgumentParser()
  282. parser.add_argument("requirements",
  283. help="dependencies file path (requirements.txt or Pipfile)")
  284. parser.add_argument("--allow-missing",
  285. action="store_true",
  286. help="don't fail if some packages listed in the dependencies file " +
  287. "are not installed")
  288. parser.add_argument("--dev-deps",
  289. action="store_true",
  290. help="resolve dev dependencies")
  291. parser.add_argument("--only-provenance",
  292. action="store_true",
  293. help="only return top level deps with provenance information")
  294. parser.add_argument("--allow-empty",
  295. action="store_true",
  296. help="return empty dep tree instead of throwing")
  297. args = parser.parse_args()
  298. create_dependencies_tree_by_req_file_path(
  299. args.requirements,
  300. allow_missing=args.allow_missing,
  301. dev_deps=args.dev_deps,
  302. only_provenance=args.only_provenance,
  303. allow_empty=args.allow_empty,
  304. )
  305. if __name__ == '__main__':
  306. sys.exit(main())