"""Cache Management
"""
import hashlib
import json
import logging
import os
from pathlib
import Path
from typing
import Any, Dict, List, Optional
from pip._vendor.packaging.tags
import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils
import canonicalize_name
from pip._internal.exceptions
import InvalidWheelFilename
from pip._internal.models.direct_url
import DirectUrl
from pip._internal.models.link
import Link
from pip._internal.models.wheel
import Wheel
from pip._internal.utils.temp_dir
import TempDirectory, tempdir_kinds
from pip._internal.utils.urls
import path_to_url
logger = logging.getLogger(__name__)
ORIGIN_JSON_NAME =
"origin.json"
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=
True, separators=(
",",
":"), ensure_ascii=
True)
return hashlib.sha224(s.encode(
"ascii")).hexdigest()
class Cache:
"""An abstract class - provides cache directories for data from links
:param cache_dir: The root of the cache.
"""
def __init__(self, cache_dir: str) ->
None:
super().__init__()
assert not cache_dir
or os.path.isabs(cache_dir)
self.cache_dir = cache_dir
or None
def _get_cache_path_parts(self, link: Link) -> List[str]:
"""Get parts of part that must be os.path.joined with cache_dir"""
# We want to generate an url to use as our cache key, we don't want to
# just re-use the URL because it might have other items in the fragment
# and we don't care about those.
key_parts = {
"url": link.url_without_fragment}
if link.hash_name
is not None and link.hash
is not None:
key_parts[link.hash_name] = link.hash
if link.subdirectory_fragment:
key_parts[
"subdirectory"] = link.subdirectory_fragment
# Include interpreter name, major and minor version in cache key
# to cope with ill-behaved sdists that build a different wheel
# depending on the python version their setup.py is being run on,
# and don't encode the difference in compatibility tags.
# https://github.com/pypa/pip/issues/7296
key_parts[
"interpreter_name"] = interpreter_name()
key_parts[
"interpreter_version"] = interpreter_version()
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
# thus less secure). However the differences don't make a lot of
# difference for our use case here.
hashed = _hash_dict(key_parts)
# We want to nest the directories some to prevent having a ton of top
# level directories where we might run out of sub directories on some
# FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
return parts
def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
can_not_cache =
not self.cache_dir
or not canonical_package_name
or not link
if can_not_cache:
return []
path = self.get_path_for_link(link)
if os.path.isdir(path):
return [(candidate, path)
for candidate
in os.listdir(path)]
return []
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached items in for link."""
raise NotImplementedError()
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
raise NotImplementedError()
class SimpleWheelCache(Cache):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir: str) ->
None:
super().__init__(cache_dir)
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached wheels for link
Because there are M wheels
for any one sdist, we provide a directory
to cache them
in,
and then consult that directory when looking up
cache hits.
We only insert things into the cache
if they have plausible version
numbers, so that we don
't contaminate the cache with things that were
not unique. E.g. ./package might have dozens of installs done
for it
and build a version of 0.0...
and if we built
and cached a wheel, we
'd
end up using the same wheel even
if the source has been edited.
:param link: The link of the sdist
for which this will cache wheels.
"""
parts = self._get_cache_path_parts(link)
assert self.cache_dir
# Store wheels within the root cache_dir
return os.path.join(self.cache_dir,
"wheels", *parts)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
candidates = []
if not package_name:
return link
canonical_package_name = canonicalize_name(package_name)
for wheel_name, wheel_dir
in self._get_candidates(link, canonical_package_name):
try:
wheel = Wheel(wheel_name)
except InvalidWheelFilename:
continue
if canonicalize_name(wheel.name) != canonical_package_name:
logger.debug(
"Ignoring cached wheel %s for %s as it "
"does not match the expected distribution name %s.",
wheel_name,
link,
package_name,
)
continue
if not wheel.supported(supported_tags):
# Built for a different python/arch/etc
continue
candidates.append(
(
wheel.support_index_min(supported_tags),
wheel_name,
wheel_dir,
)
)
if not candidates:
return link
_, wheel_name, wheel_dir = min(candidates)
return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
class EphemWheelCache(SimpleWheelCache):
"""A SimpleWheelCache that creates it's own temporary cache directory"""
def __init__(self) ->
None:
self._temp_dir = TempDirectory(
kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
globally_managed=
True,
)
super().__init__(self._temp_dir.path)
class CacheEntry:
def __init__(
self,
link: Link,
persistent: bool,
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] =
None
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
if origin_direct_url_path.exists():
try:
self.origin = DirectUrl.from_json(
origin_direct_url_path.read_text(encoding=
"utf-8")
)
except Exception
as e:
logger.warning(
"Ignoring invalid cache entry origin file %s for %s (%s)",
origin_direct_url_path,
link.filename,
e,
)
class WheelCache(Cache):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
This Cache allows
for gracefully degradation, using the ephem wheel cache
when a certain link
is not found
in the simple wheel cache first.
"""
def __init__(self, cache_dir: str) ->
None:
super().__init__(cache_dir)
self._wheel_cache = SimpleWheelCache(cache_dir)
self._ephem_cache = EphemWheelCache()
def get_path_for_link(self, link: Link) -> str:
return self._wheel_cache.get_path_for_link(link)
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
if cache_entry
is None:
return link
return cache_entry.link
def get_cache_entry(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Optional[CacheEntry]:
"""Returns a CacheEntry with a link to a cached item if it exists or
None. The cache entry indicates
if the item was found
in the persistent
or ephemeral cache.
"""
retval = self._wheel_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval
is not link:
return CacheEntry(retval, persistent=
True)
retval = self._ephem_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval
is not link:
return CacheEntry(retval, persistent=
False)
return None
@staticmethod
def record_download_origin(cache_dir: str, download_info: DirectUrl) ->
None:
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
if origin_path.exists():
try:
origin = DirectUrl.from_json(origin_path.read_text(encoding=
"utf-8"))
except Exception
as e:
logger.warning(
"Could not read origin file %s in cache entry (%s). "
"Will attempt to overwrite it.",
origin_path,
e,
)
else:
# TODO: use DirectUrl.equivalent when
# https://github.com/pypa/pip/pull/10564 is merged.
if origin.url != download_info.url:
logger.warning(
"Origin URL %s in cache entry %s does not match download URL "
"%s. This is likely a pip bug or a cache corruption issue. "
"Will overwrite it with the new value.",
origin.url,
cache_dir,
download_info.url,
)
origin_path.write_text(download_info.to_json(), encoding=
"utf-8")