#-----------------------------------------------------------------------------
|
# Copyright (c) 2013-2023, PyInstaller Development Team.
|
#
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
# you may not use this file except in compliance with the License.
|
#
|
# The full license is in the file COPYING.txt, distributed with this software.
|
#
|
# SPDX-License-Identifier: Apache-2.0
|
#-----------------------------------------------------------------------------
|
|
import os
|
import pathlib
|
import sys
|
|
import pkg_resources
|
from pyimod02_importers import FrozenImporter
|
|
SYS_PREFIX = pathlib.PurePath(sys._MEIPASS)
|
|
# To make pkg_resources work with frozen modules we need to set the 'Provider' class for FrozenImporter. This class
|
# decides where to look for resources and other stuff. 'pkg_resources.NullProvider' is dedicated to PEP302 import hooks
|
# like FrozenImporter is. It uses method __loader__.get_data() in methods pkg_resources.resource_string() and
|
# pkg_resources.resource_stream()
|
#
|
# We provide PyiFrozenProvider, which subclasses the NullProvider and implements _has(), _isdir(), and _listdir()
|
# methods, which are needed for pkg_resources.resource_exists(), resource_isdir(), and resource_listdir() to work. We
|
# cannot use the DefaultProvider, because it provides filesystem-only implementations (and overrides _get() with a
|
# filesystem-only one), whereas our provider needs to also support embedded resources.
|
#
|
# The PyiFrozenProvider allows querying/listing both PYZ-embedded and on-filesystem resources in a frozen package. The
|
# results are typically combined for both types of resources (e.g., when listing a directory or checking whether a
|
# resource exists). When the order of precedence matters, the PYZ-embedded resources take precedence over the
|
# on-filesystem ones, to keep the behavior consistent with the actual file content retrieval via _get() method (which in
|
# turn uses FrozenImporter's get_data() method). For example, when checking whether a resource is a directory via
|
# _isdir(), a PYZ-embedded file will take precedence over a potential on-filesystem directory. Also, in contrast to
|
# unfrozen packages, the frozen ones do not contain source .py files, which are therefore absent from content listings.
|
|
|
class _TocFilesystem:
|
"""
|
A prefix tree implementation for embedded filesystem reconstruction.
|
"""
|
def __init__(self, toc_files, toc_dirs=None):
|
toc_dirs = toc_dirs or []
|
# Reconstruct the filesystem hierarchy by building a prefix tree from the given file and directory paths.
|
self._tree = dict()
|
|
# Data files
|
for path in toc_files:
|
path = pathlib.PurePath(path)
|
current = self._tree
|
for component in path.parts[:-1]:
|
current = current.setdefault(component, {})
|
current[path.parts[-1]] = ''
|
|
# Extra directories
|
for path in toc_dirs:
|
path = pathlib.PurePath(path)
|
current = self._tree
|
for component in path.parts:
|
current = current.setdefault(component, {})
|
|
def _get_tree_node(self, path):
|
path = pathlib.PurePath(path)
|
current = self._tree
|
for component in path.parts:
|
if component not in current:
|
return None
|
current = current[component]
|
return current
|
|
def path_exists(self, path):
|
node = self._get_tree_node(path)
|
return node is not None # File or directory
|
|
def path_isdir(self, path):
|
node = self._get_tree_node(path)
|
if node is None:
|
return False # Non-existent
|
if isinstance(node, str):
|
return False # File
|
return True
|
|
def path_listdir(self, path):
|
node = self._get_tree_node(path)
|
if not isinstance(node, dict):
|
return [] # Non-existent or file
|
return list(node.keys())
|
|
|
# Cache for reconstructed embedded trees
|
_toc_tree_cache = {}
|
|
|
class PyiFrozenProvider(pkg_resources.NullProvider):
|
"""
|
Custom pkg_resources provider for FrozenImporter.
|
"""
|
def __init__(self, module):
|
super().__init__(module)
|
|
# Get top-level path; if "module" corresponds to a package, we need the path to the package itself. If "module"
|
# is a submodule in a package, we need the path to the parent package.
|
self._pkg_path = pathlib.PurePath(module.__file__).parent
|
|
# Defer initialization of PYZ-embedded resources tree to the first access.
|
self._embedded_tree = None
|
|
def _init_embedded_tree(self, rel_pkg_path, pkg_name):
|
# Collect relevant entries from TOC. We are interested in either files that are located in the package/module's
|
# directory (data files) or in packages that are prefixed with package/module's name (to reconstruct subpackage
|
# directories).
|
data_files = []
|
package_dirs = []
|
for entry in self.loader.toc:
|
entry_path = pathlib.PurePath(entry)
|
if rel_pkg_path in entry_path.parents:
|
# Data file path
|
data_files.append(entry_path)
|
elif entry.startswith(pkg_name) and self.loader.is_package(entry):
|
# Package or subpackage; convert the name to directory path
|
package_dir = pathlib.PurePath(*entry.split('.'))
|
package_dirs.append(package_dir)
|
|
# Reconstruct the filesystem
|
return _TocFilesystem(data_files, package_dirs)
|
|
@property
|
def embedded_tree(self):
|
if self._embedded_tree is None:
|
# Construct a path relative to _MEIPASS directory for searching the TOC.
|
rel_pkg_path = self._pkg_path.relative_to(SYS_PREFIX)
|
|
# Reconstruct package name prefix (use package path to obtain correct prefix in case of a module).
|
pkg_name = '.'.join(rel_pkg_path.parts)
|
|
# Initialize and cache the tree, if necessary.
|
if pkg_name not in _toc_tree_cache:
|
_toc_tree_cache[pkg_name] = \
|
self._init_embedded_tree(rel_pkg_path, pkg_name)
|
self._embedded_tree = _toc_tree_cache[pkg_name]
|
return self._embedded_tree
|
|
def _normalize_path(self, path):
|
# Avoid using Path.resolve(), because it resolves symlinks. This is undesirable, because the pure path in
|
# self._pkg_path does not have symlinks resolved, so comparison between the two would be faulty. So use
|
# os.path.abspath() instead to normalize the path.
|
return pathlib.Path(os.path.abspath(path))
|
|
def _is_relative_to_package(self, path):
|
return path == self._pkg_path or self._pkg_path in path.parents
|
|
def _has(self, path):
|
# Prevent access outside the package.
|
path = self._normalize_path(path)
|
if not self._is_relative_to_package(path):
|
return False
|
|
# Check the filesystem first to avoid unnecessarily computing the relative path...
|
if path.exists():
|
return True
|
rel_path = path.relative_to(SYS_PREFIX)
|
return self.embedded_tree.path_exists(rel_path)
|
|
def _isdir(self, path):
|
# Prevent access outside the package.
|
path = self._normalize_path(path)
|
if not self._is_relative_to_package(path):
|
return False
|
|
# Embedded resources have precedence over filesystem...
|
rel_path = path.relative_to(SYS_PREFIX)
|
node = self.embedded_tree._get_tree_node(rel_path)
|
if node is None:
|
return path.is_dir() # No match found; try the filesystem.
|
else:
|
# str = file, dict = directory
|
return not isinstance(node, str)
|
|
def _listdir(self, path):
|
# Prevent access outside the package.
|
path = self._normalize_path(path)
|
if not self._is_relative_to_package(path):
|
return []
|
|
# Relative path for searching embedded resources.
|
rel_path = path.relative_to(SYS_PREFIX)
|
# List content from embedded filesystem...
|
content = self.embedded_tree.path_listdir(rel_path)
|
# ... as well as the actual one.
|
if path.is_dir():
|
# Use os.listdir() to avoid having to convert Path objects to strings... Also make sure to de-duplicate the
|
# results.
|
path = str(path) # not is_py36
|
content = list(set(content + os.listdir(path)))
|
return content
|
|
|
pkg_resources.register_loader_type(FrozenImporter, PyiFrozenProvider)
|