Source code for stubalyzer.analyze

import re
import sys
from argparse import ArgumentParser, Namespace, RawTextHelpFormatter
from collections import defaultdict
from enum import Enum
from importlib.util import find_spec
from io import StringIO
from json import loads as json_loads
from json.decoder import JSONDecodeError
from os import linesep, scandir
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent
from traceback import format_exception
from typing import (
    Callable,
    Dict,
    Generator,
    Iterable,
    List,
    NamedTuple,
    Optional,
    Set,
    Tuple,
)
from xml.etree.ElementTree import Element, ElementTree, SubElement

from mypy.nodes import TypeAlias, TypeVarExpr, Var
from mypy.stubgen import generate_stubs, parse_options
from schema import Or, Schema, SchemaError, Use

from .collect import get_stub_types
from .compare import ComparisonResult, MatchResult, compare_symbols
from .lookup import lookup_symbol
from .types import RelevantSymbolNode

EXPECTED_MISMATCH_SCHEMA = Schema(Or({}, {str: Use(MatchResult.declare_mismatch)}))
CHECK_FILE_ERROR = 'Check "{file_path}" to fix.'
MATCH_FOUND_ERROR = (
    'Expected "{symbol}" to be "{mismatch_type}" but it matched.'
    f"{linesep}{CHECK_FILE_ERROR}"
)
WRONG_MISMATCH_ERROR = (
    'Expected "{symbol}" to be "{expected}" but it was "{received}".'
    f"{linesep}{CHECK_FILE_ERROR}"
)
UNUSED_DEFINITION_ERROR = (
    "Expected the following symbols to fail, "
    f"but they were not processed:{linesep}"
    "{symbols}"
    f"{linesep}{CHECK_FILE_ERROR}"
)
FILE_NOT_FOUND_WARNING = (
    'WARNING: Provided file for expected mismatches ("{file_path}") not found.'
)
SUCCESS_MESSAGE = "Successfully validated {total} stubs."
FAIL_MESSAGE = "Failure: {failed} of {total} stubs seem not to be valid."
IGNORE_MESSAGE = (
    "{ignored} fail(s) were ignored, "
    "because they were defined to be expected mismatches."
)


[docs]class EvaluationResult(Enum):
    SUCCESS = "success"
    FAILURE = "failure"
    EXPECTED_FAILURE = "expected_failure"


[docs]def write_error(
    *messages: str, sep: str = "", symbol: Optional[RelevantSymbolNode] = None
) -> None:
    sys.stderr.write(sep.join(messages))
    sys.stderr.write(linesep)


[docs]def parse_command_line() -> Namespace:
    parser = ArgumentParser(
        description=dedent(
            """\
        Analyze a set of (handcrafted) mypy stubs by comparing them to (generated)
        reference stubs
        """
        ),
        formatter_class=RawTextHelpFormatter,
    )
    required_group = parser.add_argument_group("required arguments")
    required_group.add_argument(
        "-c", "--config", required=True, help="Mypy config file"
    )
    parser.add_argument(
        "-e",
        "--expected-mismatches",
        required=False,
        default=None,
        help=dedent(
            """\
        A JSON file, which defines expected mismatching
        symbols and their match results. If any symbol is
        declared in an expected_mismatches JSON file,
        %(prog)s will count it as an expected failure, and
        ignore this inconsistency.

        Example contents:
        {
            "my.module.function: "mismatch",
            "another.module.Class: "not_found"
        }

        According to the example above, we expect the signature
        of my.module.function to mismatch, and module.Class to
        be missing in the generated stubs. %(prog)s will
        ignore these inconsistencies.
        """
        ),
    )
    parser.add_argument(
        dest="stubs_handwritten",
        metavar="STUBS_HANDWRITTEN",
        help=dedent(
            """\
        Directory of handwritten stubs that need to be
        analyzed
        """
        ),
    )
    parser.add_argument(
        "-r",
        "--reference",
        required=False,
        default=None,
        metavar="REFERENCE_STUBS",
        help=dedent(
            """
        Directory of reference stubs to compare against. If
        not specified stubgen will be used to generate the
        reference stubs.
        """
        ),
    )
    parser.add_argument(
        "-x",
        "--checkstyle-report",
        required=False,
        default=None,
        help=dedent(
            """
        Write an xml report in checkstyle format to the given file.
        """
        ),
    )
    parser.add_argument(
        "-s",
        "--silent",
        required=False,
        default=False,
        action="store_true",
        help=dedent(
            """
        Suppress all non-error output.
        """
        ),
    )
    parser.add_argument(
        "-p",
        "--include-private",
        required=False,
        default=False,
        action="store_true",
        help=dedent(
            """
        Include definitions stubgen would otherwise consider
        private, when generating the reference stubs. (e.g.
        names with a single leading underscore, like "_foo")
        """
        ),
    )
    return parser.parse_args()


[docs]def compare(
    hand_written: Iterable[RelevantSymbolNode], generated: Iterable[RelevantSymbolNode]
) -> Generator[ComparisonResult, None, None]:
    """Compare hand written to generated stubs."""
    gen_map: Dict[str, RelevantSymbolNode] = {
        symbol.fullname: symbol for symbol in generated
    }

    for symbol in hand_written:
        name = symbol.fullname
        if name in gen_map:
            yield compare_symbols(symbol, gen_map[name])
        elif isinstance(symbol, (TypeAlias, TypeVarExpr, Var)) and re.match(
            r"_[^_].*", name.split(".")[-1]
        ):
            # Ignore symbols that begin with (exactly) one _,
            # since we assume they are private
            continue
        else:
            lookup_result = lookup_symbol(gen_map, symbol)
            generated_symbol = lookup_result.symbol
            if generated_symbol:
                yield ComparisonResult.create_mislocated_symbol(
                    symbol=symbol,
                    reference=generated_symbol,
                    data={"containing_class": lookup_result.containing_class},
                )
            else:
                yield ComparisonResult.create_not_found(symbol)


[docs]def setup_expected_mismatches(
    file_path: Optional[str] = None,
) -> Tuple[Dict[str, MatchResult], Set[str]]:
    if not file_path:
        return dict(), set()

    mismatches: Dict[str, MatchResult] = {}
    mismatches_file = Path(file_path)
    if not mismatches_file.exists():
        write_error(FILE_NOT_FOUND_WARNING.format(file_path=file_path))
        return dict(), set()
    mismatches = EXPECTED_MISMATCH_SCHEMA.validate(
        json_loads(mismatches_file.read_text())
    )
    unused_mismatches = set(mismatches.keys())
    return mismatches, unused_mismatches


[docs]def evaluate_compare_result(
    compare_result: ComparisonResult,
    mismatches: Dict[str, MatchResult],
    mismatches_left: Set[str],
    expected_mismatches_path: Optional[str] = None,
    *,
    loggers: List[Callable[..., None]],
) -> EvaluationResult:
    symbol = compare_result.symbol
    symbol_name = compare_result.symbol_name
    match_result = compare_result.match_result
    evaluation_result = EvaluationResult.SUCCESS
    expected_mismatch = mismatches.get(symbol_name)

    if expected_mismatch is None:
        if match_result is not MatchResult.MATCH:
            evaluation_result = EvaluationResult.FAILURE
            for logger in loggers:
                logger(
                    linesep,
                    compare_result.message,
                    symbol=symbol,
                )
    else:
        mismatches_left.remove(symbol_name)
        if match_result is MatchResult.MATCH:
            evaluation_result = EvaluationResult.FAILURE
            assert expected_mismatches_path
            for logger in loggers:
                logger(
                    linesep,
                    MATCH_FOUND_ERROR.format(
                        symbol=symbol_name,
                        mismatch_type=mismatches[symbol_name].value,
                        file_path=expected_mismatches_path,
                    ),
                    symbol=symbol,
                )
        elif match_result is not expected_mismatch:
            evaluation_result = EvaluationResult.FAILURE
            assert expected_mismatches_path
            for logger in loggers:
                logger(
                    linesep,
                    WRONG_MISMATCH_ERROR.format(
                        symbol=symbol_name,
                        expected=expected_mismatch.value,
                        received=match_result.value,
                        file_path=expected_mismatches_path,
                    ),
                    symbol=symbol,
                )
        else:
            evaluation_result = EvaluationResult.EXPECTED_FAILURE
    return evaluation_result


[docs]def call_stubgen(command_line_args: List[str]) -> None:
    """
    Call stubgen like the command line tool.

    :param command_line_args: list of command line args
    """

    generate_stubs(parse_options(command_line_args))


[docs]def silence_output() -> Tuple[StringIO, StringIO]:
    """Redirect all output to in-memory buffers instead of stdout and stderr."""
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    return (sys.stdout, sys.stderr)


[docs]def restore_output() -> None:
    """Restore output of stdout and stderr to defaults."""
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__


[docs]def generate_stub_types(
    base_stubs_path: str,
    mypy_conf_path: str,
    silent: bool = False,
    include_private: bool = False,
) -> Iterable[Tuple[RelevantSymbolNode, str]]:
    """
    Use stubgen to generate reference stub types of the modules stubbed in
    base_stubs_path. For this to work the modules need to be installed.

    :param base_stubs_path: path to directory with (handwritten) stubs
    :param mypy_conf_path: path to mypy.ini
    :return: returns the reference stub types
    """
    with TemporaryDirectory() as reference_stubs_path:
        packages = [
            entry.name.replace(".pyi", "")
            for entry in scandir(base_stubs_path)
            if entry.is_dir() or entry.name.endswith(".pyi")
        ]
        for package in packages:
            if find_spec(package) is None:
                print(
                    f'Error: The package "{package}" is not installed. Therefore no '
                    f"reference stubs can be generated for it automatically. Use the "
                    f"option -r to provide the reference stubs manually, or install "
                    f"the package."
                )
                sys.exit(1)
            try:
                if silent:
                    silence_output()
                default_parameters = [
                    "--ignore-errors",
                    "-q",
                    "-p",
                    package,
                    "-o",
                    reference_stubs_path,
                ]
                call_stubgen(
                    ["--include-private"] + default_parameters
                    if include_private
                    else default_parameters
                )
                if silent:
                    restore_output()
            except Exception as ex:
                restore_output()
                write_error(
                    f'Error: Generating stubs for the package "{package}" failed:',
                    linesep,
                    *format_exception(type(ex), ex, ex.__traceback__),
                )

        return list(get_stub_types(reference_stubs_path, mypy_conf_path))


[docs]class ErrorEntry(NamedTuple):
    symbol: RelevantSymbolNode
    message: str


[docs]class CheckStyleWriter:
    def __init__(self, path_map: Dict[RelevantSymbolNode, str]):
        self.path_map = path_map
        self.errors_by_file: Dict[str, List[ErrorEntry]] = defaultdict(list)

[docs]    def collect_error(
        self, *messages: str, sep: str = "", symbol: RelevantSymbolNode
    ) -> None:
        message = sep.join(messages)
        path = self.path_map[symbol]
        self.errors_by_file[path].append(ErrorEntry(symbol=symbol, message=message))

[docs]    def build_tree(self) -> ElementTree:
        root = Element("checkstyle", {"version": "4.3"})
        for filename in sorted(self.errors_by_file.keys()):
            errors = self.errors_by_file[filename]
            file = SubElement(root, "file", {"name": filename})
            for error in sorted(errors, key=lambda x: (x.symbol.line, x.symbol.column)):
                SubElement(
                    file,
                    "error",
                    {
                        "line": str(error.symbol.line),
                        "column": str(error.symbol.column),
                        "severity": "error",
                        "message": error.message,
                    },
                )
        return ElementTree(root)


[docs]def analyze_stubs(
    mypy_conf_path: str,
    base_stubs_path: str,
    reference_stubs_path: Optional[str] = None,
    expected_mismatches_path: Optional[str] = None,
    checkstyle_report: Optional[str] = None,
    silent: bool = False,
    include_private: bool = False,
) -> bool:
    """
    Determine if the (presumably) handwritten stubs in base_stubs_path are correct;
    i.e. if they match the API of the modules that they are stubbing.

    For this they are compared to reference stubs, which by default are generated
    with mypy's stubgen tool. For each type mismatch (e.g. different function signature,
    missing class member) a message will be printed to stdout. The function will return
    False if any mismatches are found, unless they have been declared as expected.

    :param mypy_conf_path: path to mypy.ini
    :param base_stubs_path: path to the directory that contains the stubs to analyze
    :param reference_stubs_path: Path to the folder that contains the reference stubs.
        If not provided mypy's stubgen tool will be used to generate them.
    :param expected_mismatches_path: Path to JSON file that defines expected mismatches.
        Example:

        .. code-block:: json

            {
                "my.module.function": "mismatch",
                "another.module.Class": "not_found"
            }
    :param checkstyle_report: if this path is given, a xml report in checkstyle format
        will be written.
    :param silent: Suppress all non-error output.
    :param include_private: Call stubgen with --include-private.
    :return: True if the stubs in base_stubs_path are considered correct
    """
    success = True
    failed_count = 0
    total_count = 0
    expected_count = 0

    try:
        mismatches, unused_mismatches = setup_expected_mismatches(
            expected_mismatches_path
        )
    except (JSONDecodeError, SchemaError) as ex:
        write_error(
            str(ex),
            linesep,
            CHECK_FILE_ERROR.format(file_path=expected_mismatches_path),
        )
        success = False

    if success:
        # Prevent overloaded function definitions from appearing multiple times
        stub_types_base_map = {
            symbol: path
            for (symbol, path) in get_stub_types(base_stubs_path, mypy_conf_path)
        }
        if reference_stubs_path:
            stub_types_reference = set(
                stub for stub, _ in get_stub_types(reference_stubs_path, mypy_conf_path)
            )
        else:
            stub_types_reference = set(
                stub
                for stub, _ in generate_stub_types(
                    base_stubs_path, mypy_conf_path, silent, include_private
                )
            )
        checkstyle_writer = CheckStyleWriter(stub_types_base_map)
        for res in compare(stub_types_base_map.keys(), stub_types_reference):
            total_count += 1
            evaluation_result = evaluate_compare_result(
                res,
                mismatches,
                unused_mismatches,
                expected_mismatches_path,
                loggers=[write_error, checkstyle_writer.collect_error],
            )
            if evaluation_result is EvaluationResult.FAILURE:
                failed_count += 1
            elif evaluation_result is EvaluationResult.EXPECTED_FAILURE:
                expected_count += 1
        success = failed_count == 0

        if checkstyle_report:
            checkstyle_tree = checkstyle_writer.build_tree()
            checkstyle_tree.write(
                checkstyle_report, encoding="UTF-8", xml_declaration=True
            )

        if unused_mismatches:
            success = False
            symbols = linesep.join([f" - {mm}" for mm in unused_mismatches])
            write_error(
                linesep,
                UNUSED_DEFINITION_ERROR.format(
                    symbols=symbols, file_path=expected_mismatches_path
                ),
            )

    ignore_message = IGNORE_MESSAGE.format(ignored=expected_count)

    if success:
        summary = SUCCESS_MESSAGE.format(total=total_count)
        if not silent:
            print(
                "", summary, (ignore_message if expected_count > 0 else ""), sep=linesep
            )
    else:
        summary = FAIL_MESSAGE.format(total=total_count, failed=failed_count)
        write_error(
            "", summary, (ignore_message if expected_count > 0 else ""), sep=linesep
        )
    return success


[docs]def main() -> None:
    args = parse_command_line()
    success = analyze_stubs(
        args.config,
        args.stubs_handwritten,
        args.reference,
        args.expected_mismatches,
        args.checkstyle_report,
        args.silent,
        args.include_private,
    )
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()