diff --git a/.gitignore b/.gitignore index 98038a18f..46cc7aed5 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dist/ .coverage.* .tox .vscode +.idea diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 31c6cdd0f..6569c8c51 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -16,6 +16,13 @@ # we need types_or support minimum_pre_commit_version: 2.9.0 +- id: check-yaml-schema-modelines + name: Validate YAML files with schema modelines + description: 'Validate YAML files against schemas declared in YAML modeline comments' + entry: check-jsonschema --schema-from-modeline + language: python + types: [yaml] + # --AUTOGEN_HOOKS_START-- # # this hook is autogenerated from a script diff --git a/README.md b/README.md index f8c2424ac..19faeb568 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,11 @@ Then run, as in check-jsonschema --schemafile schema.json instance.json +YAML files can also declare their schema with a modeline comment and be checked +without repeating schema paths in the command: + + check-jsonschema --schema-from-modeline config/*.yaml + ## Documentation Full documentation can be found at https://check-jsonschema.readthedocs.io/ diff --git a/docs/precommit_usage.rst b/docs/precommit_usage.rst index e2cf03a5e..51c872ccb 100644 --- a/docs/precommit_usage.rst +++ b/docs/precommit_usage.rst @@ -23,6 +23,25 @@ You must specify a schema using pre-commit ``args`` configuration. files: ^data/.*\.json$ args: ["--schemafile", "schemas/foo.json"] +The ``check-jsonschema`` hook can also validate YAML files against schemas +declared in YAML modeline comments. Files without a modeline are skipped, +which lets one hook cover YAML files that use different schemas. + +.. code-block:: yaml + :caption: example config + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.37.2 + hooks: + - id: check-yaml-schema-modelines + +Supported modeline examples: + +.. code-block:: yaml + + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + # $schema: ../schemas/service.json + ``check-metaschema`` ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/usage.rst b/docs/usage.rst index 3031c9921..eef31f0e4 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -24,6 +24,9 @@ Detailed helptext is always available interactively via - Description * - ``--schemafile`` - The path or URL for a file containing a schema to use. + * - ``--schema-from-modeline`` + - Validate YAML files using schemas declared in YAML modeline comments. + Files without a modeline are skipped. * - ``-v``, ``--verbose`` - Request more output. * - ``-q``, ``--quiet`` @@ -77,6 +80,10 @@ These options are mutually exclusive, so exactly one must be used. * - ``--check-metaschema`` - Validate each instancefile as a JSON Schema, using the relevant metaschema defined in ``"$schema"``. + * - ``--schema-from-modeline`` + - Validate YAML files using the schema declared in a modeline comment such as + ``# yaml-language-server: $schema=../schemas/foo.json``. Relative schema + paths are resolved relative to the YAML file. ``--builtin-schema`` Choices ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/check_jsonschema/checker.py b/src/check_jsonschema/checker.py index bd628770c..ec76bc4c6 100644 --- a/src/check_jsonschema/checker.py +++ b/src/check_jsonschema/checker.py @@ -9,8 +9,7 @@ from . import format_errors from .formats import FormatOptions -from .instance_loader import InstanceLoader -from .parsers import ParseError +from .instance_loader import InstanceLoader, InstanceParseError from .regex_variants import RegexImplementation from .reporter import Reporter from .result import CheckResult @@ -50,12 +49,29 @@ def _fail(self, msg: str, err: Exception | None = None) -> t.NoReturn: format_errors.print_error(err, mode=self._traceback_mode) raise _Exit(1) + def _fail_ref_resolution(self, err: Exception) -> t.NoReturn: + click.echo("Failure resolving $ref within schema", err=True) + if self._traceback_mode == "full": + format_errors.print_error(err, mode=self._traceback_mode) + else: + click.echo(f" {_format_ref_resolution_error(err)}", err=True) + raise _Exit(1) + def get_validator( - self, path: pathlib.Path | str, doc: dict[str, t.Any] + self, + path: pathlib.Path | str, + doc: t.Any, + *, + schemafile: str | None = None, ) -> jsonschema.protocols.Validator: try: return self._schema_loader.get_validator( - path, doc, self._format_opts, self._regex_impl, self._fill_defaults + path, + doc, + self._format_opts, + self._regex_impl, + self._fill_defaults, + schemafile=schemafile, ) except SchemaParseError as e: self._fail("Error: schemafile could not be parsed as JSON", e) @@ -68,17 +84,32 @@ def get_validator( def _build_result(self) -> CheckResult: result = CheckResult() - for path, data in self._instance_loader.iter_files(): - if isinstance(data, ParseError): - result.record_parse_error(path, data) + for instance in self._instance_loader.iter_documents(): + if isinstance(instance, InstanceParseError): + result.record_parse_error(instance.filename, instance.error) else: - validator = self.get_validator(path, data) + validator = self.get_validator( + instance.filename, + instance.data, + schemafile=instance.schemafile, + ) passing = True - for err in validator.iter_errors(data): - result.record_validation_error(path, err) + try: + validation_errors = validator.iter_errors(instance.data) + for err in validation_errors: + result.record_validation_error(instance.label, err) + passing = False + except ( + referencing.exceptions.NoSuchResource, + referencing.exceptions.Unretrievable, + referencing.exceptions.Unresolvable, + ) as err: + result.record_validation_error( + instance.label, _make_ref_resolution_error(err) + ) passing = False if passing: - result.record_validation_success(path) + result.record_validation_success(instance.label) return result def _run(self) -> None: @@ -89,7 +120,7 @@ def _run(self) -> None: referencing.exceptions.Unretrievable, referencing.exceptions.Unresolvable, ) as e: - self._fail("Failure resolving $ref within schema\n", e) + self._fail_ref_resolution(e) self._reporter.report_result(result) if not result.success: @@ -101,3 +132,28 @@ def run(self) -> int: except _Exit as e: return e.code return 0 + + +def _make_ref_resolution_error(err: Exception) -> jsonschema.ValidationError: + return jsonschema.ValidationError( + f"A $ref in the schema could not be resolved: " + f"{_format_ref_resolution_error(err)}" + ) + + +def _format_ref_resolution_error(err: Exception) -> str: + cause = err.__cause__ or err.__context__ or err + if isinstance(cause, referencing.exceptions.PointerToNowhere): + return ( + f"{type(cause).__name__}: {cause.ref!r} does not exist within " + "the loaded schema." + ) + if isinstance(cause, referencing.exceptions.NoSuchResource): + return f"{type(cause).__name__}: could not retrieve {cause.ref!r}." + if isinstance(cause, referencing.exceptions.Unretrievable): + return f"{type(cause).__name__}: could not retrieve {cause.ref!r}." + if isinstance(cause, referencing.exceptions.Unresolvable): + ref = getattr(cause, "ref", None) + if ref is not None: + return f"{type(cause).__name__}: could not resolve {ref!r}." + return format_errors.format_error_message(cause) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 62d79bb35..cdca791db 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -17,6 +17,7 @@ from ..schema_loader import ( BuiltinSchemaLoader, MetaSchemaLoader, + ModelineSchemaLoader, SchemaLoader, SchemaLoaderBase, ) @@ -62,7 +63,8 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: help="""\ Check JSON and YAML files against a JSON Schema. -The schema is specified either with '--schemafile' or with '--builtin-schema'. +The schema is specified with '--schemafile', '--builtin-schema', or +'--schema-from-modeline'. 'check-jsonschema' supports format checks with appropriate libraries installed, including the following formats by default: @@ -112,6 +114,14 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: type=click.Choice(BUILTIN_SCHEMA_CHOICES, case_sensitive=False), metavar="BUILTIN_SCHEMA_NAME", ) +@click.option( + "--schema-from-modeline", + is_flag=True, + help=( + "Validate YAML files using the schema declared in a YAML modeline " + "comment. Files without a schema modeline are skipped." + ), +) @click.option( "--check-metaschema", is_flag=True, @@ -240,6 +250,7 @@ def main( schemafile: str | None, builtin_schema: str | None, base_uri: str | None, + schema_from_modeline: bool, check_metaschema: bool, no_cache: bool, cache_filename: str | None, @@ -261,7 +272,7 @@ def main( args.set_regex_variant(regex_variant, legacy_opt=format_regex) - args.set_schema(schemafile, builtin_schema, check_metaschema) + args.set_schema(schemafile, builtin_schema, check_metaschema, schema_from_modeline) args.set_validator(validator_class) args.base_uri = base_uri @@ -299,6 +310,12 @@ def main( def build_schema_loader(args: ParseResult) -> SchemaLoaderBase: if args.schema_mode == SchemaLoadingMode.metaschema: return MetaSchemaLoader(base_uri=args.base_uri) + elif args.schema_mode == SchemaLoadingMode.modeline: + if args.base_uri is not None: + raise click.UsageError( + "--base-uri cannot be used with --schema-from-modeline" + ) + return ModelineSchemaLoader(disable_cache=args.disable_cache) elif args.schema_mode == SchemaLoadingMode.builtin: assert args.schema_path is not None return BuiltinSchemaLoader(args.schema_path, base_uri=args.base_uri) @@ -320,6 +337,7 @@ def build_instance_loader(args: ParseResult) -> InstanceLoader: default_filetype=args.default_filetype, force_filetype=args.force_filetype, data_transform=args.data_transform, + schema_from_modeline=args.schema_mode == SchemaLoadingMode.modeline, ) diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index dd03a3768..482c67995 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -15,6 +15,7 @@ class SchemaLoadingMode(enum.Enum): filepath = "filepath" builtin = "builtin" metaschema = "metaschema" + modeline = "modeline" class ParseResult: @@ -57,20 +58,30 @@ def set_regex_variant( self.regex_variant = RegexVariantName(variant_name) def set_schema( - self, schemafile: str | None, builtin_schema: str | None, check_metaschema: bool + self, + schemafile: str | None, + builtin_schema: str | None, + check_metaschema: bool, + schema_from_modeline: bool = False, ) -> None: mutex_arg_count = sum( - 1 if x else 0 for x in (schemafile, builtin_schema, check_metaschema) + 1 if x else 0 + for x in ( + schemafile, + builtin_schema, + check_metaschema, + schema_from_modeline, + ) ) if mutex_arg_count == 0: raise click.UsageError( - "Either --schemafile, --builtin-schema, or --check-metaschema " - "must be provided" + "Either --schemafile, --builtin-schema, --check-metaschema, " + "or --schema-from-modeline must be provided" ) if mutex_arg_count > 1: raise click.UsageError( - "--schemafile, --builtin-schema, and --check-metaschema " - "are mutually exclusive" + "--schemafile, --builtin-schema, --check-metaschema, and " + "--schema-from-modeline are mutually exclusive" ) if schemafile: @@ -79,8 +90,10 @@ def set_schema( elif builtin_schema: self.schema_mode = SchemaLoadingMode.builtin self.schema_path = builtin_schema - else: + elif check_metaschema: self.schema_mode = SchemaLoadingMode.metaschema + else: + self.schema_mode = SchemaLoadingMode.modeline def set_validator( self, validator_class: type[jsonschema.protocols.Validator] | None diff --git a/src/check_jsonschema/instance_loader.py b/src/check_jsonschema/instance_loader.py index 5d76bbfe7..9512c8def 100644 --- a/src/check_jsonschema/instance_loader.py +++ b/src/check_jsonschema/instance_loader.py @@ -2,13 +2,43 @@ import io import typing as t +from dataclasses import dataclass from check_jsonschema.cli.param_types import CustomLazyFile +from .modeline import extract_yaml_modeline_schema, resolve_modeline_schema_location from .parsers import ParseError, ParserSet +from .parsers.metadata import MultiDocumentData, ParsedDocument from .transforms import Transform +@dataclass(frozen=True) +class InstanceDocument: + filename: str + data: t.Any + line: int | None = None + schemafile: str | None = None + + @property + def label(self) -> str: + if self.line is None: + return self.filename + return f"{self.filename}:{self.line}" + + +@dataclass(frozen=True) +class InstanceParseError: + filename: str + error: ParseError + + +@dataclass(frozen=True) +class LoadedFile: + filename: str + data: ParseError | t.Any + schemafile: str | None = None + + class InstanceLoader: def __init__( self, @@ -16,10 +46,12 @@ def __init__( default_filetype: str = "json", force_filetype: str | None = None, data_transform: Transform | None = None, + schema_from_modeline: bool = False, ) -> None: self._files = files self._default_filetype = default_filetype self._force_filetype = force_filetype + self._schema_from_modeline = schema_from_modeline self._data_transform = ( data_transform if data_transform is not None else Transform() ) @@ -28,7 +60,20 @@ def __init__( modify_yaml_implementation=self._data_transform.modify_yaml_implementation ) - def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]: + def _apply_data_transform(self, data: t.Any) -> t.Any: + if isinstance(data, MultiDocumentData): + return MultiDocumentData( + tuple( + ParsedDocument( + data=self._data_transform(document.data), + line=document.line, + ) + for document in data.documents + ) + ) + return self._data_transform(data) + + def _iter_loaded_files(self) -> t.Iterator[LoadedFile]: for file in self._files: if hasattr(file, "name"): name = file.name @@ -46,14 +91,52 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]: else: stream = file + stream_bytes = stream.read() + schemafile = None + if self._schema_from_modeline: + raw_schemafile = extract_yaml_modeline_schema(stream_bytes) + if raw_schemafile is None: + continue + try: + schemafile = resolve_modeline_schema_location( + raw_schemafile, name + ) + except ValueError as err: + data = ParseError(str(err)) + yield LoadedFile(name, data, schemafile=None) + continue + try: - data: t.Any = self._parsers.parse_data_with_path( - stream, name, self._default_filetype, self._force_filetype + data = self._parsers.parse_data_with_path( + stream_bytes, name, self._default_filetype, self._force_filetype ) except ParseError as err: data = err else: - data = self._data_transform(data) + data = self._apply_data_transform(data) finally: file.close() - yield (name, data) + yield LoadedFile(name, data, schemafile=schemafile) + + def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]: + for loaded_file in self._iter_loaded_files(): + yield (loaded_file.filename, loaded_file.data) + + def iter_documents(self) -> t.Iterator[InstanceDocument | InstanceParseError]: + for loaded_file in self._iter_loaded_files(): + if isinstance(loaded_file.data, ParseError): + yield InstanceParseError(loaded_file.filename, loaded_file.data) + elif isinstance(loaded_file.data, MultiDocumentData): + for document in loaded_file.data.documents: + yield InstanceDocument( + filename=loaded_file.filename, + data=document.data, + line=document.line, + schemafile=loaded_file.schemafile, + ) + else: + yield InstanceDocument( + filename=loaded_file.filename, + data=loaded_file.data, + schemafile=loaded_file.schemafile, + ) diff --git a/src/check_jsonschema/modeline.py b/src/check_jsonschema/modeline.py new file mode 100644 index 000000000..ca3f88982 --- /dev/null +++ b/src/check_jsonschema/modeline.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import pathlib +import re + +from .utils import filename2path, is_url_ish + +_YAML_SCHEMA_MODELINE_RE = re.compile( + r"^[ \t]*#[ \t]*" + r"(?:(?:yaml-language-server)[ \t]*:[ \t]*)?" + r"\$schema[ \t]*(?:=|:)[ \t]*" + r"(?P\S+)" +) + + +def extract_yaml_modeline_schema(data: bytes) -> str | None: + text = data.decode("utf-8-sig", errors="replace") + for line in text.splitlines(): + match = _YAML_SCHEMA_MODELINE_RE.match(line) + if match: + return match.group("schema") + return None + + +def resolve_modeline_schema_location(schema_location: str, filename: str) -> str: + if is_url_ish(schema_location) or pathlib.Path(schema_location).is_absolute(): + return schema_location + + if filename in ("-", ""): + raise ValueError( + "relative schema paths in YAML modelines cannot be resolved for stdin" + ) + + return str(filename2path(filename).parent.joinpath(schema_location).resolve()) diff --git a/src/check_jsonschema/parsers/metadata.py b/src/check_jsonschema/parsers/metadata.py new file mode 100644 index 000000000..931258d00 --- /dev/null +++ b/src/check_jsonschema/parsers/metadata.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +import typing as t +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ParsedDocument: + data: t.Any + line: int | None = None + + +@dataclass(frozen=True) +class MultiDocumentData: + documents: tuple[ParsedDocument, ...] diff --git a/src/check_jsonschema/parsers/yaml.py b/src/check_jsonschema/parsers/yaml.py index d2780d77c..f88fc30ee 100644 --- a/src/check_jsonschema/parsers/yaml.py +++ b/src/check_jsonschema/parsers/yaml.py @@ -5,6 +5,8 @@ import ruamel.yaml +from .metadata import MultiDocumentData, ParsedDocument + ParseError = ruamel.yaml.YAMLError @@ -51,6 +53,38 @@ def _normalize(data: t.Any) -> t.Any: _data_sentinel = object() +def _is_multidoc_error(err: ruamel.yaml.YAMLError) -> bool: + return isinstance(err, ruamel.yaml.composer.ComposerError) and ( + "expected a single document in the stream" in str(err) + ) + + +def _document_start_lines( + implementation: ruamel.yaml.YAML, stream_bytes: bytes, num_docs: int +) -> tuple[int | None, ...]: + try: + nodes = list(implementation.compose_all(stream_bytes)) + except ruamel.yaml.YAMLError: + return (None,) * num_docs + return tuple( + node.start_mark.line + 1 if node.start_mark is not None else None + for node in nodes + ) + + +def _load_all_documents( + implementation: ruamel.yaml.YAML, stream_bytes: bytes +) -> MultiDocumentData: + documents = list(implementation.load_all(stream_bytes)) + lines = _document_start_lines(implementation, stream_bytes, len(documents)) + return MultiDocumentData( + tuple( + ParsedDocument(data=_normalize(doc), line=line) + for doc, line in zip(documents, lines) + ) + ) + + def impl2loader( primary: ruamel.yaml.YAML, *fallbacks: ruamel.yaml.YAML ) -> t.Callable[[t.IO[bytes]], t.Any]: @@ -63,12 +97,22 @@ def load(stream: t.IO[bytes]) -> t.Any: for impl in [primary] + list(fallbacks): try: data = impl.load(stream_bytes) - except ruamel.yaml.YAMLError as e: - lasterr = e + except ruamel.yaml.YAMLError as err: + if _is_multidoc_error(err): + try: + data = _load_all_documents(impl, stream_bytes) + except ruamel.yaml.YAMLError as multidoc_err: + lasterr = multidoc_err + continue + else: + break + lasterr = err else: break if data is _data_sentinel and lasterr is not None: raise lasterr + if isinstance(data, MultiDocumentData): + return data return _normalize(data) return load diff --git a/src/check_jsonschema/schema_loader/__init__.py b/src/check_jsonschema/schema_loader/__init__.py index 0a55e06bc..0feb6e847 100644 --- a/src/check_jsonschema/schema_loader/__init__.py +++ b/src/check_jsonschema/schema_loader/__init__.py @@ -1,11 +1,18 @@ from .errors import SchemaParseError, UnsupportedUrlScheme -from .main import BuiltinSchemaLoader, MetaSchemaLoader, SchemaLoader, SchemaLoaderBase +from .main import ( + BuiltinSchemaLoader, + MetaSchemaLoader, + ModelineSchemaLoader, + SchemaLoader, + SchemaLoaderBase, +) __all__ = ( "SchemaParseError", "UnsupportedUrlScheme", "BuiltinSchemaLoader", "MetaSchemaLoader", + "ModelineSchemaLoader", "SchemaLoader", "SchemaLoaderBase", ) diff --git a/src/check_jsonschema/schema_loader/main.py b/src/check_jsonschema/schema_loader/main.py index ef808becd..c8b7d1cfd 100644 --- a/src/check_jsonschema/schema_loader/main.py +++ b/src/check_jsonschema/schema_loader/main.py @@ -64,10 +64,12 @@ class SchemaLoaderBase: def get_validator( self, path: pathlib.Path | str, - instance_doc: dict[str, t.Any], + instance_doc: t.Any, format_opts: FormatOptions, regex_impl: RegexImplementation, fill_defaults: bool, + *, + schemafile: str | None = None, ) -> jsonschema.protocols.Validator: raise NotImplementedError @@ -138,10 +140,12 @@ def get_schema(self) -> dict[str, t.Any]: def get_validator( self, path: pathlib.Path | str, - instance_doc: dict[str, t.Any], + instance_doc: t.Any, format_opts: FormatOptions, regex_impl: RegexImplementation, fill_defaults: bool, + *, + schemafile: str | None = None, ) -> jsonschema.protocols.Validator: return self._get_validator(format_opts, regex_impl, fill_defaults) @@ -256,6 +260,39 @@ def get_schema(self) -> dict[str, t.Any]: return data +class ModelineSchemaLoader(SchemaLoaderBase): + def __init__(self, *, disable_cache: bool = False) -> None: + self.disable_cache = disable_cache + self._loader_by_schemafile: dict[str, SchemaLoader] = {} + + def _get_loader(self, schemafile: str) -> SchemaLoader: + if schemafile not in self._loader_by_schemafile: + self._loader_by_schemafile[schemafile] = SchemaLoader( + schemafile, disable_cache=self.disable_cache + ) + return self._loader_by_schemafile[schemafile] + + def get_validator( + self, + path: pathlib.Path | str, + instance_doc: t.Any, + format_opts: FormatOptions, + regex_impl: RegexImplementation, + fill_defaults: bool, + *, + schemafile: str | None = None, + ) -> jsonschema.protocols.Validator: + if schemafile is None: + raise RuntimeError(f"No YAML modeline schema was found for {path}") + return self._get_loader(schemafile).get_validator( + path, + instance_doc, + format_opts, + regex_impl, + fill_defaults, + ) + + class MetaSchemaLoader(SchemaLoaderBase): def __init__(self, *, base_uri: str | None = None) -> None: if base_uri is not None: @@ -267,10 +304,12 @@ def __init__(self, *, base_uri: str | None = None) -> None: def get_validator( self, path: pathlib.Path | str, - instance_doc: dict[str, t.Any], + instance_doc: t.Any, format_opts: FormatOptions, regex_impl: RegexImplementation, fill_defaults: bool, + *, + schemafile: str | None = None, ) -> jsonschema.protocols.Validator: schema_validator = jsonschema.validators.validator_for(instance_doc) meta_validator_class = jsonschema.validators.validator_for( diff --git a/tests/acceptance/test_invalid_schema_files.py b/tests/acceptance/test_invalid_schema_files.py index 71efda024..ea74e24e4 100644 --- a/tests/acceptance/test_invalid_schema_files.py +++ b/tests/acceptance/test_invalid_schema_files.py @@ -1,6 +1,21 @@ import pytest +def test_checker_unresolvable_schema_ref_has_concise_error(run_line, tmp_path): + foo = tmp_path / "foo.json" + bar = tmp_path / "bar.json" + foo.write_text('{"properties": {"foo": {"$ref": "#/definitions/missing"}}}') + bar.write_text('{"foo": "bar"}') + + res = run_line(["check-jsonschema", "--schemafile", str(foo), str(bar)]) + + assert res.exit_code == 1 + assert "Schema validation errors were encountered." in res.stdout + assert f"{bar}::$: A $ref in the schema could not be resolved" in res.stdout + assert "PointerToNowhere: '/definitions/missing' does not exist" in res.stdout + assert "'properties':" not in res.stdout + + def test_checker_non_json_schemafile(run_line, tmp_path): foo = tmp_path / "foo.json" bar = tmp_path / "bar.json" diff --git a/tests/acceptance/test_schema_from_modeline.py b/tests/acceptance/test_schema_from_modeline.py new file mode 100644 index 000000000..6e488a914 --- /dev/null +++ b/tests/acceptance/test_schema_from_modeline.py @@ -0,0 +1,116 @@ +import json + +import responses + + +def test_schema_from_modeline_validates_files_against_different_schemas( + run_line_simple, tmp_path +): + schemas = tmp_path / "schemas" + configs = tmp_path / "configs" + schemas.mkdir() + configs.mkdir() + + (schemas / "name.json").write_text( + json.dumps({"type": "object", "required": ["name"]}) + ) + (schemas / "count.json").write_text( + json.dumps({"type": "object", "required": ["count"]}) + ) + + name_config = configs / "name.yaml" + count_config = configs / "count.yaml" + name_config.write_text("""\ +# yaml-language-server: $schema=../schemas/name.json +name: example +""") + count_config.write_text("""\ +# $schema: ../schemas/count.json +count: 1 +""") + + run_line_simple(["--schema-from-modeline", str(name_config), str(count_config)]) + + +def test_schema_from_modeline_reports_validation_errors(run_line, tmp_path): + schema = tmp_path / "schema.json" + schema.write_text(json.dumps({"type": "object", "required": ["name"]})) + + config = tmp_path / "config.yaml" + config.write_text("""\ +# yaml-language-server: $schema=schema.json +count: 1 +""") + + result = run_line(["check-jsonschema", "--schema-from-modeline", str(config)]) + + assert result.exit_code == 1 + assert f"{config}::$: 'name' is a required property" in result.stdout + + +def test_schema_from_modeline_skips_unannotated_files(run_line_simple, tmp_path): + config = tmp_path / "config.yaml" + config.write_text("this: is not validated") + + run_line_simple(["--schema-from-modeline", str(config)]) + + +def test_schema_from_modeline_skips_unannotated_malformed_files( + run_line_simple, tmp_path +): + config = tmp_path / "config.yaml" + config.write_text("a: {b") + + run_line_simple(["--schema-from-modeline", str(config)]) + + +def test_schema_from_modeline_supports_remote_schemas(run_line_simple, tmp_path): + schema_url = "https://example.com/schema.json" + responses.add( + "GET", + schema_url, + headers={"Last-Modified": "Sun, 01 Jan 2000 00:00:01 GMT"}, + json={"type": "object", "required": ["name"]}, + match_querystring=None, + ) + + config = tmp_path / "config.yaml" + config.write_text(f"""\ +# yaml-language-server: $schema={schema_url} +name: example +""") + + run_line_simple(["--schema-from-modeline", str(config)]) + + +def test_schema_from_modeline_validates_each_yaml_document(run_line, tmp_path): + schema = tmp_path / "schema.json" + schema.write_text(json.dumps({"type": "object", "required": ["name"]})) + + config = tmp_path / "config.yaml" + config.write_text("""\ +# yaml-language-server: $schema=schema.json +--- +name: ok +--- +count: 1 +""") + + result = run_line(["check-jsonschema", "--schema-from-modeline", str(config)]) + + assert result.exit_code == 1 + assert f"{config}:5::$: 'name' is a required property" in result.stdout + + +def test_schema_from_modeline_preserves_top_level_yaml_lists(run_line_simple, tmp_path): + schema = tmp_path / "schema.json" + schema.write_text(json.dumps({"type": "array", "items": {"type": "string"}})) + + config = tmp_path / "config.yaml" + config.write_text("""\ +# yaml-language-server: $schema=schema.json +- first +- second +""") + + run_line_simple(["--schema-from-modeline", str(config)]) diff --git a/tests/unit/cli/test_parse.py b/tests/unit/cli/test_parse.py index e7846220c..8bec4ef29 100644 --- a/tests/unit/cli/test_parse.py +++ b/tests/unit/cli/test_parse.py @@ -43,21 +43,22 @@ def get_ctx(*args): @pytest.mark.parametrize( - "schemafile,builtin_schema,check_metaschema,expect_mode", + "schemafile,builtin_schema,check_metaschema,schema_from_modeline,expect_mode", [ - ("foo.json", None, False, SchemaLoadingMode.filepath), - (None, "foo", False, SchemaLoadingMode.builtin), - (None, None, True, SchemaLoadingMode.metaschema), + ("foo.json", None, False, False, SchemaLoadingMode.filepath), + (None, "foo", False, False, SchemaLoadingMode.builtin), + (None, None, True, False, SchemaLoadingMode.metaschema), + (None, None, False, True, SchemaLoadingMode.modeline), ], ) def test_parse_result_set_schema( - schemafile, builtin_schema, check_metaschema, expect_mode + schemafile, builtin_schema, check_metaschema, schema_from_modeline, expect_mode ): args = ParseResult() # starts as None (always) assert args.schema_path is None - args.set_schema(schemafile, builtin_schema, check_metaschema) + args.set_schema(schemafile, builtin_schema, check_metaschema, schema_from_modeline) assert args.schema_mode == expect_mode if schemafile: assert args.schema_path == schemafile @@ -65,6 +66,8 @@ def test_parse_result_set_schema( assert args.schema_path == builtin_schema if check_metaschema: assert args.schema_path is None + if schema_from_modeline: + assert args.schema_path is None def test_requires_some_args(cli_runner): @@ -135,6 +138,20 @@ def test_no_cache_flag_is_true(cli_runner, mock_parse_result, in_tmp_dir, tmp_pa "vendor.travis", "--check-metaschema", ], + [ + "--schemafile", + "x.json", + "--schema-from-modeline", + ], + [ + "--builtin-schema", + "vendor.travis", + "--schema-from-modeline", + ], + [ + "--check-metaschema", + "--schema-from-modeline", + ], ], ) def test_mutex_schema_opts(cli_runner, cmd_args, in_tmp_dir, tmp_path): @@ -144,6 +161,17 @@ def test_mutex_schema_opts(cli_runner, cmd_args, in_tmp_dir, tmp_path): assert "are mutually exclusive" in result.stderr +def test_schema_from_modeline_and_instancefile( + cli_runner, mock_parse_result, in_tmp_dir, tmp_path +): + touch_files(tmp_path, "foo.yaml") + cli_runner.invoke(cli_main, ["--schema-from-modeline", "foo.yaml"]) + assert mock_parse_result.schema_mode == SchemaLoadingMode.modeline + assert mock_parse_result.schema_path is None + assert isinstance(mock_parse_result.instancefiles, tuple) + assert tuple(f.name for f in mock_parse_result.instancefiles) == ("foo.yaml",) + + @pytest.mark.parametrize( "cmd_args", [ diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py index 4835b814a..8ae7ffdab 100644 --- a/tests/unit/test_instance_loader.py +++ b/tests/unit/test_instance_loader.py @@ -3,6 +3,7 @@ from check_jsonschema.instance_loader import InstanceLoader from check_jsonschema.parsers import BadFileTypeError, FailedFileLoadError from check_jsonschema.parsers.json5 import ENABLED as JSON5_ENABLED +from check_jsonschema.parsers.metadata import MultiDocumentData # handy helper for opening multiple files for InstanceLoader @@ -188,6 +189,54 @@ def test_instanceloader_yaml_dup_anchor(tmp_path, open_wide): assert data == [(str(f), {"a": {"b": [1, 2], "c": "d"}})] +def test_instanceloader_yaml_multi_document_data(tmp_path, open_wide): + f = tmp_path / "foo.yaml" + f.write_text("""\ +--- +a: 1 +--- +- b +- c +""") + loader = InstanceLoader(open_wide(f)) + + data = list(loader.iter_files()) + + assert len(data) == 1 + assert data[0][0] == str(f) + assert isinstance(data[0][1], MultiDocumentData) + assert [doc.data for doc in data[0][1].documents] == [{"a": 1}, ["b", "c"]] + assert [doc.line for doc in data[0][1].documents] == [2, 4] + + +def test_instanceloader_iter_documents_expands_multi_document_data(tmp_path, open_wide): + f = tmp_path / "foo.yaml" + f.write_text("""\ +first: true +--- +second: true +""") + loader = InstanceLoader(open_wide(f)) + + documents = list(loader.iter_documents()) + + assert [document.label for document in documents] == [f"{f}:1", f"{f}:3"] + assert [document.data for document in documents] == [ + {"first": True}, + {"second": True}, + ] + + +def test_instanceloader_schema_from_modeline_skips_unannotated_files( + tmp_path, open_wide +): + f = tmp_path / "foo.yaml" + f.write_text("a: b") + loader = InstanceLoader(open_wide(f), schema_from_modeline=True) + + assert list(loader.iter_documents()) == [] + + @pytest.mark.parametrize( "file_format, filename, content", [ diff --git a/tests/unit/test_modeline.py b/tests/unit/test_modeline.py new file mode 100644 index 000000000..3ca7af5fe --- /dev/null +++ b/tests/unit/test_modeline.py @@ -0,0 +1,62 @@ +import pytest + +from check_jsonschema.modeline import ( + extract_yaml_modeline_schema, + resolve_modeline_schema_location, +) + + +@pytest.mark.parametrize( + "line", + [ + "# yaml-language-server: $schema=https://example.com/schema.json", + " # yaml-language-server : $schema = https://example.com/schema.json", + "# yaml-language-server: $schema: https://example.com/schema.json", + "# $schema=https://example.com/schema.json", + "# $schema: https://example.com/schema.json", + ], +) +def test_extract_yaml_modeline_schema_supported_forms(line): + data = f"---\n{line}\nfoo: bar\n".encode() + assert extract_yaml_modeline_schema(data) == "https://example.com/schema.json" + + +def test_extract_yaml_modeline_schema_returns_first_match(): + data = b"""\ +# yaml-language-server: $schema=https://example.com/first.json +# yaml-language-server: $schema=https://example.com/second.json +""" + assert extract_yaml_modeline_schema(data) == "https://example.com/first.json" + + +def test_extract_yaml_modeline_schema_requires_full_line_comment(): + assert ( + extract_yaml_modeline_schema( + b"foo: bar # yaml-language-server: $schema=https://example.com/schema.json" + ) + is None + ) + + +def test_resolve_modeline_schema_location_relative_to_instance(tmp_path): + instance = tmp_path / "configs" / "foo.yaml" + instance.parent.mkdir() + instance.write_text("") + + schema_location = resolve_modeline_schema_location( + "../schemas/foo.json", str(instance) + ) + + assert schema_location == str(tmp_path / "schemas" / "foo.json") + + +def test_resolve_modeline_schema_location_remote_unchanged(): + assert ( + resolve_modeline_schema_location("https://example.com/schema.json", "foo.yaml") + == "https://example.com/schema.json" + ) + + +def test_resolve_modeline_schema_location_relative_stdin_fails(): + with pytest.raises(ValueError, match="cannot be resolved for stdin"): + resolve_modeline_schema_location("schemas/foo.json", "-")