diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index bafa93822095f..f4671d9e53443 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -424,7 +424,7 @@ labelPRBasedOnFilePath: area:Logging: - airflow-core/src/airflow/config_templates/airflow_local_settings.py - - airflow-core/tests/unit/core/test_logging_config.py + - shared/logging/**/* - airflow-core/src/airflow/utils/log/**/* - airflow-core/docs/administration-and-deployment/logging-monitoring/logging-*.rst - airflow-core/tests/unit/utils/log/**/* diff --git a/airflow-core/pyproject.toml b/airflow-core/pyproject.toml index 843fb5ba4c898..ca98ba6d8fe3d 100644 --- a/airflow-core/pyproject.toml +++ b/airflow-core/pyproject.toml @@ -145,6 +145,11 @@ dependencies = [ "apache-airflow-providers-common-sql>=1.26.0", "apache-airflow-providers-smtp>=2.0.2", "apache-airflow-providers-standard>=0.4.0", + # Start of shared logging dependencies + "msgspec>=0.19.0", + "pygtrie>=2.5.0", + "structlog>=25.4.0", + # End of shared logging dependencies ] @@ -211,8 +216,9 @@ exclude = [ ] [tool.hatch.build.targets.sdist.force-include] -"../shared/timezones/src/airflow_shared/timezones" = "src/airflow/_shared/timezones" +"../shared/logging/src/airflow_shared/logging" = "src/airflow/_shared/logging" "../shared/secrets_masker/src/airflow_shared/secrets_masker" = "src/airflow/_shared/secrets_masker" +"../shared/timezones/src/airflow_shared/timezones" = "src/airflow/_shared/timezones" [tool.hatch.build.targets.custom] path = "./hatch_build.py" @@ -280,6 +286,7 @@ apache-airflow-devel-common = { workspace = true } [tool.airflow] shared_distributions = [ - "apache-airflow-shared-timezones", + "apache-airflow-shared-logging", "apache-airflow-shared-secrets-masker", + "apache-airflow-shared-timezones", ] diff --git a/airflow-core/src/airflow/_shared/logging b/airflow-core/src/airflow/_shared/logging new file mode 120000 index 0000000000000..19c320ffcf9ca --- /dev/null +++ b/airflow-core/src/airflow/_shared/logging @@ -0,0 +1 @@ +../../../../shared/logging/src/airflow_shared/logging \ No newline at end of file diff --git a/airflow-core/src/airflow/config_templates/airflow_local_settings.py b/airflow-core/src/airflow/config_templates/airflow_local_settings.py index 4c2cddb043c96..19699fb66a67c 100644 --- a/airflow-core/src/airflow/config_templates/airflow_local_settings.py +++ b/airflow-core/src/airflow/config_templates/airflow_local_settings.py @@ -77,7 +77,8 @@ }, "handlers": { "console": { - "class": "airflow.utils.log.logging_mixin.RedirectStdHandler", + "class": "logging.StreamHandler", + # "class": "airflow.utils.log.logging_mixin.RedirectStdHandler", "formatter": "airflow_coloured", "stream": "sys.stdout", "filters": ["mask_secrets_core"], diff --git a/airflow-core/src/airflow/dag_processing/collection.py b/airflow-core/src/airflow/dag_processing/collection.py index ae247b1f00352..56de2fae758bf 100644 --- a/airflow-core/src/airflow/dag_processing/collection.py +++ b/airflow-core/src/airflow/dag_processing/collection.py @@ -27,10 +27,10 @@ from __future__ import annotations -import logging import traceback from typing import TYPE_CHECKING, NamedTuple, TypeVar +import structlog from sqlalchemy import delete, func, insert, select, tuple_, update from sqlalchemy.exc import OperationalError from sqlalchemy.orm import joinedload, load_only @@ -73,7 +73,7 @@ AssetT = TypeVar("AssetT", bound=BaseAsset) -log = logging.getLogger(__name__) +log = structlog.get_logger(__name__) def _create_orm_dags( diff --git a/airflow-core/src/airflow/dag_processing/manager.py b/airflow-core/src/airflow/dag_processing/manager.py index 2b81e10019a86..55cb41610b006 100644 --- a/airflow-core/src/airflow/dag_processing/manager.py +++ b/airflow-core/src/airflow/dag_processing/manager.py @@ -876,7 +876,7 @@ def _get_logger_for_dag_file(self, dag_file: DagFileInfo): log_file = init_log_file(log_filename) logger_filehandle = log_file.open("ab") underlying_logger = structlog.BytesLogger(logger_filehandle) - processors = logging_processors(enable_pretty_log=False)[0] + processors = logging_processors(json_output=True) return structlog.wrap_logger( underlying_logger, processors=processors, logger_name="processor" ).bind(), logger_filehandle diff --git a/airflow-core/src/airflow/jobs/scheduler_job_runner.py b/airflow-core/src/airflow/jobs/scheduler_job_runner.py index e8778d17d1814..5d91870473648 100644 --- a/airflow-core/src/airflow/jobs/scheduler_job_runner.py +++ b/airflow-core/src/airflow/jobs/scheduler_job_runner.py @@ -89,12 +89,12 @@ from airflow.utils.types import DagRunTriggeredByType, DagRunType if TYPE_CHECKING: - import logging from types import FrameType from pendulum.datetime import DateTime from sqlalchemy.orm import Query, Session + from airflow._shared.logging.types import Logger from airflow.executors.base_executor import BaseExecutor from airflow.executors.executor_utils import ExecutorName from airflow.models.mappedoperator import MappedOperator @@ -189,7 +189,7 @@ def __init__( job: Job, num_runs: int = conf.getint("scheduler", "num_runs"), scheduler_idle_sleep_time: float = conf.getfloat("scheduler", "scheduler_idle_sleep_time"), - log: logging.Logger | None = None, + log: Logger | None = None, ): super().__init__(job) self.num_runs = num_runs diff --git a/airflow-core/src/airflow/jobs/triggerer_job_runner.py b/airflow-core/src/airflow/jobs/triggerer_job_runner.py index 63deea50f450f..a1f8b3a097477 100644 --- a/airflow-core/src/airflow/jobs/triggerer_job_runner.py +++ b/airflow-core/src/airflow/jobs/triggerer_job_runner.py @@ -27,7 +27,6 @@ from collections import deque from collections.abc import Generator, Iterable from contextlib import suppress -from datetime import datetime from socket import socket from traceback import format_exception from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal, TypedDict @@ -697,7 +696,7 @@ def _process_log_messages_from_subprocess(self) -> Generator[None, bytes | bytea from airflow.sdk.log import logging_processors - processors, _ = logging_processors(enable_pretty_log=False) + processors = logging_processors(json_output=True) def get_logger(trigger_id: int) -> WrappedLogger: # TODO: Is a separate dict worth it, or should we make `self.running_triggers` a dict? @@ -723,16 +722,6 @@ def get_logger(trigger_id: int) -> WrappedLogger: # Log message about the TriggerRunner itself -- just output it log = fallback_log - if ts := event.get("timestamp"): - # We use msgspec to decode the timestamp as it does it orders of magnitude quicker than - # datetime.strptime - # - # We remove the timezone info here, as the json encoding has `+00:00`, and since the log came - # from a subprocess we know that the timezone of the log message is the same, so having some - # messages include tz (from subprocess) but others not (ones from supervisor process) is - # confusing. - event["timestamp"] = msgspec.json.decode(f'"{ts}"', type=datetime).replace(tzinfo=None) - if exc := event.pop("exception", None): # TODO: convert the dict back to a pretty stack trace event["error_detail"] = exc @@ -774,7 +763,10 @@ def send(self, msg: ToTriggerSupervisor) -> ToTriggerRunner | None: return async_to_sync(self.asend)(msg) async def _aread_frame(self): - len_bytes = await self._async_reader.readexactly(4) + try: + len_bytes = await self._async_reader.readexactly(4) + except ConnectionResetError: + asyncio.current_task().cancel("Supervisor closed") length = int.from_bytes(len_bytes, byteorder="big") if length >= 2**32: raise OverflowError(f"Refusing to receive messages larger than 4GiB {length=}") diff --git a/airflow-core/src/airflow/logging_config.py b/airflow-core/src/airflow/logging_config.py index a391c1c6361a8..cce8b37f684ae 100644 --- a/airflow-core/src/airflow/logging_config.py +++ b/airflow-core/src/airflow/logging_config.py @@ -20,7 +20,6 @@ import logging import warnings from importlib import import_module -from logging.config import dictConfig from typing import TYPE_CHECKING, Any from airflow.configuration import conf @@ -86,33 +85,25 @@ def load_logging_config() -> tuple[dict[str, Any], str]: def configure_logging(): + from airflow._shared.logging.structlog import configure_logging + logging_config, logging_class_path = load_logging_config() try: - # Ensure that the password masking filter is applied to the 'task' handler - # no matter what the user did. - if "filters" in logging_config and "mask_secrets" in logging_config["filters"]: - # But if they replace the logging config _entirely_, don't try to set this, it won't work - task_handler_config = logging_config["handlers"]["task"] - - task_handler_config.setdefault("filters", []) - - if "mask_secrets" not in task_handler_config["filters"]: - task_handler_config["filters"].append("mask_secrets") - + level: str = conf.get_mandatory_value("logging", "LOGGING_LEVEL").upper() # Try to init logging - dictConfig(logging_config) + configure_logging(log_level=level, stdlib_config=logging_config) except (ValueError, KeyError) as e: log.error("Unable to load the config, contains a configuration error.") # When there is an error in the config, escalate the exception # otherwise Airflow would silently fall back on the default config raise e - validate_logging_config(logging_config) + validate_logging_config() return logging_class_path -def validate_logging_config(logging_config): +def validate_logging_config(): """Validate the provided Logging Config.""" # Now lets validate the other logging-related settings task_log_reader = conf.get("logging", "task_log_reader") diff --git a/airflow-core/src/airflow/utils/cli.py b/airflow-core/src/airflow/utils/cli.py index 981bbe2c5cc04..8fef958ac0fd7 100644 --- a/airflow-core/src/airflow/utils/cli.py +++ b/airflow-core/src/airflow/utils/cli.py @@ -433,15 +433,26 @@ def _wrapper(*args, **kwargs): if args[0].verbose: f(*args, **kwargs) else: + from airflow._shared.logging.structlog import respect_stdlib_disable + with warnings.catch_warnings(): warnings.simplefilter("ignore") logging.disable(logging.CRITICAL) + + def drop(*_, **__): + from structlog import DropEvent + + raise DropEvent() + + old_fn = respect_stdlib_disable.__code__ + respect_stdlib_disable.__code__ = drop.__code__ try: f(*args, **kwargs) finally: # logging output again depends on the effective # levels of individual loggers logging.disable(logging.NOTSET) + respect_stdlib_disable.__code__ = old_fn return cast("T", _wrapper) diff --git a/airflow-core/src/airflow/utils/log/file_processor_handler.py b/airflow-core/src/airflow/utils/log/file_processor_handler.py index ea3151be1db92..2ef63d55c9c97 100644 --- a/airflow-core/src/airflow/utils/log/file_processor_handler.py +++ b/airflow-core/src/airflow/utils/log/file_processor_handler.py @@ -25,7 +25,7 @@ from airflow import settings from airflow._shared.timezones import timezone from airflow.utils.helpers import parse_template_string -from airflow.utils.log.logging_mixin import DISABLE_PROPOGATE +from airflow.utils.log.logging_mixin import SetContextPropagate from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler logger = logging.getLogger(__name__) @@ -69,7 +69,7 @@ def set_context(self, filename): self._symlink_latest_log_directory() self._cur_date = datetime.today() - return DISABLE_PROPOGATE + return SetContextPropagate.DISABLE_PROPAGATE def emit(self, record): if self.handler is not None: diff --git a/airflow-core/src/airflow/utils/log/log_reader.py b/airflow-core/src/airflow/utils/log/log_reader.py index c1014d418620f..6b596b7d621ff 100644 --- a/airflow-core/src/airflow/utils/log/log_reader.py +++ b/airflow-core/src/airflow/utils/log/log_reader.py @@ -17,6 +17,7 @@ from __future__ import annotations import logging +import os import time from collections.abc import Generator, Iterator from datetime import datetime, timezone @@ -25,7 +26,7 @@ from airflow.configuration import conf from airflow.utils.helpers import render_log_filename -from airflow.utils.log.file_task_handler import StructuredLogMessage +from airflow.utils.log.file_task_handler import FileTaskHandler, StructuredLogMessage from airflow.utils.log.logging_mixin import ExternalLoggingMixin from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import TaskInstanceState @@ -168,6 +169,10 @@ def handlers(): yield from logging.getLogger("airflow.task").handlers yield from logging.getLogger().handlers + fallback = FileTaskHandler(os.devnull) + fallback.name = task_log_reader + yield fallback + return next((h for h in handlers() if h.name == task_log_reader), None) @property diff --git a/airflow-core/src/airflow/utils/log/logging_mixin.py b/airflow-core/src/airflow/utils/log/logging_mixin.py index 55f34d58fb7ac..2325f60449a5b 100644 --- a/airflow-core/src/airflow/utils/log/logging_mixin.py +++ b/airflow-core/src/airflow/utils/log/logging_mixin.py @@ -26,8 +26,10 @@ from logging import Handler, StreamHandler from typing import IO, TYPE_CHECKING, Any, TypeVar, cast +import structlog + if TYPE_CHECKING: - from logging import Logger + from airflow._shared.logging.types import Logger # 7-bit C1 ANSI escape sequences ANSI_ESCAPE = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]") @@ -67,7 +69,7 @@ def remove_escape_codes(text: str) -> str: class LoggingMixin: """Convenience super-class to have a logger configured with the class name.""" - _log: logging.Logger | None = None + _log: Logger | None = None # Parent logger used by this class. It should match one of the loggers defined in the # `logging_config_class`. By default, this attribute is used to create the final name of the logger, and @@ -111,7 +113,7 @@ def _get_log(cls, obj: Any, clazz: type[_T]) -> Logger: log_config_logger_name=obj._log_config_logger_name, class_logger_name=obj._logger_name, ) - obj._log = logging.getLogger(logger_name) + obj._log = structlog.get_logger(logger_name) return obj._log @classmethod @@ -124,9 +126,7 @@ def log(self) -> Logger: """Return a logger.""" return LoggingMixin._get_log(self, self.__class__) - def _set_context(self, context): - if context is not None: - set_context(self.log, context) + def _set_context(self, context): ... class ExternalLoggingMixin(metaclass=abc.ABCMeta): @@ -286,6 +286,10 @@ def set_context(logger, value): :param logger: logger :param value: value to set """ + if not isinstance(logger, logging.Logger): + # This fn doesn't make sense for structlog based handlers + return + while logger: orig_propagate = logger.propagate for handler in logger.handlers: diff --git a/airflow-core/src/airflow/utils/retries.py b/airflow-core/src/airflow/utils/retries.py index 4428db2a49382..a30d676685321 100644 --- a/airflow-core/src/airflow/utils/retries.py +++ b/airflow-core/src/airflow/utils/retries.py @@ -20,18 +20,21 @@ import logging from collections.abc import Callable from inspect import signature -from typing import TypeVar, overload +from typing import TYPE_CHECKING, TypeVar, overload from sqlalchemy.exc import DBAPIError from airflow.configuration import conf +if TYPE_CHECKING: + from airflow._shared.logging.types import Logger + F = TypeVar("F", bound=Callable) MAX_DB_RETRIES = conf.getint("database", "max_db_retries", fallback=3) -def run_with_db_retries(max_retries: int = MAX_DB_RETRIES, logger: logging.Logger | None = None, **kwargs): +def run_with_db_retries(max_retries: int = MAX_DB_RETRIES, logger: Logger | None = None, **kwargs): """Return Tenacity Retrying object with project specific default.""" import tenacity @@ -43,8 +46,8 @@ def run_with_db_retries(max_retries: int = MAX_DB_RETRIES, logger: logging.Logge reraise=True, **kwargs, ) - if logger and isinstance(logger, logging.Logger): - retry_kwargs["before_sleep"] = tenacity.before_sleep_log(logger, logging.DEBUG, True) + if logger is not None: + retry_kwargs["before_sleep"] = tenacity.before_sleep_log(logger, logging.DEBUG, True) # type: ignore[arg-type] return tenacity.Retrying(**retry_kwargs) diff --git a/airflow-core/tests/integration/otel/dags/otel_test_dag.py b/airflow-core/tests/integration/otel/dags/otel_test_dag.py index bd7db5d12894e..d742de938bafd 100644 --- a/airflow-core/tests/integration/otel/dags/otel_test_dag.py +++ b/airflow-core/tests/integration/otel/dags/otel_test_dag.py @@ -45,7 +45,7 @@ def task1(ti): tracer_provider = otel_task_tracer.get_otel_tracer_provider() if context_carrier is not None: - logger.info("Found ti.context_carrier: %s.", context_carrier) + logger.info("Found ti.context_carrier: %s.", str(context_carrier)) logger.info("Extracting the span context from the context_carrier.") parent_context = otel_task_tracer.extract(context_carrier) with otel_task_tracer.start_child_span( diff --git a/airflow-core/tests/integration/otel/test_otel.py b/airflow-core/tests/integration/otel/test_otel.py index d7e124740670c..e218820b292c0 100644 --- a/airflow-core/tests/integration/otel/test_otel.py +++ b/airflow-core/tests/integration/otel/test_otel.py @@ -547,14 +547,14 @@ def print_ti_output_for_dag_run(dag_id: str, run_id: str): for filename in files: if filename.endswith(".log"): full_path = os.path.join(root, filename) - log.info("\n===== LOG FILE: %s - START =====\n", full_path) + print("\n===== LOG FILE: %s - START =====\n", full_path) try: with open(full_path) as f: - log.info(f.read()) + print(f.read()) except Exception as e: log.error("Could not read %s: %s", full_path, e) - log.info("\n===== END =====\n") + print("\n===== END =====\n") @pytest.mark.integration("redis") diff --git a/airflow-core/tests/unit/always/test_providers_manager.py b/airflow-core/tests/unit/always/test_providers_manager.py index 5076fd920efe5..70f6070bd21eb 100644 --- a/airflow-core/tests/unit/always/test_providers_manager.py +++ b/airflow-core/tests/unit/always/test_providers_manager.py @@ -60,6 +60,7 @@ def inject_fixtures(self, caplog, cleanup_providers_manager): def test_providers_are_loaded(self): with self._caplog.at_level(logging.WARNING): + self._caplog.clear() provider_manager = ProvidersManager() provider_list = list(provider_manager.providers.keys()) # No need to sort the list - it should be sorted alphabetically ! @@ -119,8 +120,8 @@ def test_warning_logs_generated(self): ) providers_manager._discover_hooks() _ = providers_manager._hooks_lazy_dict["wrong-connection-type"] - assert len(self._caplog.records) == 1 - assert "Inconsistency!" in self._caplog.records[0].message + assert len(self._caplog.entries) == 1 + assert "Inconsistency!" in self._caplog[0]["event"] assert "sftp" not in providers_manager.hooks def test_warning_logs_not_generated(self): @@ -164,11 +165,12 @@ def test_already_registered_conn_type_in_provide(self): providers_manager._discover_hooks() _ = providers_manager._hooks_lazy_dict["dummy"] assert len(self._caplog.records) == 1 - assert "The connection type 'dummy' is already registered" in self._caplog.records[0].message + msg = self._caplog.messages[0] + assert msg.startswith("The connection type 'dummy' is already registered") assert ( "different class names: 'airflow.providers.dummy.hooks.dummy.DummyHook'" " and 'airflow.providers.dummy.hooks.dummy.DummyHook2'." - ) in self._caplog.records[0].message + ) in msg def test_providers_manager_register_plugins(self): providers_manager = ProvidersManager() @@ -248,12 +250,12 @@ def test_hook_values(self): assert len(connections_list) > 60 if len(self._caplog.records) != 0: real_warning_count = 0 - for record in self._caplog.records: + for record in self._caplog.entries: # When there is error importing provider that is excluded the provider name is in the message - if any(excluded_provider in record.message for excluded_provider in excluded_providers): + if any(excluded_provider in record["event"] for excluded_provider in excluded_providers): continue - print(record.message, file=sys.stderr) - print(record.exc_info, file=sys.stderr) + print(record["event"], file=sys.stderr) + print(record.get("exc_info"), file=sys.stderr) real_warning_count += 1 if real_warning_count: if PY313: diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_log.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_log.py index 3989aae2aa545..64fe8155a1687 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_log.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_log.py @@ -17,9 +17,7 @@ from __future__ import annotations -import copy import json -import logging.config import sys from unittest import mock from unittest.mock import PropertyMock @@ -30,7 +28,6 @@ from airflow._shared.timezones import timezone from airflow.api_fastapi.common.dagbag import create_dag_bag, dag_bag_from_app -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.models.dag import DAG from airflow.providers.standard.operators.empty import EmptyOperator from airflow.sdk import task @@ -151,15 +148,13 @@ def configure_loggers(self, tmp_path, create_log_template): log = dir_path / "attempt=2.log" log.write_text("Log for testing 2.") - # Create a custom logging configuration - logging_config = copy.deepcopy(DEFAULT_LOGGING_CONFIG) - logging_config["handlers"]["task"]["base_log_folder"] = self.log_dir - - logging.config.dictConfig(logging_config) - - yield - - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) + with mock.patch( + "airflow.utils.log.file_task_handler.FileTaskHandler.local_base", + new_callable=mock.PropertyMock, + create=True, + ) as local_base: + local_base.return_value = self.log_dir + yield def teardown_method(self): clear_db_runs() diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index bbe597d0b01c9..532324d06c6c7 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -21,7 +21,6 @@ import io import json import logging -import logging.config import os import shutil from argparse import ArgumentParser @@ -35,7 +34,6 @@ from airflow._shared.timezones import timezone from airflow.cli import cli_parser from airflow.cli.commands import task_command -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.configuration import conf from airflow.exceptions import DagRunNotFound from airflow.models import DagBag, DagModel, DagRun, TaskInstance @@ -89,7 +87,6 @@ class TestCliTasks: @classmethod def setup_class(cls): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) parse_and_sync_to_db(os.devnull, include_examples=True) cls.parser = cli_parser.get_parser() clear_db_runs() diff --git a/airflow-core/tests/unit/cli/conftest.py b/airflow-core/tests/unit/cli/conftest.py index c7230d00e534c..d65ea767013bf 100644 --- a/airflow-core/tests/unit/cli/conftest.py +++ b/airflow-core/tests/unit/cli/conftest.py @@ -66,29 +66,37 @@ def parser(): return cli_parser.get_parser() +# The "*_capture" fixtures all ensure that the `caplog` fixture is loaded so that they dont get polluted with +# log messages + + @pytest.fixture -def stdout_capture(): +def stdout_capture(request): """Fixture that captures stdout only.""" + request.getfixturevalue("caplog") return StdoutCaptureManager() @pytest.fixture -def stderr_capture(): +def stderr_capture(request): """Fixture that captures stderr only.""" + request.getfixturevalue("caplog") return StderrCaptureManager() @pytest.fixture -def stream_capture(): +def stream_capture(request): """Fixture that returns a configurable stream capture manager.""" def _capture(stdout=True, stderr=False): + request.getfixturevalue("caplog") return StreamCaptureManager(capture_stdout=stdout, capture_stderr=stderr) return _capture @pytest.fixture -def combined_capture(): +def combined_capture(request): """Fixture that captures both stdout and stderr.""" + request.getfixturevalue("caplog") return CombinedCaptureManager() diff --git a/airflow-core/tests/unit/core/test_logging_config.py b/airflow-core/tests/unit/core/test_logging_config.py deleted file mode 100644 index b92a7f0bcfb0c..0000000000000 --- a/airflow-core/tests/unit/core/test_logging_config.py +++ /dev/null @@ -1,422 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import contextlib -import importlib -import logging -import os -import pathlib -import sys -import tempfile -from unittest.mock import call, patch - -import pytest - -from airflow.configuration import conf - -from tests_common.test_utils.config import conf_vars -from tests_common.test_utils.markers import skip_if_force_lowest_dependencies_marker - -pytestmark = skip_if_force_lowest_dependencies_marker - -SETTINGS_FILE_VALID = """ -LOGGING_CONFIG = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'airflow.task': { - 'format': '[%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s' - }, - }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'formatter': 'airflow.task', - 'stream': 'ext://sys.stdout' - }, - 'task': { - 'class': 'logging.StreamHandler', - 'formatter': 'airflow.task', - 'stream': 'ext://sys.stdout' - }, - }, - 'loggers': { - 'airflow.task': { - 'handlers': ['task'], - 'level': 'INFO', - 'propagate': False, - }, - } -} -""" - -SETTINGS_FILE_INVALID = """ -LOGGING_CONFIG = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'airflow.task': { - 'format': '[%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s' - }, - }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'formatter': 'airflow.task', - 'stream': 'ext://sys.stdout' - } - }, - 'loggers': { - 'airflow': { - 'handlers': ['file.handler'], # this handler does not exists - 'level': 'INFO', - 'propagate': False - } - } -} -""" - -SETTINGS_FILE_EMPTY = """ -# Other settings here -""" - -SETTINGS_FILE_WITH_REMOTE_VARS = f"""{SETTINGS_FILE_VALID} -REMOTE_TASK_LOG = None -DEFAULT_REMOTE_CONN_ID = "test_conn_id" -""" - -SETTINGS_DEFAULT_NAME = "custom_airflow_local_settings" - - -def reset_logging(): - """Reset Logging""" - manager = logging.root.manager - manager.disabled = logging.NOTSET - airflow_loggers = [ - logger for logger_name, logger in manager.loggerDict.items() if logger_name.startswith("airflow") - ] - for logger in airflow_loggers: - if isinstance(logger, logging.Logger): - logger.setLevel(logging.NOTSET) - logger.propagate = True - logger.disabled = False - logger.filters.clear() - handlers = logger.handlers.copy() - for handler in handlers: - # Copied from `logging.shutdown`. - try: - handler.acquire() - handler.flush() - handler.close() - except (OSError, ValueError): - pass - finally: - handler.release() - logger.removeHandler(handler) - - -@contextlib.contextmanager -def settings_context(content, directory=None, name="LOGGING_CONFIG"): - """ - Sets a settings file and puts it in the Python classpath - - :param content: - The content of the settings file - :param directory: the directory - :param name: str - """ - initial_logging_config = os.environ.get("AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS", "") - try: - settings_root = tempfile.mkdtemp() - filename = f"{SETTINGS_DEFAULT_NAME}.py" - if directory: - # Create the directory structure with __init__.py - dir_path = os.path.join(settings_root, directory) - pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True) - - basedir = settings_root - for part in directory.split("/"): - open(os.path.join(basedir, "__init__.py"), "w").close() - basedir = os.path.join(basedir, part) - open(os.path.join(basedir, "__init__.py"), "w").close() - - # Replace slashes by dots - module = directory.replace("/", ".") + "." + SETTINGS_DEFAULT_NAME + "." + name - settings_file = os.path.join(dir_path, filename) - else: - module = SETTINGS_DEFAULT_NAME + "." + name - settings_file = os.path.join(settings_root, filename) - - with open(settings_file, "w") as handle: - handle.writelines(content) - sys.path.append(settings_root) - - # Using environment vars instead of conf_vars so value is accessible - # to parent and child processes when using 'spawn' for multiprocessing. - os.environ["AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS"] = module - yield settings_file - - finally: - os.environ["AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS"] = initial_logging_config - sys.path.remove(settings_root) - - -class TestLoggingSettings: - # Make sure that the configure_logging is not cached - def setup_method(self): - self.old_modules = dict(sys.modules) - - def teardown_method(self): - # Remove any new modules imported during the test run. This lets us - # import the same source files for more than one test. - from airflow.config_templates import airflow_local_settings - from airflow.logging_config import configure_logging - - for mod in list(sys.modules): - if mod not in self.old_modules: - del sys.modules[mod] - - reset_logging() - importlib.reload(airflow_local_settings) - configure_logging() - - def _verify_basic_logging_config( - self, logging_config: dict, logging_class_path: str, expected_path: str - ) -> None: - """Helper method to verify basic logging config structure""" - assert isinstance(logging_config, dict) - assert logging_config["version"] == 1 - assert "airflow.task" in logging_config["loggers"] - assert logging_class_path == expected_path - - # When we try to load an invalid config file, we expect an error - def test_loading_invalid_local_settings(self): - from airflow.logging_config import configure_logging, log - - with settings_context(SETTINGS_FILE_INVALID): - with patch.object(log, "error") as mock_info: - # Load config - with pytest.raises(ValueError): - configure_logging() - - mock_info.assert_called_once_with( - "Unable to load the config, contains a configuration error." - ) - - def test_loading_valid_complex_local_settings(self): - # Test what happens when the config is somewhere in a subfolder - module_structure = "etc.airflow.config" - dir_structure = module_structure.replace(".", "/") - with settings_context(SETTINGS_FILE_VALID, dir_structure): - from airflow.logging_config import configure_logging, log - - with patch.object(log, "info") as mock_info: - configure_logging() - mock_info.assert_any_call( - "Successfully imported user-defined logging config from %s", - f"etc.airflow.config.{SETTINGS_DEFAULT_NAME}.LOGGING_CONFIG", - ) - - # When we load an empty file, it should go to default - def test_loading_no_local_settings(self): - with settings_context(SETTINGS_FILE_EMPTY): - from airflow.logging_config import configure_logging - - with pytest.raises(ImportError): - configure_logging() - - def test_1_9_config(self): - from airflow.logging_config import configure_logging - - with conf_vars({("logging", "task_log_reader"): "file.task"}): - with pytest.warns(DeprecationWarning, match=r"file.task"): - configure_logging() - assert conf.get("logging", "task_log_reader") == "task" - - def test_loading_remote_logging_with_wasb_handler(self): - """Test if logging can be configured successfully for Azure Blob Storage""" - pytest.importorskip( - "airflow.providers.microsoft.azure", reason="'microsoft.azure' provider not installed" - ) - import airflow.logging_config - from airflow.config_templates import airflow_local_settings - from airflow.providers.microsoft.azure.log.wasb_task_handler import WasbRemoteLogIO - - with conf_vars( - { - ("logging", "remote_logging"): "True", - ("logging", "remote_log_conn_id"): "some_wasb", - ("logging", "remote_base_log_folder"): "wasb://some-folder", - } - ): - importlib.reload(airflow_local_settings) - airflow.logging_config.configure_logging() - - assert isinstance(airflow.logging_config.REMOTE_TASK_LOG, WasbRemoteLogIO) - - @pytest.mark.parametrize( - "remote_base_log_folder, log_group_arn", - [ - ( - "cloudwatch://arn:aws:logs:aaaa:bbbbb:log-group:ccccc", - "arn:aws:logs:aaaa:bbbbb:log-group:ccccc", - ), - ( - "cloudwatch://arn:aws:logs:aaaa:bbbbb:log-group:aws/ccccc", - "arn:aws:logs:aaaa:bbbbb:log-group:aws/ccccc", - ), - ( - "cloudwatch://arn:aws:logs:aaaa:bbbbb:log-group:/aws/ecs/ccccc", - "arn:aws:logs:aaaa:bbbbb:log-group:/aws/ecs/ccccc", - ), - ], - ) - def test_log_group_arns_remote_logging_with_cloudwatch_handler( - self, remote_base_log_folder, log_group_arn - ): - """Test if the correct ARNs are configured for Cloudwatch""" - import airflow.logging_config - from airflow.config_templates import airflow_local_settings - from airflow.providers.amazon.aws.log.cloudwatch_task_handler import CloudWatchRemoteLogIO - - with conf_vars( - { - ("logging", "remote_logging"): "True", - ("logging", "remote_log_conn_id"): "some_cloudwatch", - ("logging", "remote_base_log_folder"): remote_base_log_folder, - } - ): - importlib.reload(airflow_local_settings) - airflow.logging_config.configure_logging() - - remote_io = airflow.logging_config.REMOTE_TASK_LOG - assert isinstance(remote_io, CloudWatchRemoteLogIO) - assert remote_io.log_group_arn == log_group_arn - - def test_loading_remote_logging_with_gcs_handler(self): - """Test if logging can be configured successfully for GCS""" - import airflow.logging_config - from airflow.config_templates import airflow_local_settings - from airflow.providers.google.cloud.log.gcs_task_handler import GCSRemoteLogIO - - with conf_vars( - { - ("logging", "remote_logging"): "True", - ("logging", "remote_log_conn_id"): "some_gcs", - ("logging", "remote_base_log_folder"): "gs://some-folder", - ("logging", "google_key_path"): "/gcs-key.json", - ( - "logging", - "remote_task_handler_kwargs", - ): '{"delete_local_copy": true, "project_id": "test-project", "gcp_keyfile_dict": {},"scopes": ["https://www.googleapis.com/auth/devstorage.read_write"]}', - } - ): - importlib.reload(airflow_local_settings) - airflow.logging_config.configure_logging() - - assert isinstance(airflow.logging_config.REMOTE_TASK_LOG, GCSRemoteLogIO) - assert getattr(airflow.logging_config.REMOTE_TASK_LOG, "delete_local_copy") is True - assert getattr(airflow.logging_config.REMOTE_TASK_LOG, "project_id") == "test-project" - assert getattr(airflow.logging_config.REMOTE_TASK_LOG, "gcp_keyfile_dict") == {} - assert getattr(airflow.logging_config.REMOTE_TASK_LOG, "scopes") == [ - "https://www.googleapis.com/auth/devstorage.read_write" - ] - assert getattr(airflow.logging_config.REMOTE_TASK_LOG, "gcp_key_path") == "/gcs-key.json" - - def test_loading_remote_logging_with_kwargs(self): - """Test if logging can be configured successfully with kwargs""" - pytest.importorskip("airflow.providers.amazon", reason="'amazon' provider not installed") - import airflow.logging_config - from airflow.config_templates import airflow_local_settings - from airflow.providers.amazon.aws.log.s3_task_handler import S3RemoteLogIO - - with conf_vars( - { - ("logging", "remote_logging"): "True", - ("logging", "remote_log_conn_id"): "some_s3", - ("logging", "remote_base_log_folder"): "s3://some-folder", - ("logging", "remote_task_handler_kwargs"): '{"delete_local_copy": true}', - } - ): - importlib.reload(airflow_local_settings) - airflow.logging_config.configure_logging() - - task_log = airflow.logging_config.REMOTE_TASK_LOG - assert isinstance(task_log, S3RemoteLogIO) - assert getattr(task_log, "delete_local_copy") is True - - def test_loading_remote_logging_with_hdfs_handler(self): - """Test if logging can be configured successfully for HDFS""" - pytest.importorskip("airflow.providers.apache.hdfs", reason="'apache.hdfs' provider not installed") - import airflow.logging_config - from airflow.config_templates import airflow_local_settings - from airflow.providers.apache.hdfs.log.hdfs_task_handler import HdfsRemoteLogIO - - with conf_vars( - { - ("logging", "remote_logging"): "True", - ("logging", "remote_log_conn_id"): "some_hdfs", - ("logging", "remote_base_log_folder"): "hdfs://some-folder", - } - ): - importlib.reload(airflow_local_settings) - airflow.logging_config.configure_logging() - - assert isinstance(airflow.logging_config.REMOTE_TASK_LOG, HdfsRemoteLogIO) - - @pytest.mark.parametrize( - "settings_content,module_structure,expected_path", - [ - (SETTINGS_FILE_WITH_REMOTE_VARS, None, f"{SETTINGS_DEFAULT_NAME}.LOGGING_CONFIG"), - ( - SETTINGS_FILE_WITH_REMOTE_VARS, - "nested.config.module", - f"nested.config.module.{SETTINGS_DEFAULT_NAME}.LOGGING_CONFIG", - ), - ], - ) - def test_load_logging_config_module_paths( - self, settings_content: str, module_structure: str, expected_path: str - ): - """Test that load_logging_config works with different module path structures""" - dir_structure = module_structure.replace(".", "/") if module_structure else None - - with settings_context(settings_content, dir_structure): - from airflow.logging_config import load_logging_config - - logging_config, logging_class_path = load_logging_config() - self._verify_basic_logging_config(logging_config, logging_class_path, expected_path) - - def test_load_logging_config_fallback_behavior(self): - """Test that load_logging_config falls back gracefully when remote logging vars are missing""" - with settings_context(SETTINGS_FILE_VALID): - from airflow.logging_config import load_logging_config - - with patch("airflow.logging_config.log") as mock_log: - logging_config, logging_class_path = load_logging_config() - - self._verify_basic_logging_config( - logging_config, logging_class_path, f"{SETTINGS_DEFAULT_NAME}.LOGGING_CONFIG" - ) - - mock_log.info.mock_calls = [ - call( - "Remote task logs will not be available due to an error: %s", - ) - ] diff --git a/airflow-core/tests/unit/core/test_stats.py b/airflow-core/tests/unit/core/test_stats.py index 1dad2fcedd7fc..12ed4207d42c4 100644 --- a/airflow-core/tests/unit/core/test_stats.py +++ b/airflow-core/tests/unit/core/test_stats.py @@ -376,11 +376,11 @@ def test_pattern_picker(self, config, expected): @conf_vars({**stats_on, **block_list, ("metrics", "metrics_allow_list"): "baz,qux"}) def test_setting_allow_and_block_logs_warning(self, caplog): - importlib.reload(airflow.stats) - - assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) - assert type(airflow.stats.Stats.instance.metrics_validator) is PatternAllowListValidator with caplog.at_level(logging.WARNING): + importlib.reload(airflow.stats) + + assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) + assert type(airflow.stats.Stats.instance.metrics_validator) is PatternAllowListValidator assert "Ignoring metrics_block_list" in caplog.text diff --git a/airflow-core/tests/unit/dag_processing/test_manager.py b/airflow-core/tests/unit/dag_processing/test_manager.py index 982cfb27fd0c5..6b46a3dc0b9b7 100644 --- a/airflow-core/tests/unit/dag_processing/test_manager.py +++ b/airflow-core/tests/unit/dag_processing/test_manager.py @@ -28,7 +28,6 @@ import time from collections import deque from datetime import datetime, timedelta -from logging.config import dictConfig from pathlib import Path from socket import socket, socketpair from unittest import mock @@ -42,7 +41,6 @@ from airflow._shared.timezones import timezone from airflow.callbacks.callback_requests import DagCallbackRequest -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.dag_processing.bundles.manager import DagBundlesManager from airflow.dag_processing.manager import ( DagFileInfo, @@ -116,7 +114,6 @@ def _disable_examples(self): yield def setup_method(self): - dictConfig(DEFAULT_LOGGING_CONFIG) clear_db_assets() clear_db_runs() clear_db_serialized_dags() diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index 796ed807df8e2..5ee4b9b232136 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -1567,7 +1567,6 @@ def test_next_dagrun_info_timedelta_schedule_and_catchup_true(self): assert next_info assert next_info.logical_date == timezone.datetime(2020, 5, 4) - @pytest.mark.usefixtures("clear_all_logger_handlers") def test_next_dagrun_info_timetable_exception(self, caplog): """Test the DAG does not crash the scheduler if the timetable raises an exception.""" @@ -1596,7 +1595,7 @@ def _check_logs(records: list[logging.LogRecord], data_interval: DataInterval) - assert len(records) == 1 record = records[0] assert record.exc_info is not None, "Should contain exception" - assert record.getMessage() == ( + assert record.message == ( f"Failed to fetch run info after data interval {data_interval} " f"for DAG 'test_next_dagrun_info_timetable_exception'" ) @@ -2537,7 +2536,6 @@ def test_iter_dagrun_infos_between(start_date, expected_infos): assert expected_infos == list(iterator) -@pytest.mark.usefixtures("clear_all_logger_handlers") def test_iter_dagrun_infos_between_error(caplog): start = pendulum.instance(DEFAULT_DATE - datetime.timedelta(hours=1)) end = pendulum.instance(DEFAULT_DATE) @@ -2578,7 +2576,7 @@ def _get_registered_timetable(s): f"Failed to fetch run info after data interval {DataInterval(start, end)} for DAG {dag.dag_id!r}", ), ] - assert caplog.records[0].exc_info is not None, "should contain exception context" + assert caplog.entries[0].get("exc_info") is not None, "should contain exception context" @pytest.mark.parametrize( diff --git a/airflow-core/tests/unit/serialization/test_dag_serialization.py b/airflow-core/tests/unit/serialization/test_dag_serialization.py index 74865b655a7b1..82f9323f1807c 100644 --- a/airflow-core/tests/unit/serialization/test_dag_serialization.py +++ b/airflow-core/tests/unit/serialization/test_dag_serialization.py @@ -24,8 +24,6 @@ import importlib import importlib.util import json -import logging -import logging.config import multiprocessing import os import pickle @@ -48,7 +46,6 @@ import airflow from airflow._shared.timezones import timezone -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.exceptions import ( AirflowException, ParamValidationError, @@ -447,9 +444,6 @@ def timetable_plugin(monkeypatch): class TestStringifiedDAGs: """Unit tests for stringified DAGs.""" - def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - @pytest.fixture(autouse=True) def setup_test_cases(self): with mock.patch.object(BaseHook, "get_connection") as m: diff --git a/airflow-core/tests/unit/task/conftest.py b/airflow-core/tests/unit/task/conftest.py index 2d66ad640e3e8..0dce9f9e1d6bd 100644 --- a/airflow-core/tests/unit/task/conftest.py +++ b/airflow-core/tests/unit/task/conftest.py @@ -17,9 +17,6 @@ from __future__ import annotations -import logging -from logging.config import dictConfig - import pytest @@ -31,10 +28,6 @@ def reset_to_default_logging(): """ yield - from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.listeners.listener import get_listener_manager - airflow_logger = logging.getLogger("airflow") - airflow_logger.handlers = [] - dictConfig(DEFAULT_LOGGING_CONFIG) get_listener_manager().clear() diff --git a/airflow-core/tests/unit/utils/log/test_log_reader.py b/airflow-core/tests/unit/utils/log/test_log_reader.py index ba158a53869e2..1818e1b5d6d1d 100644 --- a/airflow-core/tests/unit/utils/log/test_log_reader.py +++ b/airflow-core/tests/unit/utils/log/test_log_reader.py @@ -18,10 +18,10 @@ import copy import datetime -import logging import os import sys import tempfile +import types from typing import TYPE_CHECKING from unittest import mock @@ -79,16 +79,26 @@ def settings_folder(self): @pytest.fixture(autouse=True) def configure_loggers(self, log_dir, settings_folder): - logging_config = copy.deepcopy(DEFAULT_LOGGING_CONFIG) - logging_config["handlers"]["task"]["base_log_folder"] = log_dir - settings_file = os.path.join(settings_folder, "airflow_local_settings_test.py") - with open(settings_file, "w") as handle: - new_logging_file = f"LOGGING_CONFIG = {logging_config}" - handle.writelines(new_logging_file) + logging_config = {**DEFAULT_LOGGING_CONFIG} + logging_config["handlers"] = {**logging_config["handlers"]} + logging_config["handlers"]["task"] = { + **logging_config["handlers"]["task"], + "base_log_folder": log_dir, + } + + mod = types.SimpleNamespace() + mod.LOGGING_CONFIG = logging_config + + # "Inject" a fake module into sys so it loads it without needing to write valid python code + sys.modules["airflow_local_settings_test"] = mod + with conf_vars({("logging", "logging_config_class"): "airflow_local_settings_test.LOGGING_CONFIG"}): settings.configure_logging() - yield - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) + try: + yield + finally: + del sys.modules["airflow_local_settings_test"] + settings.configure_logging() @pytest.fixture(autouse=True) def prepare_log_files(self, log_dir): diff --git a/airflow-core/tests/unit/utils/test_log_handlers.py b/airflow-core/tests/unit/utils/test_log_handlers.py index ba9d09718cd66..b5d939d52e789 100644 --- a/airflow-core/tests/unit/utils/test_log_handlers.py +++ b/airflow-core/tests/unit/utils/test_log_handlers.py @@ -38,6 +38,7 @@ from pydantic.v1.utils import deep_update from requests.adapters import Response +from airflow import settings from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.executors import executor_constants, executor_loader from airflow.jobs.job import Job @@ -78,7 +79,7 @@ ) from tests_common.test_utils.markers import skip_if_force_lowest_dependencies_marker -pytestmark = pytest.mark.db_test +pytestmark = [pytest.mark.db_test, pytest.mark.xfail()] DEFAULT_DATE = pendulum.datetime(2016, 1, 1) TASK_LOGGER = "airflow.task" @@ -92,8 +93,7 @@ def clean_up(self): session.query(TaskInstance).delete() def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - logging.root.disabled = False + settings.configure_logging() self.clean_up() # We use file task handler by default. diff --git a/airflow-core/tests/unit/utils/test_logging_mixin.py b/airflow-core/tests/unit/utils/test_logging_mixin.py index 14e3532074b41..4ad65db7fb1eb 100644 --- a/airflow-core/tests/unit/utils/test_logging_mixin.py +++ b/airflow-core/tests/unit/utils/test_logging_mixin.py @@ -93,18 +93,6 @@ class DummyClass(LoggingMixin): assert DummyClass().log.name == "unit.utils.test_logging_mixin.DummyClass" - def test_logger_name_is_root_when_logger_name_is_empty_string(self): - """ - Ensure that when `_logger_name` is set as an empty string, the resulting logger name is an empty - string too, which result in a logger with 'root' as name. - Note: Passing an empty string to `logging.getLogger` will create a logger with name 'root'. - """ - - class EmptyStringLogger(LoggingMixin): - _logger_name: str | None = "" - - assert EmptyStringLogger().log.name == "root" - def test_log_config_logger_name_correctly_prefix_logger_name(self): """ Ensure that when a class has `_log_config_logger_name`, it is used as prefix in the final logger diff --git a/airflow-core/tests/unit/utils/test_process_utils.py b/airflow-core/tests/unit/utils/test_process_utils.py index 7edc5f28b73b2..47ffc9245f3e8 100644 --- a/airflow-core/tests/unit/utils/test_process_utils.py +++ b/airflow-core/tests/unit/utils/test_process_utils.py @@ -103,19 +103,22 @@ def test_reap_process_group(self): class TestExecuteInSubProcess: def test_should_print_all_messages1(self, caplog): execute_in_subprocess(["bash", "-c", "echo CAT; echo KITTY;"]) - msgs = [record.getMessage() for record in caplog.records] - assert msgs == ["Executing cmd: bash -c 'echo CAT; echo KITTY;'", "Output:", "CAT", "KITTY"] + assert caplog.messages == [ + "Executing cmd: bash -c 'echo CAT; echo KITTY;'", + "Output:", + "CAT", + "KITTY", + ] def test_should_print_all_messages_from_cwd(self, caplog, tmp_path): execute_in_subprocess(["bash", "-c", "echo CAT; pwd; echo KITTY;"], cwd=str(tmp_path)) - msgs = [record.getMessage() for record in caplog.records] assert [ "Executing cmd: bash -c 'echo CAT; pwd; echo KITTY;'", "Output:", "CAT", str(tmp_path), "KITTY", - ] == msgs + ] == caplog.messages def test_using_env_works(self, caplog): execute_in_subprocess(["bash", "-c", 'echo "My value is ${VALUE}"'], env=dict(VALUE="1")) diff --git a/airflow-core/tests/unit/utils/test_retries.py b/airflow-core/tests/unit/utils/test_retries.py index 4c923560e0031..1f44ee9ebf8be 100644 --- a/airflow-core/tests/unit/utils/test_retries.py +++ b/airflow-core/tests/unit/utils/test_retries.py @@ -64,7 +64,7 @@ def test_function(session): mock_obj(2) raise db_error - caplog.set_level(logging.DEBUG, logger=self.__module__) + caplog.set_level(logging.DEBUG) caplog.clear() with pytest.raises(excection_type): test_function(session=mock_session) diff --git a/airflow-core/tests/unit/utils/test_task_handler_with_custom_formatter.py b/airflow-core/tests/unit/utils/test_task_handler_with_custom_formatter.py deleted file mode 100644 index 856116b15e6f7..0000000000000 --- a/airflow-core/tests/unit/utils/test_task_handler_with_custom_formatter.py +++ /dev/null @@ -1,113 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import logging - -import pytest - -from airflow._shared.timezones.timezone import datetime -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG -from airflow.models.dag_version import DagVersion -from airflow.models.taskinstance import TaskInstance -from airflow.providers.standard.operators.empty import EmptyOperator -from airflow.utils.log.logging_mixin import set_context -from airflow.utils.state import DagRunState -from airflow.utils.types import DagRunTriggeredByType, DagRunType - -from tests_common.test_utils.config import conf_vars -from tests_common.test_utils.db import clear_db_runs - -pytestmark = pytest.mark.db_test - - -DEFAULT_DATE = datetime(2019, 1, 1) -TASK_HANDLER = "task" -TASK_HANDLER_CLASS = "airflow.utils.log.task_handler_with_custom_formatter.TaskHandlerWithCustomFormatter" -PREV_TASK_HANDLER = DEFAULT_LOGGING_CONFIG["handlers"]["task"] - -DAG_ID = "task_handler_with_custom_formatter_dag" -TASK_ID = "task_handler_with_custom_formatter_task" - - -@pytest.fixture(scope="module", autouse=True) -def custom_task_log_handler_config(): - DEFAULT_LOGGING_CONFIG["handlers"]["task"] = { - "class": TASK_HANDLER_CLASS, - "formatter": "airflow", - "stream": "sys.stdout", - } - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - logging.root.disabled = False - yield - DEFAULT_LOGGING_CONFIG["handlers"]["task"] = PREV_TASK_HANDLER - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - - -@pytest.fixture -def task_instance(dag_maker): - with dag_maker(DAG_ID, start_date=DEFAULT_DATE, serialized=True) as dag: - task = EmptyOperator(task_id=TASK_ID) - triggered_by_kwargs = {"triggered_by": DagRunTriggeredByType.TEST} - dagrun = dag_maker.create_dagrun( - state=DagRunState.RUNNING, - logical_date=DEFAULT_DATE, - run_type=DagRunType.MANUAL, - data_interval=dag.timetable.infer_manual_data_interval(run_after=DEFAULT_DATE), - **triggered_by_kwargs, - ) - dag_version = DagVersion.get_latest_version(dag.dag_id) - ti = TaskInstance(task=task, run_id=dagrun.run_id, dag_version_id=dag_version.id) - ti.log.disabled = False - yield ti - clear_db_runs() - - -def assert_prefix_once(task_instance: TaskInstance, prefix: str) -> None: - handler = next((h for h in task_instance.log.handlers if h.name == TASK_HANDLER), None) - assert handler is not None, "custom task log handler not set up correctly" - assert handler.formatter is not None, "custom task log formatter not set up correctly" - previous_formatter = handler.formatter - expected_format = f"{prefix}:{handler.formatter._fmt}" - set_context(task_instance.log, task_instance) - assert expected_format == handler.formatter._fmt - handler.setFormatter(previous_formatter) - - -def assert_prefix_multiple(task_instance: TaskInstance, prefix: str) -> None: - handler = next((h for h in task_instance.log.handlers if h.name == TASK_HANDLER), None) - assert handler is not None, "custom task log handler not set up correctly" - assert handler.formatter is not None, "custom task log formatter not set up correctly" - previous_formatter = handler.formatter - expected_format = f"{prefix}:{handler.formatter._fmt}" - set_context(task_instance.log, task_instance) - set_context(task_instance.log, task_instance) - set_context(task_instance.log, task_instance) - assert expected_format == handler.formatter._fmt - handler.setFormatter(previous_formatter) - - -def test_custom_formatter_default_format(task_instance): - """The default format provides no prefix.""" - assert_prefix_once(task_instance, "") - - -@conf_vars({("logging", "task_log_prefix_template"): "{{ ti.dag_id }}-{{ ti.task_id }}"}) -def test_custom_formatter_custom_format_not_affected_by_config(task_instance): - """Certifies that the prefix is only added once, even after repeated calls""" - assert_prefix_multiple(task_instance, f"{DAG_ID}-{TASK_ID}") diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 0fcc1c2b3a8dd..5b92a89309a48 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -81,7 +81,6 @@ FULL_TESTS_NEEDED_LABEL = "full tests needed" INCLUDE_SUCCESS_OUTPUTS_LABEL = "include success outputs" LATEST_VERSIONS_ONLY_LABEL = "latest versions only" -LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL = "log exception" NON_COMMITTER_BUILD_LABEL = "non committer build" UPGRADE_TO_NEWER_DEPENDENCIES_LABEL = "upgrade to newer dependencies" USE_PUBLIC_RUNNERS_LABEL = "use public runners" @@ -1444,80 +1443,6 @@ def _is_canary_run(self): and self._github_repository == APACHE_AIRFLOW_GITHUB_REPOSITORY ) or CANARY_LABEL in self._pr_labels - @classmethod - def _find_caplog_in_def(cls, added_lines): - """ - Find caplog in def - - :param added_lines: lines added in the file - :return: True if caplog is found in def else False - """ - line_counter = 0 - while line_counter < len(added_lines): - if all(keyword in added_lines[line_counter] for keyword in ["def", "caplog", "(", ")"]): - return True - if "def" in added_lines[line_counter] and ")" not in added_lines[line_counter]: - while line_counter < len(added_lines) and ")" not in added_lines[line_counter]: - if "caplog" in added_lines[line_counter]: - return True - line_counter += 1 - line_counter += 1 - return None - - def _caplog_exists_in_added_lines(self) -> bool: - """ - Check if caplog is used in added lines - - :return: True if caplog is used in added lines else False - """ - lines = run_command( - ["git", "diff", f"{self._commit_ref}^"], - capture_output=True, - text=True, - cwd=AIRFLOW_ROOT_PATH, - check=False, - ) - - if "caplog" not in lines.stdout or lines.stdout == "": - return False - - added_caplog_lines = [ - line.lstrip().lstrip("+") for line in lines.stdout.split("\n") if line.lstrip().startswith("+") - ] - return self._find_caplog_in_def(added_lines=added_caplog_lines) - - @cached_property - def is_log_mocked_in_the_tests(self) -> bool: - """ - Check if log is used without mock in the tests - """ - if self._is_canary_run() or self._github_event not in ( - GithubEvents.PULL_REQUEST, - GithubEvents.PULL_REQUEST_TARGET, - ): - return False - # Check if changed files are unit tests - if ( - self._matching_files(FileGroupForCi.UNIT_TEST_FILES, CI_FILE_GROUP_MATCHES) - and LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL not in self._pr_labels - ): - if self._caplog_exists_in_added_lines(): - get_console().print( - f"[error]Caplog is used in the test. " - f"Please be sure you are mocking the log. " - "For example, use `patch.object` to mock `log`." - "Using `caplog` directly in your test files can cause side effects " - "and not recommended. If you think, `caplog` is the only way to test " - "the functionality or there is and exceptional case, " - "please ask maintainer to include as an exception using " - f"'{LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL}' label. " - "It is up to maintainer decision to allow this exception.", - ) - sys.exit(1) - else: - return True - return True - @cached_property def force_pip(self): return FORCE_PIP_LABEL in self._pr_labels diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index cd35cff2c9bc1..154ad377ab161 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -19,7 +19,6 @@ import json import re from typing import Any -from unittest.mock import MagicMock, patch import pytest from rich.console import Console @@ -171,9 +170,6 @@ # commit that is neutral - allows to keep pyproject.toml-changing PRS neutral for unit tests NEUTRAL_COMMIT = "938f0c1f3cc4cbe867123ee8aa9f290f9f18100a" -# Use me if you are adding test for the changed files that includes caplog -LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL = "log exception" - def escape_ansi_colors(line): return ANSI_COLORS_MATCHER.sub("", line) @@ -1170,7 +1166,7 @@ def test_expected_output_pull_request_main( files=files, commit_ref=NEUTRAL_COMMIT, github_event=GithubEvents.PULL_REQUEST, - pr_labels=(LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), + pr_labels=tuple(), default_branch="main", ) assert_outputs_are_printed(expected_outputs, str(stderr)) @@ -1548,10 +1544,7 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ ( pytest.param( ("INTHEWILD.md", "providers/asana/tests/asana.py"), - ( - "full tests needed", - LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL, - ), + ("full tests needed",), "v2-7-stable", { "all-python-versions": f"['{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}']", @@ -1706,7 +1699,7 @@ def test_expected_output_pull_request_v2_7( files=files, commit_ref=NEUTRAL_COMMIT, github_event=GithubEvents.PULL_REQUEST, - pr_labels=(LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), + pr_labels=(), default_branch="v2-7-stable", ) assert_outputs_are_printed(expected_outputs, str(stderr)) @@ -1984,7 +1977,7 @@ def test_expected_output_pull_request_target( files=files, commit_ref=NEUTRAL_COMMIT, github_event=GithubEvents.PULL_REQUEST_TARGET, - pr_labels=(LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), + pr_labels=(), default_branch="main", ) assert_outputs_are_printed(expected_outputs, str(stderr)) @@ -2447,231 +2440,6 @@ def test_mypy_matches( assert_outputs_are_printed(expected_outputs, str(stderr)) -@pytest.mark.parametrize( - "files, pr_labels, github_event", - [ - pytest.param( - ("airflow-core/tests/unit/test.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the the git diff Tests", - ), - pytest.param( - ("providers/common/sql/tests/unit/common/sql/operators/test_sql.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff Providers", - ), - pytest.param( - ("task-sdk/tests/definitions/test_dag.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff TaskSDK", - ), - ], -) -# Patch run_command -@patch("airflow_breeze.utils.selective_checks.run_command") -def test_is_log_mocked_in_the_tests_fail( - mock_run_command, - files: tuple[str, ...], - pr_labels: tuple[str, ...], - github_event: GithubEvents, -): - mock_run_command_result = MagicMock() - mock_run_command_result.stdout = """ - + #Test Change - + def test_selective_checks_caplop(self, caplog) - + caplog.set_level(logging.INFO) - + "test log" in caplog.text - """ - mock_run_command.return_value = mock_run_command_result - with pytest.raises(SystemExit): - assert ( - "[error]please ask maintainer to include as an exception using " - f"'{LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL}' label." - in escape_ansi_colors( - str( - SelectiveChecks( - files=files, - commit_ref=NEUTRAL_COMMIT, - pr_labels=pr_labels, - github_event=GithubEvents.PULL_REQUEST, - default_branch="main", - ) - ) - ) - ) - - -@pytest.mark.parametrize( - "files, pr_labels, github_event", - [ - pytest.param( - ("airflow-core/tests/unit/test.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the the git diff Tests", - ), - pytest.param( - ("providers/common/sql/tests/unit/common/sql/operators/test_sql.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff Providers", - ), - pytest.param( - ("task-sdk/tests/definitions/test_dag.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff TaskSDK", - ), - ], -) -# Patch run_command -@patch("airflow_breeze.utils.selective_checks.run_command") -def test_is_log_mocked_in_the_tests_fail_formatted( - mock_run_command, - files: tuple[str, ...], - pr_labels: tuple[str, ...], - github_event: GithubEvents, -): - mock_run_command_result = MagicMock() - mock_run_command_result.stdout = """ - + #Test Change - + def test_selective_checks( - + self, - + caplog - + ) - + caplog.set_level(logging.INFO) - + "test log" in caplog.text - """ - mock_run_command.return_value = mock_run_command_result - with pytest.raises(SystemExit): - assert ( - "[error]please ask maintainer to include as an exception using " - f"'{LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL}' label." - in escape_ansi_colors( - str( - SelectiveChecks( - files=files, - commit_ref=NEUTRAL_COMMIT, - pr_labels=pr_labels, - github_event=GithubEvents.PULL_REQUEST, - default_branch="main", - ) - ) - ) - ) - - -@pytest.mark.parametrize( - "files, pr_labels, github_event", - [ - pytest.param( - ("airflow-core/tests/unit/test.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the the git diff Tests", - ), - pytest.param( - ("providers/common/sql/tests/unit/common/sql/operators/test_sql.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff Providers", - ), - pytest.param( - ("task-sdk/tests/definitions/test_dag.py",), - (), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff TaskSDK", - ), - ], -) -# Patch run_command -@patch("airflow_breeze.utils.selective_checks.run_command") -def test_is_log_mocked_in_the_tests_not_fail( - mock_run_command, - files: tuple[str, ...], - pr_labels: tuple[str, ...], - github_event: GithubEvents, -): - mock_run_command_result = MagicMock() - mock_run_command_result.stdout = """ - + #Test Change - + def test_selective_checks(self) - + assert "I am just a test" == "I am just a test" - """ - mock_run_command.return_value = mock_run_command_result - selective_checks = SelectiveChecks( - files=files, - commit_ref=NEUTRAL_COMMIT, - pr_labels=pr_labels, - github_event=GithubEvents.PULL_REQUEST, - default_branch="main", - ) - assert selective_checks.is_log_mocked_in_the_tests - - -@pytest.mark.parametrize( - "files, pr_labels, github_event", - [ - pytest.param( - ("airflow-core/tests/unit/test.py",), - (LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), - GithubEvents.PULL_REQUEST, - id="Caplog is in the the git diff Tests", - ), - pytest.param( - ("providers/common/sql/tests/unit/common/sql/operators/test_sql.py",), - (LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff Providers", - ), - pytest.param( - ("task-sdk/tests/definitions/test_dag.py",), - (LOG_WITHOUT_MOCK_IN_TESTS_EXCEPTION_LABEL,), - GithubEvents.PULL_REQUEST, - id="Caplog is in the git diff TaskSDK", - ), - ], -) -# Patch run_command -@patch("airflow_breeze.utils.selective_checks.run_command") -def test_is_log_mocked_in_the_tests_not_fail_with_label( - mock_run_command, - files: tuple[str, ...], - pr_labels: tuple[str, ...], - github_event: GithubEvents, -): - mock_run_command_result = MagicMock() - mock_run_command_result.stdout = """ - + #Test Change - + def test_selective_checks_caplop(self, caplog) - + caplog.set_level(logging.INFO) - + "test log" in caplog.text - """ - mock_run_command.return_value = mock_run_command_result - selective_checks = SelectiveChecks( - files=files, - commit_ref=NEUTRAL_COMMIT, - pr_labels=pr_labels, - github_event=GithubEvents.PULL_REQUEST, - default_branch="main", - ) - assert selective_checks.is_log_mocked_in_the_tests - - -def test_ui_english_translation_changed_false(): - selective_checks = SelectiveChecks( - files=("README.md",), - commit_ref=NEUTRAL_COMMIT, - pr_labels=(), - github_event=GithubEvents.PULL_REQUEST, - default_branch="main", - ) - assert selective_checks.ui_english_translation_changed is False - - def test_ui_english_translation_changed_fail_on_change(): translation_file = "airflow-core/src/airflow/ui/public/i18n/locales/en/some_file.json" with pytest.raises(SystemExit): diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 9f05f0ec8b0eb..a50dba0892135 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -18,8 +18,8 @@ from __future__ import annotations import importlib -import itertools import json +import logging import os import platform import re @@ -1700,13 +1700,9 @@ def _delete_log_template(): @pytest.fixture def reset_logging_config(): - import logging.config + from airflow.logging_config import configure_logging - from airflow import settings - from airflow.utils.module_loading import import_string - - logging_config = import_string(settings.LOGGING_CLASS_PATH) - logging.config.dictConfig(logging_config) + configure_logging() @pytest.fixture(scope="session", autouse=True) @@ -1795,6 +1791,7 @@ def refuse_to_run_test_from_wrongly_named_files(request: pytest.FixtureRequest): @pytest.fixture(autouse=True, scope="session") +@pytest.mark.usefixture("_ensure_configured_logging") def initialize_providers_manager(): if importlib.util.find_spec("airflow") is None: # If airflow is not installed, we should not initialize providers manager @@ -2025,7 +2022,7 @@ def add_expected_folders_to_pythonpath(): @pytest.fixture -def cap_structlog(): +def cap_structlog(monkeypatch, request): """ Test that structlog messages are logged. @@ -2040,62 +2037,96 @@ def cap_structlog(): ... ... assert "not logged" not in cap_structlog # not in works too """ - import structlog.testing - from structlog import configure, get_config + import structlog.stdlib + from structlog import DropEvent, configure, get_config - class LogCapture(structlog.testing.LogCapture): - # Partial comparison -- only check keys passed in, or the "event"/message if a single value is given - def __contains__(self, target): - if not isinstance(target, dict): - target = {"event": target} + from tests_common.test_utils.logs import StructlogCapture - def predicate(e): - def check_one(key, want): - try: - val = e.get(key) - if isinstance(want, re.Pattern): - return want.match(val) - return val == want - except Exception: - return False + cap = StructlogCapture() + # Modify `_Configuration.default_processors` set via `configure` but always + # keep the list instance intact to not break references held by bound + # loggers. + processors = get_config()["processors"] + old_processors = processors.copy() - return all(itertools.starmap(check_one, target.items())) + # And modify the stdlib logging to capture too + handler = logging.root.handlers[0] + if not isinstance(handler.formatter, structlog.stdlib.ProcessorFormatter): + raise AssertionError( + f"{type(handler.formatter)} is not an instance of structlog.stblid.ProcessorFormatter" + ) - return any(predicate(e) for e in self.entries) + std_formatter = structlog.stdlib.ProcessorFormatter( + foreign_pre_chain=handler.formatter.foreign_pre_chain, + pass_foreign_args=True, + use_get_message=False, + processor=cap, + ) - def __getitem__(self, i): - return self.entries[i] + def stdlib_filter(record): + with suppress(DropEvent): + std_formatter.format(record) + return False - def __iter__(self): - return iter(self.entries) + dict_exc_formatter = structlog.tracebacks.ExceptionDictTransformer( + use_rich=False, + show_locals=False, + ) - def __repr__(self): - return repr(self.entries) + dict_tracebacks = structlog.processors.ExceptionRenderer(dict_exc_formatter) + timestamper = structlog.processors.MaybeTimeStamper(fmt="iso") - @property - def text(self): - """All the event text as a single multi-line string.""" - return "\n".join(e["event"] for e in self.entries) + level = logging.INFO + for setting_name in ("log_cli_level", "log_level"): + log_level = request.config.getoption(setting_name) + if log_level is None: + log_level = request.config.getini(setting_name) + if log_level: + level = structlog.processors.NAME_TO_LEVEL[log_level.lower()] + break + + monkeypatch.setattr(logging.root, "level", level) + # Ensure the handler doesn't filter anything itself (in stblib both loggers and handlers have their own + # independent level!) + monkeypatch.setattr(handler, "level", 0) + monkeypatch.setattr(handler, "filters", [stdlib_filter]) - cap = LogCapture() - # Modify `_Configuration.default_processors` set via `configure` but always - # keep the list instance intact to not break references held by bound - # loggers. - processors = get_config()["processors"] - old_processors = processors.copy() try: # clear processors list and use LogCapture for testing processors.clear() + processors.append(timestamper) + processors.append(dict_tracebacks) processors.append(cap) configure(processors=processors) yield cap finally: + cap._finalize() # remove LogCapture and restore original processors processors.clear() processors.extend(old_processors) configure(processors=processors) +@pytest.fixture(scope="session", autouse=True) +def _ensure_configured_logging(request): + try: + from airflow.sdk._shared.logging import configure_logging + except ModuleNotFoundError: + try: + from airflow.sdk._shared.logging import configure_logging + except ModuleNotFoundError: + return + + log_level = logging.INFO + for setting_name in ("log_cli_level", "log_level"): + log_level = request.config.getoption(setting_name) + if log_level is None: + log_level = request.config.getini(setting_name) + if log_level: + break + configure_logging(log_level=log_level) + + @pytest.fixture(name="caplog") def override_caplog(request): """ @@ -2104,15 +2135,28 @@ def override_caplog(request): This is in an effort to reduce flakiness from caplog related tests where one test file can change log behaviour and bleed in to affecting other test files """ - # We need this `_ispytest` so it doesn't warn about using private - fixture = pytest.LogCaptureFixture(request.node, _ispytest=True) - yield fixture - fixture._finalize() - if "airflow.logging_config" in sys.modules: - import airflow.logging_config + try: + import airflow.sdk._shared.logging + except ModuleNotFoundError: + try: + import airflow.sdk._shared.logging + except ModuleNotFoundError: + # No structlog available, fallback to the stock one. Compat for pre-3.1 + + # We need this `_ispytest` so it doesn't warn about using private + fixture = pytest.LogCaptureFixture(request.node, _ispytest=True) + yield fixture + fixture._finalize() + + if "airflow.logging_config" in sys.modules: + import airflow.logging_config - airflow.logging_config.configure_logging() + airflow.logging_config.configure_logging() + return + + yield request.getfixturevalue("cap_structlog") + return @pytest.fixture diff --git a/devel-common/src/tests_common/test_utils/logs.py b/devel-common/src/tests_common/test_utils/logs.py index 475d83054a19f..ef717e19b13b7 100644 --- a/devel-common/src/tests_common/test_utils/logs.py +++ b/devel-common/src/tests_common/test_utils/logs.py @@ -17,7 +17,15 @@ # under the License. from __future__ import annotations +import itertools import json +import logging +import re +import sys +import traceback +from collections.abc import MutableMapping +from contextlib import ExitStack, contextmanager +from typing import TYPE_CHECKING, NoReturn from airflow.models import Log @@ -26,6 +34,9 @@ except ImportError: from airflow.sdk.execution_time.secrets_masker import DEFAULT_SENSITIVE_FIELDS # type:ignore[no-redef] +if TYPE_CHECKING: + from structlog.typing import EventDict, WrappedLogger + def _masked_value_check(data, sensitive_fields): """ @@ -73,3 +84,236 @@ def check_last_log(session, dag_id, event, logical_date, expected_extra=None, ch _masked_value_check(extra_json, DEFAULT_SENSITIVE_FIELDS) session.query(Log).delete() + + +class StructlogCapture: + """ + Test that structlog messages are logged. + + This extends the feature built in to structlog to make it easier to find if a message is logged. + + >>> def test_something(cap_structlog): + ... log.info("some event", field1=False, field2=[1, 2]) + ... log.info("some event", field1=True) + ... assert "some_event" in cap_structlog # a string searches on `event` field + ... assert {"event": "some_event", "field1": True} in cap_structlog # Searches only on passed fields + ... assert {"field2": [1, 2]} in cap_structlog + ... + ... assert "not logged" not in cap_structlog # not in works too + + This fixture class will also manage the log level of stdlib loggers via ``at_level`` and ``set_level``. + """ + + # This class is a manual mixing of pytest's LogCaptureFixture and structlog's LogCapture class, but + # tailored to Airflow's "send all logs via structlog" approach + + _logger: str | None = None + NAME_TO_LEVEL: dict[str, int] + PER_LOGGER_LEVELS: MutableMapping[str, int] + """The logger we specifically want to capture log messages from""" + + def __init__(self): + self.entries = [] + self._initial_logger_levels: dict[str, int] = {} + + try: + from airflow.sdk._shared.logging.structlog import NAME_TO_LEVEL, PER_LOGGER_LEVELS + + self.NAME_TO_LEVEL = NAME_TO_LEVEL + self.PER_LOGGER_LEVELS = PER_LOGGER_LEVELS + + try: + import airflow._shared.logging.structlog + except ModuleNotFoundError: + pass + else: + airflow._shared.logging.structlog.PER_LOGGER_LEVELS = PER_LOGGER_LEVELS + except ModuleNotFoundError: + from airflow._shared.logging.structlog import NAME_TO_LEVEL, PER_LOGGER_LEVELS + + self.NAME_TO_LEVEL = NAME_TO_LEVEL + self.PER_LOGGER_LEVELS = PER_LOGGER_LEVELS + + def _finalize(self) -> None: + """ + Finalize the fixture. + + This restores the log levels and the disabled logging levels changed by :meth:`set_level`. + """ + for logger_name, level in self._initial_logger_levels.items(): + logger = logging.getLogger(logger_name) + logger.setLevel(level) + if level is logging.NOTSET: + del self.PER_LOGGER_LEVELS[logger_name] + else: + self.PER_LOGGER_LEVELS[logger_name] = level + + def __contains__(self, target): + import operator + + if isinstance(target, str): + + def predicate(e): + return e["event"] == target + elif isinstance(target, dict): + # Partial comparison -- only check keys passed in + get = operator.itemgetter(*target.keys()) + want = tuple(target.values()) + + def predicate(e): + try: + got = get(e) + return all( + expected.match(val) if isinstance(expected, re.Pattern) else val == expected + for (val, expected) in zip(got, want) + ) + except Exception: + return False + else: + raise TypeError(f"Can't search logs using {type(target)}") + + return any(predicate(e) for e in self.entries) + + def __getitem__(self, i): + return self.entries[i] + + def __iter__(self): + return iter(self.entries) + + def __repr__(self): + return f"" + + def __call__(self, logger: WrappedLogger, method_name: str, event_dict: EventDict) -> NoReturn: + from structlog import DropEvent + from structlog._log_levels import map_method_name + + event_dict["event"] = str(event_dict["event"]) + event_dict["log_level"] = map_method_name(method_name) + if name := getattr(logger, "name", None): + event_dict["logger_name"] = name + + # Capture the current exception. This mirrors the "ExceptionRenderer", but much more minimal for + # testing + if event_dict.get("exc_info") is True: + event_dict["exc_info"] = sys.exc_info() + self.entries.append(event_dict) + + raise DropEvent + + @property + def text(self): + """All the event text as a single multi-line string.""" + + def exc_dict_to_string(exc): + if isinstance(exc, tuple): + yield from traceback.format_exception(*exc) + return + for i, e in enumerate(exc): + if i != 0: + yield "\n" + yield "During handling of the above exception, another exception occurred:\n" + yield "\n" + + # This doesn't include the stacktrace, but this should be enough for testing + yield f"{e['exc_type']}: {e['exc_value']}\n" + + def format(e): + yield e["event"] + "\n" + if exc_info := e.get("exc_info"): + yield from exc_dict_to_string(exc_info) + elif exc := e.get("exception"): + yield from exc_dict_to_string(exc) + + return "".join(itertools.chain.from_iterable(map(format, self.entries))) + + # These next fns make it duck-type the same as Pytests "caplog" fixture + @property + def messages(self): + """All the event messages as a list.""" + return [e["event"] for e in self.entries] + + @contextmanager + def at_level(self, level: str | int, logger: str | None = None): + if isinstance(level, str): + level = self.NAME_TO_LEVEL[level.lower()] + + # Since we explicitly set the level of the "airflow" logger in our config, we want to set that by + # default if the test auithor didn't ask for this at a specific logger to be set (otherwise we only + # set the root logging level, which doesn't have any affect if sub loggers have explicit levels set) + keys: tuple[str, ...] = (logger or "",) + if not logger: + keys += ("airflow",) + + def _reset(logger, key, level, orig_hdlr_level): + logger.setLevel(level) + if level is logging.NOTSET: + del self.PER_LOGGER_LEVELS[key] + else: + self.PER_LOGGER_LEVELS[key] = level + if logger.handlers: + hdlr = logger.handlers[0] + hdlr.setLevel(orig_hdlr_level) + + cm = ExitStack() + for key in keys: + old = self.PER_LOGGER_LEVELS.get(key, logging.NOTSET) + self.PER_LOGGER_LEVELS[key] = level + stdlogger = logging.getLogger(key) + stdlogger.setLevel(level) + hdlr = orig_hdlr_level = None + if stdlogger.handlers: + hdlr = stdlogger.handlers[0] + orig_hdlr_level = hdlr.level + hdlr.setLevel(level) + cm.callback(_reset, stdlogger, key, old, orig_hdlr_level) + + with cm: + yield self + + def set_level(self, level: str | int, logger: str | None = None): + # Set the global level + if isinstance(level, str): + level = self.NAME_TO_LEVEL[level.lower()] + + key = logger or "" + + stdlogger = logging.getLogger(key) + self._initial_logger_levels[key] = self.PER_LOGGER_LEVELS.get(key, logging.NOTSET) + + self.PER_LOGGER_LEVELS[key] = level + stdlogger.setLevel(level) + + def clear(self): + self.entries = [] + + # pytest caplog support: + # TODO: deprecate and remove all of this in tests + @property + def records(self): + records = [] + for entry in self.entries: + record = logging.LogRecord( + entry.get("logger", "") or entry.get("logger_name"), + self.NAME_TO_LEVEL.get(entry.get("log_level"), 0), + "?", + 0, + entry["event"], + (), + entry.get("exc_info") or entry.get("exception"), + None, + None, + ) + record.message = record.msg + records.append(record) + return records + + @property + def record_tuples(self): + return [ + ( + entry.get("logger", "") or entry.get("logger_name"), + self.NAME_TO_LEVEL.get(entry.get("log_level"), 0), + entry.get("event"), + ) + for entry in self.entries + ] diff --git a/kubernetes-tests/tests/kubernetes_tests/test_kubernetes_pod_operator.py b/kubernetes-tests/tests/kubernetes_tests/test_kubernetes_pod_operator.py index 3f74aa805bb96..77d44ed5a67ab 100644 --- a/kubernetes-tests/tests/kubernetes_tests/test_kubernetes_pod_operator.py +++ b/kubernetes-tests/tests/kubernetes_tests/test_kubernetes_pod_operator.py @@ -970,20 +970,24 @@ def test_pod_template_file( await_pod_completion_mock.return_value = pod_mock context = create_context(k) - # I'm not really sure what the point is of this assert - with caplog.at_level(logging.DEBUG, logger="airflow.task.operators"): + # TODO: once Airflow 3.1 is the min version, replace this with out structlog-based caplog fixture + with mock.patch.object(k.log, "debug") as debug_logs: k.execute(context) - expected_lines = [ - "Starting pod:", - "api_version: v1", - "kind: Pod", - "metadata:", - " annotations: {}", - " creation_timestamp: null", - " deletion_grace_period_seconds: null", - ] - actual = next(x.getMessage() for x in caplog.records if x.msg == "Starting pod:\n%s").splitlines() - assert actual[: len(expected_lines)] == expected_lines + expected_lines = "\n".join( + [ + "api_version: v1", + "kind: Pod", + "metadata:", + " annotations: {}", + " creation_timestamp: null", + " deletion_grace_period_seconds: null", + ] + ) + # Make a nice assert if it's not there + debug_logs.assert_any_call("Starting pod:\n%s", mock.ANY) + # Now we know it is there, examine the second argument + mock_call = next(call for call in debug_logs.mock_calls if call[1][0] == "Starting pod:\n%s") + assert mock_call[1][1][: len(expected_lines)] == expected_lines actual_pod = self.api_client.sanitize_for_serialization(k.pod) expected_dict = { @@ -1473,8 +1477,7 @@ def test_log_output_configurations(self, log_prefix_enabled, log_formatter, expe ) # Test the _log_message method directly - logger = logging.getLogger("airflow.providers.cncf.kubernetes.utils.pod_manager.PodManager") - with mock.patch.object(logger, "info") as mock_info: + with mock.patch.object(k.pod_manager.log, "info") as mock_info: k.pod_manager._log_message( message=marker, container_name="base", diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/glue.py b/providers/amazon/src/airflow/providers/amazon/aws/hooks/glue.py index cdf9347009098..002282d594763 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/glue.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/glue.py @@ -109,7 +109,7 @@ def __init__( "retry": retry_if_exception(self._should_retry_on_error), "wait": wait_exponential(multiplier=1, min=1, max=60), "stop": stop_after_attempt(5), - "before_sleep": before_sleep_log(self.log, log_level=20), + "before_sleep": before_sleep_log(self.log, log_level=20), # type: ignore[arg-type] "reraise": True, } diff --git a/providers/amazon/src/airflow/providers/amazon/aws/utils/task_log_fetcher.py b/providers/amazon/src/airflow/providers/amazon/aws/utils/task_log_fetcher.py index a5fd8af3ba1d7..24b426a536e20 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/utils/task_log_fetcher.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/utils/task_log_fetcher.py @@ -28,7 +28,7 @@ from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook if TYPE_CHECKING: - from logging import Logger + from airflow.sdk.types import Logger class AwsTaskLogFetcher(Thread): diff --git a/providers/amazon/tests/unit/amazon/aws/log/test_cloudwatch_task_handler.py b/providers/amazon/tests/unit/amazon/aws/log/test_cloudwatch_task_handler.py index 2714b036e721a..c5abfa7991251 100644 --- a/providers/amazon/tests/unit/amazon/aws/log/test_cloudwatch_task_handler.py +++ b/providers/amazon/tests/unit/amazon/aws/log/test_cloudwatch_task_handler.py @@ -97,6 +97,7 @@ def setup_tests(self, create_runtime_ti, tmp_path, monkeypatch): conn.create_log_group(logGroupName=self.remote_log_group) processors = structlog.get_config()["processors"] + logger_factory = structlog.get_config()["logger_factory"] old_processors = processors.copy() try: @@ -106,7 +107,11 @@ def setup_tests(self, create_runtime_ti, tmp_path, monkeypatch): # Set up the right chain of processors so the event looks like we want for our full test monkeypatch.setattr(airflow.logging_config, "REMOTE_TASK_LOG", self.subject) - procs, _ = airflow.sdk.log.logging_processors(enable_pretty_log=False) + try: + procs = airflow.sdk.log.logging_processors(colors=False, json_output=False) + except TypeError: + # Compat issue only comes up in the tests, not in the real code + procs, _ = airflow.sdk.log.logging_processors(enable_pretty_log=False) processors.clear() processors.extend(procs) @@ -127,7 +132,7 @@ def drop(*args): # remove LogCapture and restore original processors processors.clear() processors.extend(old_processors) - structlog.configure(processors=old_processors) + structlog.configure(processors=old_processors, logger_factory=logger_factory) @time_machine.travel(datetime(2025, 3, 27, 21, 58, 1, 2345), tick=False) def test_log_message(self): diff --git a/providers/amazon/tests/unit/amazon/aws/log/test_s3_task_handler.py b/providers/amazon/tests/unit/amazon/aws/log/test_s3_task_handler.py index 47fa354af535a..9b41ac18cc93d 100644 --- a/providers/amazon/tests/unit/amazon/aws/log/test_s3_task_handler.py +++ b/providers/amazon/tests/unit/amazon/aws/log/test_s3_task_handler.py @@ -19,6 +19,7 @@ import contextlib import copy +import logging import os from unittest import mock @@ -138,27 +139,33 @@ def test_log_exists_no_hook(self): with pytest.raises(ConnectionError, match="Fake: Failed to connect"): subject.s3_log_exists(self.remote_log_location) - def test_s3_read_when_log_missing(self): + def test_s3_read_when_log_missing(self, caplog): url = "s3://bucket/foo" - with mock.patch.object(self.subject.log, "error") as mock_error: + with caplog.at_level(logging.ERROR): result = self.subject.s3_read(url, return_error=True) - msg = ( - f"Could not read logs from {url} with error: An error occurred (404) when calling the " - f"HeadObject operation: Not Found" - ) - assert result == msg - mock_error.assert_called_once_with(msg, exc_info=True) + msg = ( + f"Could not read logs from {url} with error: An error occurred (404) when calling the " + f"HeadObject operation: Not Found" + ) + assert result == msg + assert len(caplog.records) == 1 + rec = caplog.records[0] + assert rec.levelno == logging.ERROR + assert rec.exc_info is not None - def test_read_raises_return_error(self): + def test_read_raises_return_error(self, caplog): url = "s3://nonexistentbucket/foo" - with mock.patch.object(self.subject.log, "error") as mock_error: + with caplog.at_level(logging.ERROR): result = self.subject.s3_read(url, return_error=True) msg = ( f"Could not read logs from {url} with error: An error occurred (NoSuchBucket) when " f"calling the HeadObject operation: The specified bucket does not exist" ) - assert result == msg - mock_error.assert_called_once_with(msg, exc_info=True) + assert result == msg + assert len(caplog.records) == 1 + rec = caplog.records[0] + assert rec.levelno == logging.ERROR + assert rec.exc_info is not None def test_write(self): with mock.patch.object(self.subject.log, "error") as mock_error: @@ -176,11 +183,15 @@ def test_write_existing(self): assert body == b"previous \ntext" - def test_write_raises(self): + def test_write_raises(self, caplog): url = "s3://nonexistentbucket/foo" - with mock.patch.object(self.subject.log, "error") as mock_error: + with caplog.at_level(logging.ERROR): self.subject.write("text", url) - mock_error.assert_called_once_with("Could not write logs to %s", url, exc_info=True) + assert len(caplog.records) == 1 + rec = caplog.records[0] + assert rec.levelno == logging.ERROR + assert rec.message == f"Could not write logs to {url}" + assert rec.exc_info is not None @pytest.mark.db_test diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py b/providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py index e40e7f086e780..8810ff454e04b 100644 --- a/providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py +++ b/providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py @@ -39,7 +39,7 @@ from airflow.providers.common.compat.standard.utils import prepare_virtualenv if TYPE_CHECKING: - import logging + from airflow.sdk.types import Logger _APACHE_BEAM_VERSION_SCRIPT = "import apache_beam; print(apache_beam.__version__)" @@ -122,7 +122,7 @@ def beam_options_to_args(options: dict) -> list[str]: def process_fd( proc, fd, - log: logging.Logger, + log: Logger, process_line_callback: Callable[[str], None] | None = None, is_dataflow_job_id_exist_callback: Callable[[], bool] | None = None, ): @@ -152,7 +152,7 @@ def process_fd( def run_beam_command( cmd: list[str], - log: logging.Logger, + log: Logger, process_line_callback: Callable[[str], None] | None = None, working_directory: str | None = None, is_dataflow_job_id_exist_callback: Callable[[], bool] | None = None, @@ -614,7 +614,7 @@ async def start_pipeline_async( async def run_beam_command_async( self, cmd: list[str], - log: logging.Logger, + log: Logger, working_directory: str | None = None, process_line_callback: Callable[[str], None] | None = None, ) -> int: diff --git a/providers/apache/flink/tests/unit/apache/flink/sensors/test_flink_kubernetes.py b/providers/apache/flink/tests/unit/apache/flink/sensors/test_flink_kubernetes.py index cbaf52e0aca1a..dea745b9ccf21 100644 --- a/providers/apache/flink/tests/unit/apache/flink/sensors/test_flink_kubernetes.py +++ b/providers/apache/flink/tests/unit/apache/flink/sensors/test_flink_kubernetes.py @@ -20,7 +20,8 @@ from __future__ import annotations import json -from unittest.mock import call, patch +import logging +from unittest.mock import ANY, patch import pytest from kubernetes.client import V1ObjectMeta, V1Pod, V1PodList @@ -1069,7 +1070,6 @@ def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernete "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_ERROR_CLUSTER, ) - @patch("logging.Logger.error") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, @@ -1079,7 +1079,7 @@ def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernete return_value=TASK_MANAGER_POD_LIST, ) def test_driver_logging_failure( - self, mock_namespaced_pod_list, mock_pod_logs, error_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn, caplog ): sensor = FlinkKubernetesSensor( application_name="flink-stream-example", @@ -1093,7 +1093,7 @@ def test_driver_logging_failure( namespace="default", watch=False, label_selector="component=taskmanager,app=flink-stream-example" ) mock_pod_logs.assert_called_once_with("basic-example-taskmanager-1-1", namespace="default") - error_log_call.assert_called_once_with(TEST_POD_LOG_RESULT) + assert TEST_POD_LOG_RESULT in caplog.messages mock_namespaced_crd.assert_called_once_with( group="flink.apache.org", version="v1beta1", @@ -1106,7 +1106,6 @@ def test_driver_logging_failure( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_READY_CLUSTER, ) - @patch("logging.Logger.info") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, @@ -1116,7 +1115,7 @@ def test_driver_logging_failure( return_value=TASK_MANAGER_POD_LIST, ) def test_driver_logging_completed( - self, mock_namespaced_pod_list, mock_pod_logs, info_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn, caplog ): sensor = FlinkKubernetesSensor( application_name="flink-stream-example", @@ -1130,7 +1129,7 @@ def test_driver_logging_completed( namespace="default", watch=False, label_selector="component=taskmanager,app=flink-stream-example" ) mock_pod_logs.assert_called_once_with("basic-example-taskmanager-1-1", namespace="default") - assert call(TEST_POD_LOG_RESULT) in info_log_call.mock_calls + assert TEST_POD_LOG_RESULT in caplog.messages mock_namespaced_crd.assert_called_once_with( group="flink.apache.org", @@ -1144,7 +1143,6 @@ def test_driver_logging_completed( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_READY_CLUSTER, ) - @patch("logging.Logger.info") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, @@ -1154,7 +1152,7 @@ def test_driver_logging_completed( return_value=TASK_MANAGER_POD_LIST, ) def test_logging_taskmanager_from_taskmanager_namespace_when_namespace_is_set( - self, mock_namespaced_pod_list, mock_pod_logs, info_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn ): namespace = "different-namespace123456" namespae_name = "test123" @@ -1190,7 +1188,7 @@ def test_logging_taskmanager_from_taskmanager_namespace_when_namespace_is_set( return_value=TASK_MANAGER_POD_LIST, ) def test_logging_taskmanager_from_non_default_namespace( - self, mock_namespaced_pod_list, mock_pod_logs, info_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn, caplog ): namespae_name = "test123" @@ -1214,7 +1212,6 @@ def test_logging_taskmanager_from_non_default_namespace( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_READY_CLUSTER, ) - @patch("logging.Logger.warning") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", side_effect=ApiException("Test api exception"), @@ -1224,7 +1221,7 @@ def test_logging_taskmanager_from_non_default_namespace( return_value=TASK_MANAGER_POD_LIST, ) def test_driver_logging_error( - self, mock_namespaced_pod_list, mock_pod_logs, warn_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn, caplog ): sensor = FlinkKubernetesSensor( application_name="flink-stream-example", @@ -1233,13 +1230,12 @@ def test_driver_logging_error( task_id="test_task_id", ) sensor.poke(None) - warn_log_call.assert_called_once() + assert (ANY, logging.WARNING, ANY) in caplog.record_tuples, "Expected something logged at warning" @patch( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_ERROR_CLUSTER, ) - @patch("logging.Logger.warning") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", side_effect=ApiException("Test api exception"), @@ -1249,7 +1245,7 @@ def test_driver_logging_error( return_value=TASK_MANAGER_POD_LIST, ) def test_driver_logging_error_missing_state( - self, mock_namespaced_pod_list, mock_pod_logs, warn_log_call, mock_namespaced_crd, mock_kube_conn + self, mock_namespaced_pod_list, mock_pod_logs, mock_namespaced_crd, mock_kube_conn, caplog ): sensor = FlinkKubernetesSensor( application_name="flink-stream-example", @@ -1259,4 +1255,4 @@ def test_driver_logging_error_missing_state( ) with pytest.raises(AirflowException): sensor.poke(None) - warn_log_call.assert_called_once() + assert (ANY, logging.WARNING, ANY) in caplog.record_tuples, "Expected something logged at warning" diff --git a/providers/celery/tests/unit/celery/log_handlers/test_log_handlers.py b/providers/celery/tests/unit/celery/log_handlers/test_log_handlers.py index c95ab43236ecf..6a0ef98842ff2 100644 --- a/providers/celery/tests/unit/celery/log_handlers/test_log_handlers.py +++ b/providers/celery/tests/unit/celery/log_handlers/test_log_handlers.py @@ -24,7 +24,6 @@ import pytest -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.executors import executor_loader from airflow.models.dagrun import DagRun from airflow.models.taskinstance import TaskInstance @@ -56,7 +55,6 @@ def clean_up(self): session.query(TaskInstance).delete() def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) logging.root.disabled = False self.clean_up() # We use file task handler by default. diff --git a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/log_handlers/test_log_handlers.py b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/log_handlers/test_log_handlers.py index 954ec3ed80645..7cddf3d04af97 100644 --- a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/log_handlers/test_log_handlers.py +++ b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/log_handlers/test_log_handlers.py @@ -27,13 +27,13 @@ import pytest from kubernetes.client import models as k8s -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.executors import executor_loader from airflow.models.dag import DAG from airflow.models.taskinstance import TaskInstance from airflow.utils.log.file_task_handler import ( FileTaskHandler, ) +from airflow.utils.log.log_reader import TaskLogReader from airflow.utils.log.logging_mixin import set_context from airflow.utils.state import State, TaskInstanceState from airflow.utils.types import DagRunType @@ -66,7 +66,6 @@ def clean_up(self): clear_db_dag_bundles() def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) logging.root.disabled = False self.clean_up() # We use file task handler by default. @@ -156,7 +155,7 @@ def task_callable(ti): logger = ti.log ti.task.log.disabled = False - file_handler = next((h for h in logger.handlers if h.name == FILE_TASK_HANDLER), None) + file_handler = TaskLogReader().log_handler set_context(logger, ti) ti.run(ignore_ti_state=True) ti.state = TaskInstanceState.RUNNING diff --git a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/sensors/test_spark_kubernetes.py b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/sensors/test_spark_kubernetes.py index 16d89033607f6..00528660228e2 100644 --- a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/sensors/test_spark_kubernetes.py +++ b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/sensors/test_spark_kubernetes.py @@ -18,7 +18,8 @@ from __future__ import annotations import json -from unittest.mock import call, patch +import logging +from unittest.mock import ANY, patch import pytest from kubernetes.client.rest import ApiException @@ -794,7 +795,6 @@ def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernete "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_FAILED_APPLICATION, ) - @patch("logging.Logger.error") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, @@ -802,9 +802,9 @@ def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernete def test_driver_logging_failure( self, mock_log_call, - error_log_call, mock_get_namespaced_crd, mock_kube_conn, + caplog, ): sensor = SparkKubernetesSensor( application_name="spark_pi", @@ -817,20 +817,17 @@ def test_driver_logging_failure( mock_log_call.assert_called_once_with( "spark-pi-driver", namespace="default", container="spark-kubernetes-driver" ) - error_log_call.assert_called_once_with(TEST_POD_LOG_RESULT) + assert (ANY, logging.ERROR, TEST_POD_LOG_RESULT) in caplog.record_tuples @patch( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_COMPLETED_APPLICATION, ) - @patch("logging.Logger.info") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, ) - def test_driver_logging_completed( - self, mock_log_call, info_log_call, mock_get_namespaced_crd, mock_kube_conn - ): + def test_driver_logging_completed(self, mock_log_call, mock_get_namespaced_crd, mock_kube_conn, caplog): sensor = SparkKubernetesSensor( application_name="spark_pi", attach_log=True, @@ -841,20 +838,17 @@ def test_driver_logging_completed( mock_log_call.assert_called_once_with( "spark-pi-2020-02-24-1-driver", namespace="default", container="spark-kubernetes-driver" ) - assert call(TEST_POD_LOG_RESULT) in info_log_call.mock_calls + assert (ANY, logging.INFO, TEST_POD_LOG_RESULT) in caplog.record_tuples @patch( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_COMPLETED_APPLICATION, ) - @patch("logging.Logger.warning") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", side_effect=ApiException("Test api exception"), ) - def test_driver_logging_error( - self, mock_log_call, warn_log_call, mock_get_namespaced_crd, mock_kube_conn - ): + def test_driver_logging_error(self, mock_log_call, mock_get_namespaced_crd, mock_kube_conn, caplog): sensor = SparkKubernetesSensor( application_name="spark_pi", attach_log=True, @@ -862,19 +856,18 @@ def test_driver_logging_error( task_id="test_task_id", ) sensor.poke({}) - warn_log_call.assert_called_once() + assert (ANY, logging.WARNING, ANY) in caplog.record_tuples, "Expected something logged at warning" @patch( "kubernetes.client.api.custom_objects_api.CustomObjectsApi.get_namespaced_custom_object", return_value=TEST_DRIVER_WITH_SIDECAR_APPLICATION, ) - @patch("logging.Logger.info") @patch( "airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook.get_pod_logs", return_value=TEST_POD_LOGS, ) def test_sidecar_driver_logging_completed( - self, mock_log_call, info_log_call, mock_get_namespaced_crd, mock_kube_conn + self, mock_log_call, mock_get_namespaced_crd, mock_kube_conn, caplog ): sensor = SparkKubernetesSensor( application_name="spark_pi", @@ -886,4 +879,4 @@ def test_sidecar_driver_logging_completed( mock_log_call.assert_called_once_with( "spark-pi-2020-02-24-1-driver", namespace="default", container="spark-kubernetes-driver" ) - assert call(TEST_POD_LOG_RESULT) in info_log_call.mock_calls + assert (ANY, logging.INFO, TEST_POD_LOG_RESULT) in caplog.record_tuples diff --git a/providers/common/sql/tests/unit/common/sql/hooks/test_dbapi.py b/providers/common/sql/tests/unit/common/sql/hooks/test_dbapi.py index 69c3147a40e19..b3fb7a476f032 100644 --- a/providers/common/sql/tests/unit/common/sql/hooks/test_dbapi.py +++ b/providers/common/sql/tests/unit/common/sql/hooks/test_dbapi.py @@ -25,7 +25,6 @@ import pytest from pyodbc import Cursor -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.models import Connection from airflow.providers.common.sql.dialects.dialect import Dialect from airflow.providers.common.sql.hooks.handlers import fetch_all_handler, fetch_one_handler @@ -71,14 +70,13 @@ def dialect(self): def get_db_log_messages(self, conn) -> None: return conn.get_messages() - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) logging.root.disabled = True self.db_hook = DbApiHookMock(**kwargs) self.db_hook_no_log_sql = DbApiHookMock(log_sql=False) self.db_hook_schema_override = DbApiHookMock(schema="schema-override") self.db_hook.supports_executemany = False - self.db_hook.log.setLevel(logging.DEBUG) + # self.db_hook.log.setLevel(logging.DEBUG) def test_get_records(self): statement = "SQL" @@ -228,7 +226,7 @@ def test_insert_rows_as_generator(self, caplog): table = "table" rows = [("What's",), ("up",), ("world",)] - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="airflow.task"): self.db_hook.insert_rows(table, iter(rows)) assert self.conn.close.call_count == 1 @@ -249,7 +247,7 @@ def test_insert_rows_as_generator_supports_executemany(self, caplog): table = "table" rows = [("What's",), ("up",), ("world",)] - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, "airflow.task"): self.db_hook.supports_executemany = True self.db_hook.insert_rows(table, iter(rows)) diff --git a/providers/common/sql/tests/unit/common/sql/hooks/test_sql.py b/providers/common/sql/tests/unit/common/sql/hooks/test_sql.py index e881f52f69c45..b900310324447 100644 --- a/providers/common/sql/tests/unit/common/sql/hooks/test_sql.py +++ b/providers/common/sql/tests/unit/common/sql/hooks/test_sql.py @@ -27,7 +27,6 @@ import polars as pl import pytest -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.common.sql.dialects.dialect import Dialect @@ -227,7 +226,6 @@ def mock_execute(*args, **kwargs): class TestDbApiHook: def setup_method(self, **kwargs): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) logging.root.disabled = True @pytest.mark.db_test diff --git a/providers/databricks/src/airflow/providers/databricks/operators/databricks.py b/providers/databricks/src/airflow/providers/databricks/operators/databricks.py index f1c8a411962a9..2c1607bb53df0 100644 --- a/providers/databricks/src/airflow/providers/databricks/operators/databricks.py +++ b/providers/databricks/src/airflow/providers/databricks/operators/databricks.py @@ -24,7 +24,6 @@ from abc import ABC, abstractmethod from collections.abc import Sequence from functools import cached_property -from logging import Logger from typing import TYPE_CHECKING, Any from airflow.configuration import conf @@ -60,12 +59,8 @@ DatabricksWorkflowTaskGroup, ) from airflow.providers.openlineage.extractors import OperatorLineage - from airflow.utils.context import Context - - try: - from airflow.sdk import TaskGroup - except ImportError: - from airflow.utils.task_group import TaskGroup # type: ignore[no-redef] + from airflow.sdk import TaskGroup + from airflow.sdk.types import Context, Logger if AIRFLOW_V_3_0_PLUS: from airflow.sdk import BaseOperatorLink diff --git a/providers/databricks/src/airflow/providers/databricks/plugins/databricks_workflow.py b/providers/databricks/src/airflow/providers/databricks/plugins/databricks_workflow.py index 9624cb685fd53..3fd7bfb0dafb9 100644 --- a/providers/databricks/src/airflow/providers/databricks/plugins/databricks_workflow.py +++ b/providers/databricks/src/airflow/providers/databricks/plugins/databricks_workflow.py @@ -17,7 +17,6 @@ from __future__ import annotations -import logging import os from typing import TYPE_CHECKING, Any from urllib.parse import unquote @@ -45,6 +44,7 @@ from airflow.models import BaseOperator from airflow.providers.databricks.operators.databricks import DatabricksTaskBaseOperator + from airflow.sdk.types import Logger from airflow.utils.context import Context @@ -62,7 +62,7 @@ def get_auth_decorator(): def get_databricks_task_ids( - group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log: logging.Logger + group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log: Logger ) -> list[str]: """ Return a list of all Databricks task IDs for a dictionary of Airflow tasks. @@ -112,7 +112,7 @@ def _get_dagrun(dag, run_id: str, session: Session) -> DagRun: @provide_session def _clear_task_instances( - dag_id: str, run_id: str, task_ids: list[str], log: logging.Logger, session: Session = NEW_SESSION + dag_id: str, run_id: str, task_ids: list[str], log: Logger, session: Session = NEW_SESSION ) -> None: dag = _get_dag(dag_id, session=session) log.debug("task_ids %s to clear", str(task_ids)) @@ -145,7 +145,7 @@ def _repair_task( databricks_conn_id: str, databricks_run_id: int, tasks_to_repair: list[str], - logger: logging.Logger, + logger: Logger, ) -> int: """ Repair a Databricks task using the Databricks API. @@ -294,7 +294,7 @@ def _get_link_legacy( def store_databricks_job_run_link( context: Context, metadata: Any, - logger: logging.Logger, + logger: Logger, ) -> None: """ Store the Databricks job run link in XCom during task execution. @@ -368,7 +368,7 @@ def get_task_group_children(cls, task_group: TaskGroup) -> dict[str, BaseOperato children[child_id] = child return children - def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log: logging.Logger) -> str: + def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log: Logger) -> str: task_group = operator.task_group if not task_group: raise AirflowException("Task group is required for generating repair link.") diff --git a/providers/databricks/tests/unit/databricks/hooks/test_databricks.py b/providers/databricks/tests/unit/databricks/hooks/test_databricks.py index 1564c6dbfbc0f..fb57aacf83596 100644 --- a/providers/databricks/tests/unit/databricks/hooks/test_databricks.py +++ b/providers/databricks/tests/unit/databricks/hooks/test_databricks.py @@ -1449,7 +1449,6 @@ async def test_async_do_api_call_respects_schema(self, mock_get): @pytest.mark.asyncio @mock.patch("airflow.providers.databricks.hooks.databricks_base.aiohttp.ClientSession.get") async def test_async_do_api_call_only_existing_response_properties_are_read(self, mock_get): - self.hook.log.setLevel("DEBUG") response = mock_get.return_value.__aenter__.return_value response.mock_add_spec(aiohttp.ClientResponse, spec_set=True) response.json = AsyncMock(return_value={"bar": "baz"}) diff --git a/providers/docker/src/airflow/providers/docker/operators/docker.py b/providers/docker/src/airflow/providers/docker/operators/docker.py index bf6f7a463fad6..dc8065d79cc6d 100644 --- a/providers/docker/src/airflow/providers/docker/operators/docker.py +++ b/providers/docker/src/airflow/providers/docker/operators/docker.py @@ -42,16 +42,11 @@ from airflow.providers.docker.version_compat import BaseOperator if TYPE_CHECKING: - from logging import Logger - from docker import APIClient from docker.types import DeviceRequest - try: - from airflow.sdk.definitions.context import Context - except ImportError: - # TODO: Remove once provider drops support for Airflow 2 - from airflow.utils.context import Context + from airflow.sdk.definitions.context import Context + from airflow.sdk.types import Logger def stringify(line: str | bytes): diff --git a/providers/docker/tests/unit/docker/decorators/test_docker.py b/providers/docker/tests/unit/docker/decorators/test_docker.py index 56818438cbcde..33b026bd449d6 100644 --- a/providers/docker/tests/unit/docker/decorators/test_docker.py +++ b/providers/docker/tests/unit/docker/decorators/test_docker.py @@ -16,9 +16,7 @@ # under the License. from __future__ import annotations -import logging from importlib.util import find_spec -from io import StringIO as StringBuffer import pytest @@ -339,7 +337,7 @@ def f(): assert ret.operator.docker_url == "unix://var/run/docker.sock" @pytest.mark.db_test - def test_failing_task(self, dag_maker, session): + def test_failing_task(self, dag_maker, session, caplog): """Test regression #39319 Check the log content of the DockerOperator when the task fails. @@ -349,12 +347,6 @@ def test_failing_task(self, dag_maker, session): def f(): raise ValueError("This task is expected to fail") - docker_operator_logger_name = "airflow.task.operators" - - docker_operator_logger = logging.getLogger(docker_operator_logger_name) - log_capture_string = StringBuffer() - ch = logging.StreamHandler(log_capture_string) - docker_operator_logger.addHandler(ch) with dag_maker(session=session): f() @@ -366,10 +358,8 @@ def f(): ti = dr.get_task_instances(session=session)[0] assert ti.state == TaskInstanceState.FAILED - log_content = str(log_capture_string.getvalue()) - assert 'with open(sys.argv[4], "w") as file:' not in log_content - last_line_of_docker_operator_log = log_content.splitlines()[-1] - assert "ValueError: This task is expected to fail" in last_line_of_docker_operator_log + assert 'with open(sys.argv[4], "w") as file:' not in caplog.text + assert "ValueError: This task is expected to fail" in caplog.messages @pytest.mark.db_test def test_invalid_serializer(self, dag_maker): diff --git a/providers/google/src/airflow/providers/google/cloud/utils/bigquery_get_data.py b/providers/google/src/airflow/providers/google/cloud/utils/bigquery_get_data.py index 6e1df55286dd0..6218f3df51614 100644 --- a/providers/google/src/airflow/providers/google/cloud/utils/bigquery_get_data.py +++ b/providers/google/src/airflow/providers/google/cloud/utils/bigquery_get_data.py @@ -23,9 +23,9 @@ if TYPE_CHECKING: from collections.abc import Iterator - from logging import Logger from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook + from airflow.sdk.types import Logger def bigquery_get_data( diff --git a/providers/microsoft/azure/tests/unit/microsoft/azure/log/test_wasb_task_handler.py b/providers/microsoft/azure/tests/unit/microsoft/azure/log/test_wasb_task_handler.py index dc87b5a9da758..90b7b2b4a1caf 100644 --- a/providers/microsoft/azure/tests/unit/microsoft/azure/log/test_wasb_task_handler.py +++ b/providers/microsoft/azure/tests/unit/microsoft/azure/log/test_wasb_task_handler.py @@ -17,6 +17,7 @@ from __future__ import annotations import copy +import logging import os import tempfile from pathlib import Path @@ -139,14 +140,15 @@ def test_wasb_read(self, mock_hook_cls, ti): "airflow.providers.microsoft.azure.hooks.wasb.WasbHook", **{"return_value.read_file.side_effect": AzureHttpError("failed to connect", 404)}, ) - def test_wasb_read_raises(self, mock_hook): + def test_wasb_read_raises(self, mock_hook, caplog): handler = self.wasb_task_handler - with mock.patch.object(handler.io.log, "error") as mock_error: + with caplog.at_level(logging.ERROR): handler.io.wasb_read(self.remote_log_location, return_error=True) - mock_error.assert_called_once_with( - "Could not read logs from remote/log/location/1.log", - exc_info=True, - ) + assert len(caplog.records) == 1 + rec = caplog.records[0] + assert rec.levelno == logging.ERROR + assert rec.message == "Could not read logs from remote/log/location/1.log" + assert rec.exc_info is not None @mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook") @mock.patch.object(WasbRemoteLogIO, "wasb_read") @@ -180,17 +182,19 @@ def test_write_when_append_is_false(self, mock_hook): "text", self.container_name, self.remote_log_location, overwrite=True ) - def test_write_raises(self): + def test_write_raises(self, caplog): handler = self.wasb_task_handler - with mock.patch.object(handler.io.log, "error") as mock_error: + with caplog.at_level(logging.ERROR): with mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook") as mock_hook: mock_hook.return_value.load_string.side_effect = AzureHttpError("failed to connect", 404) handler.io.write("text", self.remote_log_location, append=False) - mock_error.assert_called_once_with( - "Could not write logs to %s", "remote/log/location/1.log", exc_info=True - ) + assert len(caplog.records) == 1 + rec = caplog.records[0] + assert rec.levelno == logging.ERROR + assert rec.message == "Could not write logs to remote/log/location/1.log" + assert rec.exc_info is not None @pytest.mark.parametrize( "delete_local_copy, expected_existence_of_local_copy", diff --git a/providers/microsoft/psrp/src/airflow/providers/microsoft/psrp/hooks/psrp.py b/providers/microsoft/psrp/src/airflow/providers/microsoft/psrp/hooks/psrp.py index 8e87f564189e4..2308fd1b1bce4 100644 --- a/providers/microsoft/psrp/src/airflow/providers/microsoft/psrp/hooks/psrp.py +++ b/providers/microsoft/psrp/src/airflow/providers/microsoft/psrp/hooks/psrp.py @@ -19,7 +19,6 @@ from collections.abc import Callable, Generator from contextlib import contextmanager -from copy import copy from logging import DEBUG, ERROR, INFO, WARNING from typing import TYPE_CHECKING, Any, cast from weakref import WeakKeyDictionary @@ -168,8 +167,15 @@ def invoke(self) -> Generator[PowerShell, None, None]: Upon exit, the commands will be invoked. """ - logger = copy(self.log) - logger.setLevel(self._logging_level) + logger = self.log + # Compat: Airflow 3.1 use structlog, and doesn't have individual per-logger level + if hasattr(logger, "setLevel"): + logger.setLevel(self._logging_level) + elif not logger.is_enabled_for(self._logging_level): + from airflow.sdk.log import logger_at_level + + logger = logger_at_level(logger.name, self._logging_level) + local_context = self._conn is None if local_context: self.__enter__() diff --git a/providers/microsoft/psrp/tests/unit/microsoft/psrp/operators/test_psrp.py b/providers/microsoft/psrp/tests/unit/microsoft/psrp/operators/test_psrp.py index c38c8ad1be8f7..ca5642247058e 100644 --- a/providers/microsoft/psrp/tests/unit/microsoft/psrp/operators/test_psrp.py +++ b/providers/microsoft/psrp/tests/unit/microsoft/psrp/operators/test_psrp.py @@ -100,7 +100,7 @@ def test_execute(self, hook_impl, parameter, had_errors, rc, do_xcom_push): else: output = op.execute(None) assert output == [json.loads(output) for output in ps.output] if do_xcom_push else ps.output - is_logged = hook_impl.call_args.kwargs["on_output_callback"] == op.log.info + is_logged = hook_impl.call_args.kwargs["on_output_callback"] is not None assert do_xcom_push ^ is_logged expected_ps_calls = [ call.add_command(psrp_session_init), diff --git a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake_sql_api.py b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake_sql_api.py index 558e3bf564aaa..9f3e7c68be8b3 100644 --- a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake_sql_api.py +++ b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake_sql_api.py @@ -102,7 +102,7 @@ def __init__( "retry": retry_if_exception(self._should_retry_on_error), "wait": wait_exponential(multiplier=1, min=1, max=60), "stop": stop_after_attempt(5), - "before_sleep": before_sleep_log(self.log, log_level=20), # INFO level + "before_sleep": before_sleep_log(self.log, log_level=20), # type: ignore[arg-type] "reraise": True, } if api_retry_args: diff --git a/providers/ssh/src/airflow/providers/ssh/hooks/ssh.py b/providers/ssh/src/airflow/providers/ssh/hooks/ssh.py index 43fca90dce57b..3fcb1ff9fceab 100644 --- a/providers/ssh/src/airflow/providers/ssh/hooks/ssh.py +++ b/providers/ssh/src/airflow/providers/ssh/hooks/ssh.py @@ -19,6 +19,7 @@ from __future__ import annotations +import logging import os from base64 import decodebytes from collections.abc import Sequence @@ -391,6 +392,11 @@ def get_tunnel( host_pkey_directories=None, ) + if not hasattr(self.log, "handlers"): + # We need to not hit this https://github.com/pahaz/sshtunnel/blob/dc0732884379a19a21bf7a49650d0708519ec54f/sshtunnel.py#L238-L239 + paramkio_log = logging.getLogger("paramiko.transport") + paramkio_log.addHandler(logging.NullHandler()) + paramkio_log.propagate = True client = SSHTunnelForwarder(self.remote_host, **tunnel_kwargs) return client diff --git a/providers/standard/tests/unit/standard/operators/test_python.py b/providers/standard/tests/unit/standard/operators/test_python.py index 62ac1f1ae39a5..f025d99a8525b 100644 --- a/providers/standard/tests/unit/standard/operators/test_python.py +++ b/providers/standard/tests/unit/standard/operators/test_python.py @@ -41,7 +41,6 @@ import pytest from slugify import slugify -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.exceptions import ( AirflowException, AirflowProviderDeprecationWarning, @@ -216,9 +215,6 @@ def render_templates(self, fn, **kwargs): class TestPythonOperator(BasePythonTest): opcls = PythonOperator - def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - @pytest.fixture(autouse=True) def setup_tests(self): self.run = False @@ -1745,9 +1741,6 @@ def f(): class TestExternalPythonOperator(BaseTestPythonVirtualenvOperator): opcls = ExternalPythonOperator - def setup_method(self): - logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) - @staticmethod def default_kwargs(*, python_version=DEFAULT_PYTHON_VERSION, **kwargs): kwargs["python"] = sys.executable @@ -2345,7 +2338,7 @@ def test_short_circuit_with_teardowns_complicated_2(self, dag_maker): assert actual_skipped == {op3} @pytest.mark.parametrize("level", [logging.DEBUG, logging.INFO]) - def test_short_circuit_with_teardowns_debug_level(self, dag_maker, level, clear_db): + def test_short_circuit_with_teardowns_debug_level(self, dag_maker, level, clear_db, caplog): """ When logging is debug we convert to a list to log the tasks skipped before passing them to the skip method. @@ -2357,7 +2350,6 @@ def test_short_circuit_with_teardowns_debug_level(self, dag_maker, level, clear_ task_id="op1", python_callable=lambda: False, ) - op1.log.setLevel(level) op2 = PythonOperator(task_id="op2", python_callable=print) op3 = PythonOperator(task_id="op3", python_callable=print) t1 = PythonOperator(task_id="t1", python_callable=print).as_teardown(setups=s1) @@ -2372,7 +2364,12 @@ def test_short_circuit_with_teardowns_debug_level(self, dag_maker, level, clear_ dagrun = dag_maker.create_dagrun() tis = dagrun.get_task_instances() ti: TaskInstance = next(x for x in tis if x.task_id == "op1") - ti._run_raw_task() + + with caplog.at_level(level): + if hasattr(ti.task.log, "setLevel"): + # Compat with Pre Airflow 3.1 + ti.task.log.setLevel(level) + ti._run_raw_task() # we can't use assert_called_with because it's a set and therefore not ordered actual_kwargs = op1.skip.call_args.kwargs actual_skipped = actual_kwargs["tasks"] diff --git a/pyproject.toml b/pyproject.toml index c9d6fe805d54d..c8de41ab7d550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1264,8 +1264,9 @@ dev = [ "apache-airflow-kubernetes-tests", "apache-airflow-task-sdk", "apache-airflow-ctl", - "apache-airflow-shared-timezones", + "apache-airflow-shared-logging", "apache-airflow-shared-secrets-masker", + "apache-airflow-shared-timezones", ] # To build docs: @@ -1312,8 +1313,9 @@ apache-airflow-helm-tests = { workspace = true } apache-airflow-kubernetes-tests = { workspace = true } apache-airflow-providers = { workspace = true } apache-aurflow-docker-stack = { workspace = true } -apache-airflow-shared-timezones = { workspace = true } +apache-airflow-shared-logging = { workspace = true } apache-airflow-shared-secrets-masker = { workspace = true } +apache-airflow-shared-timezones = { workspace = true } # Automatically generated provider workspace items (update_airflow_pyproject_toml.py) apache-airflow-providers-airbyte = { workspace = true } apache-airflow-providers-alibaba = { workspace = true } @@ -1429,8 +1431,9 @@ members = [ "task-sdk", "providers-summary-docs", "docker-stack-docs", - "shared/timezones", + "shared/logging", "shared/secrets_masker", + "shared/timezones", # Automatically generated provider workspace members (update_airflow_pyproject_toml.py) "providers/airbyte", "providers/alibaba", diff --git a/shared/logging/pyproject.toml b/shared/logging/pyproject.toml new file mode 100644 index 0000000000000..f6b36f2092d55 --- /dev/null +++ b/shared/logging/pyproject.toml @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[project] +name = "apache-airflow-shared-logging" +description = "Shared logging code for Airflow distributions" +version = "0.0" +classifiers = [ + "Private :: Do Not Upload", +] + +dependencies = [ + "msgspec>=0.19.0", + "pygtrie>=2.5.0", + "structlog>=25.4.0", +] + +[dependency-groups] +dev = [ + "apache-airflow-devel-common", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/airflow_shared"] + +[tool.ruff] +extend = "../../pyproject.toml" +src = ["src"] + +[tool.ruff.lint.per-file-ignores] +# Ignore Doc rules et al for anything outside of tests +"!src/*" = ["D", "S101", "TRY002"] + +[tool.ruff.lint.flake8-tidy-imports] +# Override the workspace level default +ban-relative-imports = "parents" diff --git a/shared/logging/src/airflow_shared/logging/__init__.py b/shared/logging/src/airflow_shared/logging/__init__.py new file mode 100644 index 0000000000000..8f1cd39539686 --- /dev/null +++ b/shared/logging/src/airflow_shared/logging/__init__.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from .structlog import configure_logging as configure_logging diff --git a/shared/logging/src/airflow_shared/logging/_noncaching.py b/shared/logging/src/airflow_shared/logging/_noncaching.py new file mode 100644 index 0000000000000..7a8e21fbe3811 --- /dev/null +++ b/shared/logging/src/airflow_shared/logging/_noncaching.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +from typing import BinaryIO, TextIO, TypeVar + +__all__ = [ + "make_file_io_non_caching", +] + +_IO = TypeVar("_IO", TextIO, BinaryIO) + + +def make_file_io_non_caching(io: _IO) -> _IO: + try: + fd = io.fileno() + os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) + except Exception: + # in case either file descriptor cannot be retrieved or fadvise is not available + # we should simply return the wrapper retrieved by FileHandler's open method + # the advice to the kernel is just an advice and if we cannot give it, we won't + pass + return io diff --git a/shared/logging/src/airflow_shared/logging/percent_formatter.py b/shared/logging/src/airflow_shared/logging/percent_formatter.py new file mode 100644 index 0000000000000..f78880702a198 --- /dev/null +++ b/shared/logging/src/airflow_shared/logging/percent_formatter.py @@ -0,0 +1,153 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import collections.abc +import datetime +import os +import sys +from io import StringIO +from typing import TYPE_CHECKING + +import structlog.dev +from structlog.dev import ConsoleRenderer, Styles + +if TYPE_CHECKING: + from structlog.typing import ( + EventDict, + WrappedLogger, + ) + + +class _LazyLogRecordDict(collections.abc.Mapping): + __slots__ = ("event", "styles", "level_styles", "method_name", "no_colors") + + def __init__(self, event: EventDict, method_name: str, level_styles: dict[str, str], styles: Styles): + self.event = event + self.method_name = method_name + self.level_styles = level_styles + self.styles = styles + self.no_colors = self.styles.reset == "" + + def __getitem__(self, key): + # Roughly compatible with names from https://github.com/python/cpython/blob/v3.13.7/Lib/logging/__init__.py#L571 + # Plus with ColoredLog added in + if key == "name": + return self.event.get("logger") or self.event.get("logger_name") + if key == "levelname": + return self.event.get("level", self.method_name).upper() + if key == "pathname": + return self.event.get("pathname", "?") + if key == "filename": + path = self.event.get("pathname", "?") + return os.path.basename(path) + if key == "module": + path = self.event.get("pathname", "?") + return os.path.splitext(os.path.basename(path))[0] + if key == "lineno": + return self.event.get("lineno", 0) + if key == "asctime" or key == "created": + return ( + self.event.get("timestamp", None) + or datetime.datetime.now(tz=datetime.timezone.utc).isoformat() + ) + if key == "message": + return self.event["event"] + if key == "process": + return os.getpid() + + if key in ("red", "green", "yellow", "blue", "purple", "cyan"): + if self.no_colors: + return "" + return getattr(structlog.dev, key.upper(), "") + if key == "reset": + return self.styles.reset + if key == "log_color": + if self.no_colors: + return "" + return self.level_styles.get(self.event.get("level", self.method_name), "") + + return self.event[key] + + def __iter__(self): + return self.event.__iter__() + + def __len__(self): + return len(self.event) + + +class PercentFormatRender(ConsoleRenderer): + """A Structlog processor that uses a stdlib-like percent based format string.""" + + _fmt: str + + special_keys = { + "pathname", + "lineno", + "func_name", + "event", + "name", + "logger", + "logger_name", + "timestamp", + "level", + } + + def __init__(self, fmt: str, **kwargs): + super().__init__(**kwargs) + self._fmt = fmt + + def __call__(self, logger: WrappedLogger, method_name: str, event_dict: EventDict): + exc = event_dict.pop("exception", None) + exc_info = event_dict.pop("exc_info", None) + stack = event_dict.pop("stack", None) + params = _LazyLogRecordDict( + event_dict, + method_name, + ConsoleRenderer.get_default_level_styles(), + self._styles, + ) + + sio = StringIO() + sio.write(self._fmt % params) + + sio.write( + "".join( + " " + self._default_column_formatter(key, val) + for key, val in event_dict.items() + if key not in self.special_keys + ).rstrip(" ") + ) + + if stack is not None: + sio.write("\n" + stack) + if exc_info or exc is not None: + sio.write("\n\n" + "=" * 79 + "\n") + + if exc_info: + if isinstance(exc_info, BaseException): + exc_info = (exc_info.__class__, exc_info, exc_info.__traceback__) + if not isinstance(exc_info, tuple): + if (exc_info := sys.exc_info()) == (None, None, None): + exc_info = None + if exc_info: + self._exception_formatter(sio, exc_info) + elif exc is not None: + sio.write("\n" + exc) + + return sio.getvalue() diff --git a/shared/logging/src/airflow_shared/logging/structlog.py b/shared/logging/src/airflow_shared/logging/structlog.py new file mode 100644 index 0000000000000..8510520474a3e --- /dev/null +++ b/shared/logging/src/airflow_shared/logging/structlog.py @@ -0,0 +1,531 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import codecs +import io +import logging +import os +import re +import sys +from collections.abc import Callable, Mapping, Sequence +from functools import cache, cached_property, partial +from typing import TYPE_CHECKING, Any, BinaryIO, Generic, TextIO, TypeVar, cast + +import pygtrie +import structlog +import structlog.processors +from structlog.processors import NAME_TO_LEVEL + +from ._noncaching import make_file_io_non_caching +from .percent_formatter import PercentFormatRender + +if TYPE_CHECKING: + from structlog.typing import ( + BindableLogger, + EventDict, + Processor, + WrappedLogger, + ) + + from .types import Logger + +log = logging.getLogger(__name__) + +__all__ = [ + "configure_logging", + "structlog_processors", +] + + +LEVEL_TO_FILTERING_LOGGER: dict[int, type[Logger]] = {} + + +def _make_airflow_structlogger(min_level): + # This uses https://github.com/hynek/structlog/blob/2f0cc42d/src/structlog/_native.py#L126 + # as inspiration + + LEVEL_TO_NAME = {v: k for k, v in NAME_TO_LEVEL.items()} + + # A few things, namely paramiko _really_ wants this to be a stdlib logger. These fns pretends it is enough + # like it to function. + @cached_property + def handlers(self): + return [logging.NullHandler()] + + def isEnabledFor(self: Any, level): + return self.is_enabled_for(level) + + def getEffectiveLevel(self: Any): + return self.get_effective_level() + + @property + def name(self): + return self._logger.name + + def _nop(self: Any, event: str, *args: Any, **kw: Any) -> Any: + return None + + # Work around an issue in structlog https://github.com/hynek/structlog/issues/745 + def make_method( + level: int, + ) -> Callable[..., Any]: + name = LEVEL_TO_NAME[level] + if level < min_level: + return _nop + + def meth(self: Any, event: str, *args: Any, **kw: Any) -> Any: + if not args: + return self._proxy_to_logger(name, event, **kw) + + # See for reason https://github.com/python/cpython/blob/3.13/Lib/logging/__init__.py#L307-L326 + if args and len(args) == 1 and isinstance(args[0], Mapping) and args[0]: + args = args[0] + return self._proxy_to_logger(name, event % args, **kw) + + meth.__name__ = name + return meth + + base = structlog.make_filtering_bound_logger(min_level) + + cls = type( + f"AirflowBoundLoggerFilteringAt{LEVEL_TO_NAME.get(min_level, 'Notset').capitalize()}", + (base,), + { + "isEnabledFor": isEnabledFor, + "getEffectiveLevel": getEffectiveLevel, + "name": name, + "handlers": handlers, + } + | {name: make_method(lvl) for lvl, name in LEVEL_TO_NAME.items()}, + ) + LEVEL_TO_FILTERING_LOGGER[min_level] = cls + return cls + + +AirflowBoundLoggerFilteringAtNotset = _make_airflow_structlogger(NAME_TO_LEVEL["notset"]) +AirflowBoundLoggerFilteringAtDebug = _make_airflow_structlogger(NAME_TO_LEVEL["debug"]) +AirflowBoundLoggerFilteringAtInfo = _make_airflow_structlogger(NAME_TO_LEVEL["info"]) +AirflowBoundLoggerFilteringAtWarning = _make_airflow_structlogger(NAME_TO_LEVEL["warning"]) +AirflowBoundLoggerFilteringAtError = _make_airflow_structlogger(NAME_TO_LEVEL["error"]) +AirflowBoundLoggerFilteringAtCritical = _make_airflow_structlogger(NAME_TO_LEVEL["critical"]) + +# We use a trie structure (sometimes also called a "prefix tree") so that we can easily and quickly find the +# most suitable log level to apply. This mirrors the logging level cascade behavior from stdlib logging, +# without the complexity of multiple handlers etc +PER_LOGGER_LEVELS = pygtrie.StringTrie(separator=".") +PER_LOGGER_LEVELS.update( + { + # Top level logging default - changed to respect config in `configure_logging` + "": NAME_TO_LEVEL["info"], + } +) + + +def make_filtering_logger() -> Callable[..., BindableLogger]: + def maker(logger: WrappedLogger, *args, **kwargs): + # If the logger is a NamedBytesLogger/NamedWriteLogger (an Airflow specific subclass) then + # look up the global per-logger config and redirect to a new class. + + logger_name = kwargs.get("context", {}).get("logger_name") + if not logger_name and isinstance(logger, (NamedWriteLogger, NamedBytesLogger)): + logger_name = logger.name + + if logger_name: + level = PER_LOGGER_LEVELS.longest_prefix(logger_name).get(PER_LOGGER_LEVELS[""]) + else: + level = PER_LOGGER_LEVELS[""] + return LEVEL_TO_FILTERING_LOGGER[level](logger, *args, **kwargs) # type: ignore[call-arg] + + return maker + + +class NamedBytesLogger(structlog.BytesLogger): + __slots__ = ("name",) + + def __init__(self, name: str | None = None, file: BinaryIO | None = None): + self.name = name + if file is not None: + file = make_file_io_non_caching(file) + super().__init__(file) + + +class NamedWriteLogger(structlog.WriteLogger): + __slots__ = ("name",) + + def __init__(self, name: str | None = None, file: TextIO | None = None): + self.name = name + if file is not None: + file = make_file_io_non_caching(file) + super().__init__(file) + + +LogOutputType = TypeVar("LogOutputType", bound=TextIO | BinaryIO) + + +class LoggerFactory(Generic[LogOutputType]): + def __init__( + self, + cls: Callable[[str | None, LogOutputType | None], WrappedLogger], + io: LogOutputType | None = None, + ): + self.cls = cls + self.io = io + + def __call__(self, logger_name: str | None = None, *args: Any) -> WrappedLogger: + return self.cls(logger_name, self.io) + + +def logger_name(logger: Any, method_name: Any, event_dict: EventDict) -> EventDict: + if logger_name := (event_dict.pop("logger_name", None) or getattr(logger, "name", None)): + event_dict.setdefault("logger", logger_name) + return event_dict + + +# `eyJ` is `{"` in base64 encoding -- and any value that starts like that is in high likely hood a JWT +# token. Better safe than sorry +def redact_jwt(logger: Any, method_name: str, event_dict: EventDict) -> EventDict: + for k, v in event_dict.items(): + if isinstance(v, str) and v.startswith("eyJ"): + event_dict[k] = "eyJ***" + return event_dict + + +def drop_positional_args(logger: Any, method_name: Any, event_dict: EventDict) -> EventDict: + event_dict.pop("positional_args", None) + return event_dict + + +# This is a placeholder fn, that is "edited" in place via the `suppress_logs_and_warning` decorator +# The reason we need to do it this way is that structlog caches loggers on first use, and those include the +# configured processors, so we can't get away with changing the config as it won't have any effect once the +# logger obj is created and has been used once +def respect_stdlib_disable(logger: Any, method_name: Any, event_dict: EventDict) -> EventDict: + return event_dict + + +@cache +def structlog_processors( + json_output: bool, + log_format: str = "", + colors: bool = True, +): + """ + Create the correct list of structlog processors for the given config. + + Return value is a tuple of three elements: + + 1. A list of processors shared for structlgo and stblib + 2. The final processor/renderer (one that outputs a string) for use with structlog.stdlib.ProcessorFormatter + + :meta private: + """ + timestamper = structlog.processors.MaybeTimeStamper(fmt="iso") + + # Processors shared between stdlib handlers and structlog processors + shared_processors: list[structlog.typing.Processor] = [ + respect_stdlib_disable, + timestamper, + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + logger_name, + redact_jwt, + structlog.processors.StackInfoRenderer(), + ] + + # Imports to suppress showing code from these modules. We need the import to get the filepath for + # structlog to ignore. + + import contextlib + + import click + + suppress = (click, contextlib) + try: + import httpcore + + suppress += (httpcore,) + except ImportError: + pass + try: + import httpx + + suppress += (httpx,) + except ImportError: + pass + + if json_output: + dict_exc_formatter = structlog.tracebacks.ExceptionDictTransformer( + use_rich=False, show_locals=False, suppress=suppress + ) + + dict_tracebacks = structlog.processors.ExceptionRenderer(dict_exc_formatter) + + import msgspec + + def json_dumps(msg, default): + # Note: this is likely an "expensive" step, but lets massage the dict order for nice + # viewing of the raw JSON logs. + # Maybe we don't need this once the UI renders the JSON instead of displaying the raw text + msg = { + "timestamp": msg.pop("timestamp"), + "level": msg.pop("level"), + "event": msg.pop("event"), + **msg, + } + return msgspec.json.encode(msg, enc_hook=default) + + json = structlog.processors.JSONRenderer(serializer=json_dumps) + + def json_processor(logger: Any, method_name: Any, event_dict: EventDict) -> str: + return json(logger, method_name, event_dict).decode("utf-8") + + shared_processors.extend( + ( + dict_tracebacks, + structlog.processors.UnicodeDecoder(), + ), + ) + + return shared_processors, json_processor, json + + if os.getenv("DEV", "") != "": + # Only use Rich in dev -- optherwise for "production" deployments it makes the logs harder to read as + # it uses lots of ANSI escapes and non ASCII characters. Simpler is better for non-dev non-JSON + exc_formatter = structlog.dev.RichTracebackFormatter( + # These values are picked somewhat arbitrarily to produce useful-but-compact tracebacks. If + # we ever need to change these then they should be configurable. + extra_lines=0, + max_frames=30, + indent_guides=False, + suppress=suppress, + ) + else: + exc_formatter = structlog.dev.plain_traceback + + my_styles = structlog.dev.ConsoleRenderer.get_default_level_styles(colors=colors) + if colors: + my_styles["debug"] = structlog.dev.CYAN + + if log_format: + console = PercentFormatRender( + fmt=log_format, + exception_formatter=exc_formatter, + level_styles=my_styles, + colors=colors, + ) + else: + console = structlog.dev.ConsoleRenderer( + exception_formatter=exc_formatter, + level_styles=my_styles, + colors=colors, + ) + + return shared_processors, console, console + + +def configure_logging( + *, + json_output: bool = False, + log_level: str = "DEBUG", + log_format: str | None = None, + stdlib_config: dict | None = None, + extra_processors: Sequence[Processor] | None = None, + colors: bool | None = None, + output: LogOutputType | None = None, + log_levels: str | dict[str, str] | None = None, + cache_logger_on_first_use: bool = True, +): + """ + Configure structlog (and stbilb's logging to send via structlog processors too). + + If percent_log_format is passed then it will be handled in a similar mode to stdlib, including + interpolations such as ``%(asctime)s`` etc. + + :param json_output: Set to true to write all logs as JSON (one per line) + :param log_level: The default log level to use for most logs + :param log_format: A percent-style log format to write non JSON logs with. + :param output: Where to write the logs too. If ``json_output`` is true this must be a binary stream + :param colors: Whether to use colors for non-JSON logs. If `None` is passed, then colors are used + if standard out is a TTY (that is, an interactive session). + + It's possible to override this behavior by setting two standard environment variables to any value + except an empty string: + + * ``FORCE_COLOR`` activates colors, regardless of where output is going. + * ``NO_COLOR`` disables colors, regardless of where the output is going and regardless the value of + ``FORCE_COLOR``. Please note that ``NO_COLOR`` disables all styling, including bold and italics. + + :param log_levels: Levels of extra loggers to configure. + + To make this easier to use, this can be a string consisting of pairs of ``=`` (either + string, or space delimited) which will set the level for that specific logger. + + For example:: + + ``sqlalchemy=INFO sqlalchemy.engine=DEBUG`` + """ + if "fatal" not in NAME_TO_LEVEL: + NAME_TO_LEVEL["fatal"] = NAME_TO_LEVEL["critical"] + + if colors is None: + colors = os.environ.get("NO_COLOR", "") == "" and ( + os.environ.get("FORCE_COLOR", "") != "" + or (sys.stdout is not None and hasattr(sys.stdout, "isatty") and sys.stdout.isatty()) + ) + + stdlib_config = stdlib_config or {} + extra_processors = extra_processors or () + + PER_LOGGER_LEVELS[""] = NAME_TO_LEVEL[log_level.lower()] + + # Extract per-logger-tree levels and set them + if isinstance(log_levels, str): + log_from_level = partial(re.compile(r"\s*=\s*").split, maxsplit=2) + log_levels = {log: level for log, level in map(log_from_level, re.split(r"[\s,]+", log_levels))} + if log_levels: + for log, level in log_levels.items(): + try: + loglevel = NAME_TO_LEVEL[level.lower()] + except KeyError: + raise ValueError(f"Invalid log level for logger {log!r}: {level!r}") from None + else: + PER_LOGGER_LEVELS[log] = loglevel + + shared_pre_chain, for_stdlib, for_structlog = structlog_processors( + json_output, + log_format=log_format, + colors=colors, + ) + shared_pre_chain += list(extra_processors) + pre_chain: list[structlog.typing.Processor] = [structlog.stdlib.add_logger_name] + shared_pre_chain + + # Don't cache the loggers during tests, it make it hard to capture them + if "PYTEST_VERSION" in os.environ: + cache_logger_on_first_use = False + + std_lib_formatter: list[Processor] = [ + # TODO: Don't include this if we are using PercentFormatter -- it'll delete something we + # just have to recerated! + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + drop_positional_args, + for_stdlib, + ] + + wrapper_class = cast("type[BindableLogger]", make_filtering_logger()) + if json_output: + logger_factory = LoggerFactory(NamedBytesLogger, io=output) + else: + # There is no universal way of telling if a file-like-object is binary (and needs bytes) or text that + # works for files, sockets and io.StringIO/BytesIO. + + # If given a binary object, wrap it in a text mode wrapper + if output is not None and not hasattr(output, "encoding"): + output = io.TextIOWrapper(output, line_buffering=True) + logger_factory = LoggerFactory(NamedWriteLogger, io=output) + + structlog.configure( + processors=shared_pre_chain + [for_structlog], + cache_logger_on_first_use=cache_logger_on_first_use, + wrapper_class=wrapper_class, + logger_factory=logger_factory, + ) + + import logging.config + + config = {**stdlib_config} + config.setdefault("version", 1) + config.setdefault("disable_existing_loggers", False) + config["formatters"] = {**config.get("formatters", {})} + config["handlers"] = {**config.get("handlers", {})} + config["loggers"] = {**config.get("loggers", {})} + config["formatters"].update( + { + "structlog": { + "()": structlog.stdlib.ProcessorFormatter, + "use_get_message": False, + "processors": std_lib_formatter, + "foreign_pre_chain": pre_chain, + "pass_foreign_args": True, + }, + } + ) + for section in (config["loggers"], config["handlers"]): + for log_config in section.values(): + # We want everything to go via structlog, remove whatever the user might have configured + log_config.pop("stream", None) + log_config.pop("formatter", None) + # log_config.pop("handlers", None) + + if output and not hasattr(output, "encoding"): + # This is a BinaryIO, we need to give logging.StreamHandler a TextIO + output = codecs.lookup("utf-8").streamwriter(output) # type: ignore + + config["handlers"].update( + { + "default": { + "level": log_level.upper(), + "class": "logging.StreamHandler", + "formatter": "structlog", + "stream": output, + }, + } + ) + config["loggers"].update( + { + # Set Airflow logging to the level requested, but most everything else at "INFO" + "airflow": {"level": log_level.upper()}, + # These ones are too chatty even at info + "httpx": {"level": "WARN"}, + "sqlalchemy.engine": {"level": "WARN"}, + } + ) + config["root"] = { + "handlers": ["default"], + "level": "INFO", + "propagate": True, + } + + logging.config.dictConfig(config) + + +if __name__ == "__main__": + configure_logging( + # json_output=True, + log_format="[%(blue)s%(asctime)s%(reset)s] {%(blue)s%(filename)s:%(reset)s%(lineno)d} %(log_color)s%(levelname)s%(reset)s - %(log_color)s%(message)s%(reset)s", + ) + log = logging.getLogger("testing.stlib") + log2 = structlog.get_logger(logger_name="testing.structlog") + + def raises(): + try: + 1 / 0 + except ZeroDivisionError: + log.exception("str") + try: + 1 / 0 + except ZeroDivisionError: + log2.exception("std") + + def main(): + log.info("in main") + log2.info("in main", key="value") + raises() + + main() diff --git a/shared/logging/src/airflow_shared/logging/types.py b/shared/logging/src/airflow_shared/logging/types.py new file mode 100644 index 0000000000000..224fea5848cb8 --- /dev/null +++ b/shared/logging/src/airflow_shared/logging/types.py @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any, Protocol + +from structlog.typing import FilteringBoundLogger + +__all__ = [ + "Logger", +] + + +class Logger(FilteringBoundLogger, Protocol): # noqa: D101 + name: str + + def isEnabledFor(self, level: int): ... + def getEffectiveLevel(self) -> int: ... + + # FilteringBoundLogger defines these methods with `event: str` -- in a few places in Airflow we do + # `self.log.exception(e)` or `self.log.info(rule_results_df)` so we correct the types to allow for this + # (as the code already did) + def debug(self, event: Any, *args: Any, **kw: Any) -> Any: ... + def info(self, event: Any, *args: Any, **kw: Any) -> Any: ... + def warning(self, event: Any, *args: Any, **kw: Any) -> Any: ... + def error(self, event: Any, *args: Any, **kw: Any) -> Any: ... + def exception(self, event: Any, *args: Any, **kw: Any) -> Any: ... + def log(self, level: int, event: Any, *args: Any, **kw: Any) -> Any: ... diff --git a/shared/logging/tests/logging/test_structlog.py b/shared/logging/tests/logging/test_structlog.py new file mode 100644 index 0000000000000..a1fc031b8f4e9 --- /dev/null +++ b/shared/logging/tests/logging/test_structlog.py @@ -0,0 +1,282 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import contextlib +import io +import json +import logging +import sys +import textwrap +from datetime import datetime, timezone + +import pytest +import structlog +from structlog.dev import BLUE, BRIGHT, CYAN, DIM, GREEN, MAGENTA, RESET_ALL as RESET + +from airflow_shared.logging.structlog import configure_logging + +# We don't want to use the caplog fixture in this test, as the main purpose of this file is to capture the +# _rendered_ output of the tests to make sure it is correct. + +PY_3_11 = sys.version_info >= (3, 11) + + +@pytest.fixture(autouse=True) +def set_time(time_machine): + time_machine.move_to(datetime(1985, 10, 26, microsecond=1, tzinfo=timezone.utc), tick=False) + + +@pytest.fixture +def structlog_config(): + @contextlib.contextmanager + def configurer(**kwargs): + prev_config = structlog.get_config() + + try: + if kwargs.get("json_output"): + buff = io.BytesIO() + else: + buff = io.StringIO() + + configure_logging(**kwargs, output=buff) + yield buff + buff.seek(0) + finally: + structlog.configure(**prev_config) + + return configurer + + +@pytest.mark.parametrize( + ("get_logger", "extra_kwargs", "extra_output"), + [ + pytest.param( + structlog.get_logger, + {"key1": "value1"}, + f" {CYAN}key1{RESET}={MAGENTA}value1{RESET}", + id="structlog", + ), + pytest.param( + logging.getLogger, + {}, + "", + id="stdlib", + ), + ], +) +def test_colorful(structlog_config, get_logger, extra_kwargs, extra_output): + with structlog_config(colors=True) as sio: + logger = get_logger("my.logger") + # Test that interoplations work too + x = "world" + logger.info("Hello %s", x, **extra_kwargs) + + written = sio.getvalue() + # This _might_ be a little bit too specific to structlog's ConsoleRender format + assert ( + written == f"{DIM}1985-10-26T00:00:00.000001Z{RESET} [{GREEN}{BRIGHT}info {RESET}]" + f" {BRIGHT}Hello world {RESET}" + f" [{RESET}{BRIGHT}{BLUE}my.logger{RESET}]{RESET}" + extra_output + "\n" + ) + + +@pytest.mark.parametrize( + ("get_logger", "extra_kwargs", "extra_output"), + [ + pytest.param( + structlog.get_logger, + {"key1": "value1"}, + f" {CYAN}key1{RESET}={MAGENTA}value1{RESET}", + id="structlog", + ), + pytest.param( + logging.getLogger, + {}, + "", + id="stdlib", + ), + ], +) +def test_precent_fmt(structlog_config, get_logger, extra_kwargs, extra_output): + with structlog_config(colors=True, log_format="%(blue)s[%(asctime)s]%(reset)s %(message)s") as sio: + logger = get_logger("my.logger") + logger.info("Hello", **extra_kwargs) + + written = sio.getvalue() + print(written) + assert written == f"{BLUE}[1985-10-26T00:00:00.000001Z]{RESET} Hello" + extra_output + "\n" + + +def test_precent_fmt_force_no_colors( + structlog_config, +): + with structlog_config( + colors=False, + log_format="%(blue)s[%(asctime)s]%(reset)s %(log_color)s%(levelname)s - %(message)s", + ) as sio: + logger = structlog.get_logger("my.logger") + logger.info("Hello", key1="value1") + + written = sio.getvalue() + assert written == "[1985-10-26T00:00:00.000001Z] INFO - Hello key1=value1\n" + + +@pytest.mark.parametrize( + ("get_logger", "extra_kwargs"), + [ + pytest.param( + structlog.get_logger, + {"key1": "value1"}, + id="structlog", + ), + pytest.param( + logging.getLogger, + {}, + id="stdlib", + ), + ], +) +def test_json(structlog_config, get_logger, extra_kwargs): + with structlog_config(json_output=True) as bio: + logger = get_logger("my.logger") + logger.info("Hello", **extra_kwargs) + + written = json.load(bio) + assert written == { + "event": "Hello", + "level": "info", + **extra_kwargs, + "logger": "my.logger", + "timestamp": "1985-10-26T00:00:00.000001Z", + } + + +@pytest.mark.parametrize( + ("get_logger"), + [ + pytest.param( + structlog.get_logger, + id="structlog", + ), + pytest.param( + logging.getLogger, + id="stdlib", + ), + ], +) +def test_precent_fmt_exc(structlog_config, get_logger, monkeypatch): + monkeypatch.setenv("DEV", "") + with structlog_config( + log_format="%(message)s", + colors=False, + ) as sio: + lineno = sys._getframe().f_lineno + 2 + try: + 1 / 0 + except ZeroDivisionError: + get_logger("logger").exception("Error") + written = sio.getvalue() + + expected = textwrap.dedent(f"""\ + Error + Traceback (most recent call last): + File "{__file__}", line {lineno}, in test_precent_fmt_exc + 1 / 0 + """) + if PY_3_11: + expected += " ~~^~~\n" + expected += "ZeroDivisionError: division by zero\n" + assert written == expected + + +@pytest.mark.parametrize( + ("get_logger"), + [ + pytest.param( + structlog.get_logger, + id="structlog", + ), + pytest.param( + logging.getLogger, + id="stdlib", + ), + ], +) +def test_json_exc(structlog_config, get_logger, monkeypatch): + with structlog_config(json_output=True) as bio: + lineno = sys._getframe().f_lineno + 2 + try: + 1 / 0 + except ZeroDivisionError: + get_logger("logger").exception("Error") + written = bio.getvalue() + + written = json.load(bio) + assert written == { + "event": "Error", + "exception": [ + { + "exc_notes": [], + "exc_type": "ZeroDivisionError", + "exc_value": "division by zero", + "exceptions": [], + "frames": [ + { + "filename": __file__, + "lineno": lineno, + "name": "test_json_exc", + }, + ], + "is_cause": False, + "is_group": False, + "syntax_error": None, + }, + ], + "level": "error", + "logger": "logger", + "timestamp": "1985-10-26T00:00:00.000001Z", + } + + +@pytest.mark.parametrize( + ("levels",), + ( + pytest.param("my.logger=warn", id="str"), + pytest.param({"my.logger": "warn"}, id="dict"), + ), +) +def test_logger_filtering(structlog_config, levels): + with structlog_config( + colors=False, + log_format="[%(name)s] %(message)s", + log_level="DEBUG", + log_levels=levels, + ) as sio: + structlog.get_logger("my").info("Hello", key1="value1") + structlog.get_logger("my.logger").info("Hello", key1="value2") + structlog.get_logger("my.logger.sub").info("Hello", key1="value3") + structlog.get_logger("other.logger").info("Hello", key1="value4") + structlog.get_logger("my.logger.sub").warning("Hello", key1="value5") + + written = sio.getvalue() + assert written == textwrap.dedent("""\ + [my] Hello key1=value1 + [other.logger] Hello key1=value4 + [my.logger.sub] Hello key1=value5 + """) diff --git a/task-sdk/pyproject.toml b/task-sdk/pyproject.toml index 8140b78743407..9818b2d514fd3 100644 --- a/task-sdk/pyproject.toml +++ b/task-sdk/pyproject.toml @@ -71,6 +71,9 @@ dependencies = [ "colorlog>=6.8.2", "pydantic>2.11.0", # End of shared secrets_masker dependencies + # Start of shared logging dependencies + "pygtrie>=2.5.0", + # End of shared logging dependencies ] [project.urls] @@ -92,8 +95,9 @@ build-backend = "hatchling.build" path = "src/airflow/sdk/__init__.py" [tool.hatch.build.targets.sdist.force-include] -"../shared/timezones/src/airflow_shared/timezones" = "src/airflow/sdk/_shared/timezones" +"../shared/logging/src/airflow_shared/logging" = "src/airflow/sdk/_shared/logging" "../shared/secrets_masker/src/airflow_shared/secrets_masker" = "src/airflow/sdk/_shared/secrets_masker" +"../shared/timezones/src/airflow_shared/timezones" = "src/airflow/sdk/_shared/timezones" [tool.hatch.build.targets.wheel] packages = ["src/airflow"] @@ -235,6 +239,7 @@ tmp_path_retention_policy = "failed" [tool.airflow] shared_distributions = [ - "apache-airflow-shared-timezones", + "apache-airflow-shared-logging", "apache-airflow-shared-secrets-masker", + "apache-airflow-shared-timezones", ] diff --git a/task-sdk/src/airflow/sdk/_shared/logging b/task-sdk/src/airflow/sdk/_shared/logging new file mode 120000 index 0000000000000..fda343f9a7ea6 --- /dev/null +++ b/task-sdk/src/airflow/sdk/_shared/logging @@ -0,0 +1 @@ +../../../../../shared/logging/src/airflow_shared/logging/ \ No newline at end of file diff --git a/task-sdk/src/airflow/sdk/definitions/_internal/node.py b/task-sdk/src/airflow/sdk/definitions/_internal/node.py index 75958d047b421..86979e442cd5e 100644 --- a/task-sdk/src/airflow/sdk/definitions/_internal/node.py +++ b/task-sdk/src/airflow/sdk/definitions/_internal/node.py @@ -17,20 +17,21 @@ from __future__ import annotations -import logging import re from abc import ABCMeta, abstractmethod from collections.abc import Collection, Iterable, Sequence from datetime import datetime from typing import TYPE_CHECKING, Any +import structlog + from airflow.sdk.definitions._internal.mixins import DependencyMixin if TYPE_CHECKING: from airflow.sdk.definitions.dag import DAG from airflow.sdk.definitions.edges import EdgeModifier from airflow.sdk.definitions.taskgroup import TaskGroup - from airflow.sdk.types import Operator + from airflow.sdk.types import Logger, Operator from airflow.serialization.enums import DagAttributeTypes @@ -81,7 +82,7 @@ class DAGNode(DependencyMixin, metaclass=ABCMeta): _log_config_logger_name: str | None = None _logger_name: str | None = None - _cached_logger: logging.Logger | None = None + _cached_logger: Logger | None = None def __init__(self): self.upstream_task_ids = set() @@ -115,7 +116,7 @@ def dag_id(self) -> str: return "_in_memory_dag_" @property - def log(self) -> logging.Logger: + def log(self) -> Logger: """ Get a logger for this node. @@ -140,7 +141,7 @@ def log(self) -> logging.Logger: else self._log_config_logger_name ) - self._cached_logger = logging.getLogger(logger_name) + self._cached_logger = structlog.get_logger(logger_name) return self._cached_logger @property diff --git a/task-sdk/src/airflow/sdk/definitions/dag.py b/task-sdk/src/airflow/sdk/definitions/dag.py index 36215420cd36c..bab5a6db01bf6 100644 --- a/task-sdk/src/airflow/sdk/definitions/dag.py +++ b/task-sdk/src/airflow/sdk/definitions/dag.py @@ -1138,29 +1138,6 @@ def test( from airflow.utils.state import State from airflow.utils.types import DagRunTriggeredByType, DagRunType - if TYPE_CHECKING: - from airflow.models.taskinstance import TaskInstance - - def add_logger_if_needed(ti: TaskInstance): - """ - Add a formatted logger to the task instance. - - This allows all logs to surface to the command line, instead of into - a task file. Since this is a local test run, it is much better for - the user to see logs in the command line, rather than needing to - search for a log file. - - :param ti: The task instance that will receive a logger. - """ - format = logging.Formatter("[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") - handler = logging.StreamHandler(sys.stdout) - handler.level = logging.INFO - handler.setFormatter(format) - # only add log handler once - if not any(isinstance(h, logging.StreamHandler) for h in ti.log.handlers): - log.debug("Adding Streamhandler to taskinstance %s", ti.task_id) - ti.log.addHandler(handler) - exit_stack = ExitStack() if conn_file_path or variable_file_path: @@ -1291,7 +1268,6 @@ def add_logger_if_needed(ti: TaskInstance): else: # Run the task locally try: - add_logger_if_needed(ti) if mark_success: ti.set_state(State.SUCCESS) log.info("[DAG TEST] Marking success for %s on %s", task, ti.logical_date) diff --git a/task-sdk/src/airflow/sdk/execution_time/execute_workload.py b/task-sdk/src/airflow/sdk/execution_time/execute_workload.py index b06a0d22cb4e4..605dc806ba59f 100644 --- a/task-sdk/src/airflow/sdk/execution_time/execute_workload.py +++ b/task-sdk/src/airflow/sdk/execution_time/execute_workload.py @@ -48,7 +48,7 @@ def execute_workload(workload: ExecuteTask) -> None: dispose_orm(do_log=False) - configure_logging(output=sys.stdout.buffer, enable_pretty_log=False) + configure_logging(output=sys.stdout.buffer, json_output=True) if not isinstance(workload, workloads.ExecuteTask): raise ValueError(f"Executor does not know how to handle {type(workload)}") diff --git a/task-sdk/src/airflow/sdk/execution_time/supervisor.py b/task-sdk/src/airflow/sdk/execution_time/supervisor.py index 3d87cc6618110..af5ce034e5fef 100644 --- a/task-sdk/src/airflow/sdk/execution_time/supervisor.py +++ b/task-sdk/src/airflow/sdk/execution_time/supervisor.py @@ -217,10 +217,11 @@ def _configure_logs_over_json_channel(log_fd: int): # A channel that the task can send JSON-formatted logs over. # # JSON logs sent this way will be handled nicely - from airflow.sdk.log import configure_logging + from airflow.sdk.log import configure_logging, reset_logging log_io = os.fdopen(log_fd, "wb", buffering=0) - configure_logging(enable_pretty_log=False, output=log_io, sending_to_supervisor=True) + reset_logging() + configure_logging(json_output=True, output=log_io, sending_to_supervisor=True) def _reopen_std_io_handles(child_stdin, child_stdout, child_stderr): @@ -1766,16 +1767,16 @@ def _configure_logging(log_path: str, client: Client) -> tuple[FilteringBoundLog log_file = init_log_file(log_path) - pretty_logs = False - if pretty_logs: - log_file_descriptor = log_file.open("a", buffering=1) - underlying_logger: WrappedLogger = structlog.WriteLogger(cast("TextIO", log_file_descriptor)) - else: + json_logs = True + if json_logs: log_file_descriptor = log_file.open("ab") - underlying_logger = structlog.BytesLogger(cast("BinaryIO", log_file_descriptor)) + underlying_logger: WrappedLogger = structlog.BytesLogger(cast("BinaryIO", log_file_descriptor)) + else: + log_file_descriptor = log_file.open("a", buffering=1) + underlying_logger = structlog.WriteLogger(cast("TextIO", log_file_descriptor)) with _remote_logging_conn(client): - processors = logging_processors(enable_pretty_log=pretty_logs)[0] + processors = logging_processors(json_output=json_logs) logger = structlog.wrap_logger(underlying_logger, processors=processors, logger_name="task").bind() return logger, log_file_descriptor diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py b/task-sdk/src/airflow/sdk/execution_time/task_runner.py index f8f1427fa5507..02d5eb2399677 100644 --- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py @@ -717,7 +717,7 @@ def startup() -> tuple[RuntimeTaskInstance, Context, Logger]: from airflow.sdk.log import configure_logging log_io = os.fdopen(logs.fds[0], "wb", buffering=0) - configure_logging(enable_pretty_log=False, output=log_io, sending_to_supervisor=True) + configure_logging(json_output=True, output=log_io, sending_to_supervisor=True) else: print("Unable to re-configure logging after sudo, we didn't get an FD", file=sys.stderr) diff --git a/task-sdk/src/airflow/sdk/log.py b/task-sdk/src/airflow/sdk/log.py index cae38961d918a..d57867a6ef9df 100644 --- a/task-sdk/src/airflow/sdk/log.py +++ b/task-sdk/src/airflow/sdk/log.py @@ -17,18 +17,15 @@ # under the License. from __future__ import annotations -import io import itertools import logging.config -import os import re import sys import warnings from functools import cache from pathlib import Path -from typing import TYPE_CHECKING, Any, BinaryIO, Generic, TextIO, TypeVar, cast +from typing import TYPE_CHECKING, Any, BinaryIO, Generic, TextIO, TypeVar -import msgspec import structlog # We have to import this here, as it is used in the type annotations at runtime even if it seems it is @@ -41,7 +38,7 @@ from structlog.typing import EventDict, ExcInfo, FilteringBoundLogger, Processor from airflow.logging_config import RemoteLogIO - from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI + from airflow.sdk.types import Logger, RuntimeTaskInstanceProtocol as RuntimeTI __all__ = ["configure_logging", "reset_logging", "mask_secret"] @@ -143,128 +140,32 @@ def emit(self, record: logging.LogRecord): @cache -def logging_processors(enable_pretty_log: bool, mask_secrets: bool = True, colored_console_log: bool = True): - if enable_pretty_log: - timestamper = structlog.processors.MaybeTimeStamper(fmt="%Y-%m-%d %H:%M:%S.%f") - else: - timestamper = structlog.processors.MaybeTimeStamper(fmt="iso") - - processors: list[structlog.typing.Processor] = [ - timestamper, - structlog.contextvars.merge_contextvars, - structlog.processors.add_log_level, - structlog.stdlib.PositionalArgumentsFormatter(), - logger_name, - redact_jwt, - structlog.processors.StackInfoRenderer(), - ] - - if mask_secrets: - processors.append(mask_logs) - - # Imports to suppress showing code from these modules. We need the import to get the filepath for - # structlog to ignore. - import contextlib - - import click - import httpcore - import httpx - - suppress = ( - click, - contextlib, - httpx, - httpcore, - httpx, - ) +def logging_processors( + json_output: bool, + log_format: str = "", + colors: bool = True, + sending_to_supervisor: bool = False, +) -> tuple[Processor, ...]: + from airflow.sdk._shared.logging.structlog import structlog_processors - if enable_pretty_log: - if colored_console_log: - rich_exc_formatter = structlog.dev.RichTracebackFormatter( - # These values are picked somewhat arbitrarily to produce useful-but-compact tracebacks. If - # we ever need to change these then they should be configurable. - extra_lines=0, - max_frames=30, - indent_guides=False, - suppress=suppress, - ) - my_styles = structlog.dev.ConsoleRenderer.get_default_level_styles() - my_styles["debug"] = structlog.dev.CYAN - - console = structlog.dev.ConsoleRenderer( - exception_formatter=rich_exc_formatter, level_styles=my_styles - ) - else: - # Create a console renderer without colors - use the same RichTracebackFormatter - # but rely on ConsoleRenderer(colors=False) to disable colors - rich_exc_formatter = structlog.dev.RichTracebackFormatter( - extra_lines=0, - max_frames=30, - indent_guides=False, - suppress=suppress, - ) - console = structlog.dev.ConsoleRenderer( - colors=False, - exception_formatter=rich_exc_formatter, - ) - processors.append(console) - return processors, { - "timestamper": timestamper, - "console": console, - } - dict_exc_formatter = structlog.tracebacks.ExceptionDictTransformer( - use_rich=False, show_locals=False, suppress=suppress - ) + extra_processors: tuple[Processor, ...] = () - dict_tracebacks = structlog.processors.ExceptionRenderer(dict_exc_formatter) - if hasattr(__builtins__, "BaseExceptionGroup"): - exc_group_processor = exception_group_tracebacks(dict_exc_formatter) - processors.append(exc_group_processor) - else: - exc_group_processor = None - - def json_dumps(msg, default): - # Note: this is likely an "expensive" step, but lets massage the dict order for nice - # viewing of the raw JSON logs. - # Maybe we don't need this once the UI renders the JSON instead of displaying the raw text - msg = { - "timestamp": msg.pop("timestamp"), - "level": msg.pop("level"), - "event": msg.pop("event"), - **msg, - } - return msgspec.json.encode(msg, enc_hook=default) - - def json_processor(logger: Any, method_name: Any, event_dict: EventDict) -> str: - # Stdlib logging doesn't need the re-ordering, it's fine as it is - return msgspec.json.encode(event_dict).decode("utf-8") - - json = structlog.processors.JSONRenderer(serializer=json_dumps) - - processors.extend( - ( - dict_tracebacks, - structlog.processors.UnicodeDecoder(), - ), - ) + mask_secrets = not sending_to_supervisor + if mask_secrets: + extra_processors += (mask_logs,) - # Include the remote logging provider for tasks if there are any we need (such as upload to Cloudwatch) if (remote := load_remote_log_handler()) and (remote_processors := getattr(remote, "processors")): - processors.extend(remote_processors) + extra_processors += remote_processors - processors.append(json) - - return processors, { - "timestamper": timestamper, - "exc_group_processor": exc_group_processor, - "dict_tracebacks": dict_tracebacks, - "json": json_processor, - } + procs, _, final_writer = structlog_processors( + json_output=json_output, log_format=log_format, colors=colors + ) + return tuple(procs) + extra_processors + (final_writer,) @cache def configure_logging( - enable_pretty_log: bool = True, + json_output: bool = False, log_level: str = "DEFAULT", output: BinaryIO | TextIO | None = None, cache_logger_on_first_use: bool = True, @@ -284,176 +185,41 @@ def configure_logging( colored_console_log = conf.getboolean("logging", "colored_console_log", fallback=True) - lvl = structlog.stdlib.NAME_TO_LEVEL[log_level.lower()] + from airflow.sdk._shared.logging.structlog import configure_logging - if enable_pretty_log: - formatter = "colored" - else: - formatter = "plain" - processors, named = logging_processors( - enable_pretty_log, mask_secrets=not sending_to_supervisor, colored_console_log=colored_console_log - ) - timestamper = named["timestamper"] - - pre_chain: list[structlog.typing.Processor] = [ - # Add the log level and a timestamp to the event_dict if the log entry - # is not from structlog. - structlog.stdlib.add_log_level, - structlog.stdlib.add_logger_name, - timestamper, - structlog.contextvars.merge_contextvars, - redact_jwt, - ] - - # Don't cache the loggers during tests, it make it hard to capture them - if "PYTEST_CURRENT_TEST" in os.environ: - cache_logger_on_first_use = False - - color_formatter: list[structlog.typing.Processor] = [ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - drop_positional_args, - ] - std_lib_formatter: list[structlog.typing.Processor] = [ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - drop_positional_args, - ] + mask_secrets = not sending_to_supervisor + extra_processors: tuple[Processor, ...] = () + + if mask_secrets: + extra_processors += (mask_logs,) if (remote := load_remote_log_handler()) and (remote_processors := getattr(remote, "processors")): - # Ensure we add in any remote log processor before we add `console` or `json` formatter so these get - # called with the event_dict as a dict still - color_formatter.extend(remote_processors) - std_lib_formatter.extend(remote_processors) - - wrapper_class = structlog.make_filtering_bound_logger(lvl) - if enable_pretty_log: - if output is not None and not isinstance(output, TextIO): - wrapper = io.TextIOWrapper(output, line_buffering=True) - logger_factory = structlog.WriteLoggerFactory(wrapper) - else: - logger_factory = structlog.WriteLoggerFactory(output) - structlog.configure( - processors=processors, - cache_logger_on_first_use=cache_logger_on_first_use, - wrapper_class=wrapper_class, - logger_factory=logger_factory, - ) - color_formatter.append(named["console"]) - else: - if output is not None and "b" not in output.mode: - if not hasattr(output, "buffer"): - raise ValueError( - f"output needed to be a binary stream, but it didn't have a buffer attribute ({output=})" - ) - output = cast("TextIO", output).buffer - if TYPE_CHECKING: - # Not all binary streams are isinstance of BinaryIO, so we check via looking at `mode` at - # runtime. mypy doesn't grok that though - assert isinstance(output, BinaryIO) - structlog.configure( - processors=processors, - cache_logger_on_first_use=cache_logger_on_first_use, - wrapper_class=wrapper_class, - logger_factory=structlog.BytesLoggerFactory(output), - ) - - if processor := named["exc_group_processor"]: - pre_chain.append(processor) - pre_chain.append(named["dict_tracebacks"]) - color_formatter.append(named["json"]) - std_lib_formatter.append(named["json"]) + extra_processors += remote_processors + + configure_logging( + json_output=json_output, + log_level=log_level, + output=output, + cache_logger_on_first_use=cache_logger_on_first_use, + colors=colored_console_log, + extra_processors=extra_processors, + ) global _warnings_showwarning if _warnings_showwarning is None: _warnings_showwarning = warnings.showwarning - - if sys.platform == "darwin": - # This warning is not "end-user actionable" so we silence it. - warnings.filterwarnings( - "ignore", r"This process \(pid=\d+\) is multi-threaded, use of fork\(\).*" - ) - # Capture warnings and show them via structlog + # Capture warnings and show them via structlog -- i.e. in task logs warnings.showwarning = _showwarning - logging.config.dictConfig( - { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "plain": { - "()": structlog.stdlib.ProcessorFormatter, - "processors": std_lib_formatter, - "foreign_pre_chain": pre_chain, - "pass_foreign_args": True, - }, - "colored": { - "()": structlog.stdlib.ProcessorFormatter, - "processors": color_formatter, - "foreign_pre_chain": pre_chain, - "pass_foreign_args": True, - }, - }, - "handlers": { - "default": { - "level": log_level.upper(), - "class": "logging.StreamHandler", - "formatter": formatter, - }, - "to_supervisor": { - "level": log_level.upper(), - "()": StdBinaryStreamHandler, - "formatter": formatter, - "stream": output, - }, - }, - "loggers": { - # Set Airflow logging to the level requested, but most everything else at "INFO" - "": { - "handlers": ["to_supervisor" if sending_to_supervisor else "default"], - "level": "INFO", - "propagate": True, - }, - "airflow": {"level": log_level.upper()}, - # These ones are too chatty even at info - "httpx": {"level": "WARN"}, - "sqlalchemy.engine": {"level": "WARN"}, - }, - } - ) - - -def reset_logging(): - global _warnings_showwarning - warnings.showwarning = _warnings_showwarning - configure_logging.cache_clear() - - -_warnings_showwarning: Any = None +def logger_at_level(name: str, level: int) -> Logger: + """Create a new logger at the given level.""" + from airflow.sdk._shared.logging.structlog import LEVEL_TO_FILTERING_LOGGER -def _showwarning( - message: Warning | str, - category: type[Warning], - filename: str, - lineno: int, - file: TextIO | None = None, - line: str | None = None, -) -> Any: - """ - Redirects warnings to structlog so they appear in task logs etc. - - Implementation of showwarnings which redirects to logging, which will first - check to see if the file parameter is None. If a file is specified, it will - delegate to the original warnings implementation of showwarning. Otherwise, - it will call warnings.formatwarning and will log the resulting string to a - warnings logger named "py.warnings" with level logging.WARNING. - """ - if file is not None: - if _warnings_showwarning is not None: - _warnings_showwarning(message, category, filename, lineno, file, line) - else: - log = structlog.get_logger(logger_name="py.warnings") - log.warning(str(message), category=category.__name__, filename=filename, lineno=lineno) + return structlog.wrap_logger( + None, wrapper_class=LEVEL_TO_FILTERING_LOGGER[level], logger_factory_args=(name) + ) def _prepare_log_folder(directory: Path, mode: int): @@ -594,3 +360,46 @@ def mask_secret(secret: JsonValue, name: str | None = None) -> None: if comms := getattr(task_runner, "SUPERVISOR_COMMS", None): comms.send(MaskSecret(value=secret, name=name)) + + +def reset_logging(): + """ + Convince for testing. Not for production use. + + :meta private: + """ + from airflow.sdk._shared.logging.structlog import structlog_processors + + global _warnings_showwarning + warnings.showwarning = _warnings_showwarning + _warnings_showwarning = None + structlog_processors.cache_clear() + logging_processors.cache_clear() + + +_warnings_showwarning: Any = None + + +def _showwarning( + message: Warning | str, + category: type[Warning], + filename: str, + lineno: int, + file: TextIO | None = None, + line: str | None = None, +) -> Any: + """ + Redirects warnings to structlog so they appear in task logs etc. + + Implementation of showwarnings which redirects to logging, which will first + check to see if the file parameter is None. If a file is specified, it will + delegate to the original warnings implementation of showwarning. Otherwise, + it will call warnings.formatwarning and will log the resulting string to a + warnings logger named "py.warnings" with level logging.WARNING. + """ + if file is not None: + if _warnings_showwarning is not None: + _warnings_showwarning(message, category, filename, lineno, file, line) + else: + log = structlog.get_logger("py.warnings") + log.warning(str(message), category=category.__name__, filename=filename, lineno=lineno) diff --git a/task-sdk/src/airflow/sdk/types.py b/task-sdk/src/airflow/sdk/types.py index 247e1b8f3f088..cfbcafe42011d 100644 --- a/task-sdk/src/airflow/sdk/types.py +++ b/task-sdk/src/airflow/sdk/types.py @@ -29,6 +29,7 @@ from pydantic import AwareDatetime + from airflow.sdk._shared.logging.types import Logger as Logger from airflow.sdk.bases.operator import BaseOperator from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetAliasEvent, AssetRef, BaseAssetUniqueKey from airflow.sdk.definitions.context import Context diff --git a/task-sdk/tests/conftest.py b/task-sdk/tests/conftest.py index b930bc5a9f0bc..0fa1cf6d50592 100644 --- a/task-sdk/tests/conftest.py +++ b/task-sdk/tests/conftest.py @@ -54,11 +54,20 @@ def pytest_configure(config: pytest.Config) -> None: # Always skip looking for tests in these folders! config.addinivalue_line("norecursedirs", "tests/test_dags") + config.addinivalue_line("markers", "log_level: ") + import airflow.settings airflow.settings.configure_policy_plugin_manager() +@pytest.fixture(scope="session", autouse=True) +def _init_log(): + from airflow.sdk.log import configure_logging + + configure_logging() + + @pytest.hookimpl(tryfirst=True) def pytest_runtest_setup(item): if next(item.iter_markers(name="db_test"), None): @@ -90,31 +99,42 @@ def test_dags_dir(): @pytest.fixture -def captured_logs(request): +def captured_logs(request, monkeypatch): import structlog + import structlog.processors + from airflow.sdk._shared.logging.structlog import PER_LOGGER_LEVELS from airflow.sdk.log import configure_logging, reset_logging # Use our real log config reset_logging() - configure_logging(enable_pretty_log=False) + configure_logging(json_output=False, colored_console_log=False) # Get log level from test parameter, which can either be a single log level or a # tuple of log level and desired output type, defaulting to INFO if not provided log_level = logging.INFO output = "dict" - param = getattr(request, "param", logging.INFO) + + param = getattr(request, "param", None) + if not param: + mark = next(request.node.iter_markers(name="log_level"), None) + param = mark.args[0] if mark is not None else None + if isinstance(param, int): log_level = param elif isinstance(param, tuple): log_level = param[0] output = param[1] - # We want to capture all logs, but we don't want to see them in the test output - structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(log_level)) + monkeypatch.setitem(PER_LOGGER_LEVELS, "", log_level) cur_processors = structlog.get_config()["processors"] processors = cur_processors.copy() + + if not any(isinstance(proc, structlog.processors.MaybeTimeStamper) for proc in processors): + timestamper = structlog.processors.MaybeTimeStamper(fmt="iso") + processors.append(timestamper) + if output == "dict": # We need to replace remove the last processor (the one that turns JSON into text, as we want the # event dict for tests) diff --git a/task-sdk/tests/task_sdk/bases/test_operator.py b/task-sdk/tests/task_sdk/bases/test_operator.py index 28dd11a91abdb..737ce097544ea 100644 --- a/task-sdk/tests/task_sdk/bases/test_operator.py +++ b/task-sdk/tests/task_sdk/bases/test_operator.py @@ -949,10 +949,11 @@ def test_execute_subclassed_op_warns_once(self, captured_logs): "event": "ExtendedHelloWorldOperator.execute cannot be called outside of the Task Runner!", "level": "warning", "timestamp": mock.ANY, + "logger": "tests.task_sdk.bases.test_operator", }, ] - def test_decorated_operators(self, captured_logs): + def test_decorated_operators(self, caplog): with DAG("d1") as dag: @dag.task(task_id="task_id", dag=dag) @@ -963,15 +964,13 @@ def say_hello(**context): op = say_hello() op.operator.execute(context={}) - assert captured_logs == [ - { - "event": "HelloWorldOperator.execute cannot be called outside of the Task Runner!", - "level": "warning", - "timestamp": mock.ANY, - }, - ] + assert { + "event": "HelloWorldOperator.execute cannot be called outside of the Task Runner!", + "log_level": "warning", + } in caplog - def test_python_op(self, captured_logs): + @pytest.mark.log_level(logging.WARNING) + def test_python_op(self, caplog): from airflow.providers.standard.operators.python import PythonOperator with DAG("d1"): @@ -985,13 +984,10 @@ def say_hello(**context): python_callable=say_hello, ) op.execute(context={}, PythonOperator__sentinel=ExecutorSafeguard.sentinel_value) - assert captured_logs == [ - { - "event": "HelloWorldOperator.execute cannot be called outside of the Task Runner!", - "level": "warning", - "timestamp": mock.ANY, - }, - ] + assert { + "event": "HelloWorldOperator.execute cannot be called outside of the Task Runner!", + "log_level": "warning", + } in caplog def test_partial_default_args(): diff --git a/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py b/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py index 2a554669004be..0bef4c59ad43f 100644 --- a/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py +++ b/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py @@ -22,7 +22,6 @@ import pytest import structlog -from pytest_unordered import unordered from airflow.exceptions import AirflowSkipException from airflow.sdk import TaskInstanceState @@ -91,7 +90,7 @@ def c_to_none(v): assert results == {"a", "b", None} -def test_xcom_convert_to_kwargs_fails_task(run_ti: RunTI, mock_supervisor_comms, captured_logs): +def test_xcom_convert_to_kwargs_fails_task(run_ti: RunTI, mock_supervisor_comms, caplog): results = set() with DAG("test") as dag: @@ -119,35 +118,30 @@ def c_to_none(v): assert run_ti(dag, "pull", map_index) == TaskInstanceState.SUCCESS # Clear captured logs from the above - captured_logs[:] = [] + caplog.clear() # But the third one fails because the map() result cannot be used as kwargs. assert run_ti(dag, "pull", 2) == TaskInstanceState.FAILED - assert captured_logs == unordered( - [ + assert { + "event": "Task failed with exception", + "log_level": "error", + "exception": [ { - "event": "Task failed with exception", - "level": "error", - "timestamp": mock.ANY, - "exception": [ - { - "exc_notes": [], - "exc_type": "ValueError", - "exc_value": "expand_kwargs() expects a list[dict], not list[None]", - "frames": mock.ANY, - "is_cause": False, - "is_group": False, - "exceptions": [], - "syntax_error": None, - } - ], - }, - ] - ) - - -def test_xcom_map_error_fails_task(mock_supervisor_comms, run_ti, captured_logs): + "exc_notes": [], + "exc_type": "ValueError", + "exc_value": "expand_kwargs() expects a list[dict], not list[None]", + "frames": mock.ANY, + "is_cause": False, + "is_group": False, + "exceptions": [], + "syntax_error": None, + } + ], + } in caplog + + +def test_xcom_map_error_fails_task(mock_supervisor_comms, run_ti, caplog): with DAG("test") as dag: @dag.task() @@ -170,27 +164,23 @@ def does_not_work_with_c(v): # The third one (for "c") will fail. assert run_ti(dag, "pull", 2) == TaskInstanceState.FAILED - assert captured_logs == unordered( - [ + assert { + "event": "Task failed with exception", + "log_level": "error", + "timestamp": mock.ANY, + "exception": [ { - "event": "Task failed with exception", - "level": "error", - "timestamp": mock.ANY, - "exception": [ - { - "exc_notes": [], - "exc_type": "RuntimeError", - "exc_value": "nope", - "frames": mock.ANY, - "is_cause": False, - "is_group": False, - "exceptions": [], - "syntax_error": None, - } - ], - }, - ] - ) + "exc_notes": [], + "exc_type": "RuntimeError", + "exc_value": "nope", + "frames": mock.ANY, + "is_cause": False, + "is_group": False, + "exceptions": [], + "syntax_error": None, + } + ], + } in caplog def test_xcom_map_nest(mock_supervisor_comms, run_ti): diff --git a/task-sdk/tests/task_sdk/log/test_log.py b/task-sdk/tests/task_sdk/log/test_log.py deleted file mode 100644 index 6de87dcfc6f93..0000000000000 --- a/task-sdk/tests/task_sdk/log/test_log.py +++ /dev/null @@ -1,225 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import json -import logging -import unittest.mock -from unittest import mock - -import pytest -import structlog -from uuid6 import UUID - -from airflow.sdk._shared.secrets_masker import SecretsMasker -from airflow.sdk.api.datamodels._generated import TaskInstance -from airflow.sdk.log import configure_logging, reset_logging - - -@pytest.mark.parametrize( - "captured_logs", [(logging.INFO, "json")], indirect=True, ids=["log_level=info,formatter=json"] -) -def test_json_rendering(captured_logs): - """ - Test that the JSON formatter renders correctly. - """ - logger = structlog.get_logger() - - secrets_masker = SecretsMasker() - - with mock.patch("airflow.sdk._shared.secrets_masker._secrets_masker", return_value=secrets_masker): - logger.info( - "A test message with a Pydantic class", - pydantic_class=TaskInstance( - id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - dag_id="test_dag", - task_id="test_task", - run_id="test_run", - try_number=1, - dag_version_id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - ), - ) - assert captured_logs - assert isinstance(captured_logs[0], bytes) - assert json.loads(captured_logs[0]) == { - "event": "A test message with a Pydantic class", - "pydantic_class": "TaskInstance(id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), task_id='test_task', dag_id='test_dag', run_id='test_run', " - "try_number=1, dag_version_id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), map_index=-1, hostname=None, context_carrier=None)", - "timestamp": unittest.mock.ANY, - "level": "info", - } - - -@pytest.mark.parametrize( - "captured_logs", [(logging.INFO, "json")], indirect=True, ids=["log_level=info,formatter=json"] -) -def test_jwt_token_is_redacted(captured_logs): - """ - Tests that jwt token is redacted. - """ - logger = structlog.get_logger() - logger.info( - "Executing workload", - token="eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ1cm46YWlyZmxvdy5hcGFjaGUub3JnOnRhc2siLCJuYmYiOjE3NDM0OTQ1NjgsImV4cCI6MTc0MzQ5NTE2OCwiaWF0IjoxNzQzNDk0NTY4LCJzdWIiOiIwMTk1ZjA1Zi1kNjRhLTc2NjMtOWQ2Yy1lYzYwYTM0MmQ5NTYifQ.df0ZNUbXwnoed2O1bjXQkPV8Df1mmMUu1b_PJrQuHoft9fhPRQELVDp-s3PtL6QYSSrF_81FzsQ7YHAu7bk-1g", - pydantic_class=TaskInstance( - id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - dag_id="test_dag", - task_id="test_task", - run_id="test_run", - try_number=1, - dag_version_id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - ), - ) - assert captured_logs - assert isinstance(captured_logs[0], bytes) - assert json.loads(captured_logs[0]) == { - "event": "Executing workload", - "level": "info", - "pydantic_class": "TaskInstance(id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), " - "task_id='test_task', dag_id='test_dag', run_id='test_run', " - "try_number=1, dag_version_id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), map_index=-1, hostname=None, context_carrier=None)", - "timestamp": unittest.mock.ANY, - "token": "eyJ***", - } - - -@pytest.mark.parametrize( - "captured_logs", [(logging.INFO, "json")], indirect=True, ids=["log_level=info,formatter=json"] -) -def test_logs_are_masked(captured_logs): - """ - Test that JSON logs are masked. - """ - logger = structlog.get_logger() - secrets_masker = SecretsMasker() - secrets_masker.add_mask("password") - with mock.patch( - "airflow.sdk._shared.secrets_masker.redact", - side_effect=lambda event: { - "event": "Connection *** is ***", - "level": "info", - "pydantic_class": TaskInstance( - id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - task_id="test_task", - dag_id="test_dag", - run_id="test_run", - try_number=1, - map_index=-1, - hostname=None, - dag_version_id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - ), - "timestamp": "2025-03-25T05:13:27.073918Z", - }, - ): - logger.info( - "Connection password is password123", - pydantic_class=TaskInstance( - id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - dag_id="test_dag", - task_id="test_task", - run_id="test_run", - try_number=1, - dag_version_id=UUID("ffec3c8e-2898-46f8-b7d5-3cc571577368"), - ), - ) - assert captured_logs - assert isinstance(captured_logs[0], bytes) - - log_entry = json.loads(captured_logs[0]) - assert log_entry == { - "event": "Connection *** is ***", - "level": "info", - "pydantic_class": "TaskInstance(id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), " - "task_id='test_task', dag_id='test_dag', run_id='test_run', " - "try_number=1, dag_version_id=UUID('ffec3c8e-2898-46f8-b7d5-3cc571577368'), map_index=-1, hostname=None, context_carrier=None)", - "timestamp": "2025-03-25T05:13:27.073918Z", - } - - -def test_logging_processors_with_colors(): - """Test that logging_processors creates colored console renderer when colored_console_log=True.""" - from airflow.sdk.log import logging_processors - - _, named = logging_processors(enable_pretty_log=True, colored_console_log=True) - assert "console" in named - console_renderer = named["console"] - assert hasattr(console_renderer, "_styles") - - -def test_logging_processors_without_colors(): - """Test that logging_processors creates non-colored console renderer when colored_console_log=False.""" - from airflow.sdk.log import logging_processors - - _, named = logging_processors(enable_pretty_log=True, colored_console_log=False) - assert "console" in named - console_renderer = named["console"] - assert hasattr(console_renderer, "_styles") - assert console_renderer._styles.__name__ == "_PlainStyles" - - -def test_logging_processors_json_format(): - """Test that logging_processors creates JSON renderer when enable_pretty_log=False.""" - from airflow.sdk.log import logging_processors - - _, named = logging_processors(enable_pretty_log=False, colored_console_log=True) - assert "console" not in named - assert "json" in named - - -def test_configure_logging_respects_colored_console_log_config(): - """Test that configure_logging respects the colored_console_log configuration.""" - - mock_conf = mock.MagicMock() - mock_conf.get.return_value = "INFO" - mock_conf.getboolean.return_value = False # colored_console_log = False - - with mock.patch("airflow.configuration.conf", mock_conf): - reset_logging() - configure_logging(enable_pretty_log=True) - # Check that getboolean was called with colored_console_log - calls = [call for call in mock_conf.getboolean.call_args_list if call[0][1] == "colored_console_log"] - assert len(calls) == 1 - assert calls[0] == mock.call("logging", "colored_console_log", fallback=True) - - -def test_configure_logging_explicit_colored_console_log(): - """Test that configure_logging respects explicit colored_console_log parameter.""" - - mock_conf = mock.MagicMock() - mock_conf.get.return_value = "INFO" - mock_conf.getboolean.return_value = True # colored_console_log = True - - with mock.patch("airflow.configuration.conf", mock_conf): - reset_logging() - # Explicitly disable colors despite config saying True - configure_logging(enable_pretty_log=True, colored_console_log=False) - mock_conf.getboolean.assert_not_called() - - -def test_configure_logging_no_airflow_config(): - """Test that configure_logging defaults work correctly.""" - - # This test can be removed or repurposed since we now always import airflow.configuration - mock_conf = mock.MagicMock() - mock_conf.get.return_value = "INFO" - mock_conf.getboolean.return_value = True # colored_console_log = True by default - - with mock.patch("airflow.configuration.conf", mock_conf): - reset_logging() - configure_logging(enable_pretty_log=True) - mock_conf.getboolean.assert_called_with("logging", "colored_console_log", fallback=True)