diff --git a/.github/workflows/auto_label.yml b/.github/workflows/auto_label.yml index 51a5c7913..7b65f5d9c 100644 --- a/.github/workflows/auto_label.yml +++ b/.github/workflows/auto_label.yml @@ -1,25 +1,127 @@ name: Auto Label PR on: + workflow_run: + workflows: ["Code Formatting & Lint Validation"] + types: + - completed pull_request_target: - types: [opened] + types: [labeled] permissions: pull-requests: write issues: write jobs: - label: + evaluate-and-writeback: runs-on: ubuntu-latest if: github.actor != 'copybara-service[bot]' steps: - - name: Add pull ready label + - name: Process Verification Outcome & Manage Dialogue uses: actions/github-script@v8 with: script: | - github.rest.issues.addLabels({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - labels: ['pull ready'] - }) + let pr; + let conclusion = 'failure'; + + if (context.eventName === 'pull_request_target') { + const { data } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + pr = data; + // Check latest lint workflow run status + const runs = await github.rest.actions.listWorkflowRunsForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'lint.yml', + branch: pr.head.ref, + }); + const run = runs.data.workflow_runs.find(r => r.head_sha === pr.head.sha); + conclusion = run ? run.conclusion : 'failure'; + } else { + const pulls = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + }); + pr = pulls.data.find(p => p.head.sha === context.payload.workflow_run.head_sha); + if (!pr) { + console.log('No matching open PR found for head sha.'); + return; + } + conclusion = context.payload.workflow_run.conclusion; + } + + const issue_number = pr.number; + + if (conclusion === 'failure') { + console.log('Lint workflow failed. Posting guidance advice comment.'); + if (context.eventName === 'workflow_run') { + await github.rest.issues.createComment({ + issue_number: issue_number, + owner: context.repo.owner, + repo: context.repo.repo, + body: '👋 Hello! Thank you for your contribution. Unfortunately, our pre-commit lint and style validation checks failed. In order for our internal review bots to import and process your Pull Request, please verify your code adheres to our formatting guidelines by running `pre-commit run --all-files` locally and pushing the adjustments. See [`CONTRIBUTING.md`](https://github.com/google/orbax/blob/main/CONTRIBUTING.md) for full instructions.' + }); + } + return; + } + + if (conclusion === 'success') { + console.log('Lint workflow succeeded. Cleaning up old failure comments.'); + const comments = await github.rest.issues.listComments({ + issue_number: issue_number, + owner: context.repo.owner, + repo: context.repo.repo, + }); + + // Clean up past lint failure comments + for (const comment of comments.data) { + if (comment.user.type === 'Bot' && comment.body.includes('pre-commit lint and style validation checks failed')) { + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + } + } + + // Verify Google CLA signature requirement + const hasCLA = pr.labels.some(label => label.name === 'cla: yes'); + if (hasCLA) { + console.log('CLA fully signed. Onboarding pull ready label.'); + // Delete previous CLA warning comment if present + for (const comment of comments.data) { + if (comment.user.type === 'Bot' && comment.body.includes('Google Contributor License Agreement (CLA) is not yet signed')) { + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + } + } + + if (!pr.labels.some(label => label.name === 'pull ready')) { + await github.rest.issues.addLabels({ + issue_number: issue_number, + owner: context.repo.owner, + repo: context.repo.repo, + labels: ['pull ready'] + }); + } + } else { + console.log('CLA unsigned. Posting CLA endorsement request comment.'); + // Post CLA comment only if not already present + const existingCLAComment = comments.data.some(comment => comment.user.type === 'Bot' && comment.body.includes('Google Contributor License Agreement (CLA) is not yet signed')); + if (!existingCLAComment) { + await github.rest.issues.createComment({ + issue_number: issue_number, + owner: context.repo.owner, + repo: context.repo.repo, + body: '👋 Hello! Your code formatting and style checks passed successfully! However, we notice your Google Contributor License Agreement (CLA) is not yet signed. Please verify your agreement status at https://cla.developers.google.com/ so our automated review bots can import your Pull Request.' + }); + } + } + } diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000..fc3a57d9d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,29 @@ +name: Code Formatting & Lint Validation + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + +permissions: + contents: read + +jobs: + style-check: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version-file: '.python-version' + cache: 'pip' + + - name: Install Pre-commit + run: pip install pre-commit + + - name: Execute Code Formatting & Lint Validation + run: pre-commit run --all-files --show-diff-on-failure diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..96d917d3b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,44 @@ +# pre-commit configuration for Orbax. +# Run locally with: pre-commit run --all-files +# +# Tool config lives in the repo-root pyproject.toml ([tool.pyink], +# [tool.isort]) and .pylintrc. The --config / --settings-path arguments +# below force the hooks to read from the root instead of the closest +# subpackage pyproject.toml (checkpoint/, model/, export/). +repos: + - repo: https://github.com/google/pyink + rev: '25.12.0' + hooks: + - id: pyink + types_or: [python, jupyter] + files: '\.(py|ipynb)$' + args: [--config=pyproject.toml] + additional_dependencies: ["pyink[jupyter]"] + + - repo: https://github.com/pylint-dev/pylint + rev: v4.0.5 + hooks: + - id: pylint + name: pylint + types: [python] + verbose: true + args: + - --rcfile=.pylintrc + exclude: ^(tests/|.*_test\.py$) + + - repo: https://github.com/PyCQA/isort + rev: 6.0.1 + hooks: + - id: isort + name: isort + types: [python] + args: [--settings-path=pyproject.toml] + exclude: ^(tests/|.*_test\.py$) + + - repo: https://github.com/google/pytype + rev: '2024.4.11' + hooks: + - id: pytype + name: pytype + args: [-d, import-error, --keep-going, -j, auto] + exclude: ^(tests/|.*_test\.py$) diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..1de0f9ab8 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,470 @@ +# This pylintrc file contains a best-effort configuration to uphold the +# best-practices and style described in the Google Python style guide: +# https://google.github.io/styleguide/pyguide.html +# +# Its canonical open-source location is: +# https://google.github.io/styleguide/pylintrc + +[MASTER] + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=third_party + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins=pylint.extensions.docparams,pylint.extensions.code_style,pylint.extensions.typing,pylint.extensions.mccabe + +# Use multiple processes to speed up Pylint. +jobs=4 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + + +[PARAMETER_DOCUMENTATION] + +# Whether to accept unqualified docparam tags. +accept-no-param-doc=no + +# Enforce Returns docstrings (mandatory in internal presubmits). +accept-no-return-doc=no + +# Enforce Yields docstrings for generator functions (mandatory in internal presubmits). +accept-no-yields-doc=no + +# Allow missing Raises docstrings (optional in internal presubmits). +accept-no-raise-doc=yes + +# Default docstring format. +default-docstring-type=google + + + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=apply-builtin, + arguments-differ, + attribute-defined-outside-init, + backtick, + bad-option-value, + buffer-builtin, + c-extension-no-member, + chained-comparison, + cmp-builtin, + cmp-method, + coerce-builtin, + coerce-method, + consider-iterating-dictionary, + consider-merging-isinstance, + consider-refactoring-into-while-condition, + consider-ternary-expression, + consider-using-any-or-all, + consider-using-augmented-assign, + consider-using-dict-comprehension, + consider-using-dict-items, + consider-using-dict-literal, + consider-using-enumerate, + consider-using-f-string, + consider-using-from-import, + consider-using-generator, + consider-using-in, + consider-using-join, + consider-using-list-comprehension, + consider-using-max-with-key, + consider-using-min-with-key, + consider-using-namedtuple-or-dataclass, + consider-using-set-comprehension, + consider-using-sys-exit, + consider-using-tuple, + consider-using-walrus, + consider-using-with, + delslice-method, + div-method, + duplicate-code, + eq-without-hash, + execfile-builtin, + file-builtin, + filter-builtin-not-iterating, + fixme, + getslice-method, + global-statement, + hex-method, + idiv-method, + implicit-str-concat-in-sequence, + import-error, + import-outside-toplevel, + import-self, + import-star-module-level, + input-builtin, + intern-builtin, + invalid-str-codec, + locally-disabled, + long-builtin, + long-suffix, + map-builtin-not-iterating, + metaclass-assignment, + next-method-called, + next-method-defined, + no-absolute-import, + no-else-break, + no-else-continue, + no-else-raise, + no-else-return, + no-member, + no-self-use, + nonzero-method, + oct-method, + old-division, + old-ne-operator, + old-octal-literal, + old-raise-syntax, + parameter-unpacking, + print-statement, + raising-string, + range-builtin-not-iterating, + raw_input-builtin, + rdiv-method, + redefined-argument-from-local, + reduce-builtin, + relative-import, + reload-builtin, + round-builtin, + setslice-method, + signature-differs, + standarderror-builtin, + suppressed-message, + sys-max-int, + too-few-public-methods, + too-many-ancestors, + too-many-arguments, + too-many-boolean-expressions, + too-many-branches, + too-many-instance-attributes, + too-many-locals, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + too-many-positional-arguments, + trailing-newlines, + unichr-builtin, + unicode-builtin, + unnecessary-comprehension, + unnecessary-lambda-assignment, + unnecessary-pass, + unpacking-in-except, + use-dict-literal, + useless-else-on-loop, + useless-suppression, + using-cmp-argument, + wrong-import-order, + xrange-builtin, + zip-builtin-not-iterating, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=main,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl + +# Regular expression matching correct function names +function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ + +# Regular expression matching correct variable names +variable-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct constant names +const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct attribute names +attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ + +# Regular expression matching correct argument names +argument-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=^_?[A-Z][a-zA-Z0-9]*$ + +# Regular expression matching correct module names +module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ + +# Regular expression matching correct method names +method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt +# lines made too long by directives to pytype. + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=(?x)( + ^\s*(\#\ )??$| + ^\s*(from\s+\S+\s+)?import\s+.+$| + .*pytype:.*$ + ) + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + +# Maximum number of lines in a module +max-module-lines=99999 + +# String used as indentation unit. The internal Google style guide mandates 2 +# spaces. Google's externaly-published style guide says 4, consistent with +# PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google +# projects (like TensorFlow). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=TODO + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging,absl.logging + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub, + TERMIOS, + Bastion, + rexec, + sets + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant, absl + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls, + class_ + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=builtins.Exception, + builtins.BaseException diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..e61e31922 --- /dev/null +++ b/.python-version @@ -0,0 +1,2 @@ +# This is the minimum Python version that Orbax supports. +3.10 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6bf22c7d5..79afd527a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1 +1,65 @@ -External contributions not currently accepted. +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement (CLA). You (or your employer) retain the copyright to your +contribution; this simply gives us permission to use and redistribute your +contributions as part of the project. Head over to + to see your current agreements on file or +to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted +one (even if it was for a different project), you probably don't need to do it +again. + +## Code Reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. + +Here is how the process works for external contributors: + +1. **Create a PR**: Submit your changes as a Pull Request on GitHub. +2. **Automation**: Our automation will automatically add the `"pull ready"` + label to your PR (for new PRs) and import it into Google's internal code + review system. +3. **Review**: A member of the Orbax team will review your change. +4. **Merge**: Once the review is approved and submitted internally, your PR + will be automatically merged on GitHub. + +## Code Style & Linting + +Orbax follows the +[Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) +(80-character lines, 2-space indents, Google-style docstrings). Three tools +enforce it, wired together through the +[`pre-commit`](https://pre-commit.com/) framework: + +| Tool | Role | Config | +|---|---|---| +| [`pyink`](https://github.com/google/pyink) | Formatter (Black fork with Google defaults) | `[tool.pyink]` in `pyproject.toml` | +| [`pylint`](https://pylint.readthedocs.io/) | Linter | `.pylintrc` | +| [`isort`](https://pycqa.github.io/isort/) | Import order | `[tool.isort]` in `pyproject.toml` | +| [`pytype`](https://github.com/google/pytype) | Static Type Inferece & Verification | `.pre-commit-config.yaml` | + +Install the hooks once, then they run on every `git commit`: + +```bash +pip install pre-commit +pre-commit install +``` + +To check the whole tree before submitting a PR: + +```bash +pre-commit run --all-files +``` + +## Community Guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..2e0dd580c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +# Repo-root tooling configuration. Distribution metadata (project name, +# dependencies, build backend) lives in the per-package pyproject.toml files +# under checkpoint/, model/, and export/. Only formatter/linter settings live +# here so they apply uniformly across the monorepo. + +[tool.pyink] +line-length = 80 +unstable = true +target-version = [] +pyink-indentation = 2 +pyink-use-majority-quotes = true +pyink-annotation-pragmas = [ + "noqa", + "pylint:", + "type: ignore", + "pytype:", + "mypy:", + "pyright:", + "pyre-", + "@title", + "@param", + "@markdown", + "@tidy", + "@test", +] +pyink-ipynb-indentation = 2 + +[tool.isort] +profile = "google" +known_third_party = ["orbax"]