diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 410ffc520..000000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -source = git - -; to make nosetests happy -[report] -omit = - */yaml* - */tests/* - */python?.?/* - */site-packages/nose/* \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..3f3d2f050 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +test/fixtures/* eol=lf +*.sh eol=lf +/Makefile eol=lf diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 000000000..80819f5d8 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: byron diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..2fe73ca77 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + +- package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml new file mode 100644 index 000000000..bd09a939b --- /dev/null +++ b/.github/workflows/alpine-test.yml @@ -0,0 +1,74 @@ +name: test-alpine + +on: [push, pull_request, workflow_dispatch] + +jobs: + test: + runs-on: ubuntu-latest + + container: + image: alpine:latest + + defaults: + run: + shell: sudo -u runner sh -exo pipefail {0} + + steps: + - name: Prepare Alpine Linux + run: | + apk add sudo git git-daemon python3 py3-pip py3-virtualenv + echo 'Defaults env_keep += "CI GITHUB_* RUNNER_*"' >/etc/sudoers.d/ci_env + addgroup -g 127 docker + adduser -D -u 1001 runner # TODO: Check if this still works on GHA as intended. + adduser runner docker + shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set workspace ownership + run: | + chown -R runner:docker -- "$GITHUB_WORKSPACE" + shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. + + - name: Prepare this repo for tests + run: | + ./init-tests-after-clone.sh + + - name: Set git user identity and command aliases for the tests + run: | + git config --global user.email "travis@ci.com" + git config --global user.name "Travis Runner" + # If we rewrite the user's config by accident, we will mess it up + # and cause subsequent tests to fail + cat test/fixtures/.gitconfig >> ~/.gitconfig + + - name: Set up virtualenv + run: | + python -m venv .venv + + - name: Update PyPA packages + run: | + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + . .venv/bin/activate + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + + - name: Install project and test dependencies + run: | + . .venv/bin/activate + pip install ".[test]" + + - name: Show version and platform information + run: | + . .venv/bin/activate + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' + + - name: Test with pytest + run: | + . .venv/bin/activate + pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..ae5241898 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,80 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + pull_request: + schedule: + - cron: '27 10 * * 3' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + setup-python-dependencies: false + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/cygwin-test.yml b/.github/workflows/cygwin-test.yml new file mode 100644 index 000000000..278777907 --- /dev/null +++ b/.github/workflows/cygwin-test.yml @@ -0,0 +1,87 @@ +name: test-cygwin + +on: [push, pull_request, workflow_dispatch] + +jobs: + test: + runs-on: windows-latest + + strategy: + fail-fast: false + + env: + CHERE_INVOKING: "1" + CYGWIN_NOWINPATH: "1" + + defaults: + run: + shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" + + steps: + - name: Force LF line endings + run: | + git config --global core.autocrlf false # Affects the non-Cygwin git. + shell: bash # Use Git Bash instead of Cygwin Bash for this step. + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Cygwin + uses: cygwin/cygwin-install-action@v5 + with: + packages: python39 python39-pip python39-virtualenv git wget + add-to-path: false # No need to change $PATH outside the Cygwin environment. + + - name: Arrange for verbose output + run: | + # Arrange for verbose output but without shell environment setup details. + echo 'set -x' >~/.bash_profile + + - name: Special configuration for Cygwin git + run: | + git config --global --add safe.directory "$(pwd)" + git config --global --add safe.directory "$(pwd)/.git" + git config --global core.autocrlf false + + - name: Prepare this repo for tests + run: | + ./init-tests-after-clone.sh + + - name: Set git user identity and command aliases for the tests + run: | + git config --global user.email "travis@ci.com" + git config --global user.name "Travis Runner" + # If we rewrite the user's config by accident, we will mess it up + # and cause subsequent tests to fail + cat test/fixtures/.gitconfig >> ~/.gitconfig + + - name: Set up virtualenv + run: | + python3.9 -m venv --without-pip .venv + echo 'BASH_ENV=.venv/bin/activate' >>"$GITHUB_ENV" + + - name: Bootstrap pip in virtualenv + run: | + wget -qO- https://bootstrap.pypa.io/get-pip.py | python + + - name: Update PyPA packages + run: | + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + + - name: Install project and test dependencies + run: | + pip install ".[test]" + + - name: Show version and platform information + run: | + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' + + - name: Test with pytest + run: | + pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000..a0e81a993 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,16 @@ +name: Lint + +on: [push, pull_request, workflow_dispatch] + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml new file mode 100644 index 000000000..9fd660c6b --- /dev/null +++ b/.github/workflows/pythonpackage.yml @@ -0,0 +1,117 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python package + +on: [push, pull_request, workflow_dispatch] + +permissions: + contents: read + +jobs: + test: + strategy: + matrix: + os-type: [ubuntu, macos, windows] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"] + exclude: + - os-type: macos + python-version: "3.7" # Not available for the ARM-based macOS runners. + - os-type: macos + python-version: "3.13t" + - os-type: windows + python-version: "3.13" # FIXME: Fix and enable Python 3.13 on Windows (#1955). + - os-type: windows + python-version: "3.13t" + include: + - os-ver: latest + - os-type: ubuntu + python-version: "3.7" + os-ver: "22.04" + - experimental: false + + fail-fast: false + + runs-on: ${{ matrix.os-type }}-${{ matrix.os-ver }} + + defaults: + run: + shell: bash --noprofile --norc -exo pipefail {0} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: ${{ matrix.experimental }} + + - name: Set up WSL (Windows) + if: matrix.os-type == 'windows' + uses: Vampire/setup-wsl@v5.0.1 + with: + wsl-version: 1 + distribution: Alpine + additional-packages: bash + + - name: Prepare this repo for tests + run: | + ./init-tests-after-clone.sh + + - name: Set git user identity and command aliases for the tests + run: | + git config --global user.email "travis@ci.com" + git config --global user.name "Travis Runner" + # If we rewrite the user's config by accident, we will mess it up + # and cause subsequent tests to fail + cat test/fixtures/.gitconfig >> ~/.gitconfig + + - name: Update PyPA packages + run: | + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + + - name: Install project and test dependencies + run: | + pip install ".[test]" + + - name: Show version and platform information + run: | + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' + + # For debugging hook tests on native Windows systems that may have WSL. + - name: Show bash.exe candidates (Windows) + if: matrix.os-type == 'windows' + run: | + set +e + bash.exe -c 'printenv WSL_DISTRO_NAME; uname -a' + python -c 'import subprocess; subprocess.run(["bash.exe", "-c", "printenv WSL_DISTRO_NAME; uname -a"])' + continue-on-error: true + + - name: Check types with mypy + run: | + mypy --python-version=${{ matrix.python-version }} + env: + MYPY_FORCE_COLOR: "1" + TERM: "xterm-256color" # For color: https://github.com/python/mypy/issues/13817 + # With new versions of mypy new issues might arise. This is a problem if there is + # nobody able to fix them, so we have to ignore errors until that changes. + continue-on-error: true + + - name: Test with pytest + run: | + pytest --color=yes -p no:sugar --instafail -vv + continue-on-error: false + + - name: Documentation + if: matrix.python-version != '3.7' + run: | + pip install ".[doc]" + make -C doc html diff --git a/.gitignore b/.gitignore index 787b3d442..d85569405 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,52 @@ +# Cached Python bytecode +__pycache__/ *.py[co] + +# Other caches +.cache/ +.mypy_cache/ +.pytest_cache/ + +# Transient editor files *.swp *~ -/*.egg-info + +# Editor configuration +nbproject +*.sublime-workspace +/.vscode/ +.idea/ + +# Virtual environments +.env/ +env/ +.venv/ +venv/ + +# Build output +/*egg-info /lib/GitPython.egg-info -cover/ -.coverage /build /dist /doc/_build -nbproject -*.sublime-workspace -/*egg-info +# Tox builds/environments /.tox + +# Code coverage output +cover/ +.coverage +.coverage.* + +# Monkeytype output +monkeytype.sqlite3 +monkeytype.sqlite3.* + +# Manual command output +output.txt + +# Finder metadata +.DS_Store + +# Files created by OSS-Fuzz when running locally +fuzz_*.pkg.spec diff --git a/.gitmodules b/.gitmodules index 612c39d95..251eeeec4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "gitdb"] - path = git/ext/gitdb - url = https://github.com/gitpython-developers/gitdb.git +[submodule "gitdb"] + url = https://github.com/gitpython-developers/gitdb.git + path = git/ext/gitdb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..424cc5f37 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +repos: +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: [tomli] + exclude: ^test/fixtures/ + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.0 + hooks: + - id: ruff + args: ["--fix"] + exclude: ^git/ext/ + - id: ruff-format + exclude: ^git/ext/ + +- repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 + hooks: + - id: shellcheck + args: [--color] + exclude: ^test/fixtures/polyglot$|^git/ext/ + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: end-of-file-fixer + exclude: ^test/fixtures/|COPYING|LICENSE + - id: check-symlinks + - id: check-toml + - id: check-yaml + - id: check-merge-conflict + +- repo: https://github.com/abravalheri/validate-pyproject + rev: v0.19 + hooks: + - id: validate-pyproject diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..9bce80fd2 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,36 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need. +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "doc/" directory with Sphinx. +sphinx: + configuration: doc/source/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub. +formats: all + +# Optional but recommended, declare the Python requirements required +# to build your documentation. +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - method: pip + path: . + extra_requirements: + - doc diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0a2906dc2..000000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: python -python: - - "2.6" - - "2.7" - # - "pypy" - won't work as smmap doesn't work (see gitdb/.travis.yml for details) - -install: - - git submodule update --init --recursive - - git fetch --tags - - pip install coveralls -script: - - nosetests --with-coverage -# after_success: as long as we are not running smoothly ... give it the cover treatment every time - - coveralls diff --git a/AUTHORS b/AUTHORS index 40fa69883..45b14c961 100644 --- a/AUTHORS +++ b/AUTHORS @@ -12,5 +12,48 @@ Contributors are: -Kai Lautaportti -Paul Sowden -Sebastian Thiel +-Jonathan Chu +-Vincent Driessen +-Phil Elson +-Bernard `Guyzmo` Pratz +-Timothy B. Hartman +-Konstantin Popov +-Peter Jones +-Anson Mansfield +-Ken Odegard +-Alexis Horgix Chotard +-Piotr Babij +-Mikuláš Poul +-Charles Bouchard-Légaré +-Yaroslav Halchenko +-Tim Swast +-William Luc Ritchie +-David Host +-A. Jesse Jiryu Davis +-Steven Whitman +-Stefan Stancu +-César Izurieta +-Arthur Milchior +-Anil Khatri +-JJ Graham +-Ben Thayer +-Dries Kennes +-Pratik Anurag +-Harmon +-Liam Beguin +-Ram Rachum +-Alba Mendez +-Robert Westman +-Hugo van Kemenade +-Hiroki Tokunaga +-Julien Mauroy +-Patrick Gerard +-Luke Twist +-Joseph Hale +-Santos Gallegos +-Wenhan Zhu +-Eliah Kagan +-Ethan Lin +-Jonas Scharpf Portions derived from other open source works and are clearly marked. diff --git a/CHANGES b/CHANGES index 9242253ff..9796566ae 100644 --- a/CHANGES +++ b/CHANGES @@ -1,2 +1,2 @@ Please see the online documentation for the latest changelog: -https://github.com/gitpython-developers/GitPython/blob/0.3/doc/source/changes.rst +https://github.com/gitpython-developers/GitPython/blob/main/doc/source/changes.rst diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..8536d7f73 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,15 @@ +# How to contribute + +The following is a short step-by-step rundown of what one typically would do to contribute. + +- [Fork this project](https://github.com/gitpython-developers/GitPython/fork) on GitHub. +- For setting up the environment to run the self tests, please run `init-tests-after-clone.sh`. +- Please try to **write a test that fails unless the contribution is present.** +- Try to avoid massive commits and prefer to take small steps, with one commit for each. +- Feel free to add yourself to AUTHORS file. +- Create a pull request. + +## Fuzzing Test Specific Documentation + +For details related to contributing to the fuzzing test suite and OSS-Fuzz integration, please +refer to the dedicated [fuzzing README](./fuzzing/README.md). diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 000000000..bf3faa662 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0xD0d4dCFc194ec24bCc777e635289e0b10E1a7b87" + } + } +} diff --git a/LICENSE b/LICENSE index 5a9a6f8d3..ba8a219fe 100644 --- a/LICENSE +++ b/LICENSE @@ -1,30 +1,29 @@ Copyright (C) 2008, 2009 Michael Trier and contributors All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -* Neither the name of the GitPython project nor the names of -its contributors may be used to endorse or promote products derived +* Neither the name of the GitPython project nor the names of +its contributors may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/MANIFEST.in b/MANIFEST.in index 95b2e883f..eac2a1514 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,10 +1,14 @@ -include VERSION -include LICENSE -include CHANGES include AUTHORS -include README +include CHANGES +include CONTRIBUTING.md +include LICENSE +include README.md +include VERSION include requirements.txt +include test-requirements.txt +include git/py.typed -graft git/test/fixtures -graft git/test/performance +recursive-include doc * +recursive-exclude test * +global-exclude __pycache__ *.pyc diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..d4f9acf87 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +.PHONY: all clean release force_release + +all: + @awk -F: '/^[[:alpha:]].*:/ && !/^all:/ {print $$1}' Makefile + +clean: + rm -rf build/ dist/ .eggs/ .tox/ + +release: clean + ./check-version.sh + make force_release + +force_release: clean + ./build-release.sh + twine upload dist/* + git push --tags origin main diff --git a/README.md b/README.md index b99c4c2f5..59c6f995b 100644 --- a/README.md +++ b/README.md @@ -1,103 +1,247 @@ +![Python package](https://github.com/gitpython-developers/GitPython/workflows/Python%20package/badge.svg) +[![Documentation Status](https://readthedocs.org/projects/gitpython/badge/?version=stable)](https://readthedocs.org/projects/gitpython/?badge=stable) +[![Packaging status](https://repology.org/badge/tiny-repos/python:gitpython.svg)](https://repology.org/metapackage/python:gitpython/versions) + +## [Gitoxide](https://github.com/Byron/gitoxide): A peek into the future… + +I started working on GitPython in 2009, back in the days when Python was 'my thing' and I had great plans with it. +Of course, back in the days, I didn't really know what I was doing and this shows in many places. Somewhat similar to +Python this happens to be 'good enough', but at the same time is deeply flawed and broken beyond repair. + +By now, GitPython is widely used and I am sure there is a good reason for that, it's something to be proud of and happy about. +The community is maintaining the software and is keeping it relevant for which I am absolutely grateful. For the time to come I am happy to continue maintaining GitPython, remaining hopeful that one day it won't be needed anymore. + +More than 15 years after my first meeting with 'git' I am still in excited about it, and am happy to finally have the tools and +probably the skills to scratch that itch of mine: implement `git` in a way that makes tool creation a piece of cake for most. + +If you like the idea and want to learn more, please head over to [gitoxide](https://github.com/Byron/gitoxide), an +implementation of 'git' in [Rust](https://www.rust-lang.org). + +*(Please note that `gitoxide` is not currently available for use in Python, and that Rust is required.)* + ## GitPython -GitPython is a python library used to interact with git repositories, high-level like git-porcelain, or low-level like git-plumbing. +GitPython is a python library used to interact with git repositories, high-level like git-porcelain, +or low-level like git-plumbing. + +It provides abstractions of git objects for easy access of repository data often backed by calling the `git` +command-line program. + +### DEVELOPMENT STATUS + +This project is in **maintenance mode**, which means that -It provides abstractions of git objects for easy access of repository data, and additionally allows you to access the git repository more directly using either a pure python implementation, or the faster, but more resource intensive git command implementation. +- …there will be no feature development, unless these are contributed +- …there will be no bug fixes, unless they are relevant to the safety of users, or contributed +- …issues will be responded to with waiting times of up to a month -The object database implementation is optimized for handling large quantities of objects and large datasets, which is achieved by using low-level structures and data streaming. +The project is open to contributions of all kinds, as well as new maintainers. ### REQUIREMENTS -* Git ( tested with 1.8.3.4 ) -* Python Nose - used for running the tests - - Tested with nose 1.3.0 -* Mock by Michael Foord used for tests - - Tested with 1.0.1 -* Coverage - used for tests coverage +GitPython needs the `git` executable to be installed on the system and available in your +`PATH` for most operations. If it is not in your `PATH`, you can help GitPython find it +by setting the `GIT_PYTHON_GIT_EXECUTABLE=` environment variable. -The list of dependencies are listed in /requirements.txt and /test-requirements.txt. The installer takes care of installing them for you though. +- Git (1.7.x or newer) +- Python >= 3.7 + +The list of dependencies are listed in `./requirements.txt` and `./test-requirements.txt`. +The installer takes care of installing them for you. ### INSTALL -[![Latest Version](https://pypip.in/version/GitPython/badge.svg)](https://pypi.python.org/pypi/GitPython/) -[![Supported Python Versions](https://pypip.in/py_versions/GitPython/badge.svg)](https://pypi.python.org/pypi/GitPython/) +GitPython and its required package dependencies can be installed in any of the following ways, all of which should typically be done in a [virtual environment](https://docs.python.org/3/tutorial/venv.html). + +#### From PyPI + +To obtain and install a copy [from PyPI](https://pypi.org/project/GitPython/), run: + +```sh +pip install GitPython +``` + +(A distribution package can also be downloaded for manual installation at [the PyPI page](https://pypi.org/project/GitPython/).) + +#### From downloaded source code + +If you have downloaded the source code, run this from inside the unpacked `GitPython` directory: + +```sh +pip install . +``` + +#### By cloning the source code repository + +To clone the [the GitHub repository](https://github.com/gitpython-developers/GitPython) from source to work on the code, you can do it like so: + +```sh +git clone https://github.com/gitpython-developers/GitPython +cd GitPython +./init-tests-after-clone.sh +``` -If you have downloaded the source code: +On Windows, `./init-tests-after-clone.sh` can be run in a Git Bash shell. - python setup.py install +If you are cloning [your own fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks), then replace the above `git clone` command with one that gives the URL of your fork. Or use this [`gh`](https://cli.github.com/) command (assuming you have `gh` and your fork is called `GitPython`): -or if you want to obtain a copy from the Pypi repository: +```sh +gh repo clone GitPython +``` - pip install gitpython +Having cloned the repo, create and activate your [virtual environment](https://docs.python.org/3/tutorial/venv.html). -Both commands will install the required package dependencies. +Then make an [editable install](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs): -A distribution package can be obtained for manual installation at: +```sh +pip install -e ".[test]" +``` - http://pypi.python.org/pypi/GitPython +In the less common case that you do not want to install test dependencies, `pip install -e .` can be used instead. + +#### With editable *dependencies* (not preferred, and rarely needed) + +In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediately reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. + +If you want to do that *and* you want the versions in GitPython's git submodules to be used, then pass `-e git/ext/gitdb` and/or `-e git/ext/gitdb/gitdb/ext/smmap` to `pip install`. This can be done in any order, and in separate `pip install` commands or the same one, so long as `-e` appears before *each* path. For example, you can install GitPython, gitdb, and smmap editably in the currently active virtual environment this way: + +```sh +pip install -e ".[test]" -e git/ext/gitdb -e git/ext/gitdb/gitdb/ext/smmap +``` + +The submodules must have been cloned for that to work, but that will already be the case if you have run `./init-tests-after-clone.sh`. You can use `pip list` to check which packages are installed editably and which are installed normally. + +To reiterate, this approach should only rarely be used. For most development it is preferable to allow the gitdb and smmap dependencices to be retrieved automatically from PyPI in their latest stable packaged versions. + +### Limitations + +#### Leakage of System Resources + +GitPython is not suited for long-running processes (like daemons) as it tends to +leak system resources. It was written in a time where destructors (as implemented +in the `__del__` method) still ran deterministically. + +In case you still want to use it in such a context, you will want to search the +codebase for `__del__` implementations and call these yourself when you see fit. + +Another way assure proper cleanup of resources is to factor out GitPython into a +separate process which can be dropped periodically. + +#### Windows support + +See [Issue #525](https://github.com/gitpython-developers/GitPython/issues/525). ### RUNNING TESTS -The easiest way to run test is by using [tox](https://pypi.python.org/pypi/tox) a wrapper around virtualenv. It will take care of setting up environnements with the proper dependencies installed and execute test commands. To install it simply: +_Important_: Right after cloning this repository, please be sure to have executed +the `./init-tests-after-clone.sh` script in the repository root. Otherwise +you will encounter test failures. - pip install tox +#### Install test dependencies -Then run: +Ensure testing libraries are installed. This is taken care of already if you installed with: - tox +```sh +pip install -e ".[test]" +``` -### SOURCE +If you had installed with a command like `pip install -e .` instead, you can still run +the above command to add the testing dependencies. -GitPython's git repo is available on GitHub, which can be browsed at [github](https://github.com/gitpython-developers/GitPython) and cloned like that: +#### Test commands - git clone git://github.com/gitpython-developers/GitPython.git git-python +To test, run: +```sh +pytest +``` -### INFRASTRUCTURE +To lint, and apply some linting fixes as well as automatic code formatting, run: -* [User Documentation](http://gitpython.readthedocs.org) -* [Mailing List](http://groups.google.com/group/git-python) -* [Issue Tracker](https://github.com/gitpython-developers/GitPython/issues) +```sh +pre-commit run --all-files +``` -### LICENSE +This includes the linting and autoformatting done by Ruff, as well as some other checks. -New BSD License. See the LICENSE file. +To typecheck, run: -### DEVELOPMENT STATUS +```sh +mypy +``` -[![Build Status](https://travis-ci.org/gitpython-developers/GitPython.svg?branch=0.3)](https://travis-ci.org/gitpython-developers/GitPython) -[![Coverage Status](https://coveralls.io/repos/gitpython-developers/GitPython/badge.png)](https://coveralls.io/r/gitpython-developers/GitPython) -[![Documentation Status](https://readthedocs.org/projects/gitpython/badge/?version=stable)](https://readthedocs.org/projects/gitpython/?badge=stable) +#### CI (and tox) +Style and formatting checks, and running tests on all the different supported Python versions, will be performed: -The project was idle for 2 years, the last release (v0.3.2 RC1) was made on July 2011. Reason for this might have been the project's dependency on me as sole active maintainer, which is an issue in itself. +- Upon submitting a pull request. +- On each push, *if* you have a fork with GitHub Actions enabled. +- Locally, if you run [`tox`](https://tox.wiki/) (this skips any Python versions you don't have installed). -Now that there seems to be a massive user base, this should be motivation enough to let git-python return to a proper state, which means +#### Configuration files -* no open pull requests -* no open issues describing bugs +Specific tools are all configured in the `./pyproject.toml` file: -In short, I want to make a new release of 0.3 with all contributions and fixes included, foster community building to facilitate contributions. +- `pytest` (test runner) +- `coverage.py` (code coverage) +- `ruff` (linter and formatter) +- `mypy` (type checker) -#### PRESENT GOALS +Orchestration tools: -The goals I have set for myself, in order, are as follows, all on branch 0.3. +- Configuration for `pre-commit` is in the `./.pre-commit-config.yaml` file. +- Configuration for `tox` is in `./tox.ini`. +- Configuration for GitHub Actions (CI) is in files inside `./.github/workflows/`. -* bring the test suite back online to work with the most commonly used git version -* merge all open pull requests, may there be a test-case or not, back. If something breaks, fix it if possible or let the contributor know -* conform git-python's structure and toolchain to the one used in my [other OSS projects](https://github.com/Byron/bcore) -* evaluate all open issues and close them if possible -* evaluate python 3.3 compatibility and establish it if possible +### Contributions -While that is happening, I will try hard to foster community around the project. This means being more responsive on the mailing list and in issues, as well as setting up clear guide lines about the [contribution](http://rfc.zeromq.org/spec:22) and maintenance workflow. +Please have a look at the [contributions file][contributing]. + +### INFRASTRUCTURE + +- [User Documentation](http://gitpython.readthedocs.org) +- [Questions and Answers](http://stackexchange.com/filters/167317/gitpython) +- Please post on Stack Overflow and use the `gitpython` tag +- [Issue Tracker](https://github.com/gitpython-developers/GitPython/issues) + - Post reproducible bugs and feature requests as a new issue. + Please be sure to provide the following information if posting bugs: + - GitPython version (e.g. `import git; git.__version__`) + - Python version (e.g. `python --version`) + - The encountered stack-trace, if applicable + - Enough information to allow reproducing the issue + +### How to make a new release + +1. Update/verify the **version** in the `VERSION` file. +2. Update/verify that the `doc/source/changes.rst` changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/` +3. Commit everything. +4. Run `git tag -s ` to tag the version in Git. +5. _Optionally_ create and activate a [virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment). (Then the next step can install `build` and `twine`.) +6. Run `make release`. +7. Go to [GitHub Releases](https://github.com/gitpython-developers/GitPython/releases) and publish a new one with the recently pushed tag. Generate the changelog. + +### Projects using GitPython + +- [PyDriller](https://github.com/ishepard/pydriller) +- [Kivy Designer](https://github.com/kivy/kivy-designer) +- [Prowl](https://github.com/nettitude/Prowl) +- [Python Taint](https://github.com/python-security/pyt) +- [Buster](https://github.com/axitkhurana/buster) +- [git-ftp](https://github.com/ezyang/git-ftp) +- [Git-Pandas](https://github.com/wdm0006/git-pandas) +- [PyGitUp](https://github.com/msiemens/PyGitUp) +- [PyJFuzz](https://github.com/mseclab/PyJFuzz) +- [Loki](https://github.com/Neo23x0/Loki) +- [Omniwallet](https://github.com/OmniLayer/omniwallet) +- [GitViper](https://github.com/BeayemX/GitViper) +- [Git Gud](https://github.com/bthayer2365/git-gud) + +### LICENSE -#### FUTURE GOALS +[3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license]. -There has been a lot of work in the master branch, which is the direction I want git-python to go. Namely, it should be able to freely mix and match the back-end used, depending on your requirements and environment. +One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license). +This file is not included in the wheel or sdist packages published by the maintainers of GitPython. -* restructure master to match my [OSS standard](https://github.com/Byron/bcore) -* review code base and bring test-suite back online -* establish python 3.3 compatibility -* make it work similarly to 0.3, but with the option to swap for at least one additional backend -* make a 1.0 release -* add backends as required +[contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md +[license]: https://github.com/gitpython-developers/GitPython/blob/main/LICENSE diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..0aea34845 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,16 @@ +# Security Policy + +## Supported Versions + +Only the latest version of GitPython can receive security updates. If a vulnerability is discovered, a fix can be issued in a new release. + +| Version | Supported | +| ------- | ------------------ | +| 3.x.x | :white_check_mark: | +| < 3.0 | :x: | + +## Reporting a Vulnerability + +Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicly right away. + +Vulnerabilities in GitPython's dependencies [gitdb](https://github.com/gitpython-developers/gitdb/blob/master/SECURITY.md) or [smmap](https://github.com/gitpython-developers/smmap/blob/master/SECURITY.md), which primarily exist to support GitPython, can be reported here as well, at that same link. The affected package (`GitPython`, `gitdb`, or `smmap`) can be included in the report, if known. diff --git a/TODO b/TODO deleted file mode 100644 index 2643676ce..000000000 --- a/TODO +++ /dev/null @@ -1,7 +0,0 @@ -For a list of tickets, please visit -http://byronimo.lighthouseapp.com/projects/51787-gitpython/overview - - - - - diff --git a/VERSION b/VERSION index e8a6b9305..e6af1c454 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.2.1 +3.1.44 diff --git a/build-release.sh b/build-release.sh new file mode 100755 index 000000000..1a8dce2c2 --- /dev/null +++ b/build-release.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +# +# This script builds a release. If run in a venv, it auto-installs its tools. +# You may want to run "make release" instead of running this script directly. + +set -eEu + +function release_with() { + "$1" -m build --sdist --wheel +} + +function suggest_venv() { + local venv_cmd='python -m venv env && source env/bin/activate' + printf "HELP: To avoid this error, use a virtual-env with '%s' instead.\n" "$venv_cmd" +} + +if test -n "${VIRTUAL_ENV-}"; then + deps=(build twine) # Install twine along with build, as we need it later. + echo "Virtual environment detected. Adding packages: ${deps[*]}" + pip install --quiet --upgrade "${deps[@]}" + echo 'Starting the build.' + release_with python +else + trap suggest_venv ERR # This keeps the original exit (error) code. + echo 'Starting the build.' + release_with python3 # Outside a venv, use python3. +fi diff --git a/check-version.sh b/check-version.sh new file mode 100755 index 000000000..579cf789f --- /dev/null +++ b/check-version.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +# +# This script checks if we are in a consistent state to build a new release. +# See the release instructions in README.md for the steps to make this pass. +# You may want to run "make release" instead of running this script directly. + +set -eEfuo pipefail +trap 'echo "$0: Check failed. Stopping." >&2' ERR + +readonly version_path='VERSION' +readonly changes_path='doc/source/changes.rst' + +function check_status() { + git status -s "$@" + test -z "$(git status -s "$@")" +} + +function get_latest_tag() { + local config_opts + printf -v config_opts ' -c versionsort.suffix=-%s' alpha beta pre rc RC + # shellcheck disable=SC2086 # Deliberately word-splitting the arguments. + git $config_opts tag -l '[0-9]*' --sort=-v:refname | head -n1 +} + +echo 'Checking current directory.' +test "$(cd -- "$(dirname -- "$0")" && pwd)" = "$(pwd)" # Ugly, but portable. + +echo "Checking that $version_path and $changes_path exist and have no uncommitted changes." +test -f "$version_path" +test -f "$changes_path" +check_status -- "$version_path" "$changes_path" + +# This section can be commented out, if absolutely necessary. +echo 'Checking that ALL changes are committed.' +check_status --ignore-submodules + +version_version="$(<"$version_path")" +changes_version="$(awk '/^[0-9]/ {print $0; exit}' "$changes_path")" +latest_tag="$(get_latest_tag)" +head_sha="$(git rev-parse HEAD)" +latest_tag_sha="$(git rev-parse "${latest_tag}^{commit}")" + +# Display a table of all the current version, tag, and HEAD commit information. +echo +echo 'The VERSION must be the same in all locations, and so must the HEAD and tag SHA' +printf '%-14s = %s\n' 'VERSION file' "$version_version" \ + 'changes.rst' "$changes_version" \ + 'Latest tag' "$latest_tag" \ + 'HEAD SHA' "$head_sha" \ + 'Latest tag SHA' "$latest_tag_sha" + +# Check that the latest tag and current version match the HEAD we're releasing. +test "$version_version" = "$changes_version" +test "$latest_tag" = "$version_version" +test "$head_sha" = "$latest_tag_sha" +echo 'OK, everything looks good.' diff --git a/doc/Makefile b/doc/Makefile index 39fe377f9..ddeadbd7e 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,14 +2,15 @@ # # You can set these variables from the command line. -SPHINXOPTS = +BUILDDIR = build +SPHINXOPTS = -W SPHINXBUILD = sphinx-build PAPER = # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html web pickle htmlhelp latex changes linkcheck @@ -24,52 +25,52 @@ help: @echo " linkcheck to check all external links for integrity" clean: - -rm -rf build/* + -rm -rf $(BUILDDIR)/* html: - mkdir -p build/html build/doctrees - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html + mkdir -p $(BUILDDIR)/html $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo - @echo "Build finished. The HTML pages are in build/html." + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." pickle: - mkdir -p build/pickle build/doctrees - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle + mkdir -p $(BUILDDIR)/pickle $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." web: pickle json: - mkdir -p build/json build/doctrees - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json + mkdir -p $(BUILDDIR)/json $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: - mkdir -p build/htmlhelp build/doctrees - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp + mkdir -p $(BUILDDIR)/htmlhelp $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in build/htmlhelp." + ".hhp project file in $(BUILDDIR)/htmlhelp." latex: - mkdir -p build/latex build/doctrees - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex + mkdir -p $(BUILDDIR)/latex $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo - @echo "Build finished; the LaTeX files are in build/latex." + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: - mkdir -p build/changes build/doctrees - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes + mkdir -p $(BUILDDIR)/changes $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo - @echo "The overview file is in build/changes." + @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - mkdir -p build/linkcheck build/doctrees - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck + mkdir -p $(BUILDDIR)/linkcheck $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ - "or in build/linkcheck/output.txt." + "or in $(BUILDDIR)/linkcheck/output.txt." diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 000000000..81140d898 --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1,3 @@ +sphinx >= 7.1.2, < 7.2 +sphinx_rtd_theme +sphinx-autodoc-typehints diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 4238e5f5a..00a3c660e 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,11 +2,781 @@ Changelog ========= +3.1.44 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.44 + +3.1.43 +====== + +A major visible change will be the added deprecation- or user-warnings, +and greatly improved typing. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.43 + +3.1.42 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.42 + +3.1.41 +====== + +This release is relevant for security as it fixes a possible arbitrary +code execution on Windows. + +See this PR for details: https://github.com/gitpython-developers/GitPython/pull/1792 +An advisory is available soon at: https://github.com/gitpython-developers/GitPython/security/advisories/GHSA-2mqj-m65w-jghx + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.41 + +3.1.40 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.40 + +3.1.38 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.38 + +3.1.37 +====== + +This release contains another security fix that further improves validation of symbolic references +and thus properly fixes this CVE: https://github.com/advisories/GHSA-cwvm-v4w8-q58c . + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/67?closed=1 + +3.1.36 +====== + +Note that this release should be a no-op, it's mainly for testing the changed release-process. + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/66?closed=1 + +3.1.35 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/65?closed=1 + +3.1.34 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/64?closed=1 + +3.1.33 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/63?closed=1 + +3.1.32 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/62?closed=1 + +3.1.31 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/61?closed=1 + +3.1.30 +====== + +- Make injections of command-invocations harder or impossible for clone and others. + See https://github.com/gitpython-developers/GitPython/pull/1518 for details. + Note that this might constitute a breaking change for some users, and if so please + let us know and we add an opt-out to this. +- Prohibit insecure options and protocols by default, which is potentially a breaking change, + but a necessary fix for https://github.com/gitpython-developers/GitPython/issues/1515. + Please take a look at the PR for more information and how to bypass these protections + in case they cause breakage: https://github.com/gitpython-developers/GitPython/pull/1521. + + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/60?closed=1 + + +3.1.29 +====== + +- Make the git.__version__ re-appear. + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/59?closed=1 + +3.1.28 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/58?closed=1 + +3.1.27 +====== + +- Reduced startup time due to optimized imports. +- Fix a vulenerability that could cause great slowdowns when encountering long remote path names + when pulling/fetching. + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/57?closed=1 + +3.1.26 +====== + +- Fixes a leaked file descriptor when reading the index, which would cause make writing a previously + read index on windows impossible. + See https://github.com/gitpython-developers/GitPython/issues/1395 for details. + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/56?closed=1 + + +3.1.25 +====== + +See the following for all changes. +https://github.com/gitpython-developers/gitpython/milestone/55?closed=1 + + +3.1.24 +====== + +* Newly added timeout flag is not be enabled by default, and was renamed to kill_after_timeout + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/54?closed=1 +https://github.com/gitpython-developers/gitpython/milestone/53?closed=1 + +3.1.23 (YANKED) +=============== + +* This is the second typed release with a lot of improvements under the hood. + +* General: + + - Remove python 3.6 support + + - Remove distutils ahead of deprecation in standard library. + + - Update sphinx to 4.1.12 and use autodoc-typehints. + + - Include README as long_description on PyPI + + - Test against earliest and latest minor version available on Github Actions (e.g. 3.9.0 and 3.9.7) + + +* Typing: + + - Add types to ALL functions. + + - Ensure py.typed is collected. + + - Increase mypy strictness with disallow_untyped_defs, warn_redundant_casts, warn_unreachable. + + - Use typing.NamedTuple and typing.OrderedDict now 3.6 dropped. + + - Make Protocol classes ABCs at runtime due to new behaviour/bug in 3.9.7 & 3.10.0-rc1 + + - Remove use of typing.TypeGuard until later release, to allow dependent libs time to update. + + - Tracking issue: https://github.com/gitpython-developers/GitPython/issues/1095 + +* Runtime improvements: + + - Add clone_multi_options support to submodule.add() + + - Delay calling get_user_id() unless essential, to support sand-boxed environments. + + - Add timeout to handle_process_output(), in case thread.join() hangs. + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/53?closed=1 + + +3.1.20 (YANKED) +=============== + +* This is the second typed release with a lot of improvements under the hood. + * Tracking issue: https://github.com/gitpython-developers/GitPython/issues/1095 + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/52?closed=1 + + +3.1.19 (YANKED) +=============== + +* This is the second typed release with a lot of improvements under the hood. + * Tracking issue: https://github.com/gitpython-developers/GitPython/issues/1095 + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/51?closed=1 + +3.1.18 +====== + +* drop support for python 3.5 to reduce maintenance burden on typing. Lower patch levels of python 3.5 would break, too. + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/50?closed=1 + +3.1.17 +====== + +* Fix issues from 3.1.16 (see https://github.com/gitpython-developers/GitPython/issues/1238) +* Fix issues from 3.1.15 (see https://github.com/gitpython-developers/GitPython/issues/1223) +* Add more static typing information + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/49?closed=1 + +3.1.16 (YANKED) +=============== + +* Fix issues from 3.1.15 (see https://github.com/gitpython-developers/GitPython/issues/1223) +* Add more static typing information + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/48?closed=1 + +3.1.15 (YANKED) +=============== + +* add deprecation warning for python 3.5 + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/47?closed=1 + +3.1.14 +====== + +* git.Commit objects now have a ``replace`` method that will return a + copy of the commit with modified attributes. +* Add python 3.9 support +* Drop python 3.4 support + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/46?closed=1 + +3.1.13 +====== + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/45?closed=1 + +3.1.12 +====== + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/44?closed=1 + +3.1.11 +====== + +Fixes regression of 3.1.10. + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/43?closed=1 + +3.1.10 +====== + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/42?closed=1 + + +3.1.9 +===== + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/41?closed=1 + + +3.1.8 +===== + +* support for 'includeIf' in git configuration files +* tests are now excluded from the package, making it conisderably smaller + + +See the following for more details: +https://github.com/gitpython-developers/gitpython/milestone/40?closed=1 + + +3.1.7 +===== + +* Fix tutorial examples, which disappeared in 3.1.6 due to a missed path change. + +3.1.6 +===== + +* Greatly reduced package size, see https://github.com/gitpython-developers/GitPython/pull/1031 + +3.1.5 +===== + +* rollback: package size was reduced significantly not placing tests into the package anymore. + See https://github.com/gitpython-developers/GitPython/issues/1030 + +3.1.4 +===== + +* all exceptions now keep track of their cause +* package size was reduced significantly not placing tests into the package anymore. + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/39?closed=1 + +3.1.3 +===== + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/38?closed=1 + +3.1.2 +===== + +* Re-release of 3.1.1, with known signature + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/37?closed=1 + + +3.1.1 +===== + +* support for PyOxidizer, which previously failed due to usage of `__file__`. + +See the following for details: +https://github.com/gitpython-developers/gitpython/milestone/36?closed=1 + + +3.1.0 +===== + +* Switched back to using gitdb package as requirement + (`gitdb#59 `_) + +3.0.9 +===== + +* Restricted GitDB (gitdb2) version requirement to < 4 +* Removed old nose library from test requirements + +Bugfixes +-------- + +* Changed to use UTF-8 instead of default encoding when getting information about a symbolic reference + (`#774 `_) +* Fixed decoding of tag object message so as to replace invalid bytes + (`#943 `_) + +3.0.8 +===== + +* Added support for Python 3.8 +* Bumped GitDB (gitdb2) version requirement to > 3 + +Bugfixes +-------- + +* Fixed Repo.__repr__ when subclassed + (`#968 `_) +* Removed compatibility shims for Python < 3.4 and old mock library +* Replaced usage of deprecated unittest aliases and Logger.warn +* Removed old, no longer used assert methods +* Replaced usage of nose assert methods with unittest + +3.0.7 +===== + +Properly signed re-release of v3.0.6 with new signature +(See `#980 `_) + +3.0.6 +===== + +| Note: There was an issue that caused this version to be released to PyPI without a signature +| See the changelog for v3.0.7 and `#980 `_ + +Bugfixes +-------- + +* Fixed warning for usage of environment variables for paths containing ``$`` or ``%`` + (`#832 `_, + `#961 `_) +* Added support for parsing Git internal date format (@ ) + (`#965 `_) +* Removed Python 2 and < 3.3 compatibility shims + (`#979 `_) +* Fixed GitDB (gitdb2) requirement version specifier formatting in requirements.txt + (`#979 `_) + +3.0.5 - Bugfixes +============================================= + +see the following for details: +https://github.com/gitpython-developers/gitpython/milestone/32?closed=1 + +3.0.4 - Bugfixes +============================================= + +see the following for details: +https://github.com/gitpython-developers/gitpython/milestone/31?closed=1 + +3.0.3 - Bugfixes +============================================= + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/30?closed=1 + +3.0.2 - Bugfixes +============================================= + +* fixes an issue with installation + +3.0.1 - Bugfixes and performance improvements +============================================= + +* Fix a `performance regression `__ which could make certain workloads 50% slower +* Add `currently_rebasing_on` method on `Repo`, see `the PR `__ +* Fix incorrect `requirements.txt` which could lead to broken installations, see this `issue `__ for details. + +3.0.0 - Remove Python 2 support +=============================== + +Motivation for this is a patch which improves unicode handling when dealing with filesystem paths. +Python 2 compatibility was introduced to deal with differences, and I thought it would be a good idea +to 'just' drop support right now, mere 5 months away from the official maintenance stop of python 2.7. + +The underlying motivation clearly is my anger when thinking python and unicode, which was a hassle from the +start, at least in a codebase as old as GitPython, which totally doesn't handle encodings correctly in many cases. + +Having migrated to using `Rust` exclusively for tooling, I still see that correct handling of encodings isn't entirely +trivial, but at least `Rust` makes clear what has to be done at compile time, allowing to write software that is pretty +much guaranteed to work once it compiles. + +Again, my apologies if removing Python 2 support caused inconveniences, please see release 2.1.13 which returns it. + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/27?closed=1 + +or run have a look at the difference between tags v2.1.12 and v3.0.0: +https://github.com/gitpython-developers/GitPython/compare/2.1.12...3.0.0. + +2.1.15 +====== + +* Fixed GitDB (gitdb2) requirement version specifier formatting in requirements.txt + (Backported from `#979 `_) +* Restricted GitDB (gitdb2) version requirement to < 3 + (`#897 `_) + +2.1.14 +====== + +* Fixed handling of 0 when transforming kwargs into Git command arguments + (Backported from `#899 `_) + +2.1.13 - Bring back Python 2.7 support +====================================== + +My apologies for any inconvenience this may have caused. Following semver, backward incompatible changes +will be introduced in a minor version. + +2.1.12 - Bugfixes and Features +============================== + +* Multi-value support and interface improvements for Git configuration. Thanks to A. Jesse Jiryu Davis. + +or run have a look at the difference between tags v2.1.11 and v2.1.12: +https://github.com/gitpython-developers/GitPython/compare/2.1.11...2.1.12 + +2.1.11 - Bugfixes +================= + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/26?closed=1 + +or run have a look at the difference between tags v2.1.10 and v2.1.11: +https://github.com/gitpython-developers/GitPython/compare/2.1.10...2.1.11 + +2.1.10 - Bugfixes +================= + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/25?closed=1 + +or run have a look at the difference between tags v2.1.9 and v2.1.10: +https://github.com/gitpython-developers/GitPython/compare/2.1.9...2.1.10 + +2.1.9 - Dropping support for Python 2.6 +======================================= + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/24?closed=1 + +or run have a look at the difference between tags v2.1.8 and v2.1.9: +https://github.com/gitpython-developers/GitPython/compare/2.1.8...2.1.9 + + +2.1.8 - bugfixes +==================================== + +see the following for (most) details: +https://github.com/gitpython-developers/gitpython/milestone/23?closed=1 + +or run have a look at the difference between tags v2.1.7 and v2.1.8: +https://github.com/gitpython-developers/GitPython/compare/2.1.7...2.1.8 + +2.1.6 - bugfixes +==================================== + +* support for worktrees + +2.1.3 - Bugfixes +==================================== + +All issues and PRs can be viewed in all detail when following this URL: +https://github.com/gitpython-developers/GitPython/milestone/21?closed=1 + + +2.1.1 - Bugfixes +==================================== + +All issues and PRs can be viewed in all detail when following this URL: +https://github.com/gitpython-developers/GitPython/issues?q=is%3Aclosed+milestone%3A%22v2.1.1+-+Bugfixes%22 + + +2.1.0 - Much better windows support! +==================================== + +Special thanks to @ankostis, who made this release possible (nearly) single-handedly. +GitPython is run by its users, and their PRs make all the difference, they keep +GitPython relevant. Thank you all so much for contributing ! + +Notable fixes +------------- + +* The `GIT_DIR` environment variable does not override the `path` argument when + initializing a `Repo` object anymore. However, if said `path` unset, `GIT_DIR` + will be used to fill the void. + +All issues and PRs can be viewed in all detail when following this URL: +https://github.com/gitpython-developers/GitPython/issues?q=is%3Aclosed+milestone%3A%22v2.1.0+-+proper+windows+support%22 + + +2.0.9 - Bugfixes +============================= + +* `tag.commit` will now resolve commits deeply. +* `Repo` objects can now be pickled, which helps with multi-processing. +* `Head.checkout()` now deals with detached heads, which is when it will return + the `HEAD` reference instead. + +* `DiffIndex.iter_change_type(...)` produces better results when diffing + +2.0.8 - Features and Bugfixes +============================= + +* `DiffIndex.iter_change_type(...)` produces better results when diffing + an index against the working tree. +* `Repo().is_dirty(...)` now supports the `path` parameter, to specify a single + path by which to filter the output. Similar to `git status ` +* Symbolic refs created by this library will now be written with a newline + character, which was previously missing. +* `blame()` now properly preserves multi-line commit messages. +* No longer corrupt ref-logs by writing multi-line comments into them. + +2.0.7 - New Features +==================== + +* `IndexFile.commit(...,skip_hooks=False)` added. This parameter emulates the + behaviour of `--no-verify` on the command-line. + +2.0.6 - Fixes and Features +========================== + +* Fix: remote output parser now correctly matches refs with non-ASCII + chars in them +* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`, + `raw_rename_to` properties, which are the raw-bytes equivalents of their + unicode path counterparts. +* Fix: TypeError about passing keyword argument to string decode() on + Python 2.6. +* Feature: `setUrl API on Remotes `__ + +2.0.5 - Fixes +============= + +* Fix: parser of fetch info lines choked on some legitimate lines + +2.0.4 - Fixes +============= + +* Fix: parser of commit object data is now robust against cases where + commit object contains invalid bytes. The invalid characters are now + replaced rather than choked on. +* Fix: non-ASCII paths are now properly decoded and returned in + ``.diff()`` output +* Fix: `RemoteProgress` will now strip the ', ' prefix or suffix from messages. +* API: Remote.[fetch|push|pull](...) methods now allow the ``progress`` argument to + be a callable. This saves you from creating a custom type with usually just one + implemented method. + +2.0.3 - Fixes +============= + +* Fix: bug in ``git-blame --incremental`` output parser that broken when + commit messages contained ``\r`` characters +* Fix: progress handler exceptions are not caught anymore, which would usually just hide bugs + previously. +* Fix: The `Git.execute` method will now redirect `stdout` to `devnull` if `with_stdout` is false, + which is the intended behaviour based on the parameter's documentation. + +2.0.2 - Fixes +============= + +* Fix: source package does not include \*.pyc files +* Fix: source package does include doc sources + +2.0.1 - Fixes +============= + +* Fix: remote output parser now correctly matches refs with "@" in them + +2.0.0 - Features +================ + +Please note that due to breaking changes, we have to increase the major version. + +* **IMPORTANT**: This release drops support for python 2.6, which is + officially deprecated by the python maintainers. +* **CRITICAL**: `Diff` objects created with patch output will now not carry + the --- and +++ header lines anymore. All diffs now start with the + @@ header line directly. Users that rely on the old behaviour can now + (reliably) read this information from the a_path and b_path properties + without having to parse these lines manually. +* `Commit` now has extra properties `authored_datetime` and + `committer_datetime` (to get Python datetime instances rather than + timestamps) +* `Commit.diff()` now supports diffing the root commit via + `Commit.diff(NULL_TREE)`. +* `Repo.blame()` now respects `incremental=True`, supporting incremental + blames. Incremental blames are slightly faster since they don't include + the file's contents in them. +* Fix: `Diff` objects created with patch output will now have their + `a_path` and `b_path` properties parsed out correctly. Previously, some + values may have been populated incorrectly when a file was added or + deleted. +* Fix: diff parsing issues with paths that contain "unsafe" chars, like + spaces, tabs, backslashes, etc. + +1.0.2 - Fixes +============= + +* IMPORTANT: Changed default object database of `Repo` objects to `GitCmdObjectDB`. The pure-python implementation + used previously usually fails to release its resources (i.e. file handles), which can lead to problems when working + with large repositories. +* CRITICAL: fixed incorrect `Commit` object serialization when authored or commit date had timezones which were not + divisiblej by 3600 seconds. This would happen if the timezone was something like `+0530` for instance. +* A list of all additional fixes can be found `on GitHub `__ +* CRITICAL: `Tree.cache` was removed without replacement. It is technically impossible to change individual trees and expect their serialization results to be consistent with what *git* expects. Instead, use the `IndexFile` facilities to adjust the content of the staging area, and write it out to the respective tree objects using `IndexFile.write_tree()` instead. + +1.0.1 - Fixes +============= + +* A list of all issues can be found `on GitHub `__ + +1.0.0 - Notes +============= + +This version is equivalent to v0.3.7, but finally acknowledges that GitPython is stable and production ready. + +It follows the `semantic version scheme `_, and thus will not break its existing API unless it goes 2.0. + +0.3.7 - Fixes +============= +* `IndexFile.add()` will now write the index without any extension data by default. However, you may override this behaviour with the new `write_extension_data` keyword argument. + + - Renamed `ignore_tree_extension_data` keyword argument in `IndexFile.write(...)` to `ignore_extension_data` +* If the git command executed during `Remote.push(...)|fetch(...)` returns with an non-zero exit code and GitPython didn't + obtain any head-information, the corresponding `GitCommandError` will be raised. This may break previous code which expected + these operations to never raise. However, that behaviour is undesirable as it would effectively hide the fact that there + was an error. See `this issue `__ for more information. + +* If the git executable can't be found in the PATH or at the path provided by `GIT_PYTHON_GIT_EXECUTABLE`, this is made + obvious by throwing `GitCommandNotFound`, both on unix and on windows. + + - Those who support **GUI on windows** will now have to set `git.Git.USE_SHELL = True` to get the previous behaviour. + +* A list of all issues can be found `on GitHub `__ + + +0.3.6 - Features +================ +* **DOCS** + + * special members like `__init__` are now listed in the API documentation + * tutorial section was revised entirely, more advanced examples were added. + +* **POSSIBLY BREAKING CHANGES** + + * As `rev_parse` will now throw `BadName` as well as `BadObject`, client code will have to catch both exception types. + * Repo.working_tree_dir now returns None if it is bare. Previously it raised AssertionError. + * IndexFile.add() previously raised AssertionError when paths where used with bare repository, now it raises InvalidGitRepositoryError + +* Added `Repo.merge_base()` implementation. See the `respective issue on GitHub `__ +* `[include]` sections in git configuration files are now respected +* Added `GitConfigParser.rename_section()` +* Added `Submodule.rename()` +* A list of all issues can be found `on GitHub `__ + +0.3.5 - Bugfixes +================ +* push/pull/fetch operations will not block anymore +* diff() can now properly detect renames, both in patch and raw format. Previously it only worked when create_patch was True. +* repo.odb.update_cache() is now called automatically after fetch and pull operations. In case you did that in your own code, you might want to remove your line to prevent a double-update that causes unnecessary IO. +* `Repo(path)` will not automatically search upstream anymore and find any git directory on its way up. If you need that behaviour, you can turn it back on using the new `search_parent_directories=True` flag when constructing a `Repo` object. +* IndexFile.commit() now runs the `pre-commit` and `post-commit` hooks. Verified to be working on posix systems only. +* A list of all fixed issues can be found here: https://github.com/gitpython-developers/GitPython/issues?q=milestone%3A%22v0.3.5+-+bugfixes%22+ + +0.3.4 - Python 3 Support +======================== +* Internally, hexadecimal SHA1 are treated as ascii encoded strings. Binary SHA1 are treated as bytes. +* Id attribute of Commit objects is now `hexsha`, instead of `binsha`. The latter makes no sense in python 3 and I see no application of it anyway besides its artificial usage in test cases. +* **IMPORTANT**: If you were using the config_writer(), you implicitly relied on __del__ to work as expected to flush changes. To be sure changes are flushed under PY3, you will have to call the new `release()` method to trigger a flush. For some reason, __del__ is not called necessarily anymore when a symbol goes out of scope. +* The `Tree` now has a `.join('name')` method which is equivalent to `tree / 'name'` + +0.3.3 +===== +* When fetching, pulling or pushing, and an error occurs, it will not be reported on stdout anymore. However, if there is a fatal error, it will still result in a GitCommandError to be thrown. This goes hand in hand with improved fetch result parsing. +* Code Cleanup (in preparation for python 3 support) + + * Applied autopep8 and cleaned up code + * Using python logging module instead of print statements to signal certain kinds of errors + 0.3.2.1 ======= * `Fix for #207 `_ -0.3.2 +0.3.2 ===== * Release of most recent version as non-RC build, just to allow pip to install the latest version right away. @@ -17,13 +787,13 @@ Changelog * **git** command wrapper * Added ``version_info`` property which returns a tuple of integers representing the installed git version. - + * Added GIT_PYTHON_GIT_EXECUTABLE environment variable, which can be used to set the desired git executable to be used. despite of what would be found in the path. - + * **Blob** Type * Added mode constants to ease the manual creation of blobs - + * **IterableList** * Added __contains__ and __delitem__ methods @@ -35,8 +805,8 @@ Changelog * Parsing of tags was improved. Previously some parts of the name could not be parsed properly. * The rev-parse pure python implementation now handles branches correctly if they look like hexadecimal sha's. * GIT_PYTHON_TRACE is now set on class level of the Git type, previously it was a module level global variable. - * GIT_PYTHON_GIT_EXECUTABLE is a class level variable as well. - + * GIT_PYTHON_GIT_EXECUTABLE is a class level variable as well. + 0.3.1 Beta 2 ============ @@ -45,7 +815,7 @@ Changelog * New types: ``RefLog`` and ``RefLogEntry`` * Reflog is maintained automatically when creating references and deleting them * Non-intrusive changes to ``SymbolicReference``, these don't require your code to change. They allow to append messages to the reflog. - + * ``abspath`` property added, similar to ``abspath`` of Object instances * ``log()`` method added * ``log_append(...)`` method added @@ -54,19 +824,19 @@ Changelog * ``set_object(...)`` method added (reflog support) * **Intrusive Changes** to ``Head`` type - + * ``create(...)`` method now supports the reflog, but will not raise ``GitCommandError`` anymore as it is a pure python implementation now. Instead, it raises ``OSError``. - + * **Intrusive Changes** to ``Repo`` type - + * ``create_head(...)`` method does not support kwargs anymore, instead it supports a logmsg parameter - + * Repo.rev_parse now supports the [ref]@{n} syntax, where *n* is the number of steps to look into the reference's past * **BugFixes** * Removed incorrect ORIG_HEAD handling - + * **Flattened directory** structure to make development more convenient. * .. note:: This alters the way projects using git-python as a submodule have to adjust their sys.path to be able to import git-python successfully. @@ -79,7 +849,7 @@ Changelog * Head Type changes * config_reader() & config_writer() methods added for access to head specific options. - * tracking_branch() & set_tracking_branch() methods addded for easy configuration of tracking branches. + * tracking_branch() & set_tracking_branch() methods added for easy configuration of tracking branches. 0.3.0 Beta 2 @@ -95,7 +865,7 @@ Renamed Modules * git.utils -> git.util * git.errors -> git.exc * git.objects.utils -> git.objects.util - + General ------- * Object instances, and everything derived from it, now use binary sha's internally. The 'sha' member was removed, in favor of the 'binsha' member. An 'hexsha' property is available for convenient conversions. They may only be initialized using their binary shas, reference names or revision specs are not allowed anymore. @@ -106,67 +876,67 @@ General * IndexFile.get_entries_key was renamed to entry_key * IndexFile.write_tree: removed missing_ok keyword, its always True now. Instead of raising GitCommandError it raises UnmergedEntriesError. This is required as the pure-python implementation doesn't support the missing_ok keyword yet. * diff.Diff.null_hex_sha renamed to NULL_HEX_SHA, to be conforming with the naming in the Object base class - + 0.2 Beta 2 =========== * Commit objects now carry the 'encoding' information of their message. It wasn't parsed previously, and defaults to UTF-8 - * Commit.create_from_tree now uses a pure-python implementation, mimicing git-commit-tree + * Commit.create_from_tree now uses a pure-python implementation, mimicking git-commit-tree 0.2 ===== General ------- -* file mode in Tree, Blob and Diff objects now is an int compatible to definintiions - in the stat module, allowing you to query whether individual user, group and other +* file mode in Tree, Blob and Diff objects now is an int compatible to definitions + in the stat module, allowing you to query whether individual user, group and other read, write and execute bits are set. * Adjusted class hierarchy to generally allow comparison and hash for Objects and Refs -* Improved Tag object which now is a Ref that may contain a tag object with additional +* Improved Tag object which now is a Ref that may contain a tag object with additional Information -* id_abbrev method has been removed as it could not assure the returned short SHA's +* id_abbrev method has been removed as it could not assure the returned short SHA's where unique * removed basename method from Objects with path's as it replicated features of os.path -* from_string and list_from_string methods are now private and were renamed to - _from_string and _list_from_string respectively. As part of the private API, they +* from_string and list_from_string methods are now private and were renamed to + _from_string and _list_from_string respectively. As part of the private API, they may change without prior notice. * Renamed all find_all methods to list_items - this method is part of the Iterable interface that also provides a more efficients and more responsive iter_items method -* All dates, like authored_date and committer_date, are stored as seconds since epoc - to consume less memory - they can be converted using time.gmtime in a more suitable +* All dates, like authored_date and committer_date, are stored as seconds since epoch + to consume less memory - they can be converted using time.gmtime in a more suitable presentation format if needed. -* Named method parameters changed on a wide scale to unify their use. Now git specific +* Named method parameters changed on a wide scale to unify their use. Now git specific terms are used everywhere, such as "Reference" ( ref ) and "Revision" ( rev ). - Prevously multiple terms where used making it harder to know which type was allowed + Previously multiple terms where used making it harder to know which type was allowed or not. * Unified diff interface to allow easy diffing between trees, trees and index, trees and working tree, index and working tree, trees and index. This closely follows the git-diff capabilities. -* Git.execute does not take the with_raw_output option anymore. It was not used +* Git.execute does not take the with_raw_output option anymore. It was not used by anyone within the project and False by default. - + Item Iteration -------------- -* Previously one would return and process multiple items as list only which can - hurt performance and memory consumption and reduce response times. - iter_items method provide an iterator that will return items on demand as parsed +* Previously one would return and process multiple items as list only which can + hurt performance and memory consumption and reduce response times. + iter_items method provide an iterator that will return items on demand as parsed from a stream. This way any amount of objects can be handled. * list_items method returns IterableList allowing to access list members by name - + objects Package ---------------- -* blob, tree, tag and commit module have been moved to new objects package. This should - not affect you though unless you explicitly imported individual objects. If you just +* blob, tree, tag and commit module have been moved to new objects package. This should + not affect you though unless you explicitly imported individual objects. If you just used the git package, names did not change. - + Blob ---- * former 'name' member renamed to path as it suits the actual data better GitCommand ----------- -* git.subcommand call scheme now prunes out None from the argument list, allowing - to be called more confortably as None can never be a valid to the git command +* git.subcommand call scheme now prunes out None from the argument list, allowing + to be called more comfortably as None can never be a valid to the git command if converted to a string. * Renamed 'git_dir' attribute to 'working_dir' which is exactly how it is used @@ -180,43 +950,43 @@ Config * The git configuration can now be read and manipulated directly from within python using the GitConfigParser * Repo.config_reader() returns a read-only parser -* Repo.config_writer() returns a read-write parser - +* Repo.config_writer() returns a read-write parser + Diff ---- * Members a a_commit and b_commit renamed to a_blob and b_blob - they are populated with Blob objects if possible * Members a_path and b_path removed as this information is kept in the blobs -* Diffs are now returned as DiffIndex allowing to more quickly find the kind of +* Diffs are now returned as DiffIndex allowing to more quickly find the kind of diffs you are interested in - + Diffing ------- -* Commit and Tree objects now support diffing natively with a common interface to - compare agains other Commits or Trees, against the working tree or against the index. +* Commit and Tree objects now support diffing natively with a common interface to + compare against other Commits or Trees, against the working tree or against the index. Index ----- * A new Index class allows to read and write index files directly, and to perform simple two and three way merges based on an arbitrary index. - -Referernces + +References ------------ * References are object that point to a Commit * SymbolicReference are a pointer to a Reference Object, which itself points to a specific Commit -* They will dynmically retrieve their object at the time of query to assure the information - is actual. Recently objects would be cached, hence ref object not be safely kept +* They will dynamically retrieve their object at the time of query to assure the information + is actual. Recently objects would be cached, hence ref object not be safely kept persistent. - + Repo ---- * Moved blame method from Blob to repo as it appeared to belong there much more. -* active_branch method now returns a Head object instead of a string with the name +* active_branch method now returns a Head object instead of a string with the name of the active branch. -* tree method now requires a Ref instance as input and defaults to the active_branche +* tree method now requires a Ref instance as input and defaults to the active_branch instead of master -* is_dirty now takes additional arguments allowing fine-grained control about what is +* is_dirty now takes additional arguments allowing fine-grained control about what is considered dirty * Removed the following methods: @@ -228,7 +998,7 @@ Repo - 'create' method which equals the 'init' method's functionality - 'diff' - it returned a mere string which still had to be parsed - 'commit_diff' - moved to Commit, Tree and Diff types respectively - + * Renamed the following methods: - commits to iter_commits to improve the performance, adjusted signature @@ -236,7 +1006,7 @@ Repo - fork_bare to clone, as it was to represent general clone functionality, but implied a bare clone to be more versatile - archive_tar_gz and archive_tar and replaced by archive method with different signature - + * 'commits' method has no max-count of returned commits anymore, it now behaves like git-rev-list * The following methods and properties were added @@ -247,16 +1017,16 @@ Repo - 'config_reader' method - 'config_writer' method - 'bare' property, previously it was a simple attribute that could be written - + * Renamed the following attributes - 'path' is now 'git_dir' - 'wd' is now 'working_dir' - + * Added attribute - 'working_tree_dir' which may be None in case of bare repositories - + Remote ------ * Added Remote object allowing easy access to remotes @@ -266,7 +1036,7 @@ Remote Test Framework -------------- * Added support for common TestCase base class that provides additional functionality - to receive repositories tests can also write to. This way, more aspects can be + to receive repositories tests can also write to. This way, more aspects can be tested under real-world ( un-mocked ) conditions. Tree @@ -274,7 +1044,7 @@ Tree * former 'name' member renamed to path as it suits the actual data better * added traverse method allowing to recursively traverse tree items * deleted blob method -* added blobs and trees properties allowing to query the respective items in the +* added blobs and trees properties allowing to query the respective items in the tree * now mimics behaviour of a read-only list instead of a dict to maintain order. * content_from_string method is now private and not part of the public API anymore @@ -290,9 +1060,9 @@ General * Removed ambiguity between paths and treeishs. When calling commands that accept treeish and path arguments and there is a path with the same name as a treeish git cowardly refuses to pick one and asks for the command to use - the unambiguous syntax where '--' seperates the treeish from the paths. + the unambiguous syntax where '--' separates the treeish from the paths. -* ``Repo.commits``, ``Repo.commits_between``, ``Reop.commits_since``, +* ``Repo.commits``, ``Repo.commits_between``, ``Repo.commits_since``, ``Repo.commit_count``, ``Repo.commit``, ``Commit.count`` and ``Commit.find_all`` all now optionally take a path argument which constrains the lookup by path. This changes the order of the positional @@ -413,14 +1183,14 @@ Git * Added support for ``stderr``, ``stdin``, and ``with_status``. -* ``git_dir`` is now optional in the constructor for ``git.Git``. Git now +* ``git_dir`` is now optional in the constructor for ``git.Git``. Git now falls back to ``os.getcwd()`` when git_dir is not specified. -* add a ``with_exceptions`` keyword argument to git commands. +* add a ``with_exceptions`` keyword argument to git commands. ``GitCommandError`` is raised when the exit status is non-zero. -* add support for a ``GIT_PYTHON_TRACE`` environment variable. - ``GIT_PYTHON_TRACE`` allows us to debug GitPython's usage of git through +* add support for a ``GIT_PYTHON_TRACE`` environment variable. + ``GIT_PYTHON_TRACE`` allows us to debug GitPython's usage of git through the use of an environment variable. Tree @@ -436,9 +1206,9 @@ Repo Tree ---- -* Corrected problem with ``Tree.__div__`` not working with zero length files. - Removed ``__len__`` override and replaced with size instead. Also made size - cach properly. This is a breaking change. +* Corrected problem with ``Tree.__div__`` not working with zero length files. + Removed ``__len__`` override and replaced with size instead. Also made size + cache properly. This is a breaking change. 0.1.1 ===== diff --git a/doc/source/conf.py b/doc/source/conf.py index 2da09c669..809762483 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,197 +1,188 @@ -# -*- coding: utf-8 -*- -# -# GitPython documentation build configuration file, created by +# GitPython documentation build configuration file, originally created by # sphinx-quickstart on Sat Jan 24 11:51:01 2009. # # This file is execfile()d with the current directory set to its containing dir. # -# The contents of this file are pickled, so don't put values in the namespace -# that aren't pickleable (module imports are okay, they're removed automatically). +# The contents of this file are pickled, so don't put values in the namespace that +# aren't pickleable (module imports are okay, they're removed automatically). # -# Note that not all possible configuration values are present in this -# autogenerated file. +# Note that not all possible configuration values are present in this autogenerated +# file. # -# All configuration values have a default; values that are commented out -# serve to show the default. +# All configuration values have a default; values that are commented out serve to show +# the default. -import sys, os +import os +import sys -# If your extensions are in another directory, add it here. If the directory -# is relative to the documentation root, use os.path.abspath to make it -# absolute, like shown here. -#sys.path.append(os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('../..')) -print sys.path +# If your extensions are in another directory, add it here. If the directory is relative +# to the documentation root, use os.path.abspath to make it absolute, like shown here. +# sys.path.append(os.path.abspath('.')) +sys.path.insert(0, os.path.abspath("../..")) # General configuration # --------------------- -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] +# Add any Sphinx extension module names here, as strings. They can be extensions coming +# with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['.templates'] +templates_path = [] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'GitPython' -copyright = u'Copyright (C) 2008, 2009 Michael Trier and contributors, 2010 Sebastian Thiel' +project = "GitPython" +copyright = "Copyright (C) 2008, 2009 Michael Trier and contributors, 2010-2015 Sebastian Thiel" -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. +# The version info for the project you're documenting, acts as replacement for |version| +# and |release|, also used in various other places throughout the built documents. # # The short X.Y version. -VERSION = open(os.path.join(os.path.dirname(__file__),"..", "..", 'VERSION')).readline().strip() +with open(os.path.join(os.path.dirname(__file__), "..", "..", "VERSION")) as fd: + VERSION = fd.readline().strip() version = VERSION # The full version, including alpha/beta/rc tags. release = VERSION -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None +# The language for content autogenerated by Sphinx. Refer to documentation for a list of +# supported languages. +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = ['build'] +# List of directories, relative to source directory, that shouldn't be searched for +# source files. +exclude_trees = ["build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True +# If true, the current module name will be prepended to all description unit titles +# (such as .. function::). +# add_module_names = True -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False +# If true, sectionauthor and moduleauthor directives will be shown in the output. +# They are ignored by default. +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" + +manpages_url = "https://git-scm.com/docs/%7Bpage%7D" # Options for HTML output # ----------------------- -html_theme_options = { - "stickysidebar": "true" -} - -# The style sheet to use for HTML and HTML Help pages. A file of that name -# must exist either in Sphinx' static/ path, or in one of the custom paths -# given in html_static_path. -html_style = 'default.css' +html_theme = "sphinx_rtd_theme" +html_theme_options = {} -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None +# The name for this set of Sphinx documents. +# If None, it defaults to " v documentation". +# html_title = None -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None +# The name of an image file (relative to this directory) to place at the top of the +# sidebar. +# html_logo = None -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None +# The name of an image file (within the static path) to use as favicon of the docs. +# This file should be a Windows icon file (.ico) being 16x16 or 32x32 pixels large. +# html_favicon = None -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['.static'] +# Add any paths that contain custom static files (such as style sheets) here, relative +# to this directory. They are copied after the builtin static files, so a file named +# "default.css" will overwrite the builtin "default.css". +html_static_path = [] -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, using the +# given strftime format. +# html_last_updated_fmt = '%b %d, %Y' -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True +# If true, SmartyPants will be used to convert quotes and dashes to typographically +# correct entities. +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} +# Additional templates that should be rendered to pages, maps page names to template +# names. +# html_additional_pages = {} # If false, no module index is generated. -#html_use_modindex = True +# html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, the reST sources are included in the HTML build as _sources/. -#html_copy_source = True +# html_copy_source = True -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' +# If true, an OpenSearch description file will be output, and all pages will contain a +# tag referring to it. The value of this option must be the base URL from which +# the finished HTML is served. +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. -htmlhelp_basename = 'gitpythondoc' +htmlhelp_basename = "gitpythondoc" # Options for LaTeX output # ------------------------ # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). latex_documents = [ - ('index', 'GitPython.tex', ur'GitPython Documentation', - ur'Michael Trier', 'manual'), + ("index", "GitPython.tex", "GitPython Documentation", "Michael Trier", "manual"), ] -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None +# The name of an image file (relative to this directory) to place at the top of the +# title page. +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True +# latex_use_modindex = True diff --git a/doc/source/index.rst b/doc/source/index.rst index 1079c5c76..ca5229ac3 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -9,7 +9,7 @@ GitPython Documentation :maxdepth: 2 intro - whatsnew + quickstart tutorial reference roadmap @@ -21,4 +21,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/source/intro.rst b/doc/source/intro.rst index 8dac28047..d053bd117 100644 --- a/doc/source/intro.rst +++ b/doc/source/intro.rst @@ -13,43 +13,38 @@ The object database implementation is optimized for handling large quantities of Requirements ============ +* `Python`_ >= 3.7 * `Git`_ 1.7.0 or newer It should also work with older versions, but it may be that some operations involving remotes will not work as expected. * `GitDB`_ - a pure python git database implementation +* `typing_extensions`_ >= 3.7.3.4 (if python < 3.10) - * `async`_ - asynchronous task scheduling - -* `Python Nose`_ - used for running the tests -* `Mock by Michael Foord`_ used for tests. Requires version 0.5 - -.. _Git: http://git-scm.com/ -.. _Python Nose: http://code.google.com/p/python-nose/ -.. _Mock by Michael Foord: http://www.voidspace.org.uk/python/mock.html -.. _GitDB: http://pypi.python.org/pypi/gitdb -.. _async: http://pypi.python.org/pypi/async +.. _Python: https://www.python.org +.. _Git: https://git-scm.com/ +.. _GitDB: https://pypi.python.org/pypi/gitdb +.. _typing_extensions: https://pypi.org/project/typing-extensions/ Installing GitPython ==================== Installing GitPython is easily done using -`setuptools`_. Assuming it is +`pip`_. Assuming it is installed, just run the following from the command-line: .. sourcecode:: none - # easy_install GitPython + # pip install GitPython This command will download the latest version of GitPython from the `Python Package Index `_ and install it -to your system. More information about ``easy_install`` and pypi can be found +to your system. More information about ``pip`` and pypi can be found here: -* `setuptools`_ -* `install setuptools `_ +* `install pip `_ * `pypi `_ -.. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools +.. _pip: https://pip.pypa.io/en/latest/installing.html Alternatively, you can install from the distribution using the ``setup.py`` script: @@ -57,8 +52,24 @@ script: .. sourcecode:: none # python setup.py install - -.. note:: In this case, you have to manually install `GitDB`_ and `async`_ as well. It would be recommended to use the :ref:`git source repository ` in that case. + +.. note:: In this case, you have to manually install `GitDB`_ as well. It would be recommended to use the :ref:`git source repository ` in that case. + +Limitations +=========== + +Leakage of System Resources +--------------------------- + +GitPython is not suited for long-running processes (like daemons) as it tends to +leak system resources. It was written in a time where destructors (as implemented +in the `__del__` method) still ran deterministically. + +In case you still want to use it in such a context, you will want to search the +codebase for `__del__` implementations and call these yourself when you see fit. + +Another way assure proper cleanup of resources is to factor out GitPython into a +separate process which can be dropped periodically. Getting Started =============== @@ -71,7 +82,7 @@ Getting Started API Reference ============= -An organized section of the GitPthon API is at :ref:`api_reference_toplevel`. +An organized section of the GitPython API is at :ref:`api_reference_toplevel`. .. _source-code-label: @@ -84,29 +95,30 @@ GitPython's git repo is available on GitHub, which can be browsed at: and cloned using:: - $ git clone git://github.com/gitpython-developers/GitPython.git git-python - + $ git clone https://github.com/gitpython-developers/GitPython git-python + Initialize all submodules to obtain the required dependencies with:: - + $ cd git-python $ git submodule update --init --recursive - -Finally verify the installation by running the `nose powered `_ unit tests:: - - $ nosetests - -Mailing List -============ -http://groups.google.com/group/git-python + +Finally verify the installation by running unit tests:: + + $ python -m unittest + +Questions and Answers +===================== +Please use stackoverflow for questions, and don't forget to tag it with `gitpython` to assure the right people see the question in a timely manner. + +http://stackoverflow.com/questions/tagged/gitpython Issue Tracker ============= -The issue tracker is hosted by github: +The issue tracker is hosted by GitHub: https://github.com/gitpython-developers/GitPython/issues - + License Information =================== GitPython is licensed under the New BSD License. See the LICENSE file for more information. - diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst new file mode 100644 index 000000000..c5930eb8a --- /dev/null +++ b/doc/source/quickstart.rst @@ -0,0 +1,244 @@ +.. _quickdoc_toplevel: + +.. highlight:: python + +.. _quickdoc-label: + +============================== +GitPython Quick Start Tutorial +============================== +Welcome to the GitPython Quickstart Guide! Designed for developers seeking a practical and interactive learning experience, this concise resource offers step-by-step code snippets to swiftly initialize/clone repositories, perform essential Git operations, and explore GitPython's capabilities. Get ready to dive in, experiment, and unleash the power of GitPython in your projects! + + +git.Repo +******** + +There are a few ways to create a :class:`git.Repo ` object + +Initialize a new git Repo +######################### + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [1-test_init_repo_object] + :end-before: # ![1-test_init_repo_object] + +Existing local git Repo +####################### + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [2-test_init_repo_object] + :end-before: # ![2-test_init_repo_object] + +Clone from URL +############## + +For the rest of this tutorial we will use a clone from https://github.com/gitpython-developers/QuickStartTutorialFiles.git + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [1-test_cloned_repo_object] + :end-before: # ![1-test_cloned_repo_object] + + +Trees & Blobs +************** + +Latest Commit Tree +################## + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [12-test_cloned_repo_object] + :end-before: # ![12-test_cloned_repo_object] + +Any Commit Tree +############### + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [13-test_cloned_repo_object] + :end-before: # ![13-test_cloned_repo_object] + +Display level 1 Contents +######################## + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [14-test_cloned_repo_object] + :end-before: # ![14-test_cloned_repo_object] + +Recurse through the Tree +######################## + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [15-test_cloned_repo_object] + :end-before: # ![15-test_cloned_repo_object] + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [16-test_cloned_repo_object] + :end-before: # ![16-test_cloned_repo_object] + + + + +Usage +**************** + +Add file to staging area +######################## + + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [2-test_cloned_repo_object] + :end-before: # ![2-test_cloned_repo_object] + + Now lets add the updated file to git + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [3-test_cloned_repo_object] + :end-before: # ![3-test_cloned_repo_object] + + Notice the add method requires a list as a parameter + + Warning: If you experience any trouble with this, try to invoke :class:`git ` instead via repo.git.add(path) + +Commit +###### + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [4-test_cloned_repo_object] + :end-before: # ![4-test_cloned_repo_object] + +List of commits associated with a file +####################################### + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [5-test_cloned_repo_object] + :end-before: # ![5-test_cloned_repo_object] + + Notice this returns a generator object + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [6-test_cloned_repo_object] + :end-before: # ![6-test_cloned_repo_object] + + returns list of :class:`Commit ` objects + +Printing text files +#################### +Lets print the latest version of `/dir1/file2.txt` + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [17-test_cloned_repo_object] + :end-before: # ![17-test_cloned_repo_object] + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [18-test_cloned_repo_object] + :end-before: # ![18-test_cloned_repo_object] + + Previous version of `/dir1/file2.txt` + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [18.1-test_cloned_repo_object] + :end-before: # ![18.1-test_cloned_repo_object] + +Status +###### + * Untracked files + + Lets create a new file + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [7-test_cloned_repo_object] + :end-before: # ![7-test_cloned_repo_object] + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [8-test_cloned_repo_object] + :end-before: # ![8-test_cloned_repo_object] + + * Modified files + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [9-test_cloned_repo_object] + :end-before: # ![9-test_cloned_repo_object] + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [10-test_cloned_repo_object] + :end-before: # ![10-test_cloned_repo_object] + + returns a list of :class:`Diff ` objects + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [11-test_cloned_repo_object] + :end-before: # ![11-test_cloned_repo_object] + +Diffs +###### + +Compare staging area to head commit + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [11.1-test_cloned_repo_object] + :end-before: # ![11.1-test_cloned_repo_object] + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [11.2-test_cloned_repo_object] + :end-before: # ![11.2-test_cloned_repo_object] + +Compare commit to commit + + .. literalinclude:: ../../test/test_quick_doc.py + :language: python + :dedent: 8 + :start-after: # [11.3-test_cloned_repo_object] + :end-before: # ![11.3-test_cloned_repo_object] + + +More Resources +**************** + +Remember, this is just the beginning! There's a lot more you can achieve with GitPython in your development workflow. +To explore further possibilities and discover advanced features, check out the full :ref:`GitPython tutorial ` +and the :ref:`API Reference `. Happy coding! diff --git a/doc/source/reference.rst b/doc/source/reference.rst index 7adc53287..13dd38d02 100644 --- a/doc/source/reference.rst +++ b/doc/source/reference.rst @@ -3,33 +3,47 @@ API Reference ============= +Top-Level +--------- + +.. py:data:: git.__version__ + + Current GitPython version. + +.. automodule:: git + :members: refresh + Objects.Base ------------ .. automodule:: git.objects.base :members: - :undoc-members: - + :undoc-members: + :special-members: + Objects.Blob ------------ .. automodule:: git.objects.blob :members: :undoc-members: - + :special-members: + Objects.Commit -------------- .. automodule:: git.objects.commit :members: :undoc-members: - + :special-members: + Objects.Tag ----------- .. automodule:: git.objects.tag :members: :undoc-members: + :special-members: Objects.Tree ------------ @@ -37,6 +51,7 @@ Objects.Tree .. automodule:: git.objects.tree :members: :undoc-members: + :special-members: Objects.Functions ----------------- @@ -44,6 +59,7 @@ Objects.Functions .. automodule:: git.objects.fun :members: :undoc-members: + :special-members: Objects.Submodule.base ---------------------- @@ -51,6 +67,7 @@ Objects.Submodule.base .. automodule:: git.objects.submodule.base :members: :undoc-members: + :special-members: Objects.Submodule.root ---------------------- @@ -58,20 +75,23 @@ Objects.Submodule.root .. automodule:: git.objects.submodule.root :members: :undoc-members: - + :special-members: + Objects.Submodule.util ---------------------- .. automodule:: git.objects.submodule.util :members: :undoc-members: - + :special-members: + Objects.Util ------------- .. automodule:: git.objects.util :members: :undoc-members: + :special-members: Index.Base ---------- @@ -79,6 +99,7 @@ Index.Base .. automodule:: git.index.base :members: :undoc-members: + :special-members: Index.Functions --------------- @@ -86,28 +107,31 @@ Index.Functions .. automodule:: git.index.fun :members: :undoc-members: - + :special-members: + Index.Types ----------- .. automodule:: git.index.typ :members: :undoc-members: - + :special-members: + Index.Util ------------- .. automodule:: git.index.util :members: :undoc-members: - + :special-members: + GitCmd ------ .. automodule:: git.cmd :members: :undoc-members: - + :special-members: Config ------ @@ -115,13 +139,15 @@ Config .. automodule:: git.config :members: :undoc-members: - + :special-members: + Diff ---- .. automodule:: git.diff :members: :undoc-members: + :special-members: Exceptions ---------- @@ -129,21 +155,24 @@ Exceptions .. automodule:: git.exc :members: :undoc-members: + :special-members: + - Refs.symbolic ------------- .. automodule:: git.refs.symbolic :members: :undoc-members: - + :special-members: + Refs.reference -------------- .. automodule:: git.refs.reference :members: :undoc-members: + :special-members: Refs.head --------- @@ -151,34 +180,39 @@ Refs.head .. automodule:: git.refs.head :members: :undoc-members: - + :special-members: + Refs.tag ------------ .. automodule:: git.refs.tag :members: :undoc-members: - + :special-members: + Refs.remote ------------ .. automodule:: git.refs.remote :members: :undoc-members: - + :special-members: + Refs.log ------------ .. automodule:: git.refs.log :members: :undoc-members: - + :special-members: + Remote ------ .. automodule:: git.remote :members: :undoc-members: + :special-members: Repo.Base --------- @@ -186,13 +220,39 @@ Repo.Base .. automodule:: git.repo.base :members: :undoc-members: - + :special-members: + Repo.Functions -------------- .. automodule:: git.repo.fun :members: :undoc-members: + :special-members: + +Compat +------ + +.. automodule:: git.compat + :members: + :undoc-members: + :special-members: + +DB +-- + +.. automodule:: git.db + :members: + :undoc-members: + :special-members: + +Types +----- + +.. automodule:: git.types + :members: + :undoc-members: + :special-members: Util ---- @@ -200,3 +260,4 @@ Util .. automodule:: git.util :members: :undoc-members: + :special-members: diff --git a/doc/source/roadmap.rst b/doc/source/roadmap.rst index f93d5e65b..34c953626 100644 --- a/doc/source/roadmap.rst +++ b/doc/source/roadmap.rst @@ -2,8 +2,7 @@ ####### Roadmap ####### -The full list of milestones including associated tasks can be found on github: +The full list of milestones including associated tasks can be found on GitHub: https://github.com/gitpython-developers/GitPython/issues Select the respective milestone to filter the list of issues accordingly. - diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index d9b35fda9..fd3b14c57 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -8,424 +8,517 @@ GitPython Tutorial ================== -GitPython provides object model access to your git repository. This tutorial is composed of multiple sections, each of which explains a real-life usecase. - -Initialize a Repo object -************************ - -The first step is to create a ``Repo`` object to represent your repository:: - - from git import * - repo = Repo("/Users/mtrier/Development/git-python") - assert repo.bare == False - -In the above example, the directory ``/Users/mtrier/Development/git-python`` is my working repository and contains the ``.git`` directory. You can also initialize GitPython with a *bare* repository:: - - repo = Repo.init("/var/git/git-python.git", bare=True) - assert repo.bare == True - -A repo object provides high-level access to your data, it allows you to create and delete heads, tags and remotes and access the configuration of the repository:: - - repo.config_reader() # get a config reader for read-only access - repo.config_writer() # get a config writer to change configuration - -Query the active branch, query untracked files or whether the repository data has been modified:: - - repo.is_dirty() - False - repo.untracked_files - ['my_untracked_file'] - -Clone from existing repositories or initialize new empty ones:: - - cloned_repo = repo.clone("to/this/path") - new_repo = Repo.init("path/for/new/repo") - -Archive the repository contents to a tar file:: - - repo.archive(open("repo.tar",'w')) - - -Object Databases -**************** -``Repo`` instances are powered by its object database instance which will be used when extracting any data, or when writing new objects. +GitPython provides object model access to your git repository. This tutorial is composed of multiple sections, most of which explain a real-life use case. -The type of the database determines certain performance characteristics, such as the quantity of objects that can be read per second, the resource usage when reading large data files, as well as the average memory footprint of your application. +All code presented here originated from `test_docs.py `_ to assure correctness. Knowing this should also allow you to more easily run the code for your own testing purposes. All you need is a developer installation of git-python. + +Meet the Repo type +****************** + +The first step is to create a :class:`git.Repo ` object to represent your repository. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [1-test_init_repo_object] + :end-before: # ![1-test_init_repo_object] + +In the above example, the directory ``self.rorepo.working_tree_dir`` equals ``/Users/mtrier/Development/git-python`` and is my working repository which contains the ``.git`` directory. You can also initialize GitPython with a *bare* repository. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [2-test_init_repo_object] + :end-before: # ![2-test_init_repo_object] + +A repo object provides high-level access to your data, it allows you to create and delete heads, tags and remotes and access the configuration of the repository. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [3-test_init_repo_object] + :end-before: # ![3-test_init_repo_object] + +Query the active branch, query untracked files or whether the repository data has been modified. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [4-test_init_repo_object] + :end-before: # ![4-test_init_repo_object] + +Clone from existing repositories or initialize new empty ones. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [5-test_init_repo_object] + :end-before: # ![5-test_init_repo_object] + +Archive the repository contents to a tar file. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [6-test_init_repo_object] + :end-before: # ![6-test_init_repo_object] + +Advanced Repo Usage +=================== + +And of course, there is much more you can do with this type, most of the following will be explained in greater detail in specific tutorials. Don't worry if you don't understand some of these examples right away, as they may require a thorough understanding of git's inner workings. + +Query relevant repository paths ... + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [7-test_init_repo_object] + :end-before: # ![7-test_init_repo_object] + +:class:`Heads ` Heads are branches in git-speak. :class:`References ` are pointers to a specific commit or to other references. Heads and :class:`Tags ` are a kind of references. GitPython allows you to query them rather intuitively. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [8-test_init_repo_object] + :end-before: # ![8-test_init_repo_object] + +You can also create new heads ... + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [9-test_init_repo_object] + :end-before: # ![9-test_init_repo_object] + +... and tags ... + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [10-test_init_repo_object] + :end-before: # ![10-test_init_repo_object] + +You can traverse down to :class:`git objects ` through references and other objects. Some objects like :class:`commits ` have additional meta-data to query. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [11-test_init_repo_object] + :end-before: # ![11-test_init_repo_object] + +:class:`Remotes ` allow to handle fetch, pull and push operations, while providing optional real-time progress information to :class:`progress delegates `. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [12-test_init_repo_object] + :end-before: # ![12-test_init_repo_object] + +The :class:`index ` is also called stage in git-speak. It is used to prepare new commits, and can be used to keep results of merge operations. Our index implementation allows to stream date into the index, which is useful for bare repositories that do not have a working tree. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [13-test_init_repo_object] + :end-before: # ![13-test_init_repo_object] + +:class:`Submodules ` represent all aspects of git submodules, which allows you query all of their related information, and manipulate in various ways. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [14-test_init_repo_object] + :end-before: # ![14-test_init_repo_object] -GitDB -===== -The GitDB is a pure-python implementation of the git object database. It is the default database to use in GitPython 0.3. Its uses less memory when handling huge files, but will be 2 to 5 times slower when extracting large quantities small of objects from densely packed repositories:: - - repo = Repo("path/to/repo", odbt=GitDB) -GitCmdObjectDB -============== -The git command database uses persistent git-cat-file instances to read repository information. These operate very fast under all conditions, but will consume additional memory for the process itself. When extracting large files, memory usage will be much higher than the one of the ``GitDB``:: - - repo = Repo("path/to/repo", odbt=GitCmdObjectDB) - Examining References ******************** -References are the tips of your commit graph from which you can easily examine the history of your project:: - - heads = repo.heads - master = heads.master # lists can be accessed by name for convenience - master.commit # the commit pointed to by head called master - master.rename("new_name") # rename heads - -Tags are (usually immutable) references to a commit and/or a tag object:: - - tags = repo.tags - tagref = tags[0] - tagref.tag # tags may have tag objects carrying additional information - tagref.commit # but they always point to commits - repo.delete_tag(tagref) # delete or - repo.create_tag("my_tag") # create tags using the repo for convenience - -A symbolic reference is a special case of a reference as it points to another reference instead of a commit:: - - head = repo.head # the head points to the active branch/ref - master = head.reference # retrieve the reference the head points to - master.commit # from here you use it as any other reference - -Access the reflog easily:: - - log = master.log() - log[0] # first (i.e. oldest) reflog entry - log[-1] # last (i.e. most recent) reflog entry - -For more information on the reflog, see the ``RefLog`` type's documentation. +:class:`References ` are the tips of your commit graph from which you can easily examine the history of your project. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [1-test_references_and_objects] + :end-before: # ![1-test_references_and_objects] + +:class:`Tags ` are (usually immutable) references to a commit and/or a tag object. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [2-test_references_and_objects] + :end-before: # ![2-test_references_and_objects] + +A :class:`symbolic reference ` is a special case of a reference as it points to another reference instead of a commit. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [3-test_references_and_objects] + :end-before: # ![3-test_references_and_objects] + +Access the :class:`reflog ` easily. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [4-test_references_and_objects] + :end-before: # ![4-test_references_and_objects] Modifying References ******************** -You can easily create and delete reference types or modify where they point to:: +You can easily create and delete :class:`reference types ` or modify where they point to. - repo.delete_head('master') # delete an existing head - master = repo.create_head('master') # create a new one - master.commit = 'HEAD~10' # set branch to another commit without changing index or working tree +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [5-test_references_and_objects] + :end-before: # ![5-test_references_and_objects] -Create or delete tags the same way except you may not change them afterwards:: +Create or delete :class:`tags ` the same way except you may not change them afterwards. - new_tag = repo.create_tag('my_tag', 'my message') - repo.delete_tag(new_tag) - -Change the symbolic reference to switch branches cheaply ( without adjusting the index or the working copy ):: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [6-test_references_and_objects] + :end-before: # ![6-test_references_and_objects] - new_branch = repo.create_head('new_branch') - repo.head.reference = new_branch +Change the :class:`symbolic reference ` to switch branches cheaply (without adjusting the index or the working tree). + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [7-test_references_and_objects] + :end-before: # ![7-test_references_and_objects] Understanding Objects ********************* -An Object is anything storable in git's object database. Objects contain information about their type, their uncompressed size as well as the actual data. Each object is uniquely identified by a binary SHA1 hash, being 20 bytes in size. - -Git only knows 4 distinct object types being Blobs, Trees, Commits and Tags. - -In Git-Python, all objects can be accessed through their common base, compared and hashed. They are usually not instantiated directly, but through references or specialized repository functions:: - - hc = repo.head.commit - hct = hc.tree - hc != hct - hc != repo.tags[0] - hc == repo.head.reference.commit - -Common fields are:: - - hct.type - 'tree' - hct.size - 166 - hct.hexsha - 'a95eeb2a7082212c197cabbf2539185ec74ed0e8' - hct.binsha - 'binary 20 byte sha1' - -Index Objects are objects that can be put into git's index. These objects are trees, blobs and submodules which additionally know about their path in the filesystem as well as their mode:: - - hct.path # root tree has no path - '' - hct.trees[0].path # the first subdirectory has one though - 'dir' - htc.mode # trees have the mode of a linux directory - 040000 - '%o' % htc.blobs[0].mode # blobs have a specific mode though comparable to a standard linux fs - 100644 - -Access blob data (or any object data) directly or using streams:: - - htc.blobs[0].data_stream.read() # stream object to read data from - htc.blobs[0].stream_data(open("blob_data", "w")) # write data to given stream - - -The Commit object -***************** +An Object is anything storable in git's object database. Objects contain information about their type, their uncompressed size as well as the actual data. Each object is uniquely identified by a binary SHA1 hash, being 20 bytes in size, or 40 bytes in hexadecimal notation. -Commit objects contain information about a specific commit. Obtain commits using references as done in `Examining References`_ or as follows. +Git only knows 4 distinct object types being :class:`Blobs `, :class:`Trees `, :class:`Commits ` and :class:`Tags `. -Obtain commits at the specified revision:: +In GitPython, all objects can be accessed through their common base, can be compared and hashed. They are usually not instantiated directly, but through references or specialized repository functions. - repo.commit('master') - repo.commit('v0.1') - repo.commit('HEAD~10') +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [8-test_references_and_objects] + :end-before: # ![8-test_references_and_objects] -Iterate 100 commits:: +Common fields are ... - repo.iter_commits('master', max_count=100) +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [9-test_references_and_objects] + :end-before: # ![9-test_references_and_objects] -If you need paging, you can specify a number of commits to skip:: +:class:`Index objects ` are objects that can be put into git's index. These objects are trees, blobs and submodules which additionally know about their path in the file system as well as their mode. - repo.iter_commits('master', max_count=10, skip=20) +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [10-test_references_and_objects] + :end-before: # ![10-test_references_and_objects] -The above will return commits 21-30 from the commit list.:: +Access :class:`blob ` data (or any object data) using streams. - headcommit = repo.head.commit +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [11-test_references_and_objects] + :end-before: # ![11-test_references_and_objects] - headcommit.hexsha - '207c0c4418115df0d30820ab1a9acd2ea4bf4431' - headcommit.parents - (,) +The Commit object +***************** - headcommit.tree - +:class:`Commit ` objects contain information about a specific commit. Obtain commits using references as done in `Examining References`_ or as follows. - headcommit.author - "> +Obtain commits at the specified revision - headcommit.authored_date # seconds since epoch - 1256291446 +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [12-test_references_and_objects] + :end-before: # ![12-test_references_and_objects] - headcommit.committer - "> +Iterate 50 commits, and if you need paging, you can specify a number of commits to skip. - headcommit.committed_date - 1256291446 +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [13-test_references_and_objects] + :end-before: # ![13-test_references_and_objects] - headcommit.message - 'cleaned up a lot of test information. Fixed escaping so it works with - subprocess.' +A commit object carries all sorts of meta-data -Note: date time is represented in a ``seconds since epoch`` format. Conversion to human readable form can be accomplished with the various `time module `_ methods:: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [14-test_references_and_objects] + :end-before: # ![14-test_references_and_objects] - import time - time.asctime(time.gmtime(headcommit.committed_date)) - 'Wed May 7 05:56:02 2008' +Note: date time is represented in a ``seconds since epoch`` format. Conversion to human readable form can be accomplished with the various `time module `_ methods. - time.strftime("%a, %d %b %Y %H:%M", time.gmtime(headcommit.committed_date)) - 'Wed, 7 May 2008 05:56' +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [15-test_references_and_objects] + :end-before: # ![15-test_references_and_objects] -You can traverse a commit's ancestry by chaining calls to ``parents``:: +You can traverse a commit's ancestry by chaining calls to ``parents`` - headcommit.parents[0].parents[0].parents[0] +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [16-test_references_and_objects] + :end-before: # ![16-test_references_and_objects] The above corresponds to ``master^^^`` or ``master~3`` in git parlance. The Tree object *************** -A tree records pointers to the contents of a directory. Let's say you want the root tree of the latest commit on the master branch:: - - tree = repo.heads.master.commit.tree - +A :class:`tree ` records pointers to the contents of a directory. Let's say you want the root tree of the latest commit on the master branch - tree.hexsha - 'a006b5b1a8115185a228b7514cdcd46fed90dc92' +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [17-test_references_and_objects] + :end-before: # ![17-test_references_and_objects] -Once you have a tree, you can get the contents:: +Once you have a tree, you can get its contents - tree.trees # trees are subdirectories - [] - - tree.blobs # blobs are files - [, - , - , - ] +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [18-test_references_and_objects] + :end-before: # ![18-test_references_and_objects] -Its useful to know that a tree behaves like a list with the ability to query entries by name:: +It is useful to know that a tree behaves like a list with the ability to query entries by name - tree[0] == tree['dir'] # access by index and by sub-path - - for entry in tree: do_something_with(entry) +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [19-test_references_and_objects] + :end-before: # ![19-test_references_and_objects] - blob = tree[0][0] - blob.name - 'file' - blob.path - 'dir/file' - blob.abspath - '/Users/mtrier/Development/git-python/dir/file' - >>>tree['dir/file'].binsha == blob.binsha +There is a convenience method that allows you to get a named sub-object from a tree with a syntax similar to how paths are written in a posix system -There is a convenience method that allows you to get a named sub-object from a tree with a syntax similar to how paths are written in an unix system:: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [20-test_references_and_objects] + :end-before: # ![20-test_references_and_objects] - tree/"lib" - - tree/"dir/file" == blob +You can also get a commit's root tree directly from the repository -You can also get a tree directly from the repository if you know its name:: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [21-test_references_and_objects] + :end-before: # ![21-test_references_and_objects] - repo.tree() - +As trees allow direct access to their intermediate child entries only, use the traverse method to obtain an iterator to retrieve entries recursively - repo.tree("c1c7214dde86f76bc3e18806ac1f47c38b2b7a30") - - repo.tree('0.1.6') - - -As trees only allow direct access to their direct entries, use the traverse method to obtain an iterator to traverse entries recursively:: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [22-test_references_and_objects] + :end-before: # ![22-test_references_and_objects] - tree.traverse() - - for entry in tree.traverse(): do_something_with(entry) - - -.. note:: If tree's return Submodule objects, they will assume that they exist at the current head's commit. The tree it originated from may be rooted at another commit though, which has to be told to the Submodule object using its ``set_parent_commit(my_commit)`` method. +.. note:: If trees return Submodule objects, they will assume that they exist at the current head's commit. The tree it originated from may be rooted at another commit though, that it doesn't know. That is why the caller would have to set the submodule's owning or parent commit using the ``set_parent_commit(my_commit)`` method. - The Index Object **************** -The git index is the stage containing changes to be written with the next commit or where merges finally have to take place. You may freely access and manipulate this information using the IndexFile Object:: - - index = repo.index - -Access objects and add/remove entries. Commit the changes:: - - for stage, blob in index.iter_blobs(): do_something(...) - # Access blob objects - for (path, stage), entry in index.entries.iteritems: pass - # Access the entries directly - index.add(['my_new_file']) # add a new file to the index - index.remove(['dir/existing_file']) - new_commit = index.commit("my commit message") - -Create new indices from other trees or as result of a merge. Write that result to a new index file:: - - tmp_index = Index.from_tree(repo, 'HEAD~1') # load a tree into a temporary index - merge_index = Index.from_tree(repo, 'base', 'HEAD', 'some_branch') # merge two trees three-way - merge_index.write("merged_index") - +The git index is the stage containing changes to be written with the next commit or where merges finally have to take place. You may freely access and manipulate this information using the :class:`IndexFile ` object. +Modify the index with ease + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [23-test_references_and_objects] + :end-before: # ![23-test_references_and_objects] + +Create new indices from other trees or as result of a merge. Write that result to a new index file for later inspection. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [24-test_references_and_objects] + :end-before: # ![24-test_references_and_objects] + Handling Remotes **************** -Remotes are used as alias for a foreign repository to ease pushing to and fetching from them:: - - test_remote = repo.create_remote('test', 'git@server:repo.git') - repo.delete_remote(test_remote) # create and delete remotes - origin = repo.remotes.origin # get default remote by name - origin.refs # local remote references - o = origin.rename('new_origin') # rename remotes - o.fetch() # fetch, pull and push from and to the remote - o.pull() - o.push() - -You can easily access configuration information for a remote by accessing options as if they where attributes:: - - o.url - 'git@server:dummy_repo.git' - -Change configuration for a specific remote only:: - - o.config_writer.set("pushurl", "other_url") - - +:class:`Remotes ` are used as alias for a foreign repository to ease pushing to and fetching from them + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [25-test_references_and_objects] + :end-before: # ![25-test_references_and_objects] + +You can easily access configuration information for a remote by accessing options as if they were attributes. The modification of remote configuration is more explicit though. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [26-test_references_and_objects] + :end-before: # ![26-test_references_and_objects] + +You can also specify per-call custom environments using a new context manager on the Git command, e.g. for using a specific SSH key. The following example works with `git` starting at *v2.3*:: + + ssh_cmd = 'ssh -i id_deployment_key' + with repo.git.custom_environment(GIT_SSH_COMMAND=ssh_cmd): + repo.remotes.origin.fetch() + +This one sets a custom script to be executed in place of `ssh`, and can be used in `git` prior to *v2.3*:: + + ssh_executable = os.path.join(rw_dir, 'my_ssh_executable.sh') + with repo.git.custom_environment(GIT_SSH=ssh_executable): + repo.remotes.origin.fetch() + +Here's an example executable that can be used in place of the `ssh_executable` above: + +.. code-block:: shell + + #!/bin/sh + ID_RSA=/var/lib/openshift/5562b947ecdd5ce939000038/app-deployments/id_rsa + exec /usr/bin/ssh -o StrictHostKeyChecking=no -i $ID_RSA "$@" + +Please note that the script must be executable (i.e. `chmod +x script.sh`). `StrictHostKeyChecking=no` is used to avoid prompts asking to save the hosts key to `~/.ssh/known_hosts`, which happens in case you run this as daemon. + +You might also have a look at `Git.update_environment(...)` in case you want to setup a changed environment more permanently. + Submodule Handling ****************** -Submodules can be conveniently handled using the methods provided by Git-Python, and as an added benefit, Git-Python provides functionality which behave smarter and less error prone than its original c-git implementation, that is Git-Python tries hard to keep your repository consistent when updating submodules recursively or adjusting the existing configuration. - -In the following brief example, you will learn about the very basics, assuming you operate on the Git-Python repository itself:: - - >>> repo = Repo('path/to/git-python/repository') - >>> sms = repo.submodules - [git.Submodule(name=gitdb, path=lib/git/ext/gitdb, url=git://github.com/gitpython-developers/GitPython.git, branch=master)] - >>> sm = sms[0] - >>> sm.name - 'gitdb' - >>> sm.module() # The module is the actual repository referenced by the submodule - /git-python/lib/git/ext/gitdb/.git"> - >>> sm.module_exists() - True - >>> sm.abspath == sm.module().working_tree_dir # the submodule's absolute path is the module's path - True - >>> sm.hexsha # Its sha defines the commit to checkout - '2ddc5bad224d8f545ef3bb2ab3df98dfe063c5b6' - >>> sm.exists() # yes, this submodule is valid and exists - True - >>> sm.config_reader().get_value('path') == sm.path # read its configuration conveniently - True - >>> sm.children() # query the submodule hierarchy - [git.Submodule(name=async, path=ext/async, url=git://github.com/gitpython-developers/async.git, branch=master)] - -In addition to the query functionality, you can move the submodule's repository to a different path <``move(...)``>, write its configuration <``config_writer().set_value(...)``>, update its working tree <``update(...)``>, and remove and add them <``remove(...)``, ``add(...)``>. - -If you obtained your submodule object by traversing a tree object which is not rooted at the head's commit, you have to inform the submodule about its actual commit to retrieve the data from by using the ``set_parent_commit(...)`` method. - -The special ``RootModule`` type allows you to treat your master repository as root of a hierarchy of submodules, which allows very convenient submodule handling. Its ``update(...)`` method is reimplemented to provide an advanced way of updating submodules as they change their values. The update method will track changes and make sure your working tree and submodule checkouts stay consistent, which is very useful in case submodules get deleted or added to name just two of the handled cases. - -Additionally, Git-Python adds functionality to track a specific branch, instead of just a commit. Supported by customized update methods, you are able to automatically update submodules to the latest revision available in the remote repository, as well as to keep track of changes and movements of these submodules. To use it, set the name of the branch you want to track to the ``submodule.$name.branch`` option of the *.gitmodules* file, and use Git-Python update methods on the resulting repository with the ``to_latest_revision`` parameter turned on. In the latter case, the sha of your submodule will be ignored, instead a local tracking branch will be updated to the respective remote branch automatically. The resulting behaviour is much like the one of svn::externals, which can be useful in times. +:class:`Submodules ` can be conveniently handled using the methods provided by GitPython, and as an added benefit, GitPython provides functionality which behave smarter and less error prone than its original c-git implementation, that is GitPython tries hard to keep your repository consistent when updating submodules recursively or adjusting the existing configuration. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [1-test_submodules] + :end-before: # ![1-test_submodules] + +In addition to the query functionality, you can move the submodule's repository to a different path <``move(...)``>, +write its configuration <``config_writer().set_value(...).release()``>, update its working tree <``update(...)``>, +and remove or add them <``remove(...)``, ``add(...)``>. + +If you obtained your submodule object by traversing a tree object which is not rooted at the head's commit, +you have to inform the submodule about its actual commit to retrieve the data from +by using the ``set_parent_commit(...)`` method. + +The special :class:`RootModule ` type allows you to treat your superproject (master repository) as root of a hierarchy of submodules, which allows very convenient submodule handling. Its ``update(...)`` method is reimplemented to provide an advanced way of updating submodules as they change their values over time. The update method will track changes and make sure your working tree and submodule checkouts stay consistent, which is very useful in case submodules get deleted or added to name just two of the handled cases. + +Additionally, GitPython adds functionality to track a specific branch, instead of just a commit. Supported by customized update methods, you are able to automatically update submodules to the latest revision available in the remote repository, as well as to keep track of changes and movements of these submodules. To use it, set the name of the branch you want to track to the ``submodule.$name.branch`` option of the *.gitmodules* file, and use GitPython update methods on the resulting repository with the ``to_latest_revision`` parameter turned on. In the latter case, the sha of your submodule will be ignored, instead a local tracking branch will be updated to the respective remote branch automatically, provided there are no local changes. The resulting behaviour is much like the one of svn::externals, which can be useful in times. Obtaining Diff Information ************************** -Diffs can generally be obtained by subclasses of ``Diffable`` as they provide the ``diff`` method. This operation yields a DiffIndex allowing you to easily access diff information about paths. +Diffs can generally be obtained by subclasses of :class:`Diffable ` as they provide the ``diff`` method. This operation yields a :class:`DiffIndex ` allowing you to easily access diff information about paths. -Diffs can be made between the Index and Trees, Index and the working tree, trees and trees as well as trees and the working copy. If commits are involved, their tree will be used implicitly:: +Diffs can be made between the Index and Trees, Index and the working tree, trees and trees as well as trees and the working copy. If commits are involved, their tree will be used implicitly. - hcommit = repo.head.commit - idiff = hcommit.diff() # diff tree against index - tdiff = hcommit.diff('HEAD~1') # diff tree against previous tree - wdiff = hcommit.diff(None) # diff tree against working tree - - index = repo.index - index.diff() # diff index against itself yielding empty diff - index.diff(None) # diff index against working copy - index.diff('HEAD') # diff index against current HEAD tree +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [27-test_references_and_objects] + :end-before: # ![27-test_references_and_objects] -The item returned is a DiffIndex which is essentially a list of Diff objects. It provides additional filtering to ease finding what you might be looking for:: +The item returned is a DiffIndex which is essentially a list of Diff objects. It provides additional filtering to ease finding what you might be looking for. - for diff_added in wdiff.iter_change_type('A'): do_something_with(diff_added) +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [28-test_references_and_objects] + :end-before: # ![28-test_references_and_objects] -Use the diff framework if you want to implement git-status like functionality. +Use the diff framework if you want to implement git-status like functionality. * A diff between the index and the commit's tree your HEAD points to - - * use repo.index.diff(repo.head) - + + * use ``repo.index.diff(repo.head.commit)`` + * A diff between the index and the working tree - - * use repo.index.diff(None) - + + * use ``repo.index.diff(None)`` + * A list of untracked files - - * use repo.untracked_files - + * use ``repo.untracked_files`` + Switching Branches ****************** -To switch between branches, you effectively need to point your HEAD to the new branch head and reset your index and working copy to match. A simple manual way to do it is the following one:: +To switch between branches similar to ``git checkout``, you effectively need to point your HEAD symbolic reference to the new branch and reset your index and working copy to match. A simple manual way to do it is the following one - repo.head.reference = repo.heads.other_branch - repo.head.reset(index=True, working_tree=True) - -The previous approach would brutally overwrite the user's changes in the working copy and index though and is less sophisticated than a git-checkout for instance which generally prevents you from destroying your work. Use the safer approach as follows:: +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [29-test_references_and_objects] + :end-before: # ![29-test_references_and_objects] - repo.heads.master.checkout() # checkout the branch using git-checkout - repo.heads.other_branch.checkout() +The previous approach would brutally overwrite the user's changes in the working copy and index though and is less sophisticated than a ``git-checkout``. The latter will generally prevent you from destroying your work. Use the safer approach as follows. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [30-test_references_and_objects] + :end-before: # ![30-test_references_and_objects] + +Initializing a repository +************************* + +In this example, we will initialize an empty repository, add an empty file to the index, and commit the change. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: def test_add_file_and_commit + :end-before: # ![test_add_file_and_commit] + +Please have a look at the individual methods as they usually support a vast amount of arguments to customize their behavior. Using git directly ****************** -In case you are missing functionality as it has not been wrapped, you may conveniently use the git command directly. It is owned by each repository instance:: +In case you are missing functionality as it has not been wrapped, you may conveniently use the :class:`git ` command directly. It is owned by each repository instance. + +.. literalinclude:: ../../test/test_docs.py + :language: python + :dedent: 8 + :start-after: # [31-test_references_and_objects] + :end-before: # ![31-test_references_and_objects] - git = repo.git - git.checkout('head', b="my_new_branch") # default command - git.for_each_ref() # '-' becomes '_' when calling it - The return value will by default be a string of the standard output channel produced by the command. -Keyword arguments translate to short and long keyword arguments on the commandline. +Keyword arguments translate to short and long keyword arguments on the command-line. The special notion ``git.command(flag=True)`` will create a flag without value like ``command --flag``. -If ``None`` is found in the arguments, it will be dropped silently. Lists and tuples passed as arguments will be unpacked recursively to individual arguments. Objects are converted to strings using the str(...) function. +If ``None`` is found in the arguments, it will be dropped silently. Lists and tuples passed as arguments will be unpacked recursively to individual arguments. Objects are converted to strings using the ``str(...)`` function. + + +Object Databases +**************** +:class:`git.Repo ` instances are powered by its object database instance which will be used when extracting any data, or when writing new objects. + +The type of the database determines certain performance characteristics, such as the quantity of objects that can be read per second, the resource usage when reading large data files, as well as the average memory footprint of your application. + +GitDB +===== +The GitDB is a pure-python implementation of the git object database. It is the default database to use in GitPython 0.3. It uses less memory when handling huge files, but will be 2 to 5 times slower when extracting large quantities of small objects from densely packed repositories:: + + repo = Repo("path/to/repo", odbt=GitDB) + + +GitCmdObjectDB +============== +The git command database uses persistent git-cat-file instances to read repository information. These operate very fast under all conditions, but will consume additional memory for the process itself. When extracting large files, memory usage will be much higher than ``GitDB``:: + + repo = Repo("path/to/repo", odbt=GitCmdObjectDB) Git Command Debugging and Customization *************************************** @@ -434,9 +527,14 @@ Using environment variables, you can further adjust the behaviour of the git com * **GIT_PYTHON_TRACE** - * If set to non-0, all executed git commands will be printed to stdout. - * if set to *full*, the executed git command will be printed along with its output. - + * If set to non-0, all executed git commands will be shown as they happen + * If set to *full*, the executed git command _and_ its entire output on stdout and stderr will be shown as they happen + + **NOTE**: All logging is outputted using a Python logger, so make sure your program is configured to show INFO-level messages. If this is not the case, try adding the following to your program:: + + import logging + logging.basicConfig(level=logging.INFO) + * **GIT_PYTHON_GIT_EXECUTABLE** * If set, it should contain the full path to the git executable, e.g. *c:\\Program Files (x86)\\Git\\bin\\git.exe* on windows or */usr/bin/git* on linux. @@ -444,7 +542,6 @@ Using environment variables, you can further adjust the behaviour of the git com And even more ... ***************** -There is more functionality in there, like the ability to archive repositories, get stats and logs, blame, and probably a few other things that were not mentioned here. +There is more functionality in there, like the ability to archive repositories, get stats and logs, blame, and probably a few other things that were not mentioned here. Check the unit tests for an in-depth introduction on how each function is supposed to be used. - diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst deleted file mode 100644 index f71d88e60..000000000 --- a/doc/source/whatsnew.rst +++ /dev/null @@ -1,59 +0,0 @@ - -################ -Whats New in 0.3 -################ -GitPython 0.3 is the first step in creating a hybrid which uses a pure python implementations for all simple git features which can be implemented without significant performance penalties. Everything else is still performed using the git command, which is nicely integrated and easy to use. - -Its biggest strength, being the support for all git features through the git command itself, is a weakness as well considering the possibly vast amount of times the git command is being started up. Depending on the actual command being performed, the git repository will be initialized on many of these invocations, causing additional overhead for possibly tiny operations. - -Keeping as many major operations in the python world will result in improved caching benefits as certain data structures just have to be initialized once and can be reused multiple times. This mode of operation may improve performance when altering the git database on a low level, and is clearly beneficial on operating systems where command invocations are very slow. - -**************** -Object Databases -**************** -An object database provides a simple interface to query object information or to write new object data. Objects are generally identified by their 20 byte binary sha1 value during query. - -GitPython uses the ``gitdb`` project to provide a pure-python implementation of the git database, which includes reading and writing loose objects, reading pack files and handling alternate repositories. - -The great thing about this is that ``Repo`` objects can use any object database, hence it easily supports different implementations with different performance characteristics. If you are thinking in extremes, you can implement your own database representation, which may be more efficient for what you want to do specifically, like handling big files more efficiently. - -************************ -Reduced Memory Footprint -************************ -Objects, such as commits, tags, trees and blobs now use 20 byte sha1 signatures internally, reducing their memory demands by 20 bytes per object, allowing you to keep more objects in memory at the same time. - -The internal caches of tree objects were improved to use less memory as well. - -################## -Upgrading from 0.2 -################## -GitPython 0.2 essentially behaves like GitPython 0.3 with a Repository using the ``GitCmdObjectDB`` instead of the ``GitDB`` as object database backend. Additionally it can be used more conveniently through implicit conversions and provides a feature set strikingly similar to 0.3. - -************************** -Why you should not upgrade -************************** -GitPython 0.3 in most cases will not run faster than GitPython 0.2, the opposite might be the case at it uses the pure python implementation by default. -There have been a few renames which will need additional adjustments in your code. - -Generally, if you only read git repositories, version 0.2 is sufficient and very well performing. - -********************** -Why you should upgrade -********************** -GitPython 0.2 has reached its end of line, and it is unlikely to receive more than contributed patches. 0.3 is the main development branch which will lead into the future. - -GitPython 0.3 provides memory usage optimization and is very flexible in the way it uses to access the object database. With minimal effort, 0.3 will be running as fast as 0.2. It marks the first step of more versions to come, and will improve over time. - -GitPython 0.3 is especially suitable for everyone who needs not only read, but also write access to a git repository. It is optimized to keep the memory consumption as low as possible, especially when handling large data sets. GitPython 0.3 operates on streams, not on possibly huge chunks of data. - - -************** -Guided Upgrade -************** -This guide should help to make the upgrade as painless as possible, hence it points out where to start, and what to look out for. - -* Have a look at https://github.com/gitpython-developers/GitPython/blob/0.3/doc/source/changes.rst -* Start applying the renames, generally the ``utils`` modules are now called ``util``, ``errors`` is called ``exc``. -* Search for occurrences of the ``sha`` property of object instances. A similar value can be obtained through the new ``hexsha`` property. The native sha1 value is the ``binsha`` though. -* Search for code which instantiates objects directly. Their initializer now requires a 20 byte binary Sha1, rev-specs cannot be used anymore. For a similar effect, either convert your hexadecimal shas to binary shas beforehand ( ``binascii.unhexlify`` for instance ), or use higher level functions such as ``Object.new``, ``Repo.commit`` or ``Repo.tree``. The latter ones takes rev-specs and hexadecimal sha1 hashes. - diff --git a/etc/sublime-text/git-python.sublime-project b/etc/sublime-text/git-python.sublime-project deleted file mode 100644 index d3b692892..000000000 --- a/etc/sublime-text/git-python.sublime-project +++ /dev/null @@ -1,71 +0,0 @@ -{ - "folders": - [ - // GIT-PYTHON - ///////////// - { - "follow_symlinks": true, - "path": "../..", - "file_exclude_patterns" : [ - "*.sublime-workspace", - ".git", - ".noseids", - ".coverage" - ], - "folder_exclude_patterns" : [ - ".git", - "cover", - "git/ext" - ] - }, - // GITDB - //////// - { - "follow_symlinks": true, - "path": "../../git/ext/gitdb", - "file_exclude_patterns" : [ - "*.sublime-workspace", - ".git", - ".noseids", - ".coverage" - ], - "folder_exclude_patterns" : [ - ".git", - "cover", - "gitdb/ext" - ] - }, - // // SMMAP - // //////// - // { - // "follow_symlinks": true, - // "path": "../../git/ext/gitdb/gitdb/ext/smmap", - // "file_exclude_patterns" : [ - // "*.sublime-workspace", - // ".git", - // ".noseids", - // ".coverage" - // ], - // "folder_exclude_patterns" : [ - // ".git", - // "cover", - // ] - // }, - // // ASYNC - // //////// - // { - // "follow_symlinks": true, - // "path": "../../git/ext/gitdb/gitdb/ext/async", - // "file_exclude_patterns" : [ - // "*.sublime-workspace", - // ".git", - // ".noseids", - // ".coverage" - // ], - // "folder_exclude_patterns" : [ - // ".git", - // "cover", - // ] - // }, - ] -} diff --git a/fuzzing/LICENSE-APACHE b/fuzzing/LICENSE-APACHE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/fuzzing/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/fuzzing/LICENSE-BSD b/fuzzing/LICENSE-BSD new file mode 120000 index 000000000..ea5b60640 --- /dev/null +++ b/fuzzing/LICENSE-BSD @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/fuzzing/README.md b/fuzzing/README.md new file mode 100644 index 000000000..286f529eb --- /dev/null +++ b/fuzzing/README.md @@ -0,0 +1,226 @@ +# Fuzzing GitPython + +[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/gitpython.svg)][oss-fuzz-issue-tracker] + +This directory contains files related to GitPython's suite of fuzz tests that are executed daily on automated +infrastructure provided by [OSS-Fuzz][oss-fuzz-repo]. This document aims to provide necessary information for working +with fuzzing in GitPython. + +The latest details regarding OSS-Fuzz test status, including build logs and coverage reports, is available +on [the Open Source Fuzzing Introspection website](https://introspector.oss-fuzz.com/project-profile?project=gitpython). + +## How to Contribute + +There are many ways to contribute to GitPython's fuzzing efforts! Contributions are welcomed through issues, +discussions, or pull requests on this repository. + +Areas that are particularly appreciated include: + +- **Tackling the existing backlog of open issues**. While fuzzing is an effective way to identify bugs, that information + isn't useful unless they are fixed. If you are not sure where to start, the issues tab is a great place to get ideas! +- **Improvements to this (or other) documentation** make it easier for new contributors to get involved, so even small + improvements can have a large impact over time. If you see something that could be made easier by a documentation + update of any size, please consider suggesting it! + +For everything else, such as expanding test coverage, optimizing test performance, or enhancing error detection +capabilities, jump into the "Getting Started" section below. + +## Getting Started with Fuzzing GitPython + +> [!TIP] +> **New to fuzzing or unfamiliar with OSS-Fuzz?** +> +> These resources are an excellent place to start: +> +> - [OSS-Fuzz documentation][oss-fuzz-docs] - Continuous fuzzing service for open source software. +> - [Google/fuzzing][google-fuzzing-repo] - Tutorials, examples, discussions, research proposals, and other resources + related to fuzzing. +> - [CNCF Fuzzing Handbook](https://github.com/cncf/tag-security/blob/main/security-fuzzing-handbook/handbook-fuzzing.pdf) - + A comprehensive guide for fuzzing open source software. +> - [Efficient Fuzzing Guide by The Chromium Project](https://chromium.googlesource.com/chromium/src/+/main/testing/libfuzzer/efficient_fuzzing.md) - + Explores strategies to enhance the effectiveness of your fuzz tests, recommended for those looking to optimize their + testing efforts. + +### Setting Up Your Local Environment + +Before contributing to fuzzing efforts, ensure Python and Docker are installed on your machine. Docker is required for +running fuzzers in containers provided by OSS-Fuzz and for safely executing test files directly. [Install Docker](https://docs.docker.com/get-docker/) following the official guide if you do not already have it. + +### Understanding Existing Fuzz Targets + +Review the `fuzz-targets/` directory to familiarize yourself with how existing tests are implemented. See +the [Files & Directories Overview](#files--directories-overview) for more details on the directory structure. + +### Contributing to Fuzz Tests + +Start by reviewing the [Atheris documentation][atheris-repo] and the section +on [Running Fuzzers Locally](#running-fuzzers-locally) to begin writing or improving fuzz tests. + +## Files & Directories Overview + +The `fuzzing/` directory is organized into three key areas: + +### Fuzz Targets (`fuzz-targets/`) + +Contains Python files for each fuzz test. + +**Things to Know**: + +- Each fuzz test targets a specific part of GitPython's functionality. +- Test files adhere to the naming convention: `fuzz_.py`, where `` indicates the + functionality targeted by the test. +- Any functionality that involves performing operations on input data is a possible candidate for fuzz testing, but + features that involve processing untrusted user input or parsing operations are typically going to be the most + interesting. +- The goal of these tests is to identify previously unknown or unexpected error cases caused by a given input. For that + reason, fuzz tests should gracefully handle anticipated exception cases with a `try`/`except` block to avoid false + positives that halt the fuzzing engine. + +### OSS-Fuzz Scripts (`oss-fuzz-scripts/`) + +Includes scripts for building and integrating fuzz targets with OSS-Fuzz: + +- **`container-environment-bootstrap.sh`** - Sets up the execution environment. It is responsible for fetching default + dictionary entries and ensuring all required build dependencies are installed and up-to-date. +- **`build.sh`** - Executed within the Docker container, this script builds fuzz targets with necessary instrumentation + and prepares seed corpora and dictionaries for use. + +**Where to learn more:** + +- [OSS-Fuzz documentation on the build.sh](https://google.github.io/oss-fuzz/getting-started/new-project-guide/#buildsh) +- [See GitPython's build.sh and Dockerfile in the OSS-Fuzz repository](https://github.com/google/oss-fuzz/tree/master/projects/gitpython) + +### Local Development Helpers (`local-dev-helpers/`) + +Contains tools to make local development tasks easier. +See [the "Running Fuzzers Locally" section below](#running-fuzzers-locally) for further documentation and use cases related to files found here. + +## Running Fuzzers Locally + +> [!WARNING] +> **Some fuzz targets in this repository write to the filesystem** during execution. +> For that reason, it is strongly recommended to **always use Docker when executing fuzz targets**, even when it may be +> possible to do so without it. +> +> Although [I/O operations such as writing to disk are not considered best practice](https://github.com/google/fuzzing/blob/master/docs/good-fuzz-target.md#io), the current implementation of at least one test requires it. +> See [the "Setting Up Your Local Environment" section above](#setting-up-your-local-environment) if you do not already have Docker installed on your machine. +> +> PRs that replace disk I/O with in-memory alternatives are very much welcomed! + +### Direct Execution of Fuzz Targets + +Directly executing fuzz targets allows for quick iteration and testing of changes which can be helpful during early +development of new fuzz targets or for validating changes made to an existing test. +The [Dockerfile](./local-dev-helpers/Dockerfile) located in the `local-dev-helpers/` subdirectory provides a lightweight +container environment preconfigured with [Atheris][atheris-repo] that makes it easy to execute a fuzz target directly. + +**From the root directory of your GitPython repository clone**: + +1. Build the local development helper image: + +```shell +docker build -f fuzzing/local-dev-helpers/Dockerfile -t gitpython-fuzzdev . +``` + +2. Then execute a fuzz target inside the image, for example: + +```shell + docker run -it -v "$PWD":/src gitpython-fuzzdev python fuzzing/fuzz-targets/fuzz_config.py -atheris_runs=10000 +``` + +The above command executes [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) and exits after `10000` runs, or earlier if +the fuzzer finds an error. + +Docker CLI's `-v` flag specifies a volume mount in Docker that maps the directory in which the command is run (which +should be the root directory of your local GitPython clone) to a directory inside the container, so any modifications +made between invocations will be reflected immediately without the need to rebuild the image each time. + +### Running OSS-Fuzz Locally + +This approach uses Docker images provided by OSS-Fuzz for building and running fuzz tests locally. It offers +comprehensive features but requires a local clone of the OSS-Fuzz repository and sufficient disk space for Docker +containers. + +#### Build the Execution Environment + +Clone the OSS-Fuzz repository and prepare the Docker environment: + +```shell +git clone --depth 1 https://github.com/google/oss-fuzz.git oss-fuzz +cd oss-fuzz +python infra/helper.py build_image gitpython +python infra/helper.py build_fuzzers --sanitizer address gitpython +``` + +> [!TIP] +> The `build_fuzzers` command above accepts a local file path pointing to your GitPython repository clone as the last +> argument. +> This makes it easy to build fuzz targets you are developing locally in this repository without changing anything in +> the OSS-Fuzz repo! +> For example, if you have cloned this repository (or a fork of it) into: `~/code/GitPython` +> Then running this command would build new or modified fuzz targets using the `~/code/GitPython/fuzzing/fuzz-targets` +> directory: +> ```shell +> python infra/helper.py build_fuzzers --sanitizer address gitpython ~/code/GitPython +> ``` + +Verify the build of your fuzzers with the optional `check_build` command: + +```shell +python infra/helper.py check_build gitpython +``` + +#### Run a Fuzz Target + +Setting an environment variable for the fuzz target argument of the execution command makes it easier to quickly select +a different target between runs: + +```shell +# specify the fuzz target without the .py extension: +export FUZZ_TARGET=fuzz_config +``` + +Execute the desired fuzz target: + +```shell +python infra/helper.py run_fuzzer gitpython $FUZZ_TARGET -- -max_total_time=60 -print_final_stats=1 +``` + +> [!TIP] +> In the example above, the "`-- -max_total_time=60 -print_final_stats=1`" portion of the command is optional but quite +> useful. +> +> Every argument provided after "`--`" in the above command is passed to the fuzzing engine directly. In this case: +> - `-max_total_time=60` tells the LibFuzzer to stop execution after 60 seconds have elapsed. +> - `-print_final_stats=1` tells the LibFuzzer to print a summary of useful metrics about the target run upon + completion. +> +> But almost any [LibFuzzer option listed in the documentation](https://llvm.org/docs/LibFuzzer.html#options) should +> work as well. + +#### Next Steps + +For detailed instructions on advanced features like reproducing OSS-Fuzz issues or using the Fuzz Introspector, refer +to [the official OSS-Fuzz documentation][oss-fuzz-docs]. + +## LICENSE + +All files located within the `fuzzing/` directory are subject to [the same license](../LICENSE) +as [the other files in this repository](../README.md#license) with one exception: + +[`fuzz_config.py`](./fuzz-targets/fuzz_config.py) was migrated to this repository from the OSS-Fuzz project's repository +where it was originally created. As such, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) retains its original license +and copyright notice (Apache License, Version 2.0 and Copyright 2023 Google LLC respectively) as in a header +comment, followed by a notice stating that it has have been modified contributors to GitPython. +[LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by the OSS-Fuzz project repository at the time the +file was migrated. + +[oss-fuzz-repo]: https://github.com/google/oss-fuzz + +[oss-fuzz-docs]: https://google.github.io/oss-fuzz + +[oss-fuzz-issue-tracker]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:gitpython + +[google-fuzzing-repo]: https://github.com/google/fuzzing + +[atheris-repo]: https://github.com/google/atheris diff --git a/fuzzing/fuzz-targets/fuzz_blob.py b/fuzzing/fuzz-targets/fuzz_blob.py new file mode 100644 index 000000000..ce888e85f --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_blob.py @@ -0,0 +1,40 @@ +import atheris +import sys +import os +import tempfile + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = git.Repo.init(path=temp_dir) + binsha = fdp.ConsumeBytes(20) + mode = fdp.ConsumeInt(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())) + path = fdp.ConsumeUnicodeNoSurrogates(fdp.remaining_bytes()) + + try: + blob = git.Blob(repo, binsha, mode, path) + except AssertionError as e: + if "Require 20 byte binary sha, got" in str(e): + return -1 + else: + raise e + + _ = blob.mime_type + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_config.py b/fuzzing/fuzz-targets/fuzz_config.py new file mode 100644 index 000000000..4eddc32ff --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_config.py @@ -0,0 +1,57 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +############################################################################### +# Note: This file has been modified by contributors to GitPython. +# The original state of this file may be referenced here: +# https://github.com/google/oss-fuzz/commit/f26f254558fc48f3c9bc130b10507386b94522da +############################################################################### +import atheris +import sys +import io +import os +from configparser import MissingSectionHeaderError, ParsingError + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + sio = io.BytesIO(data) + sio.name = "/tmp/fuzzconfig.config" + git_config = git.GitConfigParser(sio) + try: + git_config.read() + except (MissingSectionHeaderError, ParsingError, UnicodeDecodeError): + return -1 # Reject inputs raising expected exceptions + except ValueError as e: + if "embedded null byte" in str(e): + # The `os.path.expanduser` function, which does not accept strings + # containing null bytes might raise this. + return -1 + else: + raise e # Raise unanticipated exceptions as they might be bugs + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_diff.py b/fuzzing/fuzz-targets/fuzz_diff.py new file mode 100644 index 000000000..d4bd68b57 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_diff.py @@ -0,0 +1,86 @@ +import sys +import os +import io +import tempfile +from binascii import Error as BinasciiError + +import atheris + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + from git import Repo, Diff + + +class BytesProcessAdapter: + """Allows bytes to be used as process objects returned by subprocess.Popen.""" + + @atheris.instrument_func + def __init__(self, input_string): + self.stdout = io.BytesIO(input_string) + self.stderr = io.BytesIO() + + @atheris.instrument_func + def wait(self): + return 0 + + poll = wait + + +@atheris.instrument_func +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(path=temp_dir) + try: + diff = Diff( + repo, + a_rawpath=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + b_rawpath=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + a_blob_id=fdp.ConsumeBytes(20), + b_blob_id=fdp.ConsumeBytes(20), + a_mode=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + b_mode=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + new_file=fdp.ConsumeBool(), + deleted_file=fdp.ConsumeBool(), + copied_file=fdp.ConsumeBool(), + raw_rename_from=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + raw_rename_to=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + diff=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + change_type=fdp.PickValueInList(["A", "D", "C", "M", "R", "T", "U"]), + score=fdp.ConsumeIntInRange(0, fdp.remaining_bytes()), + ) + except BinasciiError: + return -1 + except AssertionError as e: + if "Require 20 byte binary sha, got" in str(e): + return -1 + else: + raise e + + _ = diff.__str__() + _ = diff.a_path + _ = diff.b_path + _ = diff.rename_from + _ = diff.rename_to + _ = diff.renamed_file + + diff_index = diff._index_from_patch_format( + repo, proc=BytesProcessAdapter(fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes()))) + ) + + diff._handle_diff_line( + lines_bytes=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), repo=repo, index=diff_index + ) + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py new file mode 100644 index 000000000..7bd82c120 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_repo.py @@ -0,0 +1,47 @@ +import atheris +import io +import sys +import os +import tempfile + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = git.Repo.init(path=temp_dir) + + # Generate a minimal set of files based on fuzz data to minimize I/O operations. + file_paths = [os.path.join(temp_dir, f"File{i}") for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))] + for file_path in file_paths: + with open(file_path, "wb") as f: + # The chosen upperbound for count of bytes we consume by writing to these + # files is somewhat arbitrary and may be worth experimenting with if the + # fuzzer coverage plateaus. + f.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + + repo.index.add(file_paths) + repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 80))) + + fuzz_tree = git.Tree(repo, git.Tree.NULL_BIN_SHA, 0, "") + + try: + fuzz_tree._deserialize(io.BytesIO(data)) + except IndexError: + return -1 + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py new file mode 100644 index 000000000..afa653d0d --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -0,0 +1,125 @@ +import atheris +import sys +import os +import tempfile +from configparser import ParsingError +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) + +# Setup the Git environment +setup_git_environment() +from git import Repo, GitCommandError, InvalidGitRepositoryError + + +def sanitize_input(input_str, max_length=255): + """Sanitize and truncate inputs to avoid invalid Git operations.""" + sanitized = "".join(ch for ch in input_str if ch.isalnum() or ch in ("-", "_", ".")) + return sanitized[:max_length] + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as repo_temp_dir: + repo = Repo.init(path=repo_temp_dir) + repo.index.commit("Initial commit") + + try: + with tempfile.TemporaryDirectory() as submodule_temp_dir: + sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) + commit_message = sanitize_input(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + sub_repo.index.commit(commit_message) + + submodule_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(repo.working_tree_dir)) + ) + ) + + submodule_path = os.path.relpath( + os.path.join(repo.working_tree_dir, submodule_name), + start=repo.working_tree_dir, + ) + + # Ensure submodule_path is valid + if not submodule_name or submodule_name.startswith("/") or ".." in submodule_name: + return -1 # Reject invalid input so they are not added to the corpus + + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") + + with submodule.config_writer() as writer: + key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) + value_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) + + writer.set_value( + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(key_length)), + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(value_length)), + ) + writer.release() + + submodule.update( + init=fdp.ConsumeBool(), + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + + submodule_repo = submodule.module() + + new_file_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(submodule_repo.working_tree_dir)) + ) + ) + new_file_path = os.path.join(submodule_repo.working_tree_dir, new_file_name) + with open(new_file_path, "wb") as new_file: + new_file.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + + submodule_repo.index.add([new_file_path]) + submodule_repo.index.commit("Added new file to submodule") + + repo.submodule_update(recursive=fdp.ConsumeBool()) + submodule_repo.head.reset( + commit="HEAD~1", + working_tree=fdp.ConsumeBool(), + head=fdp.ConsumeBool(), + ) + + module_option_value, configuration_option_value = fdp.PickValueInList( + [(True, False), (False, True), (True, True)] + ) + submodule.remove( + module=module_option_value, + configuration=configuration_option_value, + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + repo.index.commit(f"Removed submodule {submodule_name}") + + except ( + ParsingError, + GitCommandError, + InvalidGitRepositoryError, + FileNotFoundError, + FileExistsError, + IsADirectoryError, + NotADirectoryError, + BrokenPipeError, + PermissionError, + ): + return -1 + except Exception as e: + return handle_exception(e) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py new file mode 100644 index 000000000..97e6eab98 --- /dev/null +++ b/fuzzing/fuzz-targets/utils.py @@ -0,0 +1,122 @@ +import atheris # pragma: no cover +import os # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover + + +@atheris.instrument_func +def is_expected_exception_message(exception: Exception, error_message_list: List[str]) -> bool: # pragma: no cover + """ + Checks if the message of a given exception matches any of the expected error messages, case-insensitively. + + Args: + exception (Exception): The exception object raised during execution. + error_message_list (List[str]): A list of error message substrings to check against the exception's message. + + Returns: + bool: True if the exception's message contains any of the substrings from the error_message_list, + case-insensitively, otherwise False. + """ + exception_message = str(exception).lower() + for error in error_message_list: + if error.lower() in exception_message: + return True + return False + + +@atheris.instrument_func +def get_max_filename_length(path: str) -> int: # pragma: no cover + """ + Get the maximum filename length for the filesystem containing the given path. + + Args: + path (str): The path to check the filesystem for. + + Returns: + int: The maximum filename length. + """ + return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) diff --git a/fuzzing/local-dev-helpers/Dockerfile b/fuzzing/local-dev-helpers/Dockerfile new file mode 100644 index 000000000..426de05dd --- /dev/null +++ b/fuzzing/local-dev-helpers/Dockerfile @@ -0,0 +1,22 @@ +# syntax=docker/dockerfile:1 + +# Use the same Python version as OSS-Fuzz to accidental incompatibilities in test code +FROM python:3.8-bookworm + +LABEL project="GitPython Fuzzing Local Dev Helper" + +WORKDIR /src + +COPY . . + +# Update package managers, install necessary packages, and cleanup unnecessary files in a single RUN to keep the image smaller. +RUN apt-get update && \ + apt-get install -y git clang && \ + python -m pip install --upgrade pip && \ + python -m pip install atheris && \ + python -m pip install -e . && \ + apt-get clean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +CMD ["bash"] diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh new file mode 100644 index 000000000..c156e872d --- /dev/null +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -0,0 +1,19 @@ +# shellcheck shell=bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +set -euo pipefail + +python3 -m pip install . + +find "$SRC" -maxdepth 1 \ + \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ + -exec printf '[%s] Copying: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" {} \; \ + -exec chmod a-x {} \; \ + -exec cp {} "$OUT" \; + +# Build fuzzers in $OUT. +find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." +done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh new file mode 100755 index 000000000..924a3cbf3 --- /dev/null +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +set -euo pipefail + +################# +# Prerequisites # +################# + +for cmd in python3 git wget zip; do + command -v "$cmd" >/dev/null 2>&1 || { + printf '[%s] Required command %s not found, exiting.\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$cmd" >&2 + exit 1 + } +done + +############# +# Functions # +############# + +download_and_concatenate_common_dictionaries() { + # Assign the first argument as the target file where all contents will be concatenated + local target_file="$1" + + # Shift the arguments so the first argument (target_file path) is removed + # and only URLs are left for the loop below. + shift + + for url in "$@"; do + wget -qO- "$url" >>"$target_file" + # Ensure there's a newline between each file's content + echo >>"$target_file" + done +} + +create_seed_corpora_zips() { + local seed_corpora_dir="$1" + local output_zip + for dir in "$seed_corpora_dir"/*; do + if [ -d "$dir" ] && [ -n "$dir" ]; then + output_zip="$SRC/$(basename "$dir")_seed_corpus.zip" + printf '[%s] Zipping the contents of %s into %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dir" "$output_zip" + zip -jur "$output_zip" "$dir"/* + fi + done +} + +prepare_dictionaries_for_fuzz_targets() { + local dictionaries_dir="$1" + local fuzz_targets_dir="$2" + local common_base_dictionary_filename="$WORK/__base.dict" + + printf '[%s] Copying .dict files from %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dictionaries_dir" "$SRC/" + cp -v "$dictionaries_dir"/*.dict "$SRC/" + + download_and_concatenate_common_dictionaries "$common_base_dictionary_filename" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict" + + find "$fuzz_targets_dir" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + if [[ -r "$common_base_dictionary_filename" ]]; then + # Strip the `.py` extension from the filename and replace it with `.dict`. + fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" + local output_file="$SRC/$fuzz_harness_dictionary_filename" + + printf '[%s] Appending %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$common_base_dictionary_filename" "$output_file" + if [[ -s "$output_file" ]]; then + # If a dictionary file for this fuzzer already exists and is not empty, + # we append a new line to the end of it before appending any new entries. + # + # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error + # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) + # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 + echo >>"$output_file" + fi + cat "$common_base_dictionary_filename" >>"$output_file" + fi + done +} + +######################## +# Main execution logic # +######################## +# Seed corpora and dictionaries are hosted in a separate repository to avoid additional bloat in this repo. +# We clone into the $WORK directory because OSS-Fuzz cleans it up after building the image, keeping the image small. +git clone --depth 1 https://github.com/gitpython-developers/qa-assets.git "$WORK/qa-assets" + +create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" + +prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" + +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" + +popd + + +# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. +python3 -m pip install --upgrade pip +# Upgrade to the latest versions known to work at the time the below changes were introduced: +python3 -m pip install 'setuptools~=69.0' 'pyinstaller~=6.0' diff --git a/git/__init__.py b/git/__init__.py index 5580c9a6b..1b2360e3a 100644 --- a/git/__init__.py +++ b/git/__init__.py @@ -1,53 +1,300 @@ -# __init__.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -import sys -import inspect - -__version__ = 'git' - - -#{ Initialization -def _init_externals(): - """Initialize external projects by putting them into the path""" - sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb')) - - try: - import gitdb - except ImportError: - raise ImportError("'gitdb' could not be found in your PYTHONPATH") - # END verify import - -#} END initialization - -################# -_init_externals() -################# - -#{ Imports - -from git.config import GitConfigParser -from git.objects import * -from git.refs import * -from git.diff import * -from git.exc import * -from git.db import * -from git.cmd import Git -from git.repo import Repo -from git.remote import * -from git.index import * -from git.util import ( - LockFile, - BlockingLockFile, - Stats, - Actor +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +# @PydevCodeAnalysisIgnore + +__all__ = [ + "Actor", + "AmbiguousObjectName", + "BadName", + "BadObject", + "BadObjectType", + "BaseIndexEntry", + "Blob", + "BlobFilter", + "BlockingLockFile", + "CacheError", + "CheckoutError", + "CommandError", + "Commit", + "Diff", + "DiffConstants", + "DiffIndex", + "Diffable", + "FetchInfo", + "Git", + "GitCmdObjectDB", + "GitCommandError", + "GitCommandNotFound", + "GitConfigParser", + "GitDB", + "GitError", + "HEAD", + "Head", + "HookExecutionError", + "INDEX", + "IndexEntry", + "IndexFile", + "IndexObject", + "InvalidDBRoot", + "InvalidGitRepositoryError", + "List", # Deprecated - import this from `typing` instead. + "LockFile", + "NULL_TREE", + "NoSuchPathError", + "ODBError", + "Object", + "Optional", # Deprecated - import this from `typing` instead. + "ParseError", + "PathLike", + "PushInfo", + "RefLog", + "RefLogEntry", + "Reference", + "Remote", + "RemoteProgress", + "RemoteReference", + "Repo", + "RepositoryDirtyError", + "RootModule", + "RootUpdateProgress", + "Sequence", # Deprecated - import from `typing`, or `collections.abc` in 3.9+. + "StageType", + "Stats", + "Submodule", + "SymbolicReference", + "TYPE_CHECKING", # Deprecated - import this from `typing` instead. + "Tag", + "TagObject", + "TagReference", + "Tree", + "TreeModifier", + "Tuple", # Deprecated - import this from `typing` instead. + "Union", # Deprecated - import this from `typing` instead. + "UnmergedEntriesError", + "UnsafeOptionError", + "UnsafeProtocolError", + "UnsupportedOperation", + "UpdateProgress", + "WorkTreeRepositoryUnsupported", + "refresh", + "remove_password_if_present", + "rmtree", + "safe_decode", + "to_hex_sha", +] + +__version__ = "git" + +from typing import Any, List, Optional, Sequence, TYPE_CHECKING, Tuple, Union + +if TYPE_CHECKING: + from types import ModuleType + +import warnings + +from gitdb.util import to_hex_sha + +from git.exc import ( + AmbiguousObjectName, + BadName, + BadObject, + BadObjectType, + CacheError, + CheckoutError, + CommandError, + GitCommandError, + GitCommandNotFound, + GitError, + HookExecutionError, + InvalidDBRoot, + InvalidGitRepositoryError, + NoSuchPathError, + ODBError, + ParseError, + RepositoryDirtyError, + UnmergedEntriesError, + UnsafeOptionError, + UnsafeProtocolError, + UnsupportedOperation, + WorkTreeRepositoryUnsupported, ) +from git.types import PathLike + +try: + from git.compat import safe_decode # @NoMove + from git.config import GitConfigParser # @NoMove + from git.objects import ( # @NoMove + Blob, + Commit, + IndexObject, + Object, + RootModule, + RootUpdateProgress, + Submodule, + TagObject, + Tree, + TreeModifier, + UpdateProgress, + ) + from git.refs import ( # @NoMove + HEAD, + Head, + RefLog, + RefLogEntry, + Reference, + RemoteReference, + SymbolicReference, + Tag, + TagReference, + ) + from git.diff import ( # @NoMove + INDEX, + NULL_TREE, + Diff, + DiffConstants, + DiffIndex, + Diffable, + ) + from git.db import GitCmdObjectDB, GitDB # @NoMove + from git.cmd import Git # @NoMove + from git.repo import Repo # @NoMove + from git.remote import FetchInfo, PushInfo, Remote, RemoteProgress # @NoMove + from git.index import ( # @NoMove + BaseIndexEntry, + BlobFilter, + CheckoutError, + IndexEntry, + IndexFile, + StageType, + # NOTE: This tells type checkers what util resolves to. We delete it, and it is + # really resolved by __getattr__, which warns. See below on what to use instead. + util, + ) + from git.util import ( # @NoMove + Actor, + BlockingLockFile, + LockFile, + Stats, + remove_password_if_present, + rmtree, + ) +except GitError as _exc: + raise ImportError("%s: %s" % (_exc.__class__.__name__, _exc)) from _exc + + +def _warned_import(message: str, fullname: str) -> "ModuleType": + import importlib + + warnings.warn(message, DeprecationWarning, stacklevel=3) + return importlib.import_module(fullname) + + +def _getattr(name: str) -> Any: + # TODO: If __version__ is made dynamic and lazily fetched, put that case right here. + + if name == "util": + return _warned_import( + "The expression `git.util` and the import `from git import util` actually " + "reference git.index.util, and not the git.util module accessed in " + '`from git.util import XYZ` or `sys.modules["git.util"]`. This potentially ' + "confusing behavior is currently preserved for compatibility, but may be " + "changed in the future and should not be relied on.", + fullname="git.index.util", + ) + + for names, prefix in ( + ({"head", "log", "reference", "symbolic", "tag"}, "git.refs"), + ({"base", "fun", "typ"}, "git.index"), + ): + if name not in names: + continue + + fullname = f"{prefix}.{name}" + + return _warned_import( + f"{__name__}.{name} is a private alias of {fullname} and subject to " + f"immediate removal. Use {fullname} instead.", + fullname=fullname, + ) + + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +if not TYPE_CHECKING: + # NOTE: The expression `git.util` gives git.index.util and `from git import util` + # imports git.index.util, NOT git.util. It may not be feasible to change this until + # the next major version, to avoid breaking code inadvertently relying on it. + # + # - If git.index.util *is* what you want, use (or import from) that, to avoid + # confusion. + # + # - To use the "real" git.util module, write `from git.util import ...`, or if + # necessary access it as `sys.modules["git.util"]`. + # + # Note also that `import git.util` technically imports the "real" git.util... but + # the *expression* `git.util` after doing so is still git.index.util! + # + # (This situation differs from that of other indirect-submodule imports that are + # unambiguously non-public and subject to immediate removal. Here, the public + # git.util module, though different, makes less discoverable that the expression + # `git.util` refers to a non-public attribute of the git module.) + # + # This had originally come about by a wildcard import. Now that all intended imports + # are explicit, the intuitive but potentially incompatible binding occurs due to the + # usual rules for Python submodule bindings. So for now we replace that binding with + # git.index.util, delete that, and let __getattr__ handle it and issue a warning. + # + # For the same runtime behavior, it would be enough to forgo importing util, and + # delete util as created naturally; __getattr__ would behave the same. But type + # checkers would not know what util refers to when accessed as an attribute of git. + del util + + # This is "hidden" to preserve static checking for undefined/misspelled attributes. + __getattr__ = _getattr + +# { Initialize git executable path + +GIT_OK = None + + +def refresh(path: Optional[PathLike] = None) -> None: + """Convenience method for setting the git executable path. + + :param path: + Optional path to the Git executable. If not absolute, it is resolved + immediately, relative to the current directory. + + :note: + The `path` parameter is usually omitted and cannot be used to specify a custom + command whose location is looked up in a path search on each call. See + :meth:`Git.refresh ` for details on how to achieve this. + + :note: + This calls :meth:`Git.refresh ` and sets other global + configuration according to the effect of doing so. As such, this function should + usually be used instead of using :meth:`Git.refresh ` or + :meth:`FetchInfo.refresh ` directly. + + :note: + This function is called automatically, with no arguments, at import time. + """ + global GIT_OK + GIT_OK = False + + if not Git.refresh(path=path): + return + if not FetchInfo.refresh(): # noqa: F405 + return # type: ignore[unreachable] + + GIT_OK = True + -#} END imports +try: + refresh() +except Exception as _exc: + raise ImportError("Failed to initialize: {0}".format(_exc)) from _exc -__all__ = [name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj))] +# } END initialize git executable path diff --git a/git/cmd.py b/git/cmd.py index f4d23002d..2048a43fa 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -1,38 +1,387 @@ -# cmd.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +from __future__ import annotations + +__all__ = ["GitMeta", "Git"] + +import contextlib +import io +import itertools +import logging import os +import re +import signal +import subprocess +from subprocess import DEVNULL, PIPE, Popen import sys -from util import ( - LazyMixin, - stream_copy +from textwrap import dedent +import threading +import warnings + +from git.compat import defenc, force_bytes, safe_decode +from git.exc import ( + CommandError, + GitCommandError, + GitCommandNotFound, + UnsafeOptionError, + UnsafeProtocolError, +) +from git.util import ( + cygpath, + expand_path, + is_cygwin_git, + patch_env, + remove_password_if_present, + stream_copy, ) -from exc import GitCommandError -from subprocess import ( - call, - Popen, - PIPE +# typing --------------------------------------------------------------------------- + +from typing import ( + Any, + AnyStr, + BinaryIO, + Callable, + Dict, + IO, + Iterator, + List, + Mapping, + Optional, + Sequence, + TYPE_CHECKING, + TextIO, + Tuple, + Union, + cast, + overload, ) -execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'as_process', - 'output_stream') +from git.types import Literal, PathLike, TBD + +if TYPE_CHECKING: + from git.diff import DiffIndex + from git.repo.base import Repo + +# --------------------------------------------------------------------------------- + +execute_kwargs = { + "istream", + "with_extended_output", + "with_exceptions", + "as_process", + "output_stream", + "stdout_as_string", + "kill_after_timeout", + "with_stdout", + "universal_newlines", + "shell", + "env", + "max_chunk_size", + "strip_newline_in_stdout", +} + +_logger = logging.getLogger(__name__) + + +# ============================================================================== +## @name Utilities +# ------------------------------------------------------------------------------ +# Documentation +## @{ + + +def handle_process_output( + process: "Git.AutoInterrupt" | Popen, + stdout_handler: Union[ + None, + Callable[[AnyStr], None], + Callable[[List[AnyStr]], None], + Callable[[bytes, "Repo", "DiffIndex"], None], + ], + stderr_handler: Union[None, Callable[[AnyStr], None], Callable[[List[AnyStr]], None]], + finalizer: Union[None, Callable[[Union[Popen, "Git.AutoInterrupt"]], None]] = None, + decode_streams: bool = True, + kill_after_timeout: Union[None, float] = None, +) -> None: + R"""Register for notifications to learn that process output is ready to read, and + dispatch lines to the respective line handlers. + + This function returns once the finalizer returns. + + :param process: + :class:`subprocess.Popen` instance. -__all__ = ('Git', ) + :param stdout_handler: + f(stdout_line_string), or ``None``. + :param stderr_handler: + f(stderr_line_string), or ``None``. -def dashify(string): - return string.replace('_', '-') + :param finalizer: + f(proc) - wait for proc to finish. + :param decode_streams: + Assume stdout/stderr streams are binary and decode them before pushing their + contents to handlers. -class Git(LazyMixin): + This defaults to ``True``. Set it to ``False`` if: + - ``universal_newlines == True``, as then streams are in text mode, or + - decoding must happen later, such as for :class:`~git.diff.Diff`\s. + + :param kill_after_timeout: + :class:`float` or ``None``, Default = ``None`` + + To specify a timeout in seconds for the git command, after which the process + should be killed. """ - The Git class manages communication with the Git binary. + + # Use 2 "pump" threads and wait for both to finish. + def pump_stream( + cmdline: List[str], + name: str, + stream: Union[BinaryIO, TextIO], + is_decode: bool, + handler: Union[None, Callable[[Union[bytes, str]], None]], + ) -> None: + try: + for line in stream: + if handler: + if is_decode: + assert isinstance(line, bytes) + line_str = line.decode(defenc) + handler(line_str) + else: + handler(line) + + except Exception as ex: + _logger.error(f"Pumping {name!r} of cmd({remove_password_if_present(cmdline)}) failed due to: {ex!r}") + if "I/O operation on closed file" not in str(ex): + # Only reraise if the error was not due to the stream closing. + raise CommandError([f"<{name}-pump>"] + remove_password_if_present(cmdline), ex) from ex + finally: + stream.close() + + if hasattr(process, "proc"): + process = cast("Git.AutoInterrupt", process) + cmdline: str | Tuple[str, ...] | List[str] = getattr(process.proc, "args", "") + p_stdout = process.proc.stdout if process.proc else None + p_stderr = process.proc.stderr if process.proc else None + else: + process = cast(Popen, process) # type: ignore[redundant-cast] + cmdline = getattr(process, "args", "") + p_stdout = process.stdout + p_stderr = process.stderr + + if not isinstance(cmdline, (tuple, list)): + cmdline = cmdline.split() + + pumps: List[Tuple[str, IO, Callable[..., None] | None]] = [] + if p_stdout: + pumps.append(("stdout", p_stdout, stdout_handler)) + if p_stderr: + pumps.append(("stderr", p_stderr, stderr_handler)) + + threads: List[threading.Thread] = [] + + for name, stream, handler in pumps: + t = threading.Thread(target=pump_stream, args=(cmdline, name, stream, decode_streams, handler)) + t.daemon = True + t.start() + threads.append(t) + + # FIXME: Why join? Will block if stdin needs feeding... + for t in threads: + t.join(timeout=kill_after_timeout) + if t.is_alive(): + if isinstance(process, Git.AutoInterrupt): + process._terminate() + else: # Don't want to deal with the other case. + raise RuntimeError( + "Thread join() timed out in cmd.handle_process_output()." + f" kill_after_timeout={kill_after_timeout} seconds" + ) + if stderr_handler: + error_str: Union[str, bytes] = ( + "error: process killed because it timed out." f" kill_after_timeout={kill_after_timeout} seconds" + ) + if not decode_streams and isinstance(p_stderr, BinaryIO): + # Assume stderr_handler needs binary input. + error_str = cast(str, error_str) + error_str = error_str.encode() + # We ignore typing on the next line because mypy does not like the way + # we inferred that stderr takes str or bytes. + stderr_handler(error_str) # type: ignore[arg-type] + + if finalizer: + finalizer(process) + + +safer_popen: Callable[..., Popen] + +if sys.platform == "win32": + + def _safer_popen_windows( + command: Union[str, Sequence[Any]], + *, + shell: bool = False, + env: Optional[Mapping[str, str]] = None, + **kwargs: Any, + ) -> Popen: + """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the + search. + + This avoids an untrusted search path condition where a file like ``git.exe`` in + a malicious repository would be run when GitPython operates on the repository. + The process using GitPython may have an untrusted repository's working tree as + its current working directory. Some operations may temporarily change to that + directory before running a subprocess. In addition, while by default GitPython + does not run external commands with a shell, it can be made to do so, in which + case the CWD of the subprocess, which GitPython usually sets to a repository + working tree, can itself be searched automatically by the shell. This wrapper + covers all those cases. + + :note: + This currently works by setting the + :envvar:`NoDefaultCurrentDirectoryInExePath` environment variable during + subprocess creation. It also takes care of passing Windows-specific process + creation flags, but that is unrelated to path search. + + :note: + The current implementation contains a race condition on :attr:`os.environ`. + GitPython isn't thread-safe, but a program using it on one thread should + ideally be able to mutate :attr:`os.environ` on another, without + unpredictable results. See comments in: + https://github.com/gitpython-developers/GitPython/pull/1650 + """ + # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards. + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal + # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP + creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP + + # When using a shell, the shell is the direct subprocess, so the variable must + # be set in its environment, to affect its search behavior. + if shell: + # The original may be immutable, or the caller may reuse it. Mutate a copy. + env = {} if env is None else dict(env) + env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be an value. + + # When not using a shell, the current process does the search in a + # CreateProcessW API call, so the variable must be set in our environment. With + # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283 + # is patched. In Python versions where it is unpatched, and in the rare case the + # ComSpec environment variable is unset, the search for the shell itself is + # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here, + # is simpler and protects against that. (As above, the "1" can be any value.) + with patch_env("NoDefaultCurrentDirectoryInExePath", "1"): + return Popen( + command, + shell=shell, + env=env, + creationflags=creationflags, + **kwargs, + ) + + safer_popen = _safer_popen_windows +else: + safer_popen = Popen + + +def dashify(string: str) -> str: + return string.replace("_", "-") + + +def slots_to_dict(self: "Git", exclude: Sequence[str] = ()) -> Dict[str, Any]: + return {s: getattr(self, s) for s in self.__slots__ if s not in exclude} + + +def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], excluded: Sequence[str] = ()) -> None: + for k, v in d.items(): + setattr(self, k, v) + for k in excluded: + setattr(self, k, None) + + +## -- End Utilities -- @} + +_USE_SHELL_DEFAULT_MESSAGE = ( + "Git.USE_SHELL is deprecated, because only its default value of False is safe. " + "It will be removed in a future release." +) + +_USE_SHELL_DANGER_MESSAGE = ( + "Setting Git.USE_SHELL to True is unsafe and insecure, as the effect of special " + "shell syntax cannot usually be accounted for. This can result in a command " + "injection vulnerability and arbitrary code execution. Git.USE_SHELL is deprecated " + "and will be removed in a future release." +) + + +def _warn_use_shell(extra_danger: bool) -> None: + warnings.warn( + _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE, + DeprecationWarning, + stacklevel=3, + ) + + +class _GitMeta(type): + """Metaclass for :class:`Git`. + + This helps issue :class:`DeprecationWarning` if :attr:`Git.USE_SHELL` is used. + """ + + def __getattribute(cls, name: str) -> Any: + if name == "USE_SHELL": + _warn_use_shell(False) + return super().__getattribute__(name) + + def __setattr(cls, name: str, value: Any) -> Any: + if name == "USE_SHELL": + _warn_use_shell(value) + super().__setattr__(name, value) + + if not TYPE_CHECKING: + # To preserve static checking for undefined/misspelled attributes while letting + # the methods' bodies be type-checked, these are defined as non-special methods, + # then bound to special names out of view of static type checkers. (The original + # names invoke name mangling (leading "__") to avoid confusion in other scopes.) + __getattribute__ = __getattribute + __setattr__ = __setattr + + +GitMeta = _GitMeta +"""Alias of :class:`Git`'s metaclass, whether it is :class:`type` or a custom metaclass. + +Whether the :class:`Git` class has the default :class:`type` as its metaclass or uses a +custom metaclass is not documented and may change at any time. This statically checkable +metaclass alias is equivalent at runtime to ``type(Git)``. This should almost never be +used. Code that benefits from it is likely to be remain brittle even if it is used. + +In view of the :class:`Git` class's intended use and :class:`Git` objects' dynamic +callable attributes representing git subcommands, it rarely makes sense to inherit from +:class:`Git` at all. Using :class:`Git` in multiple inheritance can be especially tricky +to do correctly. Attempting uses of :class:`Git` where its metaclass is relevant, such +as when a sibling class has an unrelated metaclass and a shared lower bound metaclass +might have to be introduced to solve a metaclass conflict, is not recommended. + +:note: + The correct static type of the :class:`Git` class itself, and any subclasses, is + ``Type[Git]``. (This can be written as ``type[Git]`` in Python 3.9 later.) + + :class:`GitMeta` should never be used in any annotation where ``Type[Git]`` is + intended or otherwise possible to use. This alias is truly only for very rare and + inherently precarious situations where it is necessary to deal with the metaclass + explicitly. +""" + + +class Git(metaclass=_GitMeta): + """The Git class manages communication with the Git binary. It provides a convenient interface to calling the Git binary, such as in:: @@ -40,129 +389,503 @@ class Git(LazyMixin): g.init() # calls 'git init' program rval = g.ls_files() # calls 'git ls-files' program - ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. + Debugging: + + * Set the :envvar:`GIT_PYTHON_TRACE` environment variable to print each invocation + of the command to stdout. + * Set its value to ``full`` to see details about the returned values. """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info", - "_git_options") + + __slots__ = ( + "_working_dir", + "cat_file_all", + "cat_file_header", + "_version_info", + "_version_info_token", + "_git_options", + "_persistent_git_options", + "_environment", + ) + + _excluded_ = ( + "cat_file_all", + "cat_file_header", + "_version_info", + "_version_info_token", + ) + + re_unsafe_protocol = re.compile(r"(.+)::.+") + + def __getstate__(self) -> Dict[str, Any]: + return slots_to_dict(self, exclude=self._excluded_) + + def __setstate__(self, d: Dict[str, Any]) -> None: + dict_to_slots_and__excluded_are_none(self, d, excluded=self._excluded_) # CONFIGURATION - # The size in bytes read from stdout when copying git's output to another stream - max_chunk_size = 1024 * 64 - git_exec_name = "git" # default that should work on linux and windows - git_exec_name_win = "git.cmd" # alternate command name, windows only + git_exec_name = "git" + """Default git command that should work on Linux, Windows, and other systems.""" - # Enables debugging of GitPython's git commands GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + """Enables debugging of GitPython's git commands.""" + + USE_SHELL: bool = False + """Deprecated. If set to ``True``, a shell will be used when executing git commands. + + Code that uses ``USE_SHELL = True`` or that passes ``shell=True`` to any GitPython + functions should be updated to use the default value of ``False`` instead. ``True`` + is unsafe unless the effect of syntax treated specially by the shell is fully + considered and accounted for, which is not possible under most circumstances. As + detailed below, it is also no longer needed, even where it had been in the past. + + It is in many if not most cases a command injection vulnerability for an application + to set :attr:`USE_SHELL` to ``True``. Any attacker who can cause a specially crafted + fragment of text to make its way into any part of any argument to any git command + (including paths, branch names, etc.) can cause the shell to read and write + arbitrary files and execute arbitrary commands. Innocent input may also accidentally + contain special shell syntax, leading to inadvertent malfunctions. + + In addition, how a value of ``True`` interacts with some aspects of GitPython's + operation is not precisely specified and may change without warning, even before + GitPython 4.0.0 when :attr:`USE_SHELL` may be removed. This includes: + + * Whether or how GitPython automatically customizes the shell environment. + + * Whether, outside of Windows (where :class:`subprocess.Popen` supports lists of + separate arguments even when ``shell=True``), this can be used with any GitPython + functionality other than direct calls to the :meth:`execute` method. + + * Whether any GitPython feature that runs git commands ever attempts to partially + sanitize data a shell may treat specially. Currently this is not done. + + Prior to GitPython 2.0.8, this had a narrow purpose in suppressing console windows + in graphical Windows applications. In 2.0.8 and higher, it provides no benefit, as + GitPython solves that problem more robustly and safely by using the + ``CREATE_NO_WINDOW`` process creation flag on Windows. + + Because Windows path search differs subtly based on whether a shell is used, in rare + cases changing this from ``True`` to ``False`` may keep an unusual git "executable", + such as a batch file, from being found. To fix this, set the command name or full + path in the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable or pass the + full path to :func:`git.refresh` (or invoke the script using a ``.exe`` shim). + + Further reading: + + * :meth:`Git.execute` (on the ``shell`` parameter). + * https://github.com/gitpython-developers/GitPython/commit/0d9390866f9ce42870d3116094cd49e0019a970a + * https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags + * https://github.com/python/cpython/issues/91558#issuecomment-1100942950 + * https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessw + """ - # Provide the full path to the git executable. Otherwise it assumes git is in the path _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" - GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name) + _refresh_env_var = "GIT_PYTHON_REFRESH" + + GIT_PYTHON_GIT_EXECUTABLE = None + """Provide the full path to the git executable. Otherwise it assumes git is in the + executable search path. + + :note: + The git executable is actually found during the refresh step in the top level + ``__init__``. It can also be changed by explicitly calling :func:`git.refresh`. + """ + + _refresh_token = object() # Since None would match an initial _version_info_token. + + @classmethod + def refresh(cls, path: Union[None, PathLike] = None) -> bool: + """Update information about the git executable :class:`Git` objects will use. + + Called by the :func:`git.refresh` function in the top level ``__init__``. + + :param path: + Optional path to the git executable. If not absolute, it is resolved + immediately, relative to the current directory. (See note below.) + + :note: + The top-level :func:`git.refresh` should be preferred because it calls this + method and may also update other state accordingly. + + :note: + There are three different ways to specify the command that refreshing causes + to be used for git: + + 1. Pass no `path` argument and do not set the + :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable. The command + name ``git`` is used. It is looked up in a path search by the system, in + each command run (roughly similar to how git is found when running + ``git`` commands manually). This is usually the desired behavior. + + 2. Pass no `path` argument but set the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` + environment variable. The command given as the value of that variable is + used. This may be a simple command or an arbitrary path. It is looked up + in each command run. Setting :envvar:`GIT_PYTHON_GIT_EXECUTABLE` to + ``git`` has the same effect as not setting it. + + 3. Pass a `path` argument. This path, if not absolute, is immediately + resolved, relative to the current directory. This resolution occurs at + the time of the refresh. When git commands are run, they are run using + that previously resolved path. If a `path` argument is passed, the + :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable is not + consulted. + + :note: + Refreshing always sets the :attr:`Git.GIT_PYTHON_GIT_EXECUTABLE` class + attribute, which can be read on the :class:`Git` class or any of its + instances to check what command is used to run git. This attribute should + not be confused with the related :envvar:`GIT_PYTHON_GIT_EXECUTABLE` + environment variable. The class attribute is set no matter how refreshing is + performed. + """ + # Discern which path to refresh with. + if path is not None: + new_git = os.path.expanduser(path) + new_git = os.path.abspath(new_git) + else: + new_git = os.environ.get(cls._git_exec_env_var, cls.git_exec_name) + + # Keep track of the old and new git executable path. + old_git = cls.GIT_PYTHON_GIT_EXECUTABLE + old_refresh_token = cls._refresh_token + cls.GIT_PYTHON_GIT_EXECUTABLE = new_git + cls._refresh_token = object() + + # Test if the new git executable path is valid. A GitCommandNotFound error is + # raised by us. A PermissionError is raised if the git executable cannot be + # executed for whatever reason. + has_git = False + try: + cls().version() + has_git = True + except (GitCommandNotFound, PermissionError): + pass + + # Warn or raise exception if test failed. + if not has_git: + err = ( + dedent( + """\ + Bad git executable. + The git executable must be specified in one of the following ways: + - be included in your $PATH + - be set via $%s + - explicitly set via git.refresh() + """ + ) + % cls._git_exec_env_var + ) + + # Revert to whatever the old_git was. + cls.GIT_PYTHON_GIT_EXECUTABLE = old_git + cls._refresh_token = old_refresh_token + + if old_git is None: + # On the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is None) we only + # are quiet, warn, or error depending on the GIT_PYTHON_REFRESH value. + + # Determine what the user wants to happen during the initial refresh. We + # expect GIT_PYTHON_REFRESH to either be unset or be one of the + # following values: + # + # 0|q|quiet|s|silence|silent|n|none + # 1|w|warn|warning|l|log + # 2|r|raise|e|error|exception + + mode = os.environ.get(cls._refresh_env_var, "raise").lower() + + quiet = ["quiet", "q", "silence", "s", "silent", "none", "n", "0"] + warn = ["warn", "w", "warning", "log", "l", "1"] + error = ["error", "e", "exception", "raise", "r", "2"] + + if mode in quiet: + pass + elif mode in warn or mode in error: + err = dedent( + """\ + %s + All git commands will error until this is rectified. + + This initial message can be silenced or aggravated in the future by setting the + $%s environment variable. Use one of the following values: + - %s: for no message or exception + - %s: for a warning message (logging level CRITICAL, displayed by default) + - %s: for a raised exception + + Example: + export %s=%s + """ + ) % ( + err, + cls._refresh_env_var, + "|".join(quiet), + "|".join(warn), + "|".join(error), + cls._refresh_env_var, + quiet[0], + ) + + if mode in warn: + _logger.critical(err) + else: + raise ImportError(err) + else: + err = dedent( + """\ + %s environment variable has been set but it has been set with an invalid value. + + Use only the following values: + - %s: for no message or exception + - %s: for a warning message (logging level CRITICAL, displayed by default) + - %s: for a raised exception + """ + ) % ( + cls._refresh_env_var, + "|".join(quiet), + "|".join(warn), + "|".join(error), + ) + raise ImportError(err) + + # We get here if this was the initial refresh and the refresh mode was + # not error. Go ahead and set the GIT_PYTHON_GIT_EXECUTABLE such that we + # discern the difference between the first refresh at import time + # and subsequent calls to git.refresh or this refresh method. + cls.GIT_PYTHON_GIT_EXECUTABLE = cls.git_exec_name + else: + # After the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is no longer + # None) we raise an exception. + raise GitCommandNotFound(new_git, err) + + return has_git + + @classmethod + def is_cygwin(cls) -> bool: + return is_cygwin_git(cls.GIT_PYTHON_GIT_EXECUTABLE) + + @overload + @classmethod + def polish_url(https://github.com/cls,%20url:%20str,%20is_cygwin:%20Literal[False]%20=%20...) -> str: ... - class AutoInterrupt(object): + @overload + @classmethod + def polish_url(https://github.com/cls,%20url:%20str,%20is_cygwin:%20Union[None,%20bool]%20=%20None) -> str: ... + + @classmethod + def polish_url(https://github.com/cls,%20url:%20str,%20is_cygwin:%20Union[None,%20bool]%20=%20None) -> PathLike: + """Remove any backslashes from URLs to be written in config files. - """Kill/Interrupt the stored process instance once this instance goes out of scope. It is - used to prevent processes piling up in case iterators stop reading. - Besides all attributes are wired through to the contained process object. + Windows might create config files containing paths with backslashes, but git + stops liking them as it will escape the backslashes. Hence we undo the escaping + just to be sure. + """ + if is_cygwin is None: + is_cygwin = cls.is_cygwin() - The wait method was overridden to perform automatic status code checking - and possibly raise.""" - __slots__ = ("proc", "args") + if is_cygwin: + url = cygpath(url) + else: + url = os.path.expandvars(url) + if url.startswith("~"): + url = os.path.expanduser(url) + url = url.replace("\\\\", "\\").replace("\\", "/") + return url + + @classmethod + def check_unsafe_protocols(cls, url: str) -> None: + """Check for unsafe protocols. + + Apart from the usual protocols (http, git, ssh), Git allows "remote helpers" + that have the form ``::
``. One of these helpers (``ext::``) + can be used to invoke any arbitrary command. + + See: + + - https://git-scm.com/docs/gitremote-helpers + - https://git-scm.com/docs/git-remote-ext + """ + match = cls.re_unsafe_protocol.match(url) + if match: + protocol = match.group(1) + raise UnsafeProtocolError( + f"The `{protocol}::` protocol looks suspicious, use `allow_unsafe_protocols=True` to allow it." + ) + + @classmethod + def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> None: + """Check for unsafe options. - def __init__(self, proc, args): + Some options that are passed to ``git `` can be used to execute + arbitrary commands. These are blocked by default. + """ + # Options can be of the form `foo`, `--foo bar`, or `--foo=bar`, so we need to + # check if they start with "--foo" or if they are equal to "foo". + bare_unsafe_options = [option.lstrip("-") for option in unsafe_options] + for option in options: + for unsafe_option, bare_option in zip(unsafe_options, bare_unsafe_options): + if option.startswith(unsafe_option) or option == bare_option: + raise UnsafeOptionError( + f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it." + ) + + class AutoInterrupt: + """Process wrapper that terminates the wrapped process on finalization. + + This kills/interrupts the stored process instance once this instance goes out of + scope. It is used to prevent processes piling up in case iterators stop reading. + + All attributes are wired through to the contained process object. + + The wait method is overridden to perform automatic status code checking and + possibly raise. + """ + + __slots__ = ("proc", "args", "status") + + # If this is non-zero it will override any status code during _terminate, used + # to prevent race conditions in testing. + _status_code_if_terminate: int = 0 + + def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: self.proc = proc self.args = args + self.status: Union[int, None] = None - def __del__(self): - self.proc.stdout.close() - self.proc.stderr.close() - - # did the process finish already so we have a return code ? - if self.proc.poll() is not None: + def _terminate(self) -> None: + """Terminate the underlying process.""" + if self.proc is None: return - # can be that nothing really exists anymore ... - if os is None: + proc = self.proc + self.proc = None + if proc.stdin: + proc.stdin.close() + if proc.stdout: + proc.stdout.close() + if proc.stderr: + proc.stderr.close() + # Did the process finish already so we have a return code? + try: + if proc.poll() is not None: + self.status = self._status_code_if_terminate or proc.poll() + return + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) + + # It can be that nothing really exists anymore... + if os is None or getattr(os, "kill", None) is None: return - # try to kill it + # Try to kill it. try: - os.kill(self.proc.pid, 2) # interrupt signal - self.proc.wait() # ensure process goes away - except OSError: - pass # ignore error when process already died - except AttributeError: - # try windows - # for some reason, providing None for stdout/stderr still prints something. This is why - # we simply use the shell and redirect to nul. Its slower than CreateProcess, question - # is whether we really want to see all these messages. Its annoying no matter what. - call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) + proc.terminate() + status = proc.wait() # Ensure the process goes away. + + self.status = self._status_code_if_terminate or status + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) # END exception handling - def __getattr__(self, attr): + def __del__(self) -> None: + self._terminate() + + def __getattr__(self, attr: str) -> Any: return getattr(self.proc, attr) - def wait(self): + # TODO: Bad choice to mimic `proc.wait()` but with different args. + def wait(self, stderr: Union[None, str, bytes] = b"") -> int: """Wait for the process and return its status code. - :raise GitCommandError: if the return status is not 0""" - status = self.proc.wait() - if status != 0: - raise GitCommandError(self.args, status, self.proc.stderr.read()) + :param stderr: + Previously read value of stderr, in case stderr is already closed. + + :warn: + May deadlock if output or error pipes are used and not handled + separately. + + :raise git.exc.GitCommandError: + If the return status is not 0. + """ + if stderr is None: + stderr_b = b"" + stderr_b = force_bytes(data=stderr, encoding="utf-8") + status: Union[int, None] + if self.proc is not None: + status = self.proc.wait() + p_stderr = self.proc.stderr + else: # Assume the underlying proc was killed earlier or never existed. + status = self.status + p_stderr = None + + def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: + if stream: + try: + return stderr_b + force_bytes(stream.read()) + except (OSError, ValueError): + return stderr_b or b"" + else: + return stderr_b or b"" + # END status handling + + if status != 0: + errstr = read_all_from_possibly_closed_stream(p_stderr) + _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) + raise GitCommandError(remove_password_if_present(self.args), status, errstr) return status - # END auto interrupt - class CatFileContentStream(object): + # END auto interrupt + class CatFileContentStream: """Object representing a sized read-only stream returning the contents of an object. - It behaves like a stream, but counts the data read and simulates an empty + + This behaves like a stream, but counts the data read and simulates an empty stream once our sized content region is empty. - If not all data is read to the end of the objects's lifetime, we read the - rest to assure the underlying stream continues to work""" - __slots__ = ('_stream', '_nbr', '_size') + If not all data are read to the end of the object's lifetime, we read the + rest to ensure the underlying stream continues to work. + """ + + __slots__ = ("_stream", "_nbr", "_size") - def __init__(self, size, stream): + def __init__(self, size: int, stream: IO[bytes]) -> None: self._stream = stream self._size = size - self._nbr = 0 # num bytes read + self._nbr = 0 # Number of bytes read. - # special case: if the object is empty, has null bytes, get the - # final newline right away. + # Special case: If the object is empty, has null bytes, get the final + # newline right away. if size == 0: stream.read(1) # END handle empty streams - def read(self, size=-1): + def read(self, size: int = -1) -> bytes: bytes_left = self._size - self._nbr if bytes_left == 0: - return '' + return b"" if size > -1: - # assure we don't try to read past our limit + # Ensure we don't try to read past our limit. size = min(bytes_left, size) else: - # they try to read all, make sure its not more than what remains + # They try to read all, make sure it's not more than what remains. size = bytes_left # END check early depletion data = self._stream.read(size) self._nbr += len(data) - # check for depletion, read our final byte to make the stream usable by others + # Check for depletion, read our final byte to make the stream usable by + # others. if self._size - self._nbr == 0: - self._stream.read(1) # final newline + self._stream.read(1) # final newline # END finish reading return data - def readline(self, size=-1): + def readline(self, size: int = -1) -> bytes: if self._nbr == self._size: - return '' + return b"" - # clamp size to lowest allowed value + # Clamp size to lowest allowed value. bytes_left = self._size - self._nbr if size > -1: size = min(bytes_left, size) @@ -173,19 +896,19 @@ def readline(self, size=-1): data = self._stream.readline(size) self._nbr += len(data) - # handle final byte + # Handle final byte. if self._size - self._nbr == 0: self._stream.read(1) # END finish reading return data - def readlines(self, size=-1): + def readlines(self, size: int = -1) -> List[bytes]: if self._nbr == self._size: - return list() + return [] - # leave all additional logic to our readline method, we just check the size - out = list() + # Leave all additional logic to our readline method, we just check the size. + out = [] nbr = 0 while True: line = self.readline() @@ -200,93 +923,187 @@ def readlines(self, size=-1): # END readline loop return out - def __iter__(self): + # skipcq: PYL-E0301 + def __iter__(self) -> "Git.CatFileContentStream": return self - def next(self): + def __next__(self) -> bytes: line = self.readline() if not line: raise StopIteration + return line - def __del__(self): + next = __next__ + + def __del__(self) -> None: bytes_left = self._size - self._nbr if bytes_left: - # read and discard - seeking is impossible within a stream - # includes terminating newline + # Read and discard - seeking is impossible within a stream. + # This includes any terminating newline. self._stream.read(bytes_left + 1) # END handle incomplete read - def __init__(self, working_dir=None): + def __init__(self, working_dir: Union[None, PathLike] = None) -> None: """Initialize this instance with: :param working_dir: - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd(). - It is meant to be the working tree directory if available, or the - .git directory in case of bare repositories.""" - super(Git, self).__init__() - self._working_dir = working_dir - self._git_options = () + Git directory we should work in. If ``None``, we always work in the current + directory as returned by :func:`os.getcwd`. + This is meant to be the working tree directory if available, or the + ``.git`` directory in case of bare repositories. + """ + super().__init__() + self._working_dir = expand_path(working_dir) + self._git_options: Union[List[str], Tuple[str, ...]] = () + self._persistent_git_options: List[str] = [] - # cached command slots - self.cat_file_header = None - self.cat_file_all = None + # Extra environment variables to pass to git commands + self._environment: Dict[str, str] = {} - def __getattr__(self, name): - """A convenience method as it allows to call the command as if it was - an object. - :return: Callable object that will execute call _call_process with your arguments.""" - if name[0] == '_': - return LazyMixin.__getattr__(self, name) + # Cached version slots + self._version_info: Union[Tuple[int, ...], None] = None + self._version_info_token: object = None + + # Cached command slots + self.cat_file_header: Union[None, TBD] = None + self.cat_file_all: Union[None, TBD] = None + + def __getattribute__(self, name: str) -> Any: + if name == "USE_SHELL": + _warn_use_shell(False) + return super().__getattribute__(name) + + def __getattr__(self, name: str) -> Any: + """A convenience method as it allows to call the command as if it was an object. + + :return: + Callable object that will execute call :meth:`_call_process` with your + arguments. + """ + if name.startswith("_"): + return super().__getattribute__(name) return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - def _set_cache_(self, attr): - if attr == '_version_info': - # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) - version_numbers = self._call_process('version').split(' ')[2] - self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4] if n.isdigit()) - else: - super(Git, self)._set_cache_(attr) - # END handle version info + def set_persistent_git_options(self, **kwargs: Any) -> None: + """Specify command line options to the git executable for subsequent + subcommand calls. + + :param kwargs: + A dict of keyword arguments. + These arguments are passed as in :meth:`_call_process`, but will be passed + to the git command rather than the subcommand. + """ + + self._persistent_git_options = self.transform_kwargs(split_single_char_options=True, **kwargs) @property - def working_dir(self): + def working_dir(self) -> Union[None, PathLike]: """:return: Git directory we are working on""" return self._working_dir @property - def version_info(self): + def version_info(self) -> Tuple[int, ...]: """ - :return: tuple(int, int, int, int) tuple with integers representing the major, minor - and additional version numbers as parsed from git version. - This value is generated on demand and is cached""" + :return: Tuple with integers representing the major, minor and additional + version numbers as parsed from :manpage:`git-version(1)`. Up to four fields + are used. + + This value is generated on demand and is cached. + """ + # Refreshing is global, but version_info caching is per-instance. + refresh_token = self._refresh_token # Copy token in case of concurrent refresh. + + # Use the cached version if obtained after the most recent refresh. + if self._version_info_token is refresh_token: + assert self._version_info is not None, "Bug: corrupted token-check state" + return self._version_info + + # Run "git version" and parse it. + process_version = self._call_process("version") + version_string = process_version.split(" ")[2] + version_fields = version_string.split(".")[:4] + leading_numeric_fields = itertools.takewhile(str.isdigit, version_fields) + self._version_info = tuple(map(int, leading_numeric_fields)) + + # This value will be considered valid until the next refresh. + self._version_info_token = refresh_token return self._version_info - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - as_process=False, - output_stream=None, - **subprocess_kwargs - ): - """Handles executing the command on the shell and consumes and returns - the returned information (stdout) + @overload + def execute( + self, + command: Union[str, Sequence[Any]], + *, + as_process: Literal[True], + ) -> "AutoInterrupt": ... + + @overload + def execute( + self, + command: Union[str, Sequence[Any]], + *, + as_process: Literal[False] = False, + stdout_as_string: Literal[True], + ) -> Union[str, Tuple[int, str, str]]: ... + + @overload + def execute( + self, + command: Union[str, Sequence[Any]], + *, + as_process: Literal[False] = False, + stdout_as_string: Literal[False] = False, + ) -> Union[bytes, Tuple[int, bytes, str]]: ... + + @overload + def execute( + self, + command: Union[str, Sequence[Any]], + *, + with_extended_output: Literal[False], + as_process: Literal[False], + stdout_as_string: Literal[True], + ) -> str: ... + + @overload + def execute( + self, + command: Union[str, Sequence[Any]], + *, + with_extended_output: Literal[False], + as_process: Literal[False], + stdout_as_string: Literal[False], + ) -> bytes: ... + + def execute( + self, + command: Union[str, Sequence[Any]], + istream: Union[None, BinaryIO] = None, + with_extended_output: bool = False, + with_exceptions: bool = True, + as_process: bool = False, + output_stream: Union[None, BinaryIO] = None, + stdout_as_string: bool = True, + kill_after_timeout: Union[None, float] = None, + with_stdout: bool = True, + universal_newlines: bool = False, + shell: Union[None, bool] = None, + env: Union[None, Mapping[str, str]] = None, + max_chunk_size: int = io.DEFAULT_BUFFER_SIZE, + strip_newline_in_stdout: bool = True, + **subprocess_kwargs: Any, + ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], AutoInterrupt]: + R"""Handle executing the command, and consume and return the returned + information (stdout). :param command: The command argument list to execute. - It should be a string, or a sequence of program arguments. The + It should be a sequence of program arguments, or a string. The program to execute is the first item in the args sequence or string. :param istream: - Standard input filehandle passed to subprocess.Popen. - - :param with_keep_cwd: - Whether to use the current working directory from os.getcwd(). - The cmd otherwise uses its own working_dir that it has been initialized - with if possible. + Standard input filehandle passed to :class:`subprocess.Popen`. :param with_extended_output: Whether to return a (status, stdout, stderr) tuple. @@ -296,89 +1113,249 @@ def execute(self, command, :param as_process: Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output and - with_exceptions ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. + streams can be read on demand. This will render `with_extended_output` + and `with_exceptions` ineffective - the caller will have to deal with + the details. It is important to note that the process will be placed + into an :class:`AutoInterrupt` wrapper that will interrupt the process + once it goes out of scope. If you use the command in iterators, you + should pass the whole process instance instead of a single stream. :param output_stream: If set to a file-like object, data produced by the git command will be - output to the given stream directly. - This feature only has any effect if as_process is False. Processes will - always be created with a pipe due to issues with subprocess. - This merely is a workaround as data will be copied from the - output pipe to the given output stream directly. + copied to the given stream instead of being returned as a string. + This feature only has any effect if `as_process` is ``False``. + + :param stdout_as_string: + If ``False``, the command's standard output will be bytes. Otherwise, it + will be decoded into a string using the default encoding (usually UTF-8). + The latter can fail, if the output contains binary data. + + :param kill_after_timeout: + Specifies a timeout in seconds for the git command, after which the process + should be killed. This will have no effect if `as_process` is set to + ``True``. It is set to ``None`` by default and will let the process run + until the timeout is explicitly specified. Uses of this feature should be + carefully considered, due to the following limitations: + + 1. This feature is not supported at all on Windows. + 2. Effectiveness may vary by operating system. ``ps --ppid`` is used to + enumerate child processes, which is available on most GNU/Linux systems + but not most others. + 3. Deeper descendants do not receive signals, though they may sometimes + terminate as a consequence of their parent processes being killed. + 4. `kill_after_timeout` uses ``SIGKILL``, which can have negative side + effects on a repository. For example, stale locks in case of + :manpage:`git-gc(1)` could render the repository incapable of accepting + changes until the lock is manually removed. + + :param with_stdout: + If ``True``, default ``True``, we open stdout on the created process. + + :param universal_newlines: + If ``True``, pipes will be opened as text, and lines are split at all known + line endings. + + :param shell: + Whether to invoke commands through a shell + (see :class:`Popen(..., shell=True) `). + If this is not ``None``, it overrides :attr:`USE_SHELL`. + + Passing ``shell=True`` to this or any other GitPython function should be + avoided, as it is unsafe under most circumstances. This is because it is + typically not feasible to fully consider and account for the effect of shell + expansions, especially when passing ``shell=True`` to other methods that + forward it to :meth:`Git.execute`. Passing ``shell=True`` is also no longer + needed (nor useful) to work around any known operating system specific + issues. + + :param env: + A dictionary of environment variables to be passed to + :class:`subprocess.Popen`. + + :param max_chunk_size: + Maximum number of bytes in one chunk of data passed to the `output_stream` + in one invocation of its ``write()`` method. If the given number is not + positive then the default value is used. + + :param strip_newline_in_stdout: + Whether to strip the trailing ``\n`` of the command stdout. :param subprocess_kwargs: - Keyword arguments to be passed to subprocess.Popen. Please note that - some of the valid kwargs are already set by this method, the ones you + Keyword arguments to be passed to :class:`subprocess.Popen`. Please note + that some of the valid kwargs are already set by this method; the ones you specify may not be the same ones. :return: - * str(output) if extended_output = False (Default) - * tuple(int(status), str(stdout), str(stderr)) if extended_output = True + * str(output), if `extended_output` is ``False`` (Default) + * tuple(int(status), str(stdout), str(stderr)), + if `extended_output` is ``True`` - if ouput_stream is True, the stdout value will be your output stream: - * output_stream if extended_output = False - * tuple(int(status), output_stream, str(stderr)) if extended_output = True + If `output_stream` is ``True``, the stdout value will be your output stream: - Note git is executed with LC_MESSAGES="C" to ensure consitent + * output_stream, if `extended_output` is ``False`` + * tuple(int(status), output_stream, str(stderr)), + if `extended_output` is ``True`` + + Note that git is executed with ``LC_MESSAGES="C"`` to ensure consistent output regardless of system language. - :raise GitCommandError: + :raise git.exc.GitCommandError: :note: - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module.""" - if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': - print ' '.join(command) + If you add additional keyword arguments to the signature of this method, you + must update the ``execute_kwargs`` variable housed in this module. + """ + # Remove password for the command if present. + redacted_command = remove_password_if_present(command) + if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != "full" or as_process): + _logger.info(" ".join(redacted_command)) # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self._working_dir is None: - cwd = os.getcwd() + try: + cwd = self._working_dir or os.getcwd() # type: Union[None, str] + if not os.access(str(cwd), os.X_OK): + cwd = None + except FileNotFoundError: + cwd = None + + # Start the process. + inline_env = env + env = os.environ.copy() + # Attempt to force all output to plain ASCII English, which is what some parsing + # code may expect. + # According to https://askubuntu.com/a/311796, we are setting LANGUAGE as well + # just to be sure. + env["LANGUAGE"] = "C" + env["LC_ALL"] = "C" + env.update(self._environment) + if inline_env is not None: + env.update(inline_env) + + if sys.platform == "win32": + if kill_after_timeout is not None: + raise GitCommandError( + redacted_command, + '"kill_after_timeout" feature is not supported on Windows.', + ) + cmd_not_found_exception = OSError else: - cwd = self._working_dir + cmd_not_found_exception = FileNotFoundError + # END handle + + stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb") + if shell is None: + # Get the value of USE_SHELL with no deprecation warning. Do this without + # warnings.catch_warnings, to avoid a race condition with application code + # configuring warnings. The value could be looked up in type(self).__dict__ + # or Git.__dict__, but those can break under some circumstances. This works + # the same as self.USE_SHELL in more situations; see Git.__getattribute__. + shell = super().__getattribute__("USE_SHELL") + _logger.debug( + "Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)", + redacted_command, + cwd, + "" if istream else "None", + shell, + universal_newlines, + ) + try: + proc = safer_popen( + command, + env=env, + cwd=cwd, + bufsize=-1, + stdin=(istream or DEVNULL), + stderr=PIPE, + stdout=stdout_sink, + shell=shell, + universal_newlines=universal_newlines, + encoding=defenc if universal_newlines else None, + **subprocess_kwargs, + ) + except cmd_not_found_exception as err: + raise GitCommandNotFound(redacted_command, err) from err + else: + # Replace with a typeguard for Popen[bytes]? + proc.stdout = cast(BinaryIO, proc.stdout) + proc.stderr = cast(BinaryIO, proc.stderr) - # Start the process - env = os.environ.copy() - env["LC_MESSAGES"] = "C" - proc = Popen(command, - env=env, - cwd=cwd, - stdin=istream, - stderr=PIPE, - stdout=PIPE, - # Prevent cmd prompt popups on windows by using a shell ... . - # See https://github.com/gitpython-developers/GitPython/pull/126 - shell=sys.platform == 'win32', - close_fds=(os.name == 'posix'), # unsupported on linux - **subprocess_kwargs - ) if as_process: return self.AutoInterrupt(proc, command) - # Wait for the process to return + if sys.platform != "win32" and kill_after_timeout is not None: + # Help mypy figure out this is not None even when used inside communicate(). + timeout = kill_after_timeout + + def kill_process(pid: int) -> None: + """Callback to kill a process. + + This callback implementation would be ineffective and unsafe on Windows. + """ + p = Popen(["ps", "--ppid", str(pid)], stdout=PIPE) + child_pids = [] + if p.stdout is not None: + for line in p.stdout: + if len(line.split()) > 0: + local_pid = (line.split())[0] + if local_pid.isdigit(): + child_pids.append(int(local_pid)) + try: + os.kill(pid, signal.SIGKILL) + for child_pid in child_pids: + try: + os.kill(child_pid, signal.SIGKILL) + except OSError: + pass + # Tell the main routine that the process was killed. + kill_check.set() + except OSError: + # It is possible that the process gets completed in the duration + # after timeout happens and before we try to kill the process. + pass + return + + def communicate() -> Tuple[AnyStr, AnyStr]: + watchdog.start() + out, err = proc.communicate() + watchdog.cancel() + if kill_check.is_set(): + err = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( + " ".join(redacted_command), + timeout, + ) + if not universal_newlines: + err = err.encode(defenc) + return out, err + + # END helpers + + kill_check = threading.Event() + watchdog = threading.Timer(timeout, kill_process, args=(proc.pid,)) + else: + communicate = proc.communicate + + # Wait for the process to return. status = 0 - stdout_value = '' - stderr_value = '' + stdout_value: Union[str, bytes] = b"" + stderr_value: Union[str, bytes] = b"" + newline = "\n" if universal_newlines else b"\n" try: if output_stream is None: - stdout_value, stderr_value = proc.communicate() - # strip trailing "\n" - if stdout_value.endswith("\n"): + stdout_value, stderr_value = communicate() + # Strip trailing "\n". + if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] stdout_value = stdout_value[:-1] - if stderr_value.endswith("\n"): + if stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] + status = proc.returncode else: - stream_copy(proc.stdout, output_stream, self.max_chunk_size) - stdout_value = output_stream + max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE + stream_copy(proc.stdout, output_stream, max_chunk_size) + stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() - # strip trailing "\n" - if stderr_value.endswith("\n"): + # Strip trailing "\n". + if stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.wait() # END stdout handling @@ -386,173 +1363,263 @@ def execute(self, command, proc.stdout.close() proc.stderr.close() - if self.GIT_PYTHON_TRACE == 'full': - cmdstr = " ".join(command) + if self.GIT_PYTHON_TRACE == "full": + cmdstr = " ".join(redacted_command) + + def as_text(stdout_value: Union[bytes, str]) -> str: + return not output_stream and safe_decode(stdout_value) or "" + + # END as_text + if stderr_value: - print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) + _logger.info( + "%s -> %d; stdout: '%s'; stderr: '%s'", + cmdstr, + status, + as_text(stdout_value), + safe_decode(stderr_value), + ) elif stdout_value: - print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) + _logger.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value)) else: - print "%s -> %d" % (cmdstr, status) + _logger.info("%s -> %d", cmdstr, status) # END handle debug printing if with_exceptions and status != 0: - if with_extended_output: - raise GitCommandError(command, status, stderr_value, stdout_value) - else: - raise GitCommandError(command, status, stderr_value) + raise GitCommandError(redacted_command, status, stderr_value, stdout_value) - # Allow access to the command's status code + if isinstance(stdout_value, bytes) and stdout_as_string: # Could also be output_stream. + stdout_value = safe_decode(stdout_value) + + # Allow access to the command's status code. if with_extended_output: - return (status, stdout_value, stderr_value) + return (status, stdout_value, safe_decode(stderr_value)) else: return stdout_value - def transform_kwargs(self, split_single_char_options=False, **kwargs): - """Transforms Python style kwargs into git command line options.""" - args = list() + def environment(self) -> Dict[str, str]: + return self._environment + + def update_environment(self, **kwargs: Any) -> Dict[str, Union[str, None]]: + """Set environment variables for future git invocations. Return all changed + values in a format that can be passed back into this function to revert the + changes. + + Examples:: + + old_env = self.update_environment(PWD='/tmp') + self.update_environment(**old_env) + + :param kwargs: + Environment variables to use for git processes. + + :return: + Dict that maps environment variables to their old values + """ + old_env = {} + for key, value in kwargs.items(): + # Set value if it is None. + if value is not None: + old_env[key] = self._environment.get(key) + self._environment[key] = value + # Remove key from environment if its value is None. + elif key in self._environment: + old_env[key] = self._environment[key] + del self._environment[key] + return old_env + + @contextlib.contextmanager + def custom_environment(self, **kwargs: Any) -> Iterator[None]: + """A context manager around the above :meth:`update_environment` method to + restore the environment back to its previous state after operation. + + Examples:: + + with self.custom_environment(GIT_SSH='/bin/ssh_wrapper'): + repo.remotes.origin.fetch() + + :param kwargs: + See :meth:`update_environment`. + """ + old_env = self.update_environment(**kwargs) + try: + yield + finally: + self.update_environment(**old_env) + + def transform_kwarg(self, name: str, value: Any, split_single_char_options: bool) -> List[str]: + if len(name) == 1: + if value is True: + return ["-%s" % name] + elif value not in (False, None): + if split_single_char_options: + return ["-%s" % name, "%s" % value] + else: + return ["-%s%s" % (name, value)] + else: + if value is True: + return ["--%s" % dashify(name)] + elif value is not False and value is not None: + return ["--%s=%s" % (dashify(name), value)] + return [] + + def transform_kwargs(self, split_single_char_options: bool = True, **kwargs: Any) -> List[str]: + """Transform Python-style kwargs into git command line options.""" + args = [] for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - if split_single_char_options: - args.extend(["-%s" % k, "%s" % v]) - else: - args.append("-%s%s" % (k, v)) + if isinstance(v, (list, tuple)): + for value in v: + args += self.transform_kwarg(k, value, split_single_char_options) else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) + args += self.transform_kwarg(k, v, split_single_char_options) return args @classmethod - def __unpack_args(cls, arg_list): - if not isinstance(arg_list, (list, tuple)): - if isinstance(arg_list, unicode): - return [arg_list.encode('utf-8')] - return [str(arg_list)] - - outlist = list() - for arg in arg_list: - if isinstance(arg_list, (list, tuple)): - outlist.extend(cls.__unpack_args(arg)) - elif isinstance(arg_list, unicode): - outlist.append(arg_list.encode('utf-8')) - # END recursion - else: - outlist.append(str(arg)) - # END for each arg + def _unpack_args(cls, arg_list: Sequence[str]) -> List[str]: + outlist = [] + if isinstance(arg_list, (list, tuple)): + for arg in arg_list: + outlist.extend(cls._unpack_args(arg)) + else: + outlist.append(str(arg_list)) + return outlist - def __call__(self, **kwargs): - """Specify command line options to the git executable - for a subcommand call + def __call__(self, **kwargs: Any) -> "Git": + """Specify command line options to the git executable for a subcommand call. :param kwargs: - is a dict of keyword arguments. - these arguments are passed as in _call_process - but will be passed to the git command rather than - the subcommand. - - ``Examples``:: - git(work_tree='/tmp').difftool()""" - self._git_options = self.transform_kwargs( - split_single_char_options=True, **kwargs) + A dict of keyword arguments. + These arguments are passed as in :meth:`_call_process`, but will be passed + to the git command rather than the subcommand. + + Examples:: + + git(work_tree='/tmp').difftool() + """ + self._git_options = self.transform_kwargs(split_single_char_options=True, **kwargs) return self - def _call_process(self, method, *args, **kwargs): - """Run the given git command with the specified arguments and return - the result as a String + @overload + def _call_process( + self, method: str, *args: None, **kwargs: None + ) -> str: ... # If no args were given, execute the call with all defaults. + + @overload + def _call_process( + self, + method: str, + istream: int, + as_process: Literal[True], + *args: Any, + **kwargs: Any, + ) -> "Git.AutoInterrupt": ... + + @overload + def _call_process( + self, method: str, *args: Any, **kwargs: Any + ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: ... + + def _call_process( + self, method: str, *args: Any, **kwargs: Any + ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: + """Run the given git command with the specified arguments and return the result + as a string. :param method: - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. + The command. Contained ``_`` characters will be converted to hyphens, such + as in ``ls_files`` to call ``ls-files``. :param args: - is the list of arguments. If None is included, it will be pruned. - This allows your commands to call git more conveniently as None - is realized as non-existent + The list of arguments. If ``None`` is included, it will be pruned. + This allows your commands to call git more conveniently, as ``None`` is + realized as non-existent. :param kwargs: - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). + Contains key-values for the following: + + - The :meth:`execute()` kwds, as listed in ``execute_kwargs``. + - "Command options" to be converted by :meth:`transform_kwargs`. + - The ``insert_kwargs_after`` key which its value must match one of + ``*args``. + + It also contains any command options, to be appended after the matched arg. + + Examples:: - ``Examples``:: git.rev_list('master', max_count=10, header=True) - :return: Same as ``execute``""" - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = dict() - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass + turns into:: - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) + git rev-list max-count 10 --header master - ext_args = self.__unpack_args([a for a in args if a is not None]) - args = opt_args + ext_args + :return: + Same as :meth:`execute`. If no args are given, used :meth:`execute`'s + default (especially ``as_process = False``, ``stdout_as_string = True``) and + return :class:`str`. + """ + # Handle optional arguments prior to calling transform_kwargs. + # Otherwise these'll end up in args, which is bad. + exec_kwargs = {k: v for k, v in kwargs.items() if k in execute_kwargs} + opts_kwargs = {k: v for k, v in kwargs.items() if k not in execute_kwargs} - def make_call(): - call = [self.GIT_PYTHON_GIT_EXECUTABLE] + insert_after_this_arg = opts_kwargs.pop("insert_kwargs_after", None) - # add the git options, the reset to empty - # to avoid side_effects - call.extend(self._git_options) - self._git_options = () + # Prepare the argument list. - call.extend([dashify(method)]) - call.extend(args) - return call - # END utility to recreate call after changes + opt_args = self.transform_kwargs(**opts_kwargs) + ext_args = self._unpack_args([a for a in args if a is not None]) - if sys.platform == 'win32': + if insert_after_this_arg is None: + args_list = opt_args + ext_args + else: try: - try: - return self.execute(make_call(), **_kwargs) - except WindowsError: - # did we switch to git.cmd already, or was it changed from default ? permanently fail - if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name: - raise - # END handle overridden variable - type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win - call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args) + index = ext_args.index(insert_after_this_arg) + except ValueError as err: + raise ValueError( + "Couldn't find argument '%s' in args %s to insert cmd options after" + % (insert_after_this_arg, str(ext_args)) + ) from err + # END handle error + args_list = ext_args[: index + 1] + opt_args + ext_args[index + 1 :] + # END handle opts_kwargs + + call = [self.GIT_PYTHON_GIT_EXECUTABLE] + + # Add persistent git options. + call.extend(self._persistent_git_options) + + # Add the git options, then reset to empty to avoid side effects. + call.extend(self._git_options) + self._git_options = () - try: - return self.execute(make_call(), **_kwargs) - finally: - import warnings - msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%." - msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var - warnings.warn(msg) - # END print of warning - # END catch first failure - except WindowsError: - raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE) - # END provide better error message - else: - return self.execute(make_call(), **_kwargs) - # END handle windows default installation + call.append(dashify(method)) + call.extend(args_list) - def _parse_object_header(self, header_line): + return self.execute(call, **exec_kwargs) + + def _parse_object_header(self, header_line: str) -> Tuple[str, str, int]: """ :param header_line: - type_string size_as_int + A line of the form:: - :return: (hex_sha, type_string, size_as_int) + type_string size_as_int - :raise ValueError: if the header contains indication for an error due to - incorrect input sha""" + :return: + (hex_sha, type_string, size_as_int) + + :raise ValueError: + If the header contains indication for an error due to incorrect input sha. + """ tokens = header_line.split() if len(tokens) != 3: if not tokens: - raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) + err_msg = ( + f"SHA is empty, possible dubious ownership in the repository " + f"""at {self._working_dir}.\n If this is unintended run:\n\n """ + f""" "git config --global --add safe.directory {self._working_dir}" """ + ) + raise ValueError(err_msg) else: raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) # END handle actual return value @@ -562,14 +1629,21 @@ def _parse_object_header(self, header_line): raise ValueError("Failed to parse header: %r" % header_line) return (tokens[0], tokens[1], int(tokens[2])) - def __prepare_ref(self, ref): - # required for command to separate refs on stdin - refstr = str(ref) # could be ref-object - if refstr.endswith("\n"): - return refstr - return refstr + "\n" + def _prepare_ref(self, ref: AnyStr) -> bytes: + # Required for command to separate refs on stdin, as bytes. + if isinstance(ref, bytes): + # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text. + refstr: str = ref.decode("ascii") + elif not isinstance(ref, str): + refstr = str(ref) # Could be ref-object. + else: + refstr = ref - def __get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs): + if not refstr.endswith("\n"): + refstr += "\n" + return refstr.encode(defenc) + + def _get_persistent_cmd(self, attr_name: str, cmd_name: str, *args: Any, **kwargs: Any) -> "Git.AutoInterrupt": cur_val = getattr(self, attr_name) if cur_val is not None: return cur_val @@ -579,48 +1653,72 @@ def __get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs): cmd = self._call_process(cmd_name, *args, **options) setattr(self, attr_name, cmd) + cmd = cast("Git.AutoInterrupt", cmd) return cmd - def __get_object_header(self, cmd, ref): - cmd.stdin.write(self.__prepare_ref(ref)) - cmd.stdin.flush() - return self._parse_object_header(cmd.stdout.readline()) + def __get_object_header(self, cmd: "Git.AutoInterrupt", ref: AnyStr) -> Tuple[str, str, int]: + if cmd.stdin and cmd.stdout: + cmd.stdin.write(self._prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + else: + raise ValueError("cmd stdin was empty") - def get_object_header(self, ref): - """ Use this method to quickly examine the type and size of the object behind - the given ref. + def get_object_header(self, ref: str) -> Tuple[str, str, int]: + """Use this method to quickly examine the type and size of the object behind the + given ref. - :note: The method will only suffer from the costs of command invocation - once and reuses the command in subsequent calls. + :note: + The method will only suffer from the costs of command invocation once and + reuses the command in subsequent calls. - :return: (hexsha, type_string, size_as_int)""" - cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + :return: + (hexsha, type_string, size_as_int) + """ + cmd = self._get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) return self.__get_object_header(cmd, ref) - def get_object_data(self, ref): - """ As get_object_header, but returns object data as well - :return: (hexsha, type_string, size_as_int,data_string) - :note: not threadsafe""" + def get_object_data(self, ref: str) -> Tuple[str, str, int, bytes]: + """Similar to :meth:`get_object_header`, but returns object data as well. + + :return: + (hexsha, type_string, size_as_int, data_string) + + :note: + Not threadsafe. + """ hexsha, typename, size, stream = self.stream_object_data(ref) data = stream.read(size) - del(stream) + del stream return (hexsha, typename, size, data) - def stream_object_data(self, ref): - """As get_object_header, but returns the data as a stream - :return: (hexsha, type_string, size_as_int, stream) - :note: This method is not threadsafe, you need one independent Command instance - per thread to be safe !""" - cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + def stream_object_data(self, ref: str) -> Tuple[str, str, int, "Git.CatFileContentStream"]: + """Similar to :meth:`get_object_data`, but returns the data as a stream. + + :return: + (hexsha, type_string, size_as_int, stream) + + :note: + This method is not threadsafe. You need one independent :class:`Git` + instance per thread to be safe! + """ + cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True) hexsha, typename, size = self.__get_object_header(cmd, ref) - return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) + cmd_stdout = cmd.stdout if cmd.stdout is not None else io.BytesIO() + return (hexsha, typename, size, self.CatFileContentStream(size, cmd_stdout)) - def clear_cache(self): + def clear_cache(self) -> "Git": """Clear all kinds of internal caches to release resources. Currently persistent commands will be interrupted. - :return: self""" + :return: + self + """ + for cmd in (self.cat_file_all, self.cat_file_header): + if cmd: + cmd.__del__() + self.cat_file_all = None self.cat_file_header = None return self diff --git a/git/compat.py b/git/compat.py new file mode 100644 index 000000000..d7d9a55a9 --- /dev/null +++ b/git/compat.py @@ -0,0 +1,165 @@ +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Utilities to help provide compatibility with Python 3. + +This module exists for historical reasons. Code outside GitPython may make use of public +members of this module, but is unlikely to benefit from doing so. GitPython continues to +use some of these utilities, in some cases for compatibility across different platforms. +""" + +import locale +import os +import sys +import warnings + +from gitdb.utils.encoding import force_bytes, force_text # noqa: F401 + +# typing -------------------------------------------------------------------- + +from typing import ( + Any, # noqa: F401 + AnyStr, + Dict, # noqa: F401 + IO, # noqa: F401 + List, + Optional, + TYPE_CHECKING, + Tuple, # noqa: F401 + Type, # noqa: F401 + Union, + overload, +) + +# --------------------------------------------------------------------------- + + +_deprecated_platform_aliases = { + "is_win": os.name == "nt", + "is_posix": os.name == "posix", + "is_darwin": sys.platform == "darwin", +} + + +def _getattr(name: str) -> Any: + try: + value = _deprecated_platform_aliases[name] + except KeyError: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from None + + warnings.warn( + f"{__name__}.{name} and other is_ aliases are deprecated. " + "Write the desired os.name or sys.platform check explicitly instead.", + DeprecationWarning, + stacklevel=2, + ) + return value + + +if not TYPE_CHECKING: # Preserve static checking for undefined/misspelled attributes. + __getattr__ = _getattr + + +def __dir__() -> List[str]: + return [*globals(), *_deprecated_platform_aliases] + + +is_win: bool +"""Deprecated alias for ``os.name == "nt"`` to check for native Windows. + +This is deprecated because it is clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly, especially in cases where it matters which is +used. + +:note: + ``is_win`` is ``False`` on Cygwin, but is often wrongly assumed ``True``. To detect + Cygwin, use ``sys.platform == "cygwin"``. +""" + +is_posix: bool +"""Deprecated alias for ``os.name == "posix"`` to check for Unix-like ("POSIX") systems. + +This is deprecated because it clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly, especially in cases where it matters which is +used. + +:note: + For POSIX systems, more detailed information is available in :attr:`sys.platform`, + while :attr:`os.name` is always ``"posix"`` on such systems, including macOS + (Darwin). +""" + +is_darwin: bool +"""Deprecated alias for ``sys.platform == "darwin"`` to check for macOS (Darwin). + +This is deprecated because it clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly. + +:note: + For macOS (Darwin), ``os.name == "posix"`` as in other Unix-like systems, while + ``sys.platform == "darwin"``. +""" + +defenc = sys.getfilesystemencoding() +"""The encoding used to convert between Unicode and bytes filenames.""" + + +@overload +def safe_decode(s: None) -> None: ... + + +@overload +def safe_decode(s: AnyStr) -> str: ... + + +def safe_decode(s: Union[AnyStr, None]) -> Optional[str]: + """Safely decode a binary string to Unicode.""" + if isinstance(s, str): + return s + elif isinstance(s, bytes): + return s.decode(defenc, "surrogateescape") + elif s is None: + return None + else: + raise TypeError("Expected bytes or text, but got %r" % (s,)) + + +@overload +def safe_encode(s: None) -> None: ... + + +@overload +def safe_encode(s: AnyStr) -> bytes: ... + + +def safe_encode(s: Optional[AnyStr]) -> Optional[bytes]: + """Safely encode a binary string to Unicode.""" + if isinstance(s, str): + return s.encode(defenc) + elif isinstance(s, bytes): + return s + elif s is None: + return None + else: + raise TypeError("Expected bytes or text, but got %r" % (s,)) + + +@overload +def win_encode(s: None) -> None: ... + + +@overload +def win_encode(s: AnyStr) -> bytes: ... + + +def win_encode(s: Optional[AnyStr]) -> Optional[bytes]: + """Encode Unicode strings for process arguments on Windows.""" + if isinstance(s, str): + return s.encode(locale.getpreferredencoding(False)) + elif isinstance(s, bytes): + return s + elif s is not None: + raise TypeError("Expected bytes or text, but got %r" % (s,)) + return None diff --git a/git/config.py b/git/config.py index 8a15466f4..de3508360 100644 --- a/git/config.py +++ b/git/config.py @@ -1,35 +1,92 @@ -# config.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module containing module parser implementation able to properly read and write -configuration files""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -import re -import os -import ConfigParser as cp +"""Parser for reading and writing configuration files.""" + +__all__ = ["GitConfigParser", "SectionConstraint"] + +import abc +import configparser as cp +import fnmatch +from functools import wraps import inspect -import cStringIO +from io import BufferedReader, IOBase +import logging +import os +import os.path as osp +import re +import sys -from git.odict import OrderedDict +from git.compat import defenc, force_text from git.util import LockFile -__all__ = ('GitConfigParser', 'SectionConstraint') +# typing------------------------------------------------------- + +from typing import ( + Any, + Callable, + Generic, + IO, + List, + Dict, + Sequence, + TYPE_CHECKING, + Tuple, + TypeVar, + Union, + cast, +) + +from git.types import Lit_config_levels, ConfigLevels_Tup, PathLike, assert_never, _T + +if TYPE_CHECKING: + from io import BytesIO + + from git.repo.base import Repo +T_ConfigParser = TypeVar("T_ConfigParser", bound="GitConfigParser") +T_OMD_value = TypeVar("T_OMD_value", str, bytes, int, float, bool) -class MetaParserBuilder(type): +if sys.version_info[:3] < (3, 7, 2): + # typing.Ordereddict not added until Python 3.7.2. + from collections import OrderedDict - """Utlity class wrapping base-class methods into decorators that assure read-only properties""" - def __new__(metacls, name, bases, clsdict): + OrderedDict_OMD = OrderedDict +else: + from typing import OrderedDict + + OrderedDict_OMD = OrderedDict[str, List[T_OMD_value]] # type: ignore[assignment, misc] + +# ------------------------------------------------------------- + +_logger = logging.getLogger(__name__) + +CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") +"""The configuration level of a configuration file.""" + +CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +"""Section pattern to detect conditional includes. + +See: https://git-scm.com/docs/git-config#_conditional_includes +""" + + +class MetaParserBuilder(abc.ABCMeta): # noqa: B024 + """Utility class wrapping base-class methods into decorators that assure read-only + properties.""" + + def __new__(cls, name: str, bases: Tuple, clsdict: Dict[str, Any]) -> "MetaParserBuilder": + """Equip all base-class methods with a needs_values decorator, and all non-const + methods with a :func:`set_dirty_and_flush_changes` decorator in addition to + that. """ - Equip all base-class methods with a needs_values decorator, and all non-const methods - with a set_dirty_and_flush_changes decorator in addition to that.""" - kmm = '_mutating_methods_' + kmm = "_mutating_methods_" if kmm in clsdict: mutating_methods = clsdict[kmm] for base in bases: - methods = (t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_")) + methods = (t for t in inspect.getmembers(base, inspect.isroutine) if not t[0].startswith("_")) for name, method in methods: if name in clsdict: continue @@ -43,341 +100,695 @@ def __new__(metacls, name, bases, clsdict): # END for each base # END if mutating methods configuration is set - new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) + new_type = super().__new__(cls, name, bases, clsdict) return new_type -def needs_values(func): - """Returns method assuring we read values (on demand) before we try to access them""" +def needs_values(func: Callable[..., _T]) -> Callable[..., _T]: + """Return a method for ensuring we read values (on demand) before we try to access + them.""" - def assure_data_present(self, *args, **kwargs): + @wraps(func) + def assure_data_present(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T: self.read() return func(self, *args, **kwargs) + # END wrapper method - assure_data_present.__name__ = func.__name__ return assure_data_present -def set_dirty_and_flush_changes(non_const_func): - """Return method that checks whether given non constant function may be called. - If so, the instance will be set dirty. - Additionally, we flush the changes right to disk""" +def set_dirty_and_flush_changes(non_const_func: Callable[..., _T]) -> Callable[..., _T]: + """Return a method that checks whether given non constant function may be called. - def flush_changes(self, *args, **kwargs): + If so, the instance will be set dirty. Additionally, we flush the changes right to + disk. + """ + + def flush_changes(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T: rval = non_const_func(self, *args, **kwargs) + self._dirty = True self.write() return rval + # END wrapper method flush_changes.__name__ = non_const_func.__name__ return flush_changes -class SectionConstraint(object): - +class SectionConstraint(Generic[T_ConfigParser]): """Constrains a ConfigParser to only option commands which are constrained to always use the section we have been initialized with. - It supports all ConfigParser methods that operate on an option""" + It supports all ConfigParser methods that operate on an option. + + :note: + If used as a context manager, will release the wrapped ConfigParser. + """ + __slots__ = ("_config", "_section_name") - _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", - "remove_section", "remove_option", "options") - def __init__(self, config, section): + _valid_attrs_ = ( + "get_value", + "set_value", + "get", + "set", + "getint", + "getfloat", + "getboolean", + "has_option", + "remove_section", + "remove_option", + "options", + ) + + def __init__(self, config: T_ConfigParser, section: str) -> None: self._config = config self._section_name = section - def __getattr__(self, attr): + def __del__(self) -> None: + # Yes, for some reason, we have to call it explicitly for it to work in PY3 ! + # Apparently __del__ doesn't get call anymore if refcount becomes 0 + # Ridiculous ... . + self._config.release() + + def __getattr__(self, attr: str) -> Any: if attr in self._valid_attrs_: return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) - return super(SectionConstraint, self).__getattribute__(attr) + return super().__getattribute__(attr) - def _call_config(self, method, *args, **kwargs): - """Call the configuration at the given method which must take a section name - as first argument""" + def _call_config(self, method: str, *args: Any, **kwargs: Any) -> Any: + """Call the configuration at the given method which must take a section name as + first argument.""" return getattr(self._config, method)(self._section_name, *args, **kwargs) @property - def config(self): - """return: Configparser instance we constrain""" + def config(self) -> T_ConfigParser: + """return: ConfigParser instance we constrain""" return self._config + def release(self) -> None: + """Equivalent to :meth:`GitConfigParser.release`, which is called on our + underlying parser instance.""" + return self._config.release() + + def __enter__(self) -> "SectionConstraint[T_ConfigParser]": + self._config.__enter__() + return self + + def __exit__(self, exception_type: str, exception_value: str, traceback: str) -> None: + self._config.__exit__(exception_type, exception_value, traceback) + + +class _OMD(OrderedDict_OMD): + """Ordered multi-dict.""" + + def __setitem__(self, key: str, value: _T) -> None: + super().__setitem__(key, [value]) + + def add(self, key: str, value: Any) -> None: + if key not in self: + super().__setitem__(key, [value]) + return + + super().__getitem__(key).append(value) + + def setall(self, key: str, values: List[_T]) -> None: + super().__setitem__(key, values) + + def __getitem__(self, key: str) -> Any: + return super().__getitem__(key)[-1] + + def getlast(self, key: str) -> Any: + return super().__getitem__(key)[-1] -class GitConfigParser(cp.RawConfigParser, object): + def setlast(self, key: str, value: Any) -> None: + if key not in self: + super().__setitem__(key, [value]) + return + prior = super().__getitem__(key) + prior[-1] = value + + def get(self, key: str, default: Union[_T, None] = None) -> Union[_T, None]: + return super().get(key, [default])[-1] + + def getall(self, key: str) -> List[_T]: + return super().__getitem__(key) + + def items(self) -> List[Tuple[str, _T]]: # type: ignore[override] + """List of (key, last value for key).""" + return [(k, self[k]) for k in self] + + def items_all(self) -> List[Tuple[str, List[_T]]]: + """List of (key, list of values for key).""" + return [(k, self.getall(k)) for k in self] + + +def get_config_path(config_level: Lit_config_levels) -> str: + # We do not support an absolute path of the gitconfig on Windows. + # Use the global config instead. + if sys.platform == "win32" and config_level == "system": + config_level = "global" + + if config_level == "system": + return "/etc/gitconfig" + elif config_level == "user": + config_home = os.environ.get("XDG_CONFIG_HOME") or osp.join(os.environ.get("HOME", "~"), ".config") + return osp.normpath(osp.expanduser(osp.join(config_home, "git", "config"))) + elif config_level == "global": + return osp.normpath(osp.expanduser("~/.gitconfig")) + elif config_level == "repository": + raise ValueError("No repo to get repository configuration from. Use Repo._get_config_path") + else: + # Should not reach here. Will raise ValueError if does. Static typing will warn + # about missing elifs. + assert_never( # type: ignore[unreachable] + config_level, + ValueError(f"Invalid configuration level: {config_level!r}"), + ) + + +class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): """Implements specifics required to read git style configuration files. - This variation behaves much like the git.config command such that the configuration - will be read on demand based on the filepath given during initialization. + This variation behaves much like the :manpage:`git-config(1)` command, such that the + configuration will be read on demand based on the filepath given during + initialization. The changes will automatically be written once the instance goes out of scope, but can be triggered manually as well. - The configuration file will be locked if you intend to change values preventing other - instances to write concurrently. + The configuration file will be locked if you intend to change values preventing + other instances to write concurrently. :note: The config is case-sensitive even when queried, hence section and option names - must match perfectly.""" - __metaclass__ = MetaParserBuilder + must match perfectly. + + :note: + If used as a context manager, this will release the locked file. + """ - #{ Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile + # { Configuration t_lock = LockFile - re_comment = re.compile('^\s*[#;]') + """The lock type determines the type of lock to use in new configuration readers. - #} END configuration + They must be compatible to the :class:`~git.util.LockFile` interface. + A suitable alternative would be the :class:`~git.util.BlockingLockFile`. + """ - OPTCRE = re.compile( - r'\s*(?P