diff --git a/.devcontainer.json b/.devcontainer.json deleted file mode 100644 index 54ddfa1a130f8..0000000000000 --- a/.devcontainer.json +++ /dev/null @@ -1,28 +0,0 @@ -// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at -// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda -{ - "name": "pandas", - "context": ".", - "dockerFile": "Dockerfile", - - // Use 'settings' to set *default* container specific settings.json values on container create. - // You can edit these settings after create using File > Preferences > Settings > Remote. - "settings": { - "python.pythonPath": "/usr/local/bin/python", - "python.formatting.provider": "black", - "python.linting.enabled": true, - "python.linting.flake8Enabled": true, - "python.linting.pylintEnabled": false, - "python.linting.mypyEnabled": true, - "python.testing.pytestEnabled": true, - "python.testing.pytestArgs": [ - "pandas" - ] - }, - - // Add the IDs of extensions you want installed when the container is created in the array below. - "extensions": [ - "ms-python.python", - "ms-vscode.cpptools" - ] -} diff --git a/.gitattributes b/.gitattributes index bc7dec642df0f..44b689cb53915 100644 --- a/.gitattributes +++ b/.gitattributes @@ -65,14 +65,12 @@ pandas/_version.py export-subst asv_bench export-ignore ci export-ignore doc export-ignore -gitpod export-ignore MANIFEST.in export-ignore scripts/** export-ignore typings export-ignore web export-ignore CITATION.cff export-ignore codecov.yml export-ignore -Dockerfile export-ignore environment.yml export-ignore setup.py export-ignore diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8eca91c692710..c6e93aee38a8b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,3 +3,4 @@ - [ ] All [code checks passed](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit). - [ ] Added [type annotations](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#type-hints) to new arguments/methods/functions. - [ ] Added an entry in the latest `doc/source/whatsnew/vX.X.X.rst` file if fixing a bug or adding a new feature. +- [ ] If I used AI to develop this pull request, I prompted it to follow `AGENTS.md`. diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 82b1ef586e5dc..656d0ed21ba05 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -131,33 +131,6 @@ jobs: asv machine --yes asv run --quick --dry-run --durations=30 --python=same --show-stderr - build_docker_dev_environment: - name: Build Docker Dev Environment - runs-on: ubuntu-24.04 - defaults: - run: - shell: bash -el {0} - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment - cancel-in-progress: true - - steps: - - name: Clean up dangling images - run: docker image prune -f - - - name: Checkout - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Build image - run: docker build --pull --no-cache --tag pandas-dev-env . - - - name: Show environment - run: docker run --rm pandas-dev-env python -c "import pandas as pd; print(pd.show_versions())" - requirements-dev-text-installable: name: Test install requirements-dev.txt runs-on: ubuntu-24.04 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index d3802bc32a540..836bcec855407 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -39,6 +39,7 @@ jobs: if: >- (github.event_name == 'schedule') || github.event_name == 'workflow_dispatch' || + github.event_name == 'release' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Build')) || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0'))) @@ -84,6 +85,7 @@ jobs: if: >- (github.event_name == 'schedule') || github.event_name == 'workflow_dispatch' || + github.event_name == 'release' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Build')) || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0'))) diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 5bf028750f30f..0000000000000 --- a/.gitpod.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Building pandas on init -# Might delegate this later to prebuild with Q2 improvements on gitpod -# https://www.gitpod.io/docs/config-start-tasks/#configuring-the-terminal -# ------------------------------------------------------------------------- - -# images for gitpod pandas are in https://hub.docker.com/r/pandas/pandas-gitpod/tags -# we're using the Dockerfile in the base of the repo -image: - file: Dockerfile -tasks: - - name: Prepare development environment - init: | - mkdir -p .vscode - cp gitpod/settings.json .vscode/settings.json - git fetch --tags - python -m pip install -ve . --no-build-isolation -Ceditable-verbose=true - pre-commit install --install-hooks - command: | - python -m pip install -ve . --no-build-isolation -Ceditable-verbose=true - echo "✨ Pre-build complete! You can close this terminal ✨ " - -# -------------------------------------------------------- -# exposing ports for liveserve -ports: - - port: 5500 - onOpen: notify - -# -------------------------------------------------------- -# some useful extensions to have -vscode: - extensions: - - ms-python.python - - yzhang.markdown-all-in-one - - eamodio.gitlens - - lextudio.restructuredtext - - ritwickdey.liveserver - # add or remove what you think is generally useful to most contributors - # avoid adding too many. they each open a pop-up window - -# -------------------------------------------------------- -# Using prebuilds for the container -# With this configuration the prebuild will happen on push to main -github: - prebuilds: - # enable for main/default branch - main: true - # enable for other branches (defaults to false) - branches: false - # enable for pull requests coming from this repo (defaults to true) - pullRequests: false - # enable for pull requests coming from forks (defaults to false) - pullRequestsFromForks: false - # add a check to pull requests (defaults to true) - addCheck: false - # add a "Review in Gitpod" button as a comment to pull requests (defaults to false) - addComment: false - # add a "Review in Gitpod" button to the pull request's description (defaults to false) - addBadge: false - # add a label once the prebuild is ready to pull requests (defaults to false) - addLabel: false diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000000..10b9f4f6e78fd --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,48 @@ +# pandas Agent Instructions + +## Project Overview +`pandas` is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. + +## Purpose +- Assist contributors by suggesting code changes, tests, and documentation edits for the pandas repository while preserving stability and compatibility. + +## Persona & Tone +- Concise, neutral, code-focused. Prioritize correctness, readability, and tests. + +## Project Guidelines +- Be sure to follow all guidelines for contributing to the codebase specified at https://pandas.pydata.org/docs/development/contributing_codebase.html +- These guidelines are also available in the following local files, which should be loaded into context and adhered to + - doc/source/development/contributing_codebase.rst + - doc/source/development/contributing_docstring.rst + - doc/source/development/contributing_documentation.rst + - doc/source/development/contributing.rst + +## Decision heuristics +- Favor small, backward-compatible changes with tests. +- If a change would be breaking, propose it behind a deprecation path and document the rationale. +- Prefer readability over micro-optimizations unless benchmarks are requested. +- Add tests for behavioral changes; update docs only after code change is final. + +## Type hints guidance (summary) +- Prefer PEP 484 style and types in pandas._typing when appropriate. +- Avoid unnecessary use of typing.cast; prefer refactors that convey types to type-checkers. +- Use builtin generics (list, dict) when possible. + +## Docstring guidance (summary) +- Follow NumPy / numpydoc conventions used across the repo: short summary, extended summary, Parameters, Returns/Yields, See Also, Notes, Examples. +- Ensure examples are deterministic, import numpy/pandas as documented, and pass doctest rules used by docs validation. +- Preserve formatting rules: triple double-quotes, no blank line before/after docstring, parameter formatting ("name : type, default ..."), types and examples conventions. + +## Pull Requests (summary) +- Pull request titles should be descriptive and include one of the following prefixes: + - ENH: Enhancement, new functionality + - BUG: Bug fix + - DOC: Additions/updates to documentation + - TST: Additions/updates to tests + - BLD: Updates to the build process/scripts + - PERF: Performance improvement + - TYP: Type annotations + - CLN: Code cleanup +- Pull request descriptions should follow the template, and **succinctly** describe the change being made. Usually a few sentences is sufficient. +- Pull requests which are resolving an existing Github Issue should include a link to the issue in the PR Description. +- Do not add summaries or additional comments to individual commit messages. The single PR description is sufficient. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 6d919e80191bc..0000000000000 --- a/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -FROM python:3.11.13 -WORKDIR /home/pandas - -# https://docs.docker.com/reference/dockerfile/#automatic-platform-args-in-the-global-scope -ARG TARGETPLATFORM - -RUN apt-get update && \ - apt-get --no-install-recommends -y upgrade && \ - apt-get --no-install-recommends -y install \ - build-essential \ - bash-completion \ - # hdf5 needed for pytables installation - libhdf5-dev \ - # libgles2-mesa needed for pytest-qt - libgles2-mesa-dev && \ - rm -rf /var/lib/apt/lists/* - -COPY requirements-dev.txt /tmp - -RUN case "$TARGETPLATFORM" in \ - linux/arm*) \ - # Drop PyQt5 for ARM GH#61037 - sed -i "/^pyqt5/Id" /tmp/requirements-dev.txt \ - ;; \ - esac && \ - python -m pip install --no-cache-dir --upgrade pip && \ - python -m pip install --no-cache-dir -r /tmp/requirements-dev.txt -RUN git config --global --add safe.directory /home/pandas - -ENV SHELL="/bin/bash" -CMD ["/bin/bash"] diff --git a/doc/source/conf.py b/doc/source/conf.py index f222a228531ff..63c723eb67672 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -828,7 +828,6 @@ def setup(app) -> None: "https://github.com/pandas-dev/pandas/blob/v0.20.2/pandas/core/generic.py#L568", "https://github.com/pandas-dev/pandas/blob/v0.20.2/pandas/core/frame.py#L1495", "https://github.com/pandas-dev/pandas/issues/174151", - "https://gitpod.io/#https://github.com/USERNAME/pandas", "https://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/", "https://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes.table", "https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html", diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst index c09e8595f0241..67fe71dd5e648 100644 --- a/doc/source/development/contributing_environment.rst +++ b/doc/source/development/contributing_environment.rst @@ -16,13 +16,11 @@ locally before pushing your changes. It's recommended to also install the :ref:` :maxdepth: 2 :hidden: - contributing_gitpod.rst Step 1: install a C compiler ---------------------------- -How to do this will depend on your platform. If you choose to use ``Docker`` or ``GitPod`` -in the next step, then you can skip this step. +How to do this will depend on your platform. **Windows** @@ -163,51 +161,6 @@ should already exist. # Install the build dependencies python -m pip install -r requirements-dev.txt -Option 3: using Docker -~~~~~~~~~~~~~~~~~~~~~~ - -pandas provides a ``DockerFile`` in the root directory to build a Docker image -with a full pandas development environment. - -**Docker Commands** - -Build the Docker image:: - - # Build the image - docker build -t pandas-dev . - -Run Container:: - - # Run a container and bind your local repo to the container - # This command assumes you are running from your local repo - # but if not alter ${PWD} to match your local repo path - docker run -it --rm -v ${PWD}:/home/pandas pandas-dev - -*Even easier, you can integrate Docker with the following IDEs:* - -**Visual Studio Code** - -You can use the DockerFile to launch a remote session with Visual Studio Code, -a popular free IDE, using the ``.devcontainer.json`` file. -See https://code.visualstudio.com/docs/remote/containers for details. - -**PyCharm (Professional)** - -Enable Docker support and use the Services tool window to build and manage images as well as -run and interact with containers. -See https://www.jetbrains.com/help/pycharm/docker.html for details. - -Option 4: using Gitpod -~~~~~~~~~~~~~~~~~~~~~~ - -Gitpod is an open-source platform that automatically creates the correct development -environment right in your browser, reducing the need to install local development -environments and deal with incompatible dependencies. - -If you are a Windows user, unfamiliar with using the command line or building pandas -for the first time, it is often faster to build with Gitpod. Here are the in-depth instructions -for :ref:`building pandas with GitPod `. - Step 3: build and install pandas -------------------------------- diff --git a/doc/source/development/contributing_gitpod.rst b/doc/source/development/contributing_gitpod.rst deleted file mode 100644 index 447b7b20a8ae5..0000000000000 --- a/doc/source/development/contributing_gitpod.rst +++ /dev/null @@ -1,274 +0,0 @@ -.. _contributing-gitpod: - -Using Gitpod for pandas development -=================================== - -This section of the documentation will guide you through: - -* using Gitpod for your pandas development environment -* creating a personal fork of the pandas repository on GitHub -* a quick tour of pandas and VSCode -* working on the pandas documentation in Gitpod - -Gitpod ------- - -`Gitpod`_ is an open-source platform for automated and ready-to-code -development environments. It enables developers to describe their dev -environment as code and start instant and fresh development environments for -each new task directly from your browser. This reduces the need to install local -development environments and deal with incompatible dependencies. - - -Gitpod GitHub integration -------------------------- - -To be able to use Gitpod, you will need to have the Gitpod app installed on your -GitHub account, so if -you do not have an account yet, you will need to create one first. - -To get started just login at `Gitpod`_, and grant the appropriate permissions to GitHub. - -We have built a python 3.10 environment and all development dependencies will -install when the environment starts. - - -Forking the pandas repository ------------------------------ - -The best way to work on pandas as a contributor is by making a fork of the -repository first. - -#. Browse to the `pandas repository on GitHub`_ and `create your own fork`_. - -#. Browse to your fork. Your fork will have a URL like - https://github.com/noatamir/pandas-dev, except with your GitHub username in place of - ``noatamir``. - -Starting Gitpod ---------------- -Once you have authenticated to Gitpod through GitHub, you can install the -`Gitpod Chromium or Firefox browser extension `_ -which will add a **Gitpod** button next to the **Code** button in the -repository: - -.. image:: ./gitpod-imgs/pandas-github.png - :alt: pandas repository with Gitpod button screenshot - -#. If you install the extension - you can click the **Gitpod** button to start - a new workspace. - -#. Alternatively, if you do not want to install the browser extension, you can - visit https://gitpod.io/#https://github.com/USERNAME/pandas replacing - ``USERNAME`` with your GitHub username. - -#. In both cases, this will open a new tab on your web browser and start - building your development environment. Please note this can take a few - minutes. - -#. Once the build is complete, you will be directed to your workspace, - including the VSCode editor and all the dependencies you need to work on - pandas. The first time you start your workspace, you will notice that there - might be some actions running. This will ensure that you have a development - version of pandas installed. - -#. When your workspace is ready, you can :ref:`test the build` by - entering:: - - $ python -m pytest pandas - - Note that this command takes a while to run, so once you've confirmed it's running you may want to cancel it using ctrl-c. - -Quick workspace tour --------------------- -Gitpod uses VSCode as the editor. If you have not used this editor before, you -can check the Getting started `VSCode docs`_ to familiarize yourself with it. - -Your workspace will look similar to the image below: - -.. image:: ./gitpod-imgs/gitpod-workspace.png - :alt: Gitpod workspace screenshot - -We have marked some important sections in the editor: - -#. Your current Python interpreter - by default, this is ``pandas-dev`` and - should be displayed in the status bar and on your terminal. You do not need - to activate the conda environment as this will always be activated for you. -#. Your current branch is always displayed in the status bar. You can also use - this button to change or create branches. -#. GitHub Pull Requests extension - you can use this to work with Pull Requests - from your workspace. -#. Marketplace extensions - we have added some essential extensions to the pandas - Gitpod. Still, you can also install other extensions or syntax highlighting - themes for your user, and these will be preserved for you. -#. Your workspace directory - by default, it is ``/workspace/pandas-dev``. **Do not - change this** as this is the only directory preserved in Gitpod. - -We have also pre-installed a few tools and VSCode extensions to help with the -development experience: - -* `VSCode rst extension `_ -* `Markdown All in One `_ -* `VSCode GitLens extension `_ -* `VSCode Git Graph extension `_ - -Development workflow with Gitpod --------------------------------- -The :ref:`contributing` section of this documentation contains -information regarding the pandas development workflow. Make sure to check this -before working on your contributions. - -When using Gitpod, git is pre configured for you: - -#. You do not need to configure your git username, and email as this should be - done for you as you authenticated through GitHub. Unless you are using GitHub - feature to keep email address private. You can check the git - configuration with the command ``git config --list`` in your terminal. Use - ``git config --global user.email “your-secret-email@users.noreply.github.com”`` - to set your email address to the one you use to make commits with your github - profile. -#. As you started your workspace from your own pandas fork, you will by default - have both ``upstream`` and ``origin`` added as remotes. You can verify this by - typing ``git remote`` on your terminal or by clicking on the **branch name** - on the status bar (see image below). - - .. image:: ./gitpod-imgs/pandas-gitpod-branches.png - :alt: Gitpod workspace branches plugin screenshot - -Rendering the pandas documentation ----------------------------------- -You can find the detailed documentation on how rendering the documentation with -Sphinx works in the :ref:`contributing.howto-build-docs` section. To build the full -docs you need to run the following command in the ``/doc`` directory:: - - $ cd doc - $ python make.py html - -Alternatively you can build a single page with:: - - python make.py --single development/contributing_gitpod.rst - -You have two main options to render the documentation in Gitpod. - -Option 1: using Liveserve -~~~~~~~~~~~~~~~~~~~~~~~~~ - -#. View the documentation in ``pandas/doc/build/html``. -#. To see the rendered version of a page, you can right-click on the ``.html`` - file and click on **Open with Live Serve**. Alternatively, you can open the - file in the editor and click on the **Go live** button on the status bar. - - .. image:: ./gitpod-imgs/vscode-statusbar.png - :alt: Gitpod workspace VSCode start live serve screenshot - -#. A simple browser will open to the right-hand side of the editor. We recommend - closing it and click on the **Open in browser** button in the pop-up. -#. To stop the server click on the **Port: 5500** button on the status bar. - -Option 2: using the rst extension -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A quick and easy way to see live changes in a ``.rst`` file as you work on it -uses the rst extension with docutils. - -.. note:: This will generate a simple live preview of the document without the - ``html`` theme, and some backlinks might not be added correctly. But it is an - easy and lightweight way to get instant feedback on your work, without - building the html files. - -#. Open any of the source documentation files located in ``doc/source`` in the - editor. -#. Open VSCode Command Palette with :kbd:`Cmd-Shift-P` in Mac or - :kbd:`Ctrl-Shift-P` in Linux and Windows. Start typing "restructured" - and choose either "Open preview" or "Open preview to the Side". - - .. image:: ./gitpod-imgs/vscode-rst.png - :alt: Gitpod workspace VSCode open rst screenshot - -#. As you work on the document, you will see a live rendering of it on the editor. - - .. image:: ./gitpod-imgs/rst-rendering.png - :alt: Gitpod workspace VSCode rst rendering screenshot - -If you want to see the final output with the ``html`` theme you will need to -rebuild the docs with ``make html`` and use Live Serve as described in option 1. - -FAQ's and troubleshooting -------------------------- - -How long is my Gitpod workspace kept for? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Your stopped workspace will be kept for 14 days and deleted afterwards if you do -not use them. - -Can I come back to a previous workspace? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Yes, let's say you stepped away for a while and you want to carry on working on -your pandas contributions. You need to visit https://gitpod.io/workspaces and -click on the workspace you want to spin up again. All your changes will be there -as you last left them. - -Can I install additional VSCode extensions? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Absolutely! Any extensions you installed will be installed in your own workspace -and preserved. - -I registered on Gitpod but I still cannot see a ``Gitpod`` button in my repositories. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Head to https://gitpod.io/integrations and make sure you are logged in. -Hover over GitHub and click on the three buttons that appear on the right. -Click on edit permissions and make sure you have ``user:email``, -``read:user``, and ``public_repo`` checked. Click on **Update Permissions** -and confirm the changes in the GitHub application page. - -.. image:: ./gitpod-imgs/gitpod-edit-permissions-gh.png - :alt: Gitpod integrations - edit GH permissions screenshot - -How long does my workspace stay active if I'm not using it? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you keep your workspace open in a browser tab but don't interact with it, -it will shut down after 30 minutes. If you close the browser tab, it will -shut down after 3 minutes. - -My terminal is blank - there is no cursor and it's completely unresponsive -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Unfortunately this is a known-issue on Gitpod's side. You can sort this -issue in two ways: - -#. Create a new Gitpod workspace altogether. -#. Head to your `Gitpod dashboard `_ and locate - the running workspace. Hover on it and click on the **three dots menu** - and then click on **Stop**. When the workspace is completely stopped you - can click on its name to restart it again. - -.. image:: ./gitpod-imgs/gitpod-dashboard-stop.png - :alt: Gitpod dashboard and workspace menu screenshot - -I authenticated through GitHub but I still cannot commit to the repository through Gitpod. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Head to https://gitpod.io/integrations and make sure you are logged in. -Hover over GitHub and click on the three buttons that appear on the right. -Click on edit permissions and make sure you have ``public_repo`` checked. -Click on **Update Permissions** and confirm the changes in the -GitHub application page. - -.. image:: ./gitpod-imgs/gitpod-edit-permissions-repo.png - :alt: Gitpod integrations - edit GH repository permissions screenshot - -Acknowledgments ---------------- - -This page is lightly adapted from the `NumPy`_ project . - -.. _Gitpod: https://www.gitpod.io/ -.. _pandas repository on GitHub: https://github.com/pandas-dev/pandas -.. _create your own fork: https://help.github.com/en/articles/fork-a-repo -.. _VSCode docs: https://code.visualstudio.com/docs/getstarted/tips-and-tricks -.. _NumPy: https://www.numpy.org/ diff --git a/doc/source/development/gitpod-imgs/gitpod-dashboard-stop.png b/doc/source/development/gitpod-imgs/gitpod-dashboard-stop.png deleted file mode 100644 index b64790a986646..0000000000000 Binary files a/doc/source/development/gitpod-imgs/gitpod-dashboard-stop.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/gitpod-edit-permissions-gh.png b/doc/source/development/gitpod-imgs/gitpod-edit-permissions-gh.png deleted file mode 100644 index ec21a9064c83d..0000000000000 Binary files a/doc/source/development/gitpod-imgs/gitpod-edit-permissions-gh.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/gitpod-edit-permissions-repo.png b/doc/source/development/gitpod-imgs/gitpod-edit-permissions-repo.png deleted file mode 100644 index 8bfaff81cfb69..0000000000000 Binary files a/doc/source/development/gitpod-imgs/gitpod-edit-permissions-repo.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/gitpod-workspace.png b/doc/source/development/gitpod-imgs/gitpod-workspace.png deleted file mode 100644 index daf763e9adb05..0000000000000 Binary files a/doc/source/development/gitpod-imgs/gitpod-workspace.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/pandas-github.png b/doc/source/development/gitpod-imgs/pandas-github.png deleted file mode 100644 index 010b0fc5ea33d..0000000000000 Binary files a/doc/source/development/gitpod-imgs/pandas-github.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/pandas-gitpod-branches.png b/doc/source/development/gitpod-imgs/pandas-gitpod-branches.png deleted file mode 100644 index f95c66056ca37..0000000000000 Binary files a/doc/source/development/gitpod-imgs/pandas-gitpod-branches.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/rst-rendering.png b/doc/source/development/gitpod-imgs/rst-rendering.png deleted file mode 100644 index b613c621c398b..0000000000000 Binary files a/doc/source/development/gitpod-imgs/rst-rendering.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/vscode-rst.png b/doc/source/development/gitpod-imgs/vscode-rst.png deleted file mode 100644 index 5b574c115a2b7..0000000000000 Binary files a/doc/source/development/gitpod-imgs/vscode-rst.png and /dev/null differ diff --git a/doc/source/development/gitpod-imgs/vscode-statusbar.png b/doc/source/development/gitpod-imgs/vscode-statusbar.png deleted file mode 100644 index dad25369fedfd..0000000000000 Binary files a/doc/source/development/gitpod-imgs/vscode-statusbar.png and /dev/null differ diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst index 7894789846ce8..9aef38d868374 100644 --- a/doc/source/user_guide/duplicates.rst +++ b/doc/source/user_guide/duplicates.rst @@ -109,8 +109,6 @@ with the same label. Disallowing Duplicate Labels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 1.2.0 - As noted above, handling duplicates is an important feature when reading in raw data. That said, you may want to avoid introducing duplicates as part of a data processing pipeline (from methods like :meth:`pandas.concat`, diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 40369bd40cdb5..b8f7e1477d85d 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1264,8 +1264,6 @@ with Numba accelerated routines -------------------------- -.. versionadded:: 1.1 - If `Numba `__ is installed as an optional dependency, the ``transform`` and ``aggregate`` methods support ``engine='numba'`` and ``engine_kwargs`` arguments. See :ref:`enhancing performance with Numba ` for general usage of the arguments diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7092a0f8fa8d8..88f6cf76941ef 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -156,13 +156,9 @@ dtype : Type name or dict of column -> type, default ``None`` Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}`` Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve and not interpret dtype. If converters are specified, they will be applied INSTEAD - of dtype conversion. - - .. versionadded:: 1.5.0 - - Support for defaultdict was added. Specify a defaultdict as input where - the default determines the dtype of the columns which are not explicitly - listed. + of dtype conversion. Specify a defaultdict as input where + the default determines the dtype of the columns which are not explicitly + listed. dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames Which dtype_backend to use, e.g. whether a DataFrame should have NumPy @@ -177,12 +173,8 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram engine : {``'c'``, ``'python'``, ``'pyarrow'``} Parser engine to use. The C and pyarrow engines are faster, while the python engine is currently more feature-complete. Multithreading is currently only supported by - the pyarrow engine. - - .. versionadded:: 1.4.0 - - The "pyarrow" engine was added as an *experimental* engine, and some features - are unsupported, or may not work correctly, with this engine. + the pyarrow engine. Some features of the "pyarrow" engine + are unsupported or may not work correctly. converters : dict, default ``None`` Dict of functions for converting values in certain columns. Keys can either be integers or column labels. @@ -355,8 +347,6 @@ on_bad_lines : {{'error', 'warn', 'skip'}}, default 'error' - 'warn', print a warning when a bad line is encountered and skip that line. - 'skip', skip bad lines without raising or warning when they are encountered. - .. versionadded:: 1.3.0 - .. _io.dtypes: Specifying column data types @@ -935,8 +925,6 @@ DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided: Writing CSVs to binary file objects +++++++++++++++++++++++++++++++++++ -.. versionadded:: 1.2.0 - ``df.to_csv(..., mode="wb")`` allows writing a CSV to a file object opened binary mode. In most cases, it is not necessary to specify ``mode`` as pandas will auto-detect whether the file object is @@ -1122,8 +1110,6 @@ You can elect to skip bad lines: data = "a,b,c\n1,2,3\n4,5,6,7\n8,9,10" pd.read_csv(StringIO(data), on_bad_lines="skip") -.. versionadded:: 1.4.0 - Or pass a callable function to handle the bad line if ``engine="python"``. The bad line will be a list of strings that was split by the ``sep``: @@ -1547,8 +1533,6 @@ functions - the following example shows reading a CSV file: df = pd.read_csv("https://download.bls.gov/pub/time.series/cu/cu.item", sep="\t") -.. versionadded:: 1.3.0 - A custom header can be sent alongside HTTP(s) requests by passing a dictionary of header key value mappings to the ``storage_options`` keyword argument as shown below: @@ -1600,8 +1584,6 @@ More sample configurations and documentation can be found at `S3Fs documentation If you do *not* have S3 credentials, you can still access public data by specifying an anonymous connection, such as -.. versionadded:: 1.2.0 - .. code-block:: python pd.read_csv( @@ -2535,8 +2517,6 @@ Links can be extracted from cells along with the text using ``extract_links="all df[("GitHub", None)] df[("GitHub", None)].str[1] -.. versionadded:: 1.5.0 - .. _io.html: Writing to HTML files @@ -2726,8 +2706,6 @@ parse HTML tables in the top-level pandas io function ``read_html``. LaTeX ----- -.. versionadded:: 1.3.0 - Currently there are no methods to read from LaTeX, only output methods. Writing to LaTeX files @@ -2766,8 +2744,6 @@ XML Reading XML ''''''''''' -.. versionadded:: 1.3.0 - The top-level :func:`~pandas.io.xml.read_xml` function can accept an XML string/file/URL and will parse nodes and attributes into a pandas ``DataFrame``. @@ -3093,8 +3069,6 @@ supports parsing such sizeable files using `lxml's iterparse`_ and `etree's iter which are memory-efficient methods to iterate through an XML tree and extract specific elements and attributes. without holding entire tree in memory. -.. versionadded:: 1.5.0 - .. _`lxml's iterparse`: https://lxml.de/3.2/parsing.html#iterparse-and-iterwalk .. _`etree's iterparse`: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse @@ -3133,8 +3107,6 @@ of reading in Wikipedia's very large (12 GB+) latest article data dump. Writing XML ''''''''''' -.. versionadded:: 1.3.0 - ``DataFrame`` objects have an instance method ``to_xml`` which renders the contents of the ``DataFrame`` as an XML document. diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index bc5a2d5ed5735..30dac01fa5985 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -478,8 +478,6 @@ The values can be cast to a different type using the ``dtype`` argument. pd.get_dummies(df, dtype=np.float32).dtypes -.. versionadded:: 1.5.0 - :func:`~pandas.from_dummies` converts the output of :func:`~pandas.get_dummies` back into a :class:`Series` of categorical values from indicator values. diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 6a66c30cffbf0..72b40982abb0c 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1964,8 +1964,6 @@ Note the use of ``'start'`` for ``origin`` on the last example. In that case, `` Backward resample ~~~~~~~~~~~~~~~~~ -.. versionadded:: 1.3.0 - Instead of adjusting the beginning of bins, sometimes we need to fix the end of the bins to make a backward resample with a given ``freq``. The backward resample sets ``closed`` to ``'right'`` by default since the last value should be considered as the edge point for the last bin. We can set ``origin`` to ``'end'``. The value for a specific ``Timestamp`` index stands for the resample result from the current ``Timestamp`` minus ``freq`` to the current ``Timestamp`` with a right close. diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index a12a1e1477563..f26d1ebc85776 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -645,8 +645,6 @@ each point: If a categorical column is passed to ``c``, then a discrete colorbar will be produced: -.. versionadded:: 1.3.0 - .. ipython:: python @savefig scatter_plot_categorical.png diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 5b27442c80bb8..44af88d5b05b3 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -77,8 +77,6 @@ which will first group the data by the specified keys and then perform a windowi to compute the rolling sums to preserve accuracy as much as possible. -.. versionadded:: 1.3.0 - Some windowing operations also support the ``method='table'`` option in the constructor which performs the windowing operation over an entire :class:`DataFrame` instead of a single column at a time. This can provide a useful performance benefit for a :class:`DataFrame` with many columns @@ -100,8 +98,6 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") # noqa: E501 -.. versionadded:: 1.3 - Some windowing operations also support an ``online`` method after constructing a windowing object which returns a new object that supports passing in new :class:`DataFrame` or :class:`Series` objects to continue the windowing calculation with the new values (i.e. online calculations). @@ -182,8 +178,6 @@ By default the labels are set to the right edge of the window, but a This can also be applied to datetime-like indices. -.. versionadded:: 1.3.0 - .. ipython:: python df = pd.DataFrame( @@ -365,8 +359,6 @@ The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be These keyword arguments will be applied to *both* the passed function (if a standard Python function) and the apply for loop over each window. -.. versionadded:: 1.3.0 - ``mean``, ``median``, ``max``, ``min``, and ``sum`` also support the ``engine`` and ``engine_kwargs`` arguments. .. _window.cov_corr: diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index a17cf9fd4b9a0..19a20e4e1355d 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -24,6 +24,7 @@ Version 2.3 .. toctree:: :maxdepth: 2 + v2.3.4 v2.3.3 v2.3.2 v2.3.1 diff --git a/doc/source/whatsnew/v2.3.4.rst b/doc/source/whatsnew/v2.3.4.rst new file mode 100644 index 0000000000000..836cea026d702 --- /dev/null +++ b/doc/source/whatsnew/v2.3.4.rst @@ -0,0 +1,21 @@ +.. _whatsnew_234: + +What's new in 2.3.4 (November XX, 2025) +---------------------------------------- + +These are the changes in pandas 2.3.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Bug fixes +^^^^^^^^^ +- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_234.contributors: + +Contributors +~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 70a69ce4be9f5..c96bb7f663368 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -798,6 +798,7 @@ Other Deprecations - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) +- Deprecated the 'verify_integrity' keyword in :meth:`DataFrame.set_index`; directly check the result for ``obj.index.is_unique`` instead (:issue:`62919`) - Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`) - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`) @@ -807,6 +808,7 @@ Other Deprecations - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) - Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`) - Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`) +- Deprecated silently casting strings to :class:`Timedelta` in binary operations with :class:`Timedelta` (:issue:`59653`) - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) - Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`) - Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) @@ -1030,13 +1032,13 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`) +- Bug in :class:`pandas.Categorical` displaying string categories without quotes when using "string" dtype (:issue:`63045`) - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) - Bug in :func:`bdate_range` raising ``ValueError`` with frequency ``freq="cbh"`` (:issue:`62849`) - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`) - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- Datetimelike ^^^^^^^^^^^^ @@ -1130,7 +1132,6 @@ Interval Indexing ^^^^^^^^ -- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) - Bug in :meth:`DataFrame.__setitem__` on an empty :class:`DataFrame` with a tuple corrupting the frame (:issue:`54385`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) @@ -1258,6 +1259,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`) - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`) +- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) Reshaping ^^^^^^^^^ @@ -1358,6 +1360,10 @@ Other - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) - Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) +- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only + array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`). + This logic is expanded to accessing the underlying pandas ExtensionArray + through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`). .. ***DO NOT USE THIS SECTION*** diff --git a/gitpod/Dockerfile b/gitpod/Dockerfile deleted file mode 100644 index dd4ddf64d16b4..0000000000000 --- a/gitpod/Dockerfile +++ /dev/null @@ -1,100 +0,0 @@ -# -# Dockerfile for pandas development -# -# Usage: -# ------- -# -# To make a local build of the container, from the 'Docker-dev' directory: -# docker build --rm -f "Dockerfile" -t "." -# -# To use the container use the following command. It assumes that you are in -# the root folder of the pandas git repository, making it available as -# /home/pandas in the container. Whatever changes you make to that directory -# are visible in the host and container. -# The docker image is retrieved from the pandas dockerhub repository -# -# docker run --rm -it -v $(pwd):/home/pandas pandas/pandas-dev: -# -# By default the container will activate the conda environment pandas-dev -# which contains all the dependencies needed for pandas development -# -# To build and install pandas run: -# python setup.py build_ext -j 4 -# python -m pip install -e . --no-build-isolation -# -# This image is based on: Ubuntu 20.04 (focal) -# https://hub.docker.com/_/ubuntu/?tab=tags&name=focal -# OS/ARCH: linux/amd64 -FROM gitpod/workspace-base:latest - -ARG MAMBAFORGE_VERSION="23.1.0-3" -ARG CONDA_ENV=pandas-dev -ARG PANDAS_HOME="/home/pandas" - - -# ---- Configure environment ---- -ENV CONDA_DIR=/home/gitpod/mambaforge3 \ - SHELL=/bin/bash -ENV PATH=${CONDA_DIR}/bin:$PATH \ - WORKSPACE=/workspace/pandas - -# ----------------------------------------------------------------------------- -# ---- Creating as root - note: make sure to change to gitpod in the end ---- -USER root - -# Avoid warnings by switching to noninteractive -ENV DEBIAN_FRONTEND=noninteractive - -# Configure apt and install packages -RUN apt-get update \ - && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \ - # - # Install tzdata and configure timezone (fix for tests which try to read from "/etc/localtime") - && apt-get -y install tzdata \ - && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \ - && dpkg-reconfigure -f noninteractive tzdata \ - # - # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed - && apt-get -y install git iproute2 procps iproute2 lsb-release \ - # - # cleanup - && apt-get autoremove -y \ - && apt-get clean -y \ - && rm -rf /var/lib/apt/lists/* - -# Switch back to dialog for any ad-hoc use of apt-get -ENV DEBIAN_FRONTEND=dialog - -# Allows this Dockerfile to activate conda environments -SHELL ["/bin/bash", "--login", "-o", "pipefail", "-c"] - -# ----------------------------------------------------------------------------- -# ---- Installing mamba ---- -RUN wget -q -O mambaforge3.sh \ - "https://github.com/conda-forge/miniforge/releases/download/$MAMBAFORGE_VERSION/Mambaforge-$MAMBAFORGE_VERSION-Linux-x86_64.sh" && \ - bash mambaforge3.sh -p ${CONDA_DIR} -b && \ - rm mambaforge3.sh - -# ----------------------------------------------------------------------------- -# ---- Copy needed files ---- -# basic workspace configurations -COPY ./gitpod/workspace_config /usr/local/bin/workspace_config - -RUN chmod a+rx /usr/local/bin/workspace_config && \ - workspace_config - -# the container to create a conda environment from it -COPY environment.yml /tmp/environment.yml - -# ---- Create conda environment ---- -RUN mamba env create -f /tmp/environment.yml && \ - conda activate $CONDA_ENV && \ - mamba install ccache -y && \ - # needed for docs rendering later on - python -m pip install --no-cache-dir sphinx-autobuild && \ - conda clean --all -f -y && \ - rm -rf /tmp/* - -# ----------------------------------------------------------------------------- -# Always make sure we are not root -USER gitpod diff --git a/gitpod/gitpod.Dockerfile b/gitpod/gitpod.Dockerfile deleted file mode 100644 index ab3f25b231e67..0000000000000 --- a/gitpod/gitpod.Dockerfile +++ /dev/null @@ -1,48 +0,0 @@ -# Doing a local shallow clone - keeps the container secure -# and much slimmer than using COPY directly or making a -# remote clone -ARG BASE_CONTAINER="pandas/pandas-dev:latest" -FROM gitpod/workspace-base:latest as clone - -# the clone should be deep enough for versioneer to work -RUN git clone https://github.com/pandas-dev/pandas --depth 12 /tmp/pandas - -# ----------------------------------------------------------------------------- -# Using the pandas-dev Docker image as a base -# This way, we ensure we have all the needed compilers and dependencies -# while reducing the build time -FROM ${BASE_CONTAINER} as build - -# ----------------------------------------------------------------------------- -USER root - -# ----------------------------------------------------------------------------- -# ---- ENV variables ---- -# ---- Directories needed ---- -ENV WORKSPACE=/workspace/pandas/ \ - CONDA_ENV=pandas-dev - -# Allows this micromamba.Dockerfile to activate conda environments -SHELL ["/bin/bash", "--login", "-o", "pipefail", "-c"] - -# Copy over the shallow clone -COPY --from=clone --chown=gitpod /tmp/pandas ${WORKSPACE} - -# Everything happens in the /workspace/pandas directory -WORKDIR ${WORKSPACE} - -# Build pandas to populate the cache used by ccache -RUN git config --global --add safe.directory /workspace/pandas -RUN conda activate ${CONDA_ENV} && \ - python -m pip install -e . --no-build-isolation && \ - python setup.py build_ext --inplace && \ - ccache -s - -# Gitpod will load the repository into /workspace/pandas. We remove the -# directory from the image to prevent conflicts -RUN rm -rf ${WORKSPACE} - -# ----------------------------------------------------------------------------- -# Always return to non privileged user -RUN chown -R gitpod:gitpod /home/gitpod/.cache/ -USER gitpod diff --git a/gitpod/settings.json b/gitpod/settings.json deleted file mode 100644 index 2c2c3b551e1d1..0000000000000 --- a/gitpod/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "esbonio.server.pythonPath": "/usr/local/bin/python", - "restructuredtext.linter.disabledLinters": ["doc8","rst-lint", "rstcheck"], - "python.defaultInterpreterPath": "/usr/local/bin/python" -} diff --git a/gitpod/workspace_config b/gitpod/workspace_config deleted file mode 100644 index d49c93ec83db9..0000000000000 --- a/gitpod/workspace_config +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -# Basic configurations for the workspace - -set -e - -# gitpod/workspace-base needs at least one file here -touch /home/gitpod/.bashrc.d/empty - -# Add git aliases -git config --global alias.co checkout -git config --global alias.ci commit -git config --global alias.st status -git config --global alias.br branch -git config --global alias.hist "log --pretty=format:'%h %ad | %s%d [%an]' --graph --date=short" -git config --global alias.type 'cat-file -t' -git config --global alias.dump 'cat-file -p' - -# Enable basic vim defaults in ~/.vimrc -echo "filetype plugin indent on" >>~/.vimrc -echo "set colorcolumn=80" >>~/.vimrc -echo "set number" >>~/.vimrc -echo "syntax enable" >>~/.vimrc - -# Vanity custom bash prompt - makes it more legible -echo "PS1='\[\e]0;\u \w\a\]\[\033[01;36m\]\u\[\033[m\] > \[\033[38;5;141m\]\w\[\033[m\] \\$ '" >>~/.bashrc - -# Enable prompt color in the skeleton .bashrc -# hadolint ignore=SC2016 -sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashrc - -# .gitpod.yml is configured to install pandas from /workspace/pandas -echo "export PYTHONPATH=${WORKSPACE}" >>~/.bashrc - -# make conda activate command available from /bin/bash (login and interactive) -if [[ ! -f "/etc/profile.d/conda.sh" ]]; then - ln -s ${CONDA_DIR}/etc/profile.d/conda.sh /etc/profile.d/conda.sh -fi -echo ". ${CONDA_DIR}/etc/profile.d/conda.sh" >>~/.bashrc -echo "conda activate pandas-dev" >>~/.bashrc - -# Enable prompt color in the skeleton .bashrc -# hadolint ignore=SC2016 -sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashrc - -# .gitpod.yml is configured to install pandas from /workspace/pandas -echo "export PYTHONPATH=/workspace/pandas" >>~/.bashrc - -# Set up ccache for compilers for this Dockerfile -# REF: https://github.com/conda-forge/compilers-feedstock/issues/31 -echo "conda activate pandas-dev" >>~/.startuprc -echo "export CC=\"ccache \$CC\"" >>~/.startuprc -echo "export CXX=\"ccache \$CXX\"" >>~/.startuprc -echo "source ~/.startuprc" >>~/.profile -echo "source ~/.startuprc" >>~/.bashrc diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 11d059ec53920..1064995a51797 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -94,3 +94,7 @@ class BlockValuesRefs: def add_reference(self, blk: Block) -> None: ... def add_index_reference(self, index: Index) -> None: ... def has_reference(self) -> bool: ... + +class SetitemMixin: + def __setitem__(self, key, value) -> None: ... + def __delitem__(self, key) -> None: ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4fb24c9ad1538..43b60b2356b5e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,6 +1,9 @@ from collections import defaultdict +import sys +import warnings cimport cython +from cpython cimport PY_VERSION_HEX from cpython.object cimport PyObject from cpython.pyport cimport PY_SSIZE_T_MAX from cpython.slice cimport PySlice_GetIndicesEx @@ -20,6 +23,9 @@ from numpy cimport ( cnp.import_array() from pandas._libs.algos import ensure_int64 +from pandas.compat import CHAINED_WARNING_DISABLED +from pandas.errors import ChainedAssignmentError +from pandas.errors.cow import _chained_assignment_msg from pandas._libs.util cimport ( is_array, @@ -996,3 +1002,43 @@ cdef class BlockValuesRefs: return self._has_reference_maybe_locked() ELSE: return self._has_reference_maybe_locked() + + +cdef extern from "Python.h": + """ + #if PY_VERSION_HEX < 0x030E0000 + int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref); + #else + #define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \ + PyUnstable_Object_IsUniqueReferencedTemporary + #endif + """ + int PyUnstable_Object_IsUniqueReferencedTemporary\ + "__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1 + + +# Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary +cdef inline bint _is_unique_referenced_temporary(object obj) except -1: + if PY_VERSION_HEX >= 0x030E0000: + # Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary + return PyUnstable_Object_IsUniqueReferencedTemporary(obj) + else: + # Fallback for older Python versions using sys.getrefcount + return sys.getrefcount(obj) <= 1 + + +cdef class SetitemMixin: + # class used in DataFrame and Series for checking for chained assignment + + def __setitem__(self, key, value) -> None: + cdef bint is_unique = 0 + if not CHAINED_WARNING_DISABLED: + is_unique = _is_unique_referenced_temporary(self) + if is_unique: + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=1 + ) + self._setitem(key, value) + + def __delitem__(self, key) -> None: + self._delitem(key) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f0da99c795ebf..c2f190a16872a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -322,7 +322,7 @@ def item_from_zerodim(val: object) -> object: >>> item_from_zerodim(np.array([1])) array([1]) """ - if cnp.PyArray_IsZeroDim(val): + if cnp.PyArray_IsZeroDim(val) and cnp.PyArray_CheckExact(val): return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val) return val @@ -655,16 +655,17 @@ def is_range_indexer(const int6432_t[:] left, Py_ssize_t n) -> bool: """ cdef: Py_ssize_t i + bint ret = True if left.size != n: return False - for i in range(n): - - if left[i] != i: - return False - - return True + with nogil: + for i in range(n): + if left[i] != i: + ret = False + break + return ret @cython.wraparound(False) @@ -676,6 +677,7 @@ def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool: cdef: Py_ssize_t i, n = len(sequence) int6432_t first_element + bint ret = True if step == 0: return False @@ -683,10 +685,12 @@ def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool: return True first_element = sequence[0] - for i in range(1, n): - if sequence[i] != first_element + i * step: - return False - return True + with nogil: + for i in range(1, n): + if sequence[i] != first_element + i * step: + ret = False + break + return ret ctypedef fused ndarr_object: @@ -2708,11 +2712,10 @@ def maybe_convert_objects(ndarray[object] objects, break elif PyDateTime_Check(val) or cnp.is_datetime64_object(val): - # if we have an tz's attached then return the objects + # if we have a tz's attached then return the objects if convert_non_numeric: if getattr(val, "tzinfo", None) is not None: seen.datetimetz_ = True - break else: seen.datetime_ = True try: @@ -2720,10 +2723,9 @@ def maybe_convert_objects(ndarray[object] objects, except OutOfBoundsDatetime: # e.g. test_out_of_s_bounds_datetime64 seen.object_ = True - break else: seen.object_ = True - break + break elif is_period_object(val): if convert_non_numeric: seen.period_ = True diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 567bfc02a2950..8b53e842a7988 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op) -> ndarray: +def scalar_binop(ndarray[object] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op) -> ndarray: +def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index c5391b5ef498c..c2767dc47b5e4 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -329,10 +329,6 @@ cdef class TextReader: # source: StringIO or file object - ..versionchange:: 1.2.0 - removed 'compression', 'memory_map', and 'encoding' argument. - These arguments are outsourced to CParserWrapper. - 'source' has to be a file handle. """ cdef: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index a0265297fe873..49c1c46ea4935 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1168,7 +1168,7 @@ class NaTType(_NaT): fromordinal = _make_error_func( "fromordinal", """ - Construct a timestamp from a a proleptic Gregorian ordinal. + Construct a timestamp from a proleptic Gregorian ordinal. This method creates a `Timestamp` object corresponding to the given proleptic Gregorian ordinal, which is a count of days from January 1, diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index be86118a2b9e2..6f9a1ba34bde9 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1803,7 +1803,7 @@ class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): See Also -------- dateutil.relativedelta.relativedelta : The relativedelta type is designed - to be applied to an existing datetime an can replace specific components of + to be applied to an existing datetime and can replace specific components of that datetime, or represents an interval of time. Examples @@ -5202,6 +5202,32 @@ deprec_to_valid_alias = { "L": "ms", "U": "us", "N": "ns", + "AS": "YS", + "AS-JAN": "YS-JAN", + "AS-FEB": "YS-FEB", + "AS-MAR": "YS-MAR", + "AS-APR": "YS-APR", + "AS-MAY": "YS-MAY", + "AS-JUN": "YS-JUN", + "AS-JUL": "YS-JUL", + "AS-AUG": "YS-AUG", + "AS-SEP": "YS-SEP", + "AS-OCT": "YS-OCT", + "AS-NOV": "YS-NOV", + "AS-DEC": "YS-DEC", + "A": "Y", + "A-JAN": "Y-JAN", + "A-FEB": "Y-FEB", + "A-MAR": "Y-MAR", + "A-APR": "Y-APR", + "A-MAY": "Y-MAY", + "A-JUN": "Y-JUN", + "A-JUL": "Y-JUL", + "A-AUG": "Y-AUG", + "A-SEP": "Y-SEP", + "A-OCT": "Y-OCT", + "A-NOV": "Y-NOV", + "A-DEC": "Y-DEC", } diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1cd875d4ce41d..2d18a275f26f5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -333,58 +333,39 @@ cdef convert_to_timedelta64(object ts, str unit): Handle these types of objects: - timedelta/Timedelta - - timedelta64 - - an offset - - np.int64 (with unit providing a possible modifier) - - None/NaT - Return an ns based int64 + Return an timedelta64[ns] object """ # Caller is responsible for checking unit not in ["Y", "y", "M"] - if checknull_with_nat_and_na(ts): - return np.timedelta64(NPY_NAT, "ns") - elif isinstance(ts, _Timedelta): + if isinstance(ts, _Timedelta): # already in the proper format if ts._creso != NPY_FR_ns: ts = ts.as_unit("ns").asm8 else: ts = np.timedelta64(ts._value, "ns") - elif cnp.is_timedelta64_object(ts): - ts = ensure_td64ns(ts) - elif is_integer_object(ts): - if ts == NPY_NAT: - return np.timedelta64(NPY_NAT, "ns") - else: - ts = _maybe_cast_from_unit(ts, unit) - elif is_float_object(ts): - ts = _maybe_cast_from_unit(ts, unit) - elif isinstance(ts, str): - if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): - ts = parse_iso_format_string(ts) - else: - ts = parse_timedelta_string(ts) - ts = np.timedelta64(ts, "ns") - elif is_tick_object(ts): - ts = np.timedelta64(ts.nanos, "ns") - if PyDelta_Check(ts): + elif PyDelta_Check(ts): ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") elif not cnp.is_timedelta64_object(ts): raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}") return ts.astype("timedelta64[ns]") -cdef _maybe_cast_from_unit(ts, str unit): +cdef _numeric_to_td64ns(object item, str unit): # caller is responsible for checking # assert unit not in ["Y", "y", "M"] + # assert is_integer_object(item) or is_float_object(item) + if is_integer_object(item) and item == NPY_NAT: + return np.timedelta64(NPY_NAT, "ns") + try: - ts = cast_from_unit(ts, unit) + item = cast_from_unit(item, unit) except OutOfBoundsDatetime as err: raise OutOfBoundsTimedelta( - f"Cannot cast {ts} from {unit} to 'ns' without overflow." + f"Cannot cast {item} from {unit} to 'ns' without overflow." ) from err - ts = np.timedelta64(ts, "ns") + ts = np.timedelta64(item, "ns") return ts @@ -408,10 +389,11 @@ def array_to_timedelta64( cdef: Py_ssize_t i, n = values.size ndarray result = np.empty((values).shape, dtype="m8[ns]") - object item + object item, td64ns_obj int64_t ival cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) cnp.flatiter it + str parsed_unit = parse_timedelta_unit(unit or "ns") if values.descr.type_num != cnp.NPY_OBJECT: # raise here otherwise we segfault below @@ -431,70 +413,63 @@ def array_to_timedelta64( ) cnp.PyArray_ITER_NEXT(it) - # Usually, we have all strings. If so, we hit the fast path. - # If this path fails, we try conversion a different way, and - # this is where all of the error handling will take place. - try: - for i in range(n): - # Analogous to: item = values[i] - item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + for i in range(n): + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - ival = _item_to_timedelta64_fastpath(item) + try: + if checknull_with_nat_and_na(item): + ival = NPY_NAT - # Analogous to: iresult[i] = ival - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + elif cnp.is_timedelta64_object(item): + td64ns_obj = ensure_td64ns(item) + ival = cnp.get_timedelta64_value(td64ns_obj) - cnp.PyArray_MultiIter_NEXT(mi) + elif isinstance(item, _Timedelta): + if item._creso != NPY_FR_ns: + ival = item.as_unit("ns")._value + else: + ival = item._value + + elif PyDelta_Check(item): + # i.e. isinstance(item, timedelta) + ival = delta_to_nanoseconds(item) + + elif isinstance(item, str): + if ( + (len(item) > 0 and item[0] == "P") + or (len(item) > 1 and item[:2] == "-P") + ): + ival = parse_iso_format_string(item) + else: + ival = parse_timedelta_string(item) - except (TypeError, ValueError): - cnp.PyArray_MultiIter_RESET(mi) + elif is_tick_object(item): + ival = item.nanos - parsed_unit = parse_timedelta_unit(unit or "ns") - for i in range(n): - item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + elif is_integer_object(item) or is_float_object(item): + td64ns_obj = _numeric_to_td64ns(item, parsed_unit) + ival = cnp.get_timedelta64_value(td64ns_obj) - ival = _item_to_timedelta64(item, parsed_unit, errors) + else: + raise TypeError(f"Invalid type for timedelta scalar: {type(item)}") + + except ValueError as err: + if errors == "coerce": + ival = NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{item}' to NumPy timedelta" + raise ValueError(msg) from err + else: + raise - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return result -cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1: - """ - See array_to_timedelta64. - """ - if item is NaT: - # we allow this check in the fast-path because NaT is a C-object - # so this is an inexpensive check - return NPY_NAT - else: - return parse_timedelta_string(item) - - -cdef int64_t _item_to_timedelta64( - object item, - str parsed_unit, - str errors -) except? -1: - """ - See array_to_timedelta64. - """ - try: - return cnp.get_timedelta64_value(convert_to_timedelta64(item, parsed_unit)) - except ValueError as err: - if errors == "coerce": - return NPY_NAT - elif "unit abbreviation w/o a number" in str(err): - # re-raise with more pertinent message - msg = f"Could not convert '{item}' to NumPy timedelta" - raise ValueError(msg) from err - else: - raise - - @cython.cpow(True) cdef int64_t parse_timedelta_string(str ts) except? -1: """ @@ -798,7 +773,7 @@ def _binary_op_method_timedeltalike(op, name): return NotImplemented try: - other = Timedelta(other) + other = _wrapped_to_timedelta(other) except ValueError: # failed to parse as timedelta return NotImplemented @@ -2154,12 +2129,14 @@ class Timedelta(_Timedelta): new_value = delta_to_nanoseconds(value, reso=new_reso) return cls._from_value_and_reso(new_value, reso=new_reso) + elif checknull_with_nat_and_na(value): + return NaT + elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' unit = parse_timedelta_unit(unit) - value = convert_to_timedelta64(value, unit) - elif checknull_with_nat_and_na(value): - return NaT + value = _numeric_to_td64ns(value, unit) + else: raise ValueError( "Value must be Timedelta, string, integer, " @@ -2341,7 +2318,7 @@ class Timedelta(_Timedelta): def __truediv__(self, other): if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") - other = Timedelta(other) + other = _wrapped_to_timedelta(other) if other is NaT: return np.nan if other._creso != self._creso: @@ -2374,7 +2351,7 @@ class Timedelta(_Timedelta): def __rtruediv__(self, other): if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") - other = Timedelta(other) + other = _wrapped_to_timedelta(other) if other is NaT: return np.nan if self._creso != other._creso: @@ -2402,7 +2379,7 @@ class Timedelta(_Timedelta): # just defer if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") - other = Timedelta(other) + other = _wrapped_to_timedelta(other) if other is NaT: return np.nan if self._creso != other._creso: @@ -2457,7 +2434,7 @@ class Timedelta(_Timedelta): # just defer if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") - other = Timedelta(other) + other = _wrapped_to_timedelta(other) if other is NaT: return np.nan if self._creso != other._creso: @@ -2525,6 +2502,7 @@ def truediv_object_array(ndarray left, ndarray right): if cnp.get_timedelta64_value(td64) == NPY_NAT: # td here should be interpreted as a td64 NaT if _should_cast_to_timedelta(obj): + _wrapped_to_timedelta(obj) # deprecate if allowing string res_value = np.nan else: # if its a number then let numpy handle division, otherwise @@ -2554,6 +2532,7 @@ def floordiv_object_array(ndarray left, ndarray right): if cnp.get_timedelta64_value(td64) == NPY_NAT: # td here should be interpreted as a td64 NaT if _should_cast_to_timedelta(obj): + _wrapped_to_timedelta(obj) # deprecate allowing string res_value = np.nan else: # if its a number then let numpy handle division, otherwise @@ -2585,6 +2564,23 @@ cdef bint is_any_td_scalar(object obj): ) +cdef inline _wrapped_to_timedelta(object other): + # Helper for deprecating cases where we cast str to Timedelta + td = Timedelta(other) + if isinstance(other, str): + from pandas.errors import Pandas4Warning + warnings.warn( + # GH#59653 + "Scalar operations between Timedelta and string are " + "deprecated and will raise in a future version. " + "Explicitly cast to Timedelta first.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + # When this is enforced, remove str from _should_cast_to_timedelta + return td + + cdef bint _should_cast_to_timedelta(object obj): """ Should we treat this object as a Timedelta for the purpose of a binary op diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5d6c7b53f918c..afbefd9b84461 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1869,7 +1869,7 @@ class Timestamp(_Timestamp): @classmethod def fromordinal(cls, ordinal, tz=None): """ - Construct a timestamp from a a proleptic Gregorian ordinal. + Construct a timestamp from a proleptic Gregorian ordinal. This method creates a `Timestamp` object corresponding to the given proleptic Gregorian ordinal, which is a count of days from January 1, @@ -3492,7 +3492,7 @@ default 'raise' year -= 1 month += 12 return (day + - np.fix((153 * month - 457) / 5) + + np.trunc((153 * month - 457) / 5) + 365 * year + np.floor(year / 4) - np.floor(year / 100) + diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index dccd93e8dafd9..89530c6c9c46c 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -332,19 +332,13 @@ cdef float64_t calc_var( int ddof, float64_t nobs, float64_t ssqdm_x, - int64_t num_consecutive_same_value ) noexcept nogil: cdef: float64_t result # Variance is unchanged if no observation is added or removed if (nobs >= minp) and (nobs > ddof): - - # pathological case & repeatedly same values case - if nobs == 1 or num_consecutive_same_value >= nobs: - result = 0 - else: - result = ssqdm_x / (nobs - ddof) + result = ssqdm_x / (nobs - ddof) else: result = NaN @@ -357,12 +351,12 @@ cdef void add_var( float64_t *mean_x, float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, - float64_t *prev_value, + bint *numerically_unstable, ) noexcept nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t + float64_t prev_m2 = ssqdm_x[0] # GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan` if val != val: @@ -370,14 +364,6 @@ cdef void add_var( nobs[0] = nobs[0] + 1 - # GH#42064, record num of same values to remove floating point artifacts - if val == prev_value[0]: - num_consecutive_same_value[0] += 1 - else: - # reset to 1 (include current value itself) - num_consecutive_same_value[0] = 1 - prev_value[0] = val - # Welford's method for the online variance-calculation # using Kahan summation # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance @@ -392,17 +378,23 @@ cdef void add_var( mean_x[0] = 0 ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) + if prev_m2 * InvCondTol > ssqdm_x[0]: + # possible catastrophic cancellation + numerically_unstable[0] = True + cdef void remove_var( float64_t val, float64_t *nobs, float64_t *mean_x, float64_t *ssqdm_x, - float64_t *compensation + float64_t *compensation, + bint *numerically_unstable, ) noexcept nogil: """ remove a value from the var calc """ cdef: float64_t delta, prev_mean, y, t + float64_t prev_m2 = ssqdm_x[0] if val == val: nobs[0] = nobs[0] - 1 if nobs[0]: @@ -416,9 +408,14 @@ cdef void remove_var( delta = t mean_x[0] = mean_x[0] - delta / nobs[0] ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0]) + + if prev_m2 * InvCondTol > ssqdm_x[0]: + # possible catastrophic cancellation + numerically_unstable[0] = True else: mean_x[0] = 0 ssqdm_x[0] = 0 + numerically_unstable[0] = False def roll_var(const float64_t[:] values, ndarray[int64_t] start, @@ -428,11 +425,12 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, """ cdef: float64_t mean_x, ssqdm_x, nobs, compensation_add, - float64_t compensation_remove, prev_value - int64_t s, e, num_consecutive_same_value + float64_t compensation_remove + int64_t s, e Py_ssize_t i, j, N = len(start) ndarray[float64_t] output bint is_monotonic_increasing_bounds + bint requires_recompute, numerically_unstable minp = max(minp, 1) is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( @@ -449,32 +447,35 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: - - prev_value = values[s] - num_consecutive_same_value = 0 - - mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0 - for j in range(s, e): - add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, - &num_consecutive_same_value, &prev_value) - - else: + requires_recompute = ( + i == 0 + or not is_monotonic_increasing_bounds + or s >= end[i - 1] + ) + if not requires_recompute: # After the first window, observations can both be added # and removed # calculate deletes for j in range(start[i - 1], s): remove_var(values[j], &nobs, &mean_x, &ssqdm_x, - &compensation_remove) + &compensation_remove, &numerically_unstable) # calculate adds for j in range(end[i - 1], e): add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, - &num_consecutive_same_value, &prev_value) + &numerically_unstable) + + if requires_recompute or numerically_unstable: + + mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0 + for j in range(s, e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, + &numerically_unstable) + numerically_unstable = False - output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value) + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) if not is_monotonic_increasing_bounds: nobs = 0.0 diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3f95997bb84f4..160c362d0dbc1 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -931,14 +931,10 @@ def assert_series_equal( assertion message. check_index : bool, default True Whether to check index equivalence. If False, then compare only values. - - .. versionadded:: 1.3.0 check_like : bool, default False If True, ignore the order of the index. Must be False if check_index is False. Note: same labels must be with the same data. - .. versionadded:: 1.5.0 - See Also -------- testing.assert_index_equal : Check that two Indexes are equal. diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index ed30b2022db10..8bbd20c742c9c 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -13,8 +13,8 @@ import uuid from pandas.compat import ( - PYPY, - WARNING_CHECK_DISABLED, + CHAINED_WARNING_DISABLED, + CHAINED_WARNING_DISABLED_INPLACE_METHOD, ) from pandas.errors import ChainedAssignmentError @@ -163,10 +163,18 @@ def with_csv_dialect(name: str, **kwargs) -> Generator[None]: csv.unregister_dialect(name) -def raises_chained_assignment_error(extra_warnings=(), extra_match=()): +def raises_chained_assignment_error( + extra_warnings=(), extra_match=(), inplace_method=False +): from pandas._testing import assert_produces_warning - if PYPY or WARNING_CHECK_DISABLED: + WARNING_DISABLED = ( + CHAINED_WARNING_DISABLED_INPLACE_METHOD + if inplace_method + else CHAINED_WARNING_DISABLED + ) + + if WARNING_DISABLED: if not extra_warnings: from contextlib import nullcontext diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index f38abafd2db78..72d9c2555d16e 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -16,12 +16,13 @@ from typing import TYPE_CHECKING from pandas.compat._constants import ( + CHAINED_WARNING_DISABLED, + CHAINED_WARNING_DISABLED_INPLACE_METHOD, IS64, ISMUSL, PY312, PY314, PYPY, - WARNING_CHECK_DISABLED, WASM, ) from pandas.compat.numpy import is_numpy_dev @@ -152,6 +153,8 @@ def is_ci_environment() -> bool: __all__ = [ + "CHAINED_WARNING_DISABLED", + "CHAINED_WARNING_DISABLED_INPLACE_METHOD", "HAS_PYARROW", "IS64", "ISMUSL", @@ -159,7 +162,6 @@ def is_ci_environment() -> bool: "PY314", "PYARROW_MIN_VERSION", "PYPY", - "WARNING_CHECK_DISABLED", "WASM", "is_numpy_dev", "pa_version_under14p0", diff --git a/pandas/compat/_constants.py b/pandas/compat/_constants.py index 674afc5c62009..102f6fef6e4e1 100644 --- a/pandas/compat/_constants.py +++ b/pandas/compat/_constants.py @@ -19,7 +19,8 @@ WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"]) ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "") REF_COUNT = 2 -WARNING_CHECK_DISABLED = PY314 +CHAINED_WARNING_DISABLED = PYPY or (PY314 and not sys._is_gil_enabled()) # type: ignore[attr-defined] +CHAINED_WARNING_DISABLED_INPLACE_METHOD = PYPY or PY314 __all__ = [ diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index beb4a69232b27..8247356f25f4d 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -22,6 +22,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager if TYPE_CHECKING: @@ -90,6 +91,10 @@ def load_reduce(self) -> None: cls = args[0] stack[-1] = NDArrayBacked.__new__(*args) return + elif args and issubclass(args[0], NDFrame): + cls = args[0] + stack[-1] = cls.__new__(cls) + return raise dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment] diff --git a/pandas/conftest.py b/pandas/conftest.py index 7fe4ec7a5ee4f..ad2b0a317a8b2 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -722,7 +722,7 @@ def index(request): - ... """ # copy to avoid mutation, e.g. setting .name - return indices_dict[request.param].copy() + return indices_dict[request.param].copy(deep=False) @pytest.fixture( @@ -735,7 +735,7 @@ def index_flat(request): index fixture, but excluding MultiIndex cases. """ key = request.param - return indices_dict[key].copy() + return indices_dict[key].copy(deep=False) @pytest.fixture( @@ -758,11 +758,7 @@ def index_with_missing(request): MultiIndex is excluded because isna() is not defined for MultiIndex. """ - - # GH 35538. Use deep copy to avoid illusive bug on np-dev - # GHA pipeline that writes into indices_dict despite copy - ind = indices_dict[request.param].copy(deep=True) - vals = ind.values.copy() + ind = indices_dict[request.param] if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: # For setting missing values in the top level of MultiIndex vals = ind.tolist() @@ -770,6 +766,7 @@ def index_with_missing(request): vals[-1] = (None,) + vals[-1][1:] return MultiIndex.from_tuples(vals) else: + vals = ind.values.copy() vals[0] = None vals[-1] = None return type(ind)(vals) @@ -850,7 +847,7 @@ def index_or_series_obj(request): Fixture for tests on indexes, series and series with a narrow dtype copy to avoid mutation, e.g. setting .name """ - return _index_or_series_objs[request.param].copy(deep=True) + return _index_or_series_objs[request.param].copy(deep=False) _typ_objects_series = { @@ -873,7 +870,7 @@ def index_or_series_memory_obj(request): series with empty objects type copy to avoid mutation, e.g. setting .name """ - return _index_or_series_memory_objs[request.param].copy(deep=True) + return _index_or_series_memory_objs[request.param].copy(deep=False) # ---------------------------------------------------------------- diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index fe2e8f34807ea..01cdc417742bd 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -15,7 +15,6 @@ import warnings from pandas.util._decorators import ( - doc, set_module, ) from pandas.util._exceptions import find_stack_level @@ -236,12 +235,11 @@ def __get__(self, obj, cls): CachedAccessor = Accessor -@doc(klass="", examples="", others="") def _register_accessor( name: str, cls: type[NDFrame | Index] ) -> Callable[[TypeT], TypeT]: """ - Register a custom accessor on {klass} objects. + Register a custom accessor on objects. Parameters ---------- @@ -262,14 +260,15 @@ def _register_accessor( Notes ----- - This function allows you to register a custom-defined accessor class for {klass}. + This function allows you to register a custom-defined accessor class + for pandas objects (DataFrame, Series, or Index). The requirements for the accessor class are as follows: * Must contain an init method that: - * accepts a single {klass} object + * accepts a single object - * raises an AttributeError if the {klass} object does not have correctly + * raises an AttributeError if the object does not have correctly matching inputs for the accessor * Must contain a method for each access pattern. @@ -279,9 +278,6 @@ def _register_accessor( * Accessible using the @property decorator if no additional arguments are needed. - Examples - -------- - {examples} """ def decorator(accessor: TypeT) -> TypeT: @@ -327,8 +323,73 @@ def decorator(accessor: TypeT) -> TypeT: @set_module("pandas.api.extensions") -@doc(_register_accessor, klass="DataFrame", examples=_register_df_examples) def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]: + """ + Register a custom accessor on DataFrame objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Returns + ------- + callable + A class decorator. + + See Also + -------- + register_dataframe_accessor : Register a custom accessor on DataFrame objects. + register_series_accessor : Register a custom accessor on Series objects. + register_index_accessor : Register a custom accessor on Index objects. + + Notes + ----- + This function allows you to register a custom-defined accessor class for DataFrame. + The requirements for the accessor class are as follows: + + * Must contain an init method that: + + * accepts a single DataFrame object + + * raises an AttributeError if the DataFrame object does not have correctly + matching inputs for the accessor + + * Must contain a method for each access pattern. + + * The methods should be able to take any argument signature. + + * Accessible using the @property decorator if no additional arguments are + needed. + + Examples + -------- + An accessor that only accepts integers could + have a class defined like this: + + >>> @pd.api.extensions.register_dataframe_accessor("int_accessor") + ... class IntAccessor: + ... def __init__(self, pandas_obj): + ... if not all( + ... pandas_obj[col].dtype == "int64" for col in pandas_obj.columns + ... ): + ... raise AttributeError("All columns must contain integer values only") + ... self._obj = pandas_obj + ... + ... def sum(self): + ... return self._obj.sum() + >>> df = pd.DataFrame([[1, 2], ["x", "y"]]) + >>> df.int_accessor + Traceback (most recent call last): + ... + AttributeError: All columns must contain integer values only. + >>> df = pd.DataFrame([[1, 2], [3, 4]]) + >>> df.int_accessor.sum() + 0 4 + 1 6 + dtype: int64 + """ from pandas import DataFrame return _register_accessor(name, DataFrame) @@ -359,8 +420,69 @@ def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]: @set_module("pandas.api.extensions") -@doc(_register_accessor, klass="Series", examples=_register_series_examples) def register_series_accessor(name: str) -> Callable[[TypeT], TypeT]: + """ + Register a custom accessor on Series objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Returns + ------- + callable + A class decorator. + + See Also + -------- + register_dataframe_accessor : Register a custom accessor on DataFrame objects. + register_series_accessor : Register a custom accessor on Series objects. + register_index_accessor : Register a custom accessor on Index objects. + + Notes + ----- + This function allows you to register a custom-defined accessor class for Series. + The requirements for the accessor class are as follows: + + * Must contain an init method that: + + * accepts a single Series object + + * raises an AttributeError if the Series object does not have correctly + matching inputs for the accessor + + * Must contain a method for each access pattern. + + * The methods should be able to take any argument signature. + + * Accessible using the @property decorator if no additional arguments are + needed. + + Examples + -------- + An accessor that only accepts integers could + have a class defined like this: + + >>> @pd.api.extensions.register_series_accessor("int_accessor") + ... class IntAccessor: + ... def __init__(self, pandas_obj): + ... if not pandas_obj.dtype == "int64": + ... raise AttributeError("The series must contain integer data only") + ... self._obj = pandas_obj + ... + ... def sum(self): + ... return self._obj.sum() + >>> df = pd.Series([1, 2, "x"]) + >>> df.int_accessor + Traceback (most recent call last): + ... + AttributeError: The series must contain integer data only. + >>> df = pd.Series([1, 2, 3]) + >>> df.int_accessor.sum() + np.int64(6) + """ from pandas import Series return _register_accessor(name, Series) @@ -394,8 +516,73 @@ def register_series_accessor(name: str) -> Callable[[TypeT], TypeT]: @set_module("pandas.api.extensions") -@doc(_register_accessor, klass="Index", examples=_register_index_examples) def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]: + """ + Register a custom accessor on Index objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Returns + ------- + callable + A class decorator. + + See Also + -------- + register_dataframe_accessor : Register a custom accessor on DataFrame objects. + register_series_accessor : Register a custom accessor on Series objects. + register_index_accessor : Register a custom accessor on Index objects. + + Notes + ----- + This function allows you to register a custom-defined accessor class for Index. + The requirements for the accessor class are as follows: + + * Must contain an init method that: + + * accepts a single Index object + + * raises an AttributeError if the Index object does not have correctly + matching inputs for the accessor + + * Must contain a method for each access pattern. + + * The methods should be able to take any argument signature. + + * Accessible using the @property decorator if no additional arguments are + needed. + + Examples + -------- + An accessor that only accepts integers could + have a class defined like this: + + >>> @pd.api.extensions.register_index_accessor("int_accessor") + ... class IntAccessor: + ... def __init__(self, pandas_obj): + ... if not all(isinstance(x, int) for x in pandas_obj): + ... raise AttributeError("The index must only be an integer value") + ... self._obj = pandas_obj + ... + ... def even(self): + ... return [x for x in self._obj if x % 2 == 0] + >>> df = pd.DataFrame.from_dict( + ... {"row1": {"1": 1, "2": "a"}, "row2": {"1": 2, "2": "b"}}, orient="index" + ... ) + >>> df.index.int_accessor + Traceback (most recent call last): + ... + AttributeError: The index must only be an integer value. + >>> df = pd.DataFrame( + ... {"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}, index=[1, 2, 5, 8] + ... ) + >>> df.index.int_accessor.even() + [2, 8] + """ from pandas import Index return _register_accessor(name, Index) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b977e998b82a4..54920b1a8a2a3 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -697,8 +697,6 @@ def factorize( If True, the sentinel -1 will be used for NaN values. If False, NaN values will be encoded as non-negative integers and will not drop the NaN from the uniques of the values. - - .. versionadded:: 1.5.0 {size_hint}\ Returns @@ -1703,7 +1701,7 @@ def map_array( ] else: # Dictionary does not have a default. Thus it's safe to - # convert to an Series for efficiency. + # convert to a Series for efficiency. # we specify the keys here to handle the # possibility that they are tuples diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 84aca81420fe1..11928e79ffc62 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -54,7 +54,10 @@ from pandas.core.array_algos.transforms import shift from pandas.core.arrays.base import ExtensionArray from pandas.core.construction import extract_array -from pandas.core.indexers import check_array_indexer +from pandas.core.indexers import ( + check_array_indexer, + getitem_returns_view, +) from pandas.core.sorting import nargminmax if TYPE_CHECKING: @@ -258,6 +261,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: return self._from_backing_data(new_values) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value @@ -283,7 +289,10 @@ def __getitem__( result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) - return self._from_backing_data(result) + result = self._from_backing_data(result) + if getitem_returns_view(self, key): + result._readonly = self._readonly + return result # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") @@ -294,6 +303,8 @@ def __getitem__( return self._box_func(result) result = self._from_backing_data(result) + if getitem_returns_view(self, key): + result._readonly = self._readonly return result def _pad_or_backfill( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 53c938faf9257..aa224ee571d70 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -78,6 +78,7 @@ from pandas.core.construction import extract_array from pandas.core.indexers import ( check_array_indexer, + getitem_returns_view, unpack_tuple_and_ellipses, validate_indices, ) @@ -790,7 +791,10 @@ def __getitem__(self, item: PositionalIndexer): value = self._pa_array[item] if isinstance(value, pa.ChunkedArray): - return self._from_pyarrow_array(value) + result = self._from_pyarrow_array(value) + if getitem_returns_view(self, item): + result._readonly = self._readonly + return result else: pa_type = self._pa_array.type scalar = value.as_py() @@ -2196,6 +2200,9 @@ def __setitem__(self, key, value) -> None: ------- None """ + if self._readonly: + raise ValueError("Cannot modify read-only array") + # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: key = key[0] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e091ecf18668d..63b929a867410 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -37,6 +37,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.common import ( is_list_like, is_scalar, @@ -268,6 +269,8 @@ class ExtensionArray: # strictly less than 2000 to be below Index.__pandas_priority__. __pandas_priority__ = 1000 + _readonly = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -454,6 +457,11 @@ def __setitem__(self, key, value) -> None: Returns ------- None + + Raises + ------ + ValueError + If the array is readonly and modification is attempted. """ # Some notes to the ExtensionArray implementer who may have ended up # here. While this method is not required for the interface, if you @@ -473,6 +481,10 @@ def __setitem__(self, key, value) -> None: # __init__ method coerces that value, then so should __setitem__ # Note, also, that Series/DataFrame.where internally use __setitem__ # on a copy of the data. + # Check if the array is readonly + if self._readonly: + raise ValueError("Cannot modify read-only array") + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") def __len__(self) -> int: @@ -567,8 +579,14 @@ def to_numpy( result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() + elif self._readonly and astype_is_view(self.dtype, result.dtype): + # If the ExtensionArray is readonly, make the numpy array readonly too + result = result.view() + result.flags.writeable = False + if na_value is not lib.no_default: result[self.isna()] = na_value # type: ignore[index] + return result # ------------------------------------------------------------------------ @@ -1574,8 +1592,6 @@ def factorize( NaN values will be encoded as non-negative integers and will not drop the NaN from the uniques of the values. - .. versionadded:: 1.5.0 - Returns ------- codes : ndarray diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6e01096ad158..d40c9e8cbcde2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2280,7 +2280,7 @@ def _repr_categories(self) -> list[str]: from pandas.io.formats import format as fmt formatter = None - if self.categories.dtype == "str": + if self.categories.dtype == "str" or self.categories.dtype == "string": # the extension array formatter defaults to boxed=True in format_array # override here to boxed=False to be consistent with QUOTE_NONNUMERIC formatter = cast(ExtensionArray, self.categories._values)._formatter( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1fbcd0665c467..2f7330d1e81fe 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -368,7 +368,12 @@ def __array__( if copy is True: return np.array(self._ndarray, dtype=dtype) - return self._ndarray + + result = self._ndarray + if self._readonly: + result = result.view() + result.flags.writeable = False + return result @overload def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4cf5f4b13890e..9b7ae26bef899 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2287,7 +2287,7 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: month[testarr] += 12 return ( day - + np.fix((153 * month - 457) / 5) + + np.trunc((153 * month - 457) / 5) + 365 * year + np.floor(year / 4) - np.floor(year / 100) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3e724b176b76d..b144e8ae45555 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -87,7 +87,10 @@ ensure_wrapped_if_datetimelike, extract_array, ) -from pandas.core.indexers import check_array_indexer +from pandas.core.indexers import ( + check_array_indexer, + getitem_returns_view, +) from pandas.core.ops import ( invalid_comparison, unpack_zerodim_and_defer, @@ -842,9 +845,15 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], # ndarray[Any, Any]]" - return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] + result = self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] + if getitem_returns_view(self, key): + result._readonly = self._readonly + return result def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6085b577f4392..2adab31fff851 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -26,6 +26,7 @@ ) from pandas.errors import AbstractMethodError +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -75,7 +76,10 @@ ensure_wrapped_if_datetimelike, extract_array, ) -from pandas.core.indexers import check_array_indexer +from pandas.core.indexers import ( + check_array_indexer, + getitem_returns_view, +) from pandas.core.ops import invalid_comparison from pandas.core.util.hashing import hash_array @@ -212,7 +216,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self.dtype.na_value return self._data[item] - return self._simple_new(self._data[item], newmask) + result = self._simple_new(self._data[item], newmask) + if getitem_returns_view(self, item): + result._readonly = self._readonly + return result def _pad_or_backfill( self, @@ -354,6 +361,9 @@ def _validate_setitem_value(self, value): raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + key = check_array_indexer(self, key) if is_scalar(value): @@ -566,11 +576,11 @@ def to_numpy( hasna = self._hasna dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna) if dtype is None: - dtype = object + dtype = np.dtype(object) if hasna: if ( - dtype != object + dtype != np.dtype(object) and not is_string_dtype(dtype) and na_value is libmissing.NA ): @@ -588,6 +598,9 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) + if self._readonly and not copy and astype_is_view(self.dtype, dtype): + data = data.view() + data.flags.writeable = False return data def tolist(self) -> list: @@ -686,7 +699,12 @@ def __array__( if copy is False: if not self._hasna: # special case, here we can simply return the underlying data - return np.array(self._data, dtype=dtype, copy=copy) + result = np.array(self._data, dtype=dtype, copy=copy) + # If the ExtensionArray is readonly, make the numpy array readonly too + if self._readonly: + result = result.view() + result.flags.writeable = False + return result raise ValueError( "Unable to avoid copy while creating an array as requested." ) @@ -1259,8 +1277,6 @@ def factorize( NaN values will be encoded as non-negative integers and will not drop the NaN from the uniques of the values. - .. versionadded:: 1.5.0 - Returns ------- codes : ndarray diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index eca47d3c9657f..9d174a49b5db1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -14,7 +14,10 @@ from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.astype import ( + astype_array, + astype_is_view, +) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, maybe_downcast_to_dtype, @@ -179,12 +182,23 @@ def dtype(self) -> NumpyEADtype: # NumPy Array Interface def __array__( - self, dtype: NpDtype | None = None, copy: bool | None = None + self, dtype: np.dtype | None = None, copy: bool | None = None ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support - return np.array(self._ndarray, dtype=dtype, copy=copy) - return np.asarray(self._ndarray, dtype=dtype) + result = np.array(self._ndarray, dtype=dtype, copy=copy) + else: + result = np.asarray(self._ndarray, dtype=dtype) + + if ( + self._readonly + and not copy + and (dtype is None or astype_is_view(self.dtype, dtype)) + ): + result = result.view() + result.flags.writeable = False + + return result def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of @@ -545,6 +559,9 @@ def to_numpy( result[mask] = na_value else: result = self._ndarray + if not copy and self._readonly: + result = result.view() + result.flags.writeable = False result = np.asarray(result, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 18e4ff31164ac..ba7014e661fa6 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -397,7 +397,11 @@ def __array__( # For NumPy 1.x compatibility we cannot use copy=None. And # `copy=False` has the meaning of `copy=None` here: if not copy: - return np.asarray(self.asi8, dtype=dtype) + result = np.asarray(self.asi8, dtype=dtype) + if self._readonly: + result = result.view() + result.flags.writeable = False + return result else: return np.array(self.asi8, dtype=dtype) @@ -1331,7 +1335,7 @@ def dt64arr_to_periodarr( data, freq, tz=None ) -> tuple[npt.NDArray[np.int64], BaseOffset]: """ - Convert an datetime-like array to values Period ordinals. + Convert a datetime-like array to values Period ordinals. Parameters ---------- diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 83e16f5d4b8db..34521b1b97bab 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -568,7 +568,11 @@ def __array__( if copy is True: return np.array(self.sp_values) else: - return self.sp_values + result = self.sp_values + if self._readonly: + result = result.view() + result.flags.writeable = False + return result if copy is False: raise ValueError( @@ -597,6 +601,8 @@ def __array__( return out def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where @@ -983,6 +989,13 @@ def __getitem__( elif isinstance(key, tuple): data_slice = self.to_dense()[key] elif isinstance(key, slice): + if key == slice(None): + # to ensure arr[:] (used by view()) does not make a copy + result = type(self)._simple_new( + self.sp_values, self.sp_index, self.dtype + ) + result._readonly = self._readonly + return result # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index fd68cc513125d..f36b22b10aa34 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -802,6 +802,9 @@ def _maybe_convert_setitem_value(self, value): return value def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + value = self._maybe_convert_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7dd41cc0e9960..3dc0115fc4770 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -107,10 +107,10 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr See Also -------- :func:`array` - The recommended function for creating a ArrowStringArray. + The recommended function for creating an ArrowStringArray. Series.str The string methods are available on Series backed by - a ArrowStringArray. + an ArrowStringArray. Notes ----- diff --git a/pandas/core/common.py b/pandas/core/common.py index 72b15b6e1bf4e..4f1c8d1800c00 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -639,8 +639,6 @@ def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: """ If a name is missing then replace it by level_n, where n is the count - .. versionadded:: 1.4.0 - Parameters ---------- names : list-like diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..94b92ead92374 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,8 +24,12 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, ExtensionDtype, + IntervalDtype, NumpyEADtype, + PeriodDtype, ) if TYPE_CHECKING: @@ -140,7 +144,9 @@ def _astype_float_to_int_nansafe( """ if not np.isfinite(values).all(): raise IntCastingNaNError( - "Cannot convert non-finite values (NA or inf) to integer" + "Cannot convert non-finite values (NA or inf) to integer." + "Replace or remove non-finite values or cast to an integer type" + "that supports these values (e.g. 'Int64')" ) if dtype.kind == "u": # GH#45151 @@ -283,6 +289,16 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) + elif new_dtype == object and isinstance( + dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype) + ): + return False + + elif isinstance(dtype, CategoricalDtype) and not isinstance( + new_dtype, CategoricalDtype + ): + return False + numpy_dtype = getattr(dtype, "numpy_dtype", None) new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dd63445266134..bb3e8105d5472 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -882,7 +882,7 @@ def is_unsigned_integer_dtype(arr_or_dtype) -> bool: See Also -------- api.types.is_signed_integer_dtype : Check whether the provided array - or dtype is of an signed integer dtype. + or dtype is of a signed integer dtype. api.types.is_integer_dtype : Check whether the provided array or dtype is of an integer dtype. api.types.is_numeric_dtype : Check whether the provided array or dtype @@ -1264,7 +1264,7 @@ def is_numeric_dtype(arr_or_dtype) -> bool: api.types.is_unsigned_integer_dtype: Check whether the provided array or dtype is of an unsigned integer dtype. api.types.is_signed_integer_dtype: Check whether the provided array - or dtype is of an signed integer dtype. + or dtype is of a signed integer dtype. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0f36ea031d3d4..c8c246434f6d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -48,11 +48,11 @@ properties, ) from pandas._libs.hashtable import duplicated +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer -from pandas.compat import PYPY from pandas.compat._constants import ( + CHAINED_WARNING_DISABLED_INPLACE_METHOD, REF_COUNT, - WARNING_CHECK_DISABLED, ) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -63,7 +63,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -341,8 +340,12 @@ of the left keys. * left_anti: use only keys from left frame that are not in right frame, similar to SQL left anti join; preserve key order. + + .. versionadded:: 3.0 * right_anti: use only keys from right frame that are not in left frame, similar to SQL right anti join; preserve key order. + + .. versionadded:: 3.0 on : Hashable or a sequence of the previous Column or index level names to join on. These must be found in both DataFrames. If `on` is None and not merging on indexes then this defaults @@ -517,7 +520,7 @@ @set_module("pandas") -class DataFrame(NDFrame, OpsMixin): +class DataFrame(SetitemMixin, NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -664,6 +667,11 @@ class DataFrame(NDFrame, OpsMixin): # and ExtensionArray. Should NOT be overridden by subclasses. __pandas_priority__ = 4000 + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + @property def _constructor(self) -> type[DataFrame]: return DataFrame @@ -1857,9 +1865,6 @@ def from_dict( If 'tight', assume a dict with keys ['index', 'columns', 'data', 'index_names', 'column_names']. - .. versionadded:: 1.4.0 - 'tight' as an allowed value for the ``orient`` argument - dtype : dtype, default None Data type to force after DataFrame construction, otherwise infer. columns : list, default None @@ -2098,9 +2103,6 @@ def to_dict( [{column -> value}, ... , {column -> value}] - 'index' : dict like {index -> {column -> value}} - .. versionadded:: 1.4.0 - 'tight' as an allowed value for the ``orient`` argument - into : class, default dict The collections.abc.MutableMapping subclass used for all Mappings in the return value. Can be the actual class or an empty @@ -2927,6 +2929,7 @@ def to_parquet( index: bool | None = ..., partition_cols: list[str] | None = ..., storage_options: StorageOptions = ..., + filesystem: Any = ..., **kwargs, ) -> bytes: ... @@ -2940,6 +2943,7 @@ def to_parquet( index: bool | None = ..., partition_cols: list[str] | None = ..., storage_options: StorageOptions = ..., + filesystem: Any = ..., **kwargs, ) -> None: ... @@ -2953,6 +2957,7 @@ def to_parquet( index: bool | None = None, partition_cols: list[str] | None = None, storage_options: StorageOptions | None = None, + filesystem: Any = None, **kwargs, ) -> bytes | None: """ @@ -2992,6 +2997,12 @@ def to_parquet( Must be None if path is not a string. {storage_options} + filesystem : fsspec or pyarrow filesystem, default None + Filesystem object to use when reading the parquet file. Only implemented + for ``engine="pyarrow"``. + + .. versionadded:: 2.1.0 + **kwargs Additional arguments passed to the parquet library. See :ref:`pandas io ` for more details. @@ -3053,6 +3064,7 @@ def to_parquet( index=index, partition_cols=partition_cols, storage_options=storage_options, + filesystem=filesystem, **kwargs, ) @@ -3097,8 +3109,6 @@ def to_orc( """ Write a DataFrame to the Optimized Row Columnar (ORC) format. - .. versionadded:: 1.5.0 - Parameters ---------- path : str, file-like object or None, default None @@ -3472,8 +3482,6 @@ def to_xml( """ Render a DataFrame to an XML document. - .. versionadded:: 1.3.0 - Parameters ---------- path_or_buffer : str, path object, file-like object, or None, default None @@ -4317,7 +4325,8 @@ def isetitem(self, loc, value) -> None: arraylike, refs = self._sanitize_column(value) self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) - def __setitem__(self, key, value) -> None: + # def __setitem__() is implemented in SetitemMixin and dispatches to this method + def _setitem(self, key, value) -> None: """ Set item(s) in DataFrame by key. @@ -4401,12 +4410,6 @@ def __setitem__(self, key, value) -> None: z 3 50 # Values for 'a' and 'b' are completely ignored! """ - if not PYPY and not WARNING_CHECK_DISABLED: - if sys.getrefcount(self) <= REF_COUNT + 1: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) - key = com.apply_if_callable(key, self) # see if we can slice the rows @@ -6211,7 +6214,7 @@ def set_index( drop: bool = ..., append: bool = ..., inplace: Literal[False] = ..., - verify_integrity: bool = ..., + verify_integrity: bool | lib.NoDefault = ..., ) -> DataFrame: ... @overload @@ -6222,7 +6225,7 @@ def set_index( drop: bool = ..., append: bool = ..., inplace: Literal[True], - verify_integrity: bool = ..., + verify_integrity: bool | lib.NoDefault = ..., ) -> None: ... def set_index( @@ -6232,7 +6235,7 @@ def set_index( drop: bool = True, append: bool = False, inplace: bool = False, - verify_integrity: bool = False, + verify_integrity: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | None: """ Set the DataFrame index using existing columns. @@ -6262,6 +6265,8 @@ def set_index( necessary. Setting to False will improve the performance of this method. + .. deprecated:: 3.0.0 + Returns ------- DataFrame or None @@ -6348,6 +6353,18 @@ def set_index( 2013 84 2014 31 """ + if verify_integrity is not lib.no_default: + # GH#62919 + warnings.warn( + "The 'verify_integrity' keyword in DataFrame.set_index is " + "deprecated and will be removed in a future version. " + "Directly check the result.index.is_unique instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + else: + verify_integrity = False + inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) if not isinstance(keys, list): @@ -6529,16 +6546,11 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. - - .. versionadded:: 1.5.0 - names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list with length equal to the number of levels. - .. versionadded:: 1.5.0 - Returns ------- DataFrame or None @@ -7716,8 +7728,6 @@ def value_counts( dropna : bool, default True Do not include counts of rows that contain NA values. - .. versionadded:: 1.3.0 - Returns ------- Series @@ -9352,7 +9362,7 @@ def update( 1 2 500.0 2 3 6.0 """ - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -9845,13 +9855,9 @@ def pivot( sort : bool, default True Specifies if the result should be sorted. - .. versionadded:: 1.3.0 - **kwargs : dict Optional keyword arguments to pass to ``aggfunc``. - .. versionadded:: 3.0.0 - Returns ------- DataFrame @@ -10200,9 +10206,6 @@ def explode( be str or tuple, and all specified columns their list-like data on same row of the frame must have matching length. - .. versionadded:: 1.3.0 - Multi-column explode - ignore_index : bool, default False If True, the resulting index will be labeled 0, 1, …, n - 1. @@ -11234,8 +11237,6 @@ def join( * "many_to_one" or "m:1": check if join keys are unique in right dataset. * "many_to_many" or "m:m": allowed, but does not result in checks. - .. versionadded:: 1.5.0 - Returns ------- DataFrame @@ -11631,8 +11632,6 @@ def corr( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 The default value of ``numeric_only`` is now ``False``. @@ -11758,8 +11757,6 @@ def cov( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 The default value of ``numeric_only`` is now ``False``. @@ -11898,8 +11895,6 @@ def corrwith( min_periods : int, optional Minimum number of observations needed to have a valid result. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 The default value of ``numeric_only`` is now ``False``. @@ -13420,8 +13415,6 @@ def idxmin( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - Returns ------- Series @@ -13525,8 +13518,6 @@ def idxmax( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - Returns ------- Series diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1385d48e0bb4a..25e0aa6b8f072 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -82,10 +82,9 @@ WriteExcelBuffer, npt, ) -from pandas.compat import PYPY from pandas.compat._constants import ( + CHAINED_WARNING_DISABLED_INPLACE_METHOD, REF_COUNT, - WARNING_CHECK_DISABLED, ) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -2075,7 +2074,6 @@ def __getstate__(self) -> dict[str, Any]: **meta, } - @final def __setstate__(self, state) -> None: if isinstance(state, BlockManager): self._mgr = state @@ -2876,8 +2874,6 @@ def to_sql( `sqlite3 `__ or `SQLAlchemy `__. - .. versionadded:: 1.4.0 - Raises ------ ValueError @@ -4263,8 +4259,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: result = result.__finalize__(self) return result + # def __delitem__() is implemented in SetitemMixin and dispatches to this method @final - def __delitem__(self, key) -> None: + def _delitem(self, key) -> None: """ Delete item """ @@ -5857,8 +5854,6 @@ def sample( ignore_index : bool, default False If True, the resulting index will be labeled 0, 1, …, n - 1. - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -7081,7 +7076,7 @@ def fillna( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7328,7 +7323,7 @@ def ffill( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7468,7 +7463,7 @@ def bfill( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7553,7 +7548,7 @@ def replace( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7916,7 +7911,7 @@ def interpolate( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -8571,7 +8566,7 @@ def clip( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -8954,8 +8949,6 @@ def resample( - 'end': `origin` is the last value of the timeseries - 'end_day': `origin` is the ceiling midnight of the last day - .. versionadded:: 1.3.0 - .. note:: Only takes effect for Tick-frequencies (i.e. fixed frequencies like @@ -8967,12 +8960,6 @@ def resample( Whether to include the group keys in the result index when using ``.apply()`` on the resampled object. - .. versionadded:: 1.5.0 - - Not specifying ``group_keys`` will retain values-dependent behavior - from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes - ` for examples). - .. versionchanged:: 2.0.0 ``group_keys`` now defaults to ``False``. @@ -10214,7 +10201,7 @@ def where( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -10278,7 +10265,7 @@ def mask( ) -> Self | None: inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7240db129b246..5918d0f263379 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -920,8 +920,6 @@ def value_counts( """ Return a Series or DataFrame containing counts of unique rows. - .. versionadded:: 1.4.0 - Parameters ---------- normalize : bool, default False @@ -2629,8 +2627,6 @@ def idxmax( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - Returns ------- DataFrame @@ -2703,8 +2699,6 @@ def idxmin( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - Returns ------- DataFrame @@ -2775,8 +2769,6 @@ def value_counts( """ Return a Series or DataFrame containing counts of unique rows. - .. versionadded:: 1.4.0 - Parameters ---------- subset : list-like, optional @@ -2913,7 +2905,7 @@ def take( Returns ------- DataFrame - An DataFrame containing the elements taken from each group. + A DataFrame containing the elements taken from each group. See Also -------- @@ -3352,8 +3344,6 @@ def corrwith( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 The default value of ``numeric_only`` is now ``False``. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1f9a24dae96f5..f7868b9e46c37 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2180,16 +2180,12 @@ def mean( skipna : bool, default True Exclude NA/null values. If an entire group is NA, the result will be NA. - .. versionadded:: 3.0.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2197,8 +2193,6 @@ def mean( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` - .. versionadded:: 1.4.0 - Returns ------- pandas.Series or pandas.DataFrame @@ -2390,8 +2384,6 @@ def std( * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2399,13 +2391,9 @@ def std( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` - .. versionadded:: 1.4.0 - numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. @@ -2508,8 +2496,6 @@ def var( * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2517,13 +2503,9 @@ def var( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` - .. versionadded:: 1.4.0 - numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. @@ -2739,8 +2721,6 @@ def sem( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. @@ -4393,8 +4373,6 @@ def quantile( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a45ce1f385e4d..70e3d45a02305 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -113,8 +113,6 @@ class Grouper: - 'end': `origin` is the last value of the timeseries - 'end_day': `origin` is the ceiling midnight of the last day - .. versionadded:: 1.3.0 - offset : Timedelta or str, default is None An offset timedelta added to the origin. diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index 036b32b3feac2..425c515a9e657 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -3,6 +3,7 @@ check_key_length, check_setitem_lengths, disallow_ndim_indexing, + getitem_returns_view, is_empty_indexer, is_list_like_indexer, is_scalar_indexer, @@ -19,6 +20,7 @@ "check_key_length", "check_setitem_lengths", "disallow_ndim_indexing", + "getitem_returns_view", "is_empty_indexer", "is_list_like_indexer", "is_scalar_indexer", diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 2c2413c74f2fa..6fa973c1599c5 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -94,7 +94,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -134,7 +133,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -189,7 +187,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -316,7 +313,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -422,7 +418,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -509,7 +504,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -599,7 +593,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -681,7 +674,6 @@ def get_window_bounds( closed passed from the top level rolling API step : int, default None step passed from the top level rolling API - .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 85c298d8c3a48..e5b7586fbddbf 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -126,7 +126,7 @@ def check_setitem_lengths(indexer, value, values) -> bool: """ Validate that value and indexer are the same length. - An special-case is allowed for when the indexer is a boolean array + A special-case is allowed for when the indexer is a boolean array and the number of true values equals the length of ``value``. In this case, no exception is raised. @@ -414,6 +414,27 @@ def unpack_tuple_and_ellipses(item: tuple): return item +def getitem_returns_view(arr, key) -> bool: + """ + Check if an ``arr.__getitem__`` call with given ``key`` would return a view + or not. + """ + if not isinstance(key, tuple): + key = (key,) + + # filter out Ellipsis and np.newaxis + key = tuple(k for k in key if k is not Ellipsis and k is not np.newaxis) + if not key: + return True + # single integer gives view if selecting subset of 2D array + if arr.ndim == 2 and lib.is_integer(key[0]): + return True + # slices always give views + if all(isinstance(k, slice) for k in key): + return True + return False + + # ----------------------------------------------------------- # Public indexer validation diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4186a13926c6f..67e03ec791e2c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5023,6 +5023,8 @@ def array(self) -> ExtensionArray: from pandas.core.arrays.numpy_ import NumpyExtensionArray array = NumpyExtensionArray(array) + array = array.view() + array._readonly = True return array @property diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 382b3678da75b..ec0c2c896423b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -922,8 +922,6 @@ def date_range( Name of the resulting DatetimeIndex. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. - - .. versionadded:: 1.4.0 unit : {'s', 'ms', 'us', 'ns'}, default 'ns' Specify the desired resolution of the result. @@ -1124,8 +1122,6 @@ def bdate_range( are passed. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. - - .. versionadded:: 1.4.0 **kwargs For compatibility. Has no effect on the result. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9cfe27ff81f2c..a4a6230337add 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -6,7 +6,6 @@ le, lt, ) -import textwrap from typing import ( TYPE_CHECKING, Any, @@ -31,7 +30,6 @@ ) from pandas.errors import InvalidIndexError from pandas.util._decorators import ( - Appender, cache_readonly, set_module, ) @@ -66,14 +64,11 @@ from pandas.core.arrays.datetimelike import validate_periods from pandas.core.arrays.interval import ( IntervalArray, - _interval_shared_docs, ) import pandas.core.common as com from pandas.core.indexers import is_valid_positional_slice -import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, - _index_shared_docs, ensure_index, maybe_extract_name, ) @@ -100,21 +95,6 @@ IntervalClosedType, npt, ) -_index_doc_kwargs = dict(ibase._index_doc_kwargs) - -_index_doc_kwargs.update( - { - "klass": "IntervalIndex", - "qualname": "IntervalIndex", - "target_klass": "IntervalIndex or list of Intervals", - "name": textwrap.dedent( - """\ - name : object, optional - Name to be stored in the index. - """ - ), - } -) def _get_next_label(label): @@ -159,35 +139,6 @@ def _new_IntervalIndex(cls, d): return cls.from_arrays(**d) -@Appender( - _interval_shared_docs["class"] - % { - "klass": "IntervalIndex", - "summary": "Immutable index of intervals that are closed on the same side.", - "name": _index_doc_kwargs["name"], - "extra_attributes": "is_overlapping\nvalues\n", - "extra_methods": "", - "examples": textwrap.dedent( - """\ - Examples - -------- - A new ``IntervalIndex`` is typically constructed using - :func:`interval_range`: - - >>> pd.interval_range(start=0, end=5) - IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], - dtype='interval[int64, right]') - - It may also be constructed using one of the constructor - methods: :meth:`IntervalIndex.from_arrays`, - :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. - - See further examples in the doc strings of ``interval_range`` and the - mentioned constructor methods. - """ - ), - } -) @inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) @inherit_names( [ @@ -205,6 +156,79 @@ def _new_IntervalIndex(cls, d): @inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) @set_module("pandas") class IntervalIndex(ExtensionIndex): + """ + Immutable index of intervals that are closed on the same side. + + Parameters + ---------- + data : array-like (1-dimensional) + Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing + Interval objects from which to build the IntervalIndex. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both or + neither. + dtype : dtype or None, default None + If None, dtype will be inferred. + copy : bool, default False + Copy the input data. + name : object, optional + Name to be stored in the index. + verify_integrity : bool, default True + Verify that the IntervalIndex is valid. + + Attributes + ---------- + left + right + closed + mid + length + is_empty + is_non_overlapping_monotonic + is_overlapping + values + + Methods + ------- + from_arrays + from_tuples + from_breaks + contains + overlaps + set_closed + to_tuples + + See Also + -------- + Index : The base pandas Index type. + Interval : A bounded slice-like interval; the elements of an IntervalIndex. + interval_range : Function to create a fixed frequency IntervalIndex. + cut : Bin values into discrete Intervals. + qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + A new ``IntervalIndex`` is typically constructed using + :func:`interval_range`: + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + dtype='interval[int64, right]') + + It may also be constructed using one of the constructor + methods: :meth:`IntervalIndex.from_arrays`, + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. + + See further examples in the doc strings of ``interval_range`` and the + mentioned constructor methods. + """ + _typ = "intervalindex" # annotate properties pinned via inherit_names @@ -246,26 +270,6 @@ def __new__( return cls._simple_new(array, name) @classmethod - @Appender( - _interval_shared_docs["from_breaks"] - % { - "klass": "IntervalIndex", - "name": textwrap.dedent( - """ - name : str, optional - Name of the resulting IntervalIndex.""" - ), - "examples": textwrap.dedent( - """\ - Examples - -------- - >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) - IntervalIndex([(0, 1], (1, 2], (2, 3]], - dtype='interval[int64, right]') - """ - ), - } - ) def from_breaks( cls, breaks, @@ -274,6 +278,39 @@ def from_breaks( copy: bool = False, dtype: Dtype | None = None, ) -> IntervalIndex: + """ + Construct an IntervalIndex from an array of splits. + + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + name : str, optional + Name of the resulting IntervalIndex. + copy : bool, default False + Copy the data. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + IntervalIndex + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + IntervalIndex.from_arrays : Construct from a left and right array. + IntervalIndex.from_tuples : Construct from a sequence of tuples. + + Examples + -------- + >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_breaks( breaks, closed=closed, copy=copy, dtype=dtype @@ -281,26 +318,6 @@ def from_breaks( return cls._simple_new(array, name=name) @classmethod - @Appender( - _interval_shared_docs["from_arrays"] - % { - "klass": "IntervalIndex", - "name": textwrap.dedent( - """ - name : str, optional - Name of the resulting IntervalIndex.""" - ), - "examples": textwrap.dedent( - """\ - Examples - -------- - >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) - IntervalIndex([(0, 1], (1, 2], (2, 3]], - dtype='interval[int64, right]') - """ - ), - } - ) def from_arrays( cls, left, @@ -310,6 +327,58 @@ def from_arrays( copy: bool = False, dtype: Dtype | None = None, ) -> IntervalIndex: + """ + Construct from two arrays defining the left and right bounds. + + Parameters + ---------- + left : array-like (1-dimensional) + Left bounds for each interval. + right : array-like (1-dimensional) + Right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + name : str, optional + Name of the resulting IntervalIndex. + copy : bool, default False + Copy the data. + dtype : dtype, optional + If None, dtype will be inferred. + + Returns + ------- + IntervalIndex + + Raises + ------ + ValueError + When a value is missing in only one of `left` or `right`. + When a value in `left` is greater than the corresponding value + in `right`. + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + IntervalIndex.from_breaks : Construct an IntervalIndex from an array of + splits. + IntervalIndex.from_tuples : Construct an IntervalIndex from an + array-like of tuples. + + Notes + ----- + Each element of `left` must be less than or equal to the `right` + element at the same position. If an element is missing, it must be + missing in both `left` and `right`. A TypeError is raised when + using an unsupported type for `left` or `right`. At the moment, + 'category', 'object', and 'string' subtypes are not supported. + + Examples + -------- + >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_arrays( left, right, closed, copy=copy, dtype=dtype @@ -317,26 +386,6 @@ def from_arrays( return cls._simple_new(array, name=name) @classmethod - @Appender( - _interval_shared_docs["from_tuples"] - % { - "klass": "IntervalIndex", - "name": textwrap.dedent( - """ - name : str, optional - Name of the resulting IntervalIndex.""" - ), - "examples": textwrap.dedent( - """\ - Examples - -------- - >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) - IntervalIndex([(0, 1], (1, 2]], - dtype='interval[int64, right]') - """ - ), - } - ) def from_tuples( cls, data, @@ -345,6 +394,41 @@ def from_tuples( copy: bool = False, dtype: Dtype | None = None, ) -> IntervalIndex: + """ + Construct an IntervalIndex from an array-like of tuples. + + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + name : str, optional + Name of the resulting IntervalIndex. + copy : bool, default False + By-default copy the data, this is compat only and ignored. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + IntervalIndex + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + IntervalIndex.from_arrays : Construct an IntervalIndex from a left and + right array. + IntervalIndex.from_breaks : Construct an IntervalIndex from an array of + splits. + + Examples + -------- + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) + IntervalIndex([(0, 1], (1, 2]], + dtype='interval[int64, right]') + """ with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) return cls._simple_new(arr, name=name) @@ -411,9 +495,37 @@ def inferred_type(self) -> str: """Return a string of the type inferred from the values""" return "interval" - # Cannot determine type of "memory_usage" - @Appender(Index.memory_usage.__doc__) # type: ignore[has-type] def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of the values. + + Parameters + ---------- + deep : bool, default False + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption. + + Returns + ------- + bytes used + Returns memory usage of the values in the Index in bytes. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False or if used on PyPy + + Examples + -------- + >>> idx = pd.Index([1, 2, 3]) + >>> idx.memory_usage() + 24 + """ # we don't use an explicit engine # so return the bytes here return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) @@ -729,10 +841,61 @@ def _get_indexer( return ensure_platform_int(indexer) - @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique( self, target: Index ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + Compute indexer and mask for new index given the current index. + + The indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : IntervalIndex or list of Intervals + An iterable containing the values to be used for computing indexer. + + Returns + ------- + indexer : np.ndarray[np.intp] + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : np.ndarray[np.intp] + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array. + + See Also + -------- + Index.get_indexer : Computes indexer and mask for new index given + the current index. + Index.get_indexer_for : Returns an indexer even when non-unique. + + Examples + -------- + >>> index = pd.Index(["c", "b", "a", "b", "b"]) + >>> index.get_indexer_non_unique(["b", "b"]) + (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64)) + + In the example below there are no matched values. + + >>> index = pd.Index(["c", "b", "a", "b", "b"]) + >>> index.get_indexer_non_unique(["q", "r", "t"]) + (array([-1, -1, -1]), array([0, 1, 2])) + + For this reason, the returned ``indexer`` contains only integers equal to -1. + It demonstrates that there's no match between the index and the ``target`` + values at these positions. The mask [0, 1, 2] in the return value shows that + the first, second, and third elements are missing. + + Notice that the return value is a tuple contains two items. In the example + below the first item is an array of locations in ``index``. The second + item is a mask shows that the first and third elements are missing. + + >>> index = pd.Index(["c", "b", "a", "b", "b"]) + >>> index.get_indexer_non_unique(["f", "b", "s"]) + (array([-1, 1, 3, 4, -1]), array([0, 2])) + """ target = ensure_index(target) if not self._should_compare(target) and not self._should_partial_index(target): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1cc1928136da1..43e6469e078f0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1900,8 +1900,6 @@ def to_frame( allow_duplicates : bool, optional default False Allow duplicate column labels to be created. - .. versionadded:: 1.5.0 - Returns ------- DataFrame diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3f9749f1f7a99..6487bd449f222 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,10 +15,9 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim -from pandas.compat import PYPY from pandas.compat._constants import ( + CHAINED_WARNING_DISABLED, REF_COUNT, - WARNING_CHECK_DISABLED, ) from pandas.errors import ( AbstractMethodError, @@ -920,7 +919,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED: if sys.getrefcount(self.obj) <= REF_COUNT: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -2024,7 +2023,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): ) elif lplane_indexer == 0 and len(value) == len(self.obj.index): - # We get here in one case via .loc with a all-False mask + # We get here in one case via .loc with an all-False mask pass elif self._is_scalar_access(indexer) and is_object_dtype( @@ -2588,7 +2587,7 @@ def __getitem__(self, key): return super().__getitem__(key) def __setitem__(self, key, value) -> None: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED: if sys.getrefcount(self.obj) <= REF_COUNT: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -2619,7 +2618,7 @@ def _convert_key(self, key): return key def __setitem__(self, key, value) -> None: - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED: if sys.getrefcount(self.obj) <= REF_COUNT: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 645e8d9b217b2..32d6c8f738851 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2385,7 +2385,9 @@ def external_values(values: ArrayLike) -> ArrayLike: if isinstance(values, np.ndarray): values = values.view() values.flags.writeable = False - - # TODO(CoW) we should also mark our ExtensionArrays as read-only + else: + # ExtensionArrays + values = values.view() + values._readonly = True return values diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py index 75003675dc173..1c9ca5f4d0d3c 100644 --- a/pandas/core/methods/to_dict.py +++ b/pandas/core/methods/to_dict.py @@ -126,9 +126,6 @@ def to_dict( [{column -> value}, ... , {column -> value}] - 'index' : dict like {index -> {column -> value}} - .. versionadded:: 1.4.0 - 'tight' as an allowed value for the ``orient`` argument - into : class, default dict The collections.abc.MutableMapping subclass used for all Mappings in the return value. Can be the actual class or an empty diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f84bedda8d00c..033d654889e91 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1545,8 +1545,6 @@ def std( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. @@ -1604,8 +1602,6 @@ def var( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. @@ -1670,8 +1666,6 @@ def sem( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. - .. versionadded:: 1.5.0 - .. versionchanged:: 2.0.0 numeric_only now defaults to ``False``. diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 0dfd721dee312..35fed5a24fb30 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -185,7 +185,7 @@ def check_len(item, name: str) -> None: check_len(prefix_sep, "prefix_sep") if isinstance(prefix, str): - prefix = itertools.cycle([prefix]) + prefix = itertools.repeat(prefix, len(data_to_encode.columns)) if isinstance(prefix, dict): prefix = [prefix[col] for col in data_to_encode.columns] @@ -194,7 +194,7 @@ def check_len(item, name: str) -> None: # validate separators if isinstance(prefix_sep, str): - prefix_sep = itertools.cycle([prefix_sep]) + prefix_sep = itertools.repeat(prefix_sep, len(data_to_encode.columns)) elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] @@ -211,7 +211,9 @@ def check_len(item, name: str) -> None: # columns to prepend to result. with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] - for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep): + for col, pre, sep in zip( + data_to_encode.items(), prefix, prefix_sep, strict=True + ): # col is (column_name, column), use just column data here dummy = _get_dummies_1d( col[1], @@ -377,8 +379,6 @@ def from_dummies( Inverts the operation performed by :func:`~pandas.get_dummies`. - .. versionadded:: 1.5.0 - Parameters ---------- data : DataFrame @@ -538,7 +538,11 @@ def from_dummies( raise ValueError(len_msg) elif isinstance(default_category, Hashable): default_category = dict( - zip(variables_slice, [default_category] * len(variables_slice)) + zip( + variables_slice, + [default_category] * len(variables_slice), + strict=True, + ) ) else: raise TypeError( diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 5d4b15c9a0ca3..aeab833878583 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -199,7 +199,7 @@ def melt( missing = idx == -1 if missing.any(): missing_labels = [ - lab for lab, not_found in zip(labels, missing) if not_found + lab for lab, not_found in zip(labels, missing, strict=True) if not_found ] raise KeyError( "The following id_vars or value_vars are not present in " diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 04c584c226aed..90324044c237c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -124,8 +124,6 @@ def pivot_table( sort : bool, default True Specifies if the result should be sorted. - .. versionadded:: 1.3.0 - **kwargs : dict Optional keyword arguments to pass to ``aggfunc``. @@ -1098,8 +1096,8 @@ def crosstab( from pandas import DataFrame data = { - **dict(zip(unique_rownames, index)), - **dict(zip(unique_colnames, columns)), + **dict(zip(unique_rownames, index, strict=True)), + **dict(zip(unique_colnames, columns, strict=True)), } df = DataFrame(data, index=common_idx) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c14389d753aac..d78e97c6845fe 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -158,7 +158,7 @@ def __init__( # Bug fix GH 20601 # If the data frame is too big, the number of unique index combination # will cause int32 overflow on windows environments. - # We want to check and raise an warning before this happens + # We want to check and raise a warning before this happens num_rows = max(index_level.size for index_level in self.new_index_levels) num_columns = self.removed_level.size diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdbbd6c41f34..9bbcfe0c913c9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -33,11 +33,11 @@ properties, reshape, ) +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer -from pandas.compat import PYPY from pandas.compat._constants import ( + CHAINED_WARNING_DISABLED_INPLACE_METHOD, REF_COUNT, - WARNING_CHECK_DISABLED, ) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -48,7 +48,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -232,7 +231,7 @@ # class "NDFrame") # definition in base class "NDFrame" @set_module("pandas") -class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] +class Series(SetitemMixin, base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -358,6 +357,11 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] ) _mgr: SingleBlockManager + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + # ---------------------------------------------------------------------- # Constructors @@ -818,7 +822,10 @@ def _references(self) -> BlockValuesRefs: @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator] @property def array(self) -> ExtensionArray: - return self._mgr.array_values() + arr = self._mgr.array_values() + arr = arr.view() + arr._readonly = True + return arr def __len__(self) -> int: """ @@ -1054,13 +1061,8 @@ def _get_value(self, label, takeable: bool = False): else: return self.iloc[loc] - def __setitem__(self, key, value) -> None: - if not PYPY and not WARNING_CHECK_DISABLED: - if sys.getrefcount(self) <= REF_COUNT + 1: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) - + # def __setitem__() is implemented in SetitemMixin and dispatches to this method + def _setitem(self, key, value) -> None: check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) @@ -1306,8 +1308,6 @@ def reset_index( allow_duplicates : bool, default False Allow duplicate column labels to be created. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame or None @@ -2139,7 +2139,7 @@ def unique(self) -> ArrayLike: ['2016-01-01 00:00:00-05:00'] Length: 1, dtype: datetime64[s, US/Eastern] - An Categorical will return categories in the order of + A Categorical will return categories in the order of appearance and with the same dtype. >>> pd.Series(pd.Categorical(list("baabc"))).unique() @@ -3028,8 +3028,6 @@ def compare( result_names : tuple, default ('self', 'other') Set the dataframes names in the comparison. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3353,7 +3351,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: 2 3 dtype: int64 """ - if not PYPY and not WARNING_CHECK_DISABLED: + if not CHAINED_WARNING_DISABLED_INPLACE_METHOD: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -5660,8 +5658,6 @@ def info( This method prints information about a Series including the index dtype, non-NA values and memory usage. - .. versionadded:: 1.4.0 - Parameters ---------- verbose : bool, optional diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 5c6a029bed7c4..3f91443b8cda6 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -79,8 +79,6 @@ result_names : tuple, default ('self', 'other') Set the dataframes names in the comparison. - - .. versionadded:: 1.5.0 """ _shared_docs["groupby"] = """ diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ff3a17e4d2d5b..110473be5d27c 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -12,7 +12,7 @@ import numpy as np -from pandas._config import get_option +from pandas._config import using_string_dtype from pandas._libs import lib from pandas._typing import ( @@ -2123,7 +2123,7 @@ def decode( """ if dtype is not None and not is_string_dtype(dtype): raise ValueError(f"dtype must be string or object, got {dtype=}") - if dtype is None and get_option("future.infer_string"): + if dtype is None and using_string_dtype(): dtype = "str" # TODO: Add a similar _bytes interface. if encoding in _cpython_optimized_decoders: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 1ea05e24d0db5..0d6eb230714c0 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -215,8 +215,6 @@ class ExponentialMovingWindow(BaseWindow): If 1-D array like, a sequence with the same shape as the observations. method : str {'single', 'table'}, default 'single' - .. versionadded:: 1.4.0 - Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). @@ -426,8 +424,6 @@ def online( Return an ``OnlineExponentialMovingWindow`` object to calculate exponentially moving window aggregations in an online method. - .. versionadded:: 1.3.0 - Parameters ---------- engine: str, default ``'numba'`` diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 2527a5dd508d8..2ef0b4676b06d 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -57,8 +57,6 @@ class Expanding(RollingAndExpandingMixin): This argument is only implemented when specifying ``engine='numba'`` in the method call. - .. versionadded:: 1.3.0 - Returns ------- pandas.api.typing.Expanding @@ -215,8 +213,6 @@ def count(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -436,16 +432,12 @@ def sum( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -453,8 +445,6 @@ def sum( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -502,16 +492,12 @@ def max( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -519,8 +505,6 @@ def max( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -568,16 +552,12 @@ def min( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -585,8 +565,6 @@ def min( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -634,16 +612,12 @@ def mean( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -651,8 +625,6 @@ def mean( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -700,16 +672,12 @@ def median( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -717,8 +685,6 @@ def median( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -771,16 +737,12 @@ def std( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -788,8 +750,6 @@ def std( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.4.0 - Returns ------- Series or DataFrame @@ -850,16 +810,12 @@ def var( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -867,8 +823,6 @@ def var( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}`` - .. versionadded:: 1.4.0 - Returns ------- Series or DataFrame @@ -923,8 +877,6 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -963,8 +915,6 @@ def skew(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1004,8 +954,6 @@ def kurt(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1054,8 +1002,6 @@ def first(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1091,8 +1037,6 @@ def last(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1149,8 +1093,6 @@ def quantile( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1191,8 +1133,6 @@ def rank( """ Calculate the expanding rank. - .. versionadded:: 1.4.0 - Parameters ---------- method : {'average', 'min', 'max'}, default 'average' @@ -1210,8 +1150,6 @@ def rank( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1275,8 +1213,6 @@ def nunique( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1333,8 +1269,6 @@ def cov( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -1394,8 +1328,6 @@ def corr( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e6f84941f6b1a..17b189e222299 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -934,12 +934,8 @@ class Window(BaseWindow): ``[::step]``. ``window`` must be an integer. Using a step argument other than None or 1 will produce a result with a different shape than the input. - .. versionadded:: 1.5.0 - method : str {'single', 'table'}, default 'single' - .. versionadded:: 1.3.0 - Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). @@ -1307,8 +1303,6 @@ def sum(self, numeric_only: bool = False, **kwargs): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - **kwargs Keyword arguments to configure the ``SciPy`` weighted window type. @@ -1367,8 +1361,6 @@ def mean(self, numeric_only: bool = False, **kwargs): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - **kwargs Keyword arguments to configure the ``SciPy`` weighted window type. @@ -1429,8 +1421,6 @@ def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - **kwargs Keyword arguments to configure the ``SciPy`` weighted window type. @@ -1484,8 +1474,6 @@ def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - **kwargs Keyword arguments to configure the ``SciPy`` weighted window type. @@ -2137,8 +2125,6 @@ def count(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -2374,16 +2360,12 @@ def sum( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2391,8 +2373,6 @@ def sum( ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -2479,8 +2459,6 @@ def max( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - *args : iterable, optional Positional arguments passed into ``func``. @@ -2490,8 +2468,6 @@ def max( * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2501,8 +2477,6 @@ def max( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.3.0 - **kwargs : mapping, optional A dictionary of keyword arguments passed into ``func``. @@ -2554,16 +2528,12 @@ def min( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2573,8 +2543,6 @@ def min( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -2626,16 +2594,12 @@ def mean( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2645,8 +2609,6 @@ def mean( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -2705,16 +2667,12 @@ def median( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.3.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2724,8 +2682,6 @@ def median( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.3.0 - Returns ------- Series or DataFrame @@ -2782,16 +2738,12 @@ def std( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2801,8 +2753,6 @@ def std( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.4.0 - Returns ------- Series or DataFrame @@ -2862,16 +2812,12 @@ def var( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. * ``'numba'`` : Runs the operation through JIT compiled code from numba. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - .. versionadded:: 1.4.0 - engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` @@ -2881,8 +2827,6 @@ def var( The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. - .. versionadded:: 1.4.0 - Returns ------- Series or DataFrame @@ -2932,8 +2876,6 @@ def skew(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -2979,8 +2921,6 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3022,8 +2962,6 @@ def kurt(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3072,8 +3010,6 @@ def first(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3109,8 +3045,6 @@ def last(self, numeric_only: bool = False): numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3168,8 +3102,6 @@ def quantile( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3215,8 +3147,6 @@ def rank( """ Calculate the rolling rank. - .. versionadded:: 1.4.0 - Parameters ---------- method : {'average', 'min', 'max'}, default 'average' @@ -3236,8 +3166,6 @@ def rank( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3301,8 +3229,6 @@ def nunique( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3364,8 +3290,6 @@ def cov( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame @@ -3427,8 +3351,6 @@ def corr( numeric_only : bool, default False Include only float, int, boolean columns. - .. versionadded:: 1.5.0 - Returns ------- Series or DataFrame diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1ae59e0e5866..0732ba87de30c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -259,9 +259,6 @@ Note that this parameter is only necessary for columns stored as TEXT in Excel, any numeric columns will automatically be parsed, regardless of display format.(e.g. use ',' for European data). - - .. versionadded:: 1.4.0 - comment : str, default None Comments out remainder of line. Pass a character or characters to this argument to indicate comments in the input file. Any data between the @@ -1006,8 +1003,6 @@ class ExcelWriter(Generic[_WorkbookT]): * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)`` * odf: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` - .. versionadded:: 1.3.0 - See Also -------- read_excel : Read an Excel sheet values (xlsx) file into DataFrame. @@ -1505,12 +1500,9 @@ class ExcelFile: ``xlrd`` will be used. - Otherwise if ``path_or_buffer`` is in xlsb format, `pyxlsb `_ will be used. - - .. versionadded:: 1.3.0 - - - Otherwise if `openpyxl `_ is installed, - then ``openpyxl`` will be used. - - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. + - Otherwise if `openpyxl `_ is installed, + then ``openpyxl`` will be used. + - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. .. warning:: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d72b6cd89b940..cd1810a96d49e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -358,8 +358,6 @@ def get_dataframe_repr_params() -> dict[str, Any]: Supplying these parameters to DataFrame.to_string is equivalent to calling ``repr(DataFrame)``. This is useful if you want to adjust the repr output. - .. versionadded:: 1.4.0 - Example ------- >>> import pandas as pd @@ -391,8 +389,6 @@ def get_series_repr_params() -> dict[str, Any]: Supplying these parameters to Series.to_string is equivalent to calling ``repr(series)``. This is useful if you want to adjust the series repr output. - .. versionadded:: 1.4.0 - Example ------- >>> import pandas as pd diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1132981915177..5039bd0d4210a 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -155,15 +155,9 @@ class Styler(StylerRenderer): decimal : str, optional Character used as decimal separator for floats, complex and integers. If not given uses ``pandas.options.styler.format.decimal``. - - .. versionadded:: 1.3.0 - thousands : str, optional, default None Character used as thousands separator for floats, complex and integers. If not given uses ``pandas.options.styler.format.thousands``. - - .. versionadded:: 1.3.0 - escape : str, optional Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display string with HTML-safe sequences. @@ -175,14 +169,10 @@ class Styler(StylerRenderer): which either are surrounded by two characters ``$`` or start with the character ``\(`` and end with ``\)``. If not given uses ``pandas.options.styler.format.escape``. - - .. versionadded:: 1.3.0 formatter : str, callable, dict, optional Object to define how values are displayed. See ``Styler.format``. If not given uses ``pandas.options.styler.format.formatter``. - .. versionadded:: 1.4.0 - Attributes ---------- index : data.index Index @@ -302,8 +292,6 @@ def concat(self, other: Styler) -> Styler: """ Append another Styler to combine the output into a single table. - .. versionadded:: 1.5.0 - Parameters ---------- other : Styler @@ -438,8 +426,6 @@ def set_tooltips( These string based tooltips are only applicable to ```` HTML elements, and cannot be used for column or index headers. - .. versionadded:: 1.3.0 - Parameters ---------- ttips : DataFrame @@ -684,8 +670,6 @@ def to_latex( r""" Write Styler to a file, buffer or string in LaTeX format. - .. versionadded:: 1.3.0 - Parameters ---------- buf : str, path object, file-like object, or None, default None @@ -731,7 +715,6 @@ def to_latex( - `"skip-last;index"`: as above with lines extending only the width of the index entries. - .. versionadded:: 1.4.0 label : str, optional The LaTeX label included as: \\label{