diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 9afc93bcc..000000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -.gitignore export-ignore -.gitattributes export-ignore diff --git a/.github/workflows/close-pulls.yml b/.github/workflows/close-pulls.yml new file mode 100644 index 000000000..8a95bccc0 --- /dev/null +++ b/.github/workflows/close-pulls.yml @@ -0,0 +1,21 @@ +--- + name: Auto Close Pull Requests + + on: + schedule: + - cron: '0 * * * *' + + jobs: + close-fork-pulls: + runs-on: ubuntu-latest + + steps: + - name: Close Pull Requests + uses: peter-evans/close-fork-pulls@v2 + with: + comment: | + As of 2023-11-30 we have stopped the 2-way sync between this repository and our internal repository, so that our internal repository becomes the source of truth for the backup-utils source code. With the the 2-way sync stopped, this public repository will be used to host documentation about backup-utils and to publish new versions of backup-utils. You will be able to access a specific version of backup-utils (which includes the full source code) from the [release page](https://github.com/github/backup-utils/releases) of this repository. + + Customers should no longer open pull requests in this repository. These pull requests will not be reviewed or merged. We will automatically close all PRs opened in this repository. + + Customers cannot open issues in this repository. Instead, customers will need to follow the standard support process and open a support ticket for any questions/concerns/problems with backup-utils. This will ensure all customer requests are handled consistently. \ No newline at end of file diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 246384181..000000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/backup.config -/data -/dist diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 82488dcf2..000000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -script: - - debuild -uc -us -before_install: - - sudo apt-get update -qq - - sudo apt-get install -y devscripts diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index fec2ed2e3..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,10 +0,0 @@ -# Contributing - -Looking to contribute something to this project? That is great, we always appreciate pull requests! Here's how you can help: - -1. Fork the project to your account. -2. Clone the fork (`git clone https://github.com/[username]/backup-utils.git`). -3. Create a new feature branch (`git checkout -b my-feature-branch`). -4. Add and then commit your changes (`git commit -am "Add a new backup endpoint."`). -5. Push your feature branch to GitHub.com (`git push -u origin my-feature-branch`). -6. Open a [Pull Request](https://github.com/github/backup-utils/compare/) and wait for our feedback. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 4d231b456..000000000 --- a/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -Copyright (c) 2014 GitHub Inc. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile deleted file mode 100644 index 33528374e..000000000 --- a/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -SHELL = /bin/sh - -test: info - @script/cibuild --no-package - -info: - @echo This is github/backup-utils - @echo shell is $(shell ls -l $(SHELL) | sed 's@.*/bin/sh@/bin/sh@') - @rsync --version | head -1 - @echo - -dist: - @script/package-tarball - -clean: - rm -rf dist - -.PHONY: test info dist clean diff --git a/README.md b/README.md index 6859addcc..c0ceedb0e 100644 --- a/README.md +++ b/README.md @@ -1,246 +1,126 @@ -GitHub Enterprise Backup Utilities -================================== - -This repository includes backup and recovery utilities for [GitHub Enterprise][1]. - -- **[Features](#features)** -- **[Requirements](#requirements)** - - **[Backup host requirements](#backup-host-requirements)** - - **[Storage requirements](#storage-requirements)** - - **[GitHub Enterprise version requirements](#github-enterprise-version-requirements)** -- **[Getting started](#getting-started)** -- **[Migrating from GitHub Enterprise v11.10.34x to v2.0](#migrating-from-github-enterprise-v111034x-to-v20)** -- **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** -- **[Scheduling backups](#scheduling-backups)** -- **[Backup snapshot file structure](#backup-snapshot-file-structure)** -- **[Support](#support)** - -### Features - -The backup utilities implement a number of advanced capabilities for backup +# GitHub Enterprise Server Backup Utilities + +This repository includes backup and recovery utilities for +[GitHub Enterprise Server][1]. + +**Note**: The parallel backup and restore feature will require [GNU awk](https://www.gnu.org/software/gawk) and [moreutils](https://joeyh.name/code/moreutils) to be installed. Note that on some distributions/platforms, the `moreutils-parallel` package is separate from `moreutils` and must be installed on its own. + +**Note**: the [GitHub Enterprise Server version requirements][2] have +changed starting with Backup Utilities v2.13.0, released on 27 March 2018. + +## Features + +Backup Utilities implement a number of advanced capabilities for backup hosts, built on top of the backup and restore features already included in -GitHub Enterprise. +GitHub Enterprise Server. - - Complete GitHub Enterprise backup and recovery system via two simple utilities:
- `ghe-backup` and `ghe-restore`. - - Online backups. The GitHub appliance need not be put in maintenance mode for +- Complete GitHub Enterprise Server backup and recovery system via two simple + utilities:
`ghe-backup` and `ghe-restore`. +- Online backups. The GitHub appliance need not be put in maintenance mode for the duration of the backup run. - - Incremental backup of Git repository data. Only changes since the last +- Incremental backup of Git repository data. Only changes since the last snapshot are transferred, leading to faster backup runs and lower network bandwidth and machine utilization. - - Efficient snapshot storage. Only data added since the previous snapshot +- Efficient snapshot storage. Only data added since the previous snapshot consumes new space on the backup host. - - Multiple backup snapshots with configurable retention periods. - - Backup commands run under the lowest CPU/IO priority on the GitHub appliance, +- Multiple backup snapshots with configurable retention periods. +- Backup commands run under the lowest CPU/IO priority on the GitHub appliance, reducing performance impact while backups are in progress. - - Runs under most Linux/Unix environments. - - MIT licensed, open source software maintained by GitHub, Inc. - -### Requirements +- Runs under most Linux/Unix environments. +- MIT licensed, open source software maintained by GitHub, Inc. -The backup utilities should be run on a host dedicated to long-term permanent -storage and must have network connectivity with the GitHub Enterprise appliance. +## Documentation -##### Backup host requirements +- **[Requirements](docs/requirements.md)** + - **[Backup host requirements](docs/requirements.md#backup-host-requirements)** + - **[Storage requirements](docs/requirements.md#storage-requirements)** + - **[GitHub Enterprise Server version requirements](docs/requirements.md#github-enterprise-version-requirements)** +- **[Getting started](docs/getting-started.md)** +- **[Using the backup and restore commands](docs/usage.md)** +- **[Scheduling backups](docs/scheduling-backups.md)** +- **[Backup snapshot file structure](docs/backup-snapshot-file-structure.md)** +- **[How does Backup Utilities differ from a High Availability replica?](docs/faq.md)** +- **[Docker](docs/docker.md)** +- **[Releases](https://github.com/github/enterprise-releases/blob/master/docs/release-backup-utils.md)** -Backup host software requirements are modest: Linux or other modern Unix -operating system with [rsync][4] v2.6.4 or newer. +## Support -The backup host must be able to establish network connections outbound to the -GitHub appliance over SSH (port 22). +If you have a question related to your specific GitHub Enterprise Server setup, would like assistance with +backup site setup or recovery, or would like to report a bug or a feature request, please contact our [Enterprise support team][3]. -##### Storage requirements -Storage requirements vary based on current Git repository disk usage and growth -patterns of the GitHub appliance. We recommend allocating at least 5x the amount -of storage allocated to the primary GitHub appliance for historical snapshots -and growth over time. +## Repository updates - November 2023 -##### GitHub Enterprise version requirements +In October 2023 we announced a number of changes to this repository. +These changes will improve our (GitHub’s) ability to ship enhancements and new features to backup-utils, +as well as simplify how GitHub Enterprise Server customers interact with backup-utils. -The backup utilities are fully supported under GitHub Enterprise 2.0 or -greater. +Our process for shipping new versions of backup-utils prior to November 2023 involved a 2-way sync between this repository and an internal repository. +This 2-way sync became significantly more problematic once we started regularly shipping patches in alignment with GitHub Enterprise Server. -The previous release series (11.10.34x) is also supported but must meet minimum -version requirements. For online and incremental backup support, the GitHub -Enterprise instance must be running version 11.10.342 or above. +As of 2023-11-30 we have stopped this 2-way sync so that our internal repository becomes the source of truth for the backup-utils source code. +With the the 2-way sync stopped, this public repository will be used to host documentation about backup-utils and to publish new versions of backup-utils. +You will be able to access a specific version of backup-utils (which includes the full source code) from the [release page](https://github.com/github/backup-utils/releases) of this repository. -Earlier versions are supported, but online and incremental backups are not -supported. We strongly recommend upgrading to the latest release if you're -running a version prior to 11.10.342. Visit [enterprise.github.com][5] to -download the most recent GitHub Enterprise version. +This change has not affected the functionality of the backup-utils tool or a customer’s ability to backup or restore their GitHub Enterprise Server instance. -### Getting started +### Details - 1. [Download the latest release version][release] and extract *or* clone the - repository using Git: +There are three specific areas that have been affected by us stop the 2-way sync between our internal repository and this public repository on 2023-11-30: - `git clone -b stable https://github.com/github/backup-utils.git` +1. **Pull requests**: Customers should no longer open pull requests in this repository. +These pull requests will not be reviewed or merged. +This is necessary because we will no longer be syncing changes between this repository and our internal repository. +2. **Issues**: Customers cannot open issues in this repository. +Instead, customers will need to follow the standard support process and open a support ticket for any questions/concerns/problems with backup-utils. +This will ensure all customer requests are handled consistently. +3. **Installing/upgrading backup-utils**: Customers will not be able to use a clone of the repository to install and upgrade backup-utils. +Customers will need to download a specific version of backup-utils from the [release page](https://github.com/github/backup-utils/releases) +(either as a Debian package or as an archive file - see below for details on how to incorporate this change). - 2. Copy the [`backup.config-example`][2] file to `backup.config` and modify as - necessary. The `GHE_HOSTNAME` value must be set to the GitHub Enterprise - host name. Additional options are available and documented in the - configuration file but none are required for basic backup functionality. +### Timeline - 3. Add the backup host's SSH key to the GitHub appliance as an *Authorized SSH - key*. See [Adding an SSH key for shell access][3] for instructions. +Below is the two phase timeline we will follow to roll out the changes described above: - 4. Run `bin/ghe-host-check` to verify SSH connectivity with the GitHub - appliance. +* **Phase 1 (rolled out on 2023-11-30):** We have closed all open pull requests and issues (after reviewing each one and porting them to our internal repository if merited), +and updated the repository settings so that new issues cannot be opened. Also, we have stopped syncing code from our internal repository to this repository. + * As of 2023-11-30, you can still get a working copy of backup-utils by cloning the repository. + But the code will not be updated in the repository; you can access updated versions of backup-utils via the [release page](https://github.com/github/backup-utils/releases). +* **Phase 2 (rolling out 2024-02-20):** The backup-utils code will be removed and the repository will be used to host documentation for backup-utils. +After this date, you will no longer be able to clone a working copy of backup-utils from the repository. +Instead, you will need to download a specific version of backup-utils from the [release page](https://github.com/github/backup-utils/releases). - 5. Run `bin/ghe-backup` to perform an initial full backup. +### Updating your backup-utils upgrade process -[release]: https://github.com/github/backup-utils/releases +#### Clone of repository -### Migrating from GitHub Enterprise v11.10.34x to v2.0 +If your current process for upgrading backup-utils involves a clone of the repository, you will need to modify your process to download a new version of backup-utils and set it up. -If you are migrating from GitHub Enterprise version 11.10.34x to 2.0 or greater, -please see the [Migrating from GitHub Enterprise v11.10.34x][10] documentation -in the [GitHub Enterprise System Administrator's Guide][11]. It includes -important information on using the backup utilities to migrate data from your -v11.10.34x instance to v2.0. +For example, you could download the v3.10.0 (github-backup-utils-v3.10.0.tar.gz) artifact from the [releases page](https://github.com/github/backup-utils/releases/tag/v3.10.0) with: -### Using the backup and restore commands +```shell +\$ wget https://github.com/github/backup-utils/releases/download/v3.10.0/github-backup-utils-v3.10.0.tar.gz +``` +And then extract it: -After the initial backup, use the following commands: +```shell +\$ tar xzvf github-backup-utils-v3.10.0.tar.gz +``` - - The `ghe-backup` command creates incremental snapshots of repository data, - along with full snapshots of all other pertinent data stores. - - The `ghe-restore` command restores snapshots to the same or separate GitHub - Enterprise appliance. +This will give you a new folder, `github-backup-utils-v3.10.0`, which contains the code for version 3.10.0 of backup-utils. Once you copy over your backup.config file from a previous installation of backup-utils your new version of backup-utils will be ready to use. -##### Example backup and restore usage +#### Docker -The following assumes that `GHE_HOSTNAME` is set to "github.example.com" in -`backup.config`. +For customers that currently use Docker to create a backup-utils image, their existing process may need updating as a result of this change. Previously customers could execute this command to build a Docker image of backup-utils: -Creating a backup snapshot: +```shell +\$ docker build github.com/github/backup-utils +``` - $ ghe-backup - Starting backup of github.example.com in snapshot 20140727T224148 - Connect github.example.com OK (v11.10.343) - Backing up GitHub settings ... - Backing up SSH authorized keys ... - Backing up SSH host keys ... - Backing up MySQL database ... - Backing up Redis database ... - Backing up Git repositories ... - Backing up GitHub Pages ... - Backing up Elasticsearch indices ... - Completed backup of github.example.com in snapshot 20140727T224148 at 23:01:58 +This will not work after phase 2 roles out. You will need to update your process to first download an archive from the [release page](https://github.com/github/backup-utils/releases), extract it, and then build the Dockerfile inside the extracted directory. -Restoring from last successful snapshot to a newly provisioned GitHub Enterprise -appliance at IP "5.5.5.5": - $ ghe-restore 5.5.5.5 - Starting rsync restore of 5.5.5.5 from snapshot 20140727T224148 - Connect 5.5.5.5 OK (v11.10.343) - Enabling maintenance mode on 5.5.5.5 ... - Restoring Git repositories ... - Restoring GitHub Pages ... - Restoring MySQL database ... - Restoring Redis database ... - Restoring SSH authorized keys ... - Restoring Elasticsearch indices ... - Restoring SSH host keys ... - Completed restore of 5.5.5.5 from snapshot 20140817T174152 - Visit https://5.5.5.5/setup/settings to configure the recovered appliance. +[1]: https://github.com/enterprise +[2]: docs/requirements.md#github-enterprise-version-requirements +[3]: https://support.github.com/ -A different backup snapshot may be selected by passing the `-s` argument and the -datestamp-named directory from the backup location. - -The `ghe-backup` and `ghe-restore` commands also have a verbose output mode -(`-v`) that lists files as they're being transferred. It's often useful to -enable when output is logged to a file. - -### Scheduling backups - -Regular backups should be scheduled using `cron(8)` or similar command -scheduling service on the backup host. The backup frequency will dictate the -worst case recovery point objective (RPO) in your backup plan. We recommend the -following: - - - **Hourly backups** for GitHub Enterprise versions 11.10.342 or greater (due to - improved online and incremental backup support) - - **Daily backups** for versions prior to 11.10.342. - -Note: the time required to do full offline backups of large datasets under -GitHub Enterprise versions prior to 11.10.342 may prohibit the use of daily -backups. We strongly recommend upgrading to 11.10.342 or greater in that case. - -##### Example scheduling usage - -The following examples assume the backup utilities are installed under -`/opt/backup-utils`. The crontab entry should be made under the same user that -manual backup/recovery commands will be issued under and must have write access -to the configured `GHE_DATA_DIR` directory. - -Note that the `GHE_NUM_SNAPSHOTS` option in `backup.config` should be tuned -based on the frequency of backups. The ten most recent snapshots are retained by -default. The number should be adjusted based on backup frequency and available -storage. - -To schedule hourly backup snapshots with verbose informational output written to -a log file and errors generating an email: - - MAILTO=admin@example.com - - 0 * * * * /opt/backup-utils/bin/ghe-backup -v 1>>/opt/backup-utils/backup.log - -To schedule nightly backup snapshots instead, use: - - MAILTO=admin@example.com - - 0 0 * * * /opt/backup-utils/bin/ghe-backup -v 1>>/opt/backup-utils/backup.log - -### Backup snapshot file structure - -Backup snapshots are stored in rotating increment directories named after the -date and time the snapshot was taken. Each snapshot directory contains a full -backup snapshot of all relevant data stores. Repository, Search, and Pages data -is stored efficiently via hard links. - -The following example shows a snapshot file hierarchy for hourly frequency. -There are five snapshot directories, with the `current` symlink pointing to the -most recent successful snapshot: - - ./data - |- 20140724T010000 - |- 20140725T010000 - |- 20140726T010000 - |- 20140727T010000 - |- 20140728T010000 - |- authorized-keys.json - |- elasticsearch/ - |- enterprise.ghl - |- mysql.sql.gz - |- pages/ - |- redis.rdb - |- repositories/ - |- settings.json - |- ssh-host-keys.tar - |- strategy - |- version - |- current -> 20140727T010000 - -Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change -the disk location where snapshots are written. - -### Support - -If you find a bug or would like to request a feature in backup-utils, please -open an issue or pull request on this repository. If you have a question related -to your specific GitHub Enterprise setup or would like assistance with backup -site setup or recovery, please contact our [Enterprise support team][7] instead. - -[1]: https://enterprise.github.com -[2]: https://github.com/github/enterprise-backup-site/blob/master/backup.config-example -[3]: https://enterprise.github.com/help/articles/adding-an-ssh-key-for-shell-access -[4]: http://rsync.samba.org/ -[5]: https://enterprise.github.com/download -[6]: https://enterprise.github.com/help/articles/upgrading-to-a-newer-release -[7]: https://enterprise.github.com/support/ -[8]: https://enterprise.github.com/help/articles/backing-up-enterprise-data -[9]: https://enterprise.github.com/help/articles/restoring-enterprise-data -[10]: https://help.github.com/enterprise/2.0/admin-guide/migrating-to-a-different-platform-or-from-github-enterprise-11-10-34x/ -[11]: https://help.github.com/enterprise/2.0/admin-guide/ diff --git a/RELEASING.md b/RELEASING.md deleted file mode 100644 index 69a45cb41..000000000 --- a/RELEASING.md +++ /dev/null @@ -1,12 +0,0 @@ -# Making a backup-utils release - - 1. Add a new version and release notes to the `debian/changelog` file. - 2. Rev the `share/github-backup-utils/version` file. - 3. Tag the release: `git tag v2.0.2` - 4. Build that tarball package: `make dist` - 5. Install the debian devscripts package if necessary: - `sudo apt-get install devscripts` - 6. Build the deb package: `debuild -uc -us` - 7. Draft a new release at https://github.com/github/backup-utils/releases, - including the release notes and attaching the tarball and deb packages. - 8. Push the head of the release to the 'stable' branch. diff --git a/backup.config-example b/backup.config-example deleted file mode 100644 index 54b221187..000000000 --- a/backup.config-example +++ /dev/null @@ -1,28 +0,0 @@ -# GitHub Enterprise backup configuration file - -# The hostname of the GitHub Enterprise appliance to back up. The host -# must be reachable via SSH from the backup host. -GHE_HOSTNAME="github.example.com" - -# Path to where backup data is stored. By default this is the "data" -# directory next to this file but can be set to an absolute path -# elsewhere for backing up to a separate partition / mount point. -GHE_DATA_DIR="data" - -# The number of backup snapshots to retain. Old snapshots are pruned after each -# successful ghe-backup run. This option should be tuned based on the frequency -# of scheduled backup runs. If backups are scheduled hourly, snapshots will be -# available for the past N hours; if backups are scheduled daily, snapshots will -# be available for the past N days ... -GHE_NUM_SNAPSHOTS=10 - -# The hostname of the GitHub appliance to restore. If you've set up a separate -# GitHub appliance to act as a standby for recovery, specify its IP or hostname -# here. The host to restore to may also be specified directly when running -# ghe-restore so use of this variable isn't strictly required. -# -#GHE_RESTORE_HOST="github-standby.example.com" - -# Any extra options passed to the SSH command. Nothing required by default -# -#GHE_EXTRA_SSH_OPTS="" diff --git a/bin/ghe-backup b/bin/ghe-backup deleted file mode 100755 index bc94bc2b6..000000000 --- a/bin/ghe-backup +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup [-v] -#/ Take snapshots of all GitHub Enterprise data, including Git repository data, -#/ the MySQL database, instance settings, GitHub Pages data, etc. -#/ -#/ With -v, enable verbose output and show more information about what's being -#/ transferred. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/.. -. share/github-backup-utils/ghe-backup-config - -# Used to record failed backup steps -failures= - -# Create the timestamped snapshot directory where files for this run will live, -# change into it, and mark the snapshot as incomplete by touching the -# 'incomplete' file. If the backup succeeds, this file will be removed -# signifying that the snapshot is complete. -mkdir -p "$GHE_SNAPSHOT_DIR" -cd "$GHE_SNAPSHOT_DIR" -touch "incomplete" - -# This is toggled true once we've successfully re-enabled maintenance mode -# on the remote appliance. This is used to avoid trying to re-enable in the exit -# trap again on successful backup runs. -GHE_MAINTENANCE_MODE_ENABLED=false - -# To prevent multiple backup runs happening at the same time, we create a -# in-progress symlink pointing to the snapshot directory. This will fail if -# another backup is already in progress, giving us a form of locking. -# -# Set up a trap to remove the in-progress symlink if we exit for any reason but -# verify that we own the in-progress symlink before doing so. -# -# The cleanup trap also handles disabling maintenance mode on the appliance if -# it was automatically enabled. -cleanup () { - if [ "$(readlink ../in-progress)" = "$GHE_SNAPSHOT_TIMESTAMP" ]; then - unlink ../in-progress - fi - - if $GHE_MAINTENANCE_MODE_ENABLED; then - ghe-maintenance-mode-disable "$GHE_HOSTNAME" - fi -} - -# Setup exit traps -trap 'cleanup' EXIT -trap 'exit $?' INT # ^C always terminate - -# Mark the snapshot as in-progress by creating the symlink. If this fails, it -# means another ghe-backup run is already in progress and we should exit. -# NOTE: The -n argument to ln is non-POSIX but widely supported. -if ! ln -sn "$GHE_SNAPSHOT_TIMESTAMP" ../in-progress 2>/dev/null; then - snapshot="$(readlink ../in-progress)" - echo "Error: backup of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2 - exit 1 -fi - -echo "Starting backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP" - -# Perform a host connection check and establish the remote appliance version. -# The version is available in the GHE_REMOTE_VERSION variable and also written -# to a version file in the snapshot directory itself. -ghe_remote_version_required -echo "$GHE_REMOTE_VERSION" > version - -# Determine whether to use the rsync or tarball backup strategy based on the -# remote appliance version. The tarball strategy must be used with GitHub -# Enterprise versions prior to 11.10.340 since rsync is not available. -# The tarball strategy may be forced for newer versions by setting -# GHE_BACKUP_STRATEGY=tarball in backup.config but this is not recommended. -: ${GHE_BACKUP_STRATEGY:=rsync} -if [ $GHE_VERSION_MAJOR -eq 1 -a $GHE_VERSION_PATCH -lt 340 ]; then - GHE_BACKUP_STRATEGY="tarball" -fi - -# Record the strategy with the snapshot so we will know how to restore. -echo "$GHE_BACKUP_STRATEGY" > strategy - -# If we're using the tarball backup strategy, put the appliance in maintenance -# mode and wait for all writing processes to bleed out. -if [ "$GHE_BACKUP_STRATEGY" = "tarball" ]; then - ghe-maintenance-mode-enable "$GHE_HOSTNAME" - GHE_MAINTENANCE_MODE_ENABLED=true -fi - -echo "Backing up GitHub settings ..." -ghe-backup-settings || -failures="$failures settings" - -echo "Backing up SSH authorized keys ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-authorized-keys' > authorized-keys.json || -failures="$failures authorized-keys" - -echo "Backing up SSH host keys ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-ssh-host-keys' > ssh-host-keys.tar || -failures="$failures ssh-host-keys" - -echo "Backing up MySQL database ..." -echo 'set -o pipefail; ghe-export-mysql | gzip' | -ghe-ssh "$GHE_HOSTNAME" -- /bin/bash > mysql.sql.gz || -failures="$failures mysql" - -echo "Backing up Redis database ..." -ghe-backup-redis > redis.rdb || -failures="$failures redis" - -echo "Backing up Git repositories ..." -ghe-backup-repositories-${GHE_BACKUP_STRATEGY} || -failures="$failures repositories" - -echo "Backing up GitHub Pages ..." -ghe-backup-pages-${GHE_BACKUP_STRATEGY} || -failures="$failures pages" - -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "Backing up asset attachments ..." - ghe-backup-userdata alambic_assets || - failures="$failures alambic_assets" - - echo "Backing up hook deliveries ..." - ghe-backup-userdata hookshot || - failures="$failures hookshot" -fi - -echo "Backing up Elasticsearch indices ..." -ghe-backup-es-${GHE_BACKUP_STRATEGY} || -failures="$failures elasticsearch" - -# If we're using the tarball backup strategy, bring the appliance out of -# maintenance mode now instead of waiting until after pruning stale snapshots. -if $GHE_MAINTENANCE_MODE_ENABLED; then - ghe-maintenance-mode-disable "$GHE_HOSTNAME" || - echo "Warning: Disabling maintenance mode on $GHE_HOSTNAME failed." - GHE_MAINTENANCE_MODE_ENABLED=false -fi - -# If everything was successful, mark the snapshot as complete, update the -# current symlink to point to the new snapshot and prune expired and failed -# snapshots. -if [ -z "$failures" ]; then - rm "incomplete" - - rm -f "../current" - ln -s "$GHE_SNAPSHOT_TIMESTAMP" "../current" - - ghe-prune-snapshots -fi - -echo "Completed backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP at $(date +"%H:%M:%S")" - -# Exit non-zero and list the steps that failed. -if [ -n "$failures" ]; then - steps="$(echo $failures | sed 's/ /, /g')" - echo "Error: Snapshot incomplete. Some steps failed: ${steps}. " - exit 1 -fi - -# Make sure we exit zero after the conditional -true diff --git a/bin/ghe-host-check b/bin/ghe-host-check deleted file mode 100755 index d5be624ff..000000000 --- a/bin/ghe-host-check +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-host-check [] -#/ Verify connectivity with the GitHub Enterprise host. When no is -#/ provided, the $GHE_HOSTNAME configured in backup.config is assumed. -set -e - -# Bring in the backup configuration. -cd $(dirname "$0")/.. -. share/github-backup-utils/ghe-backup-config - -# Use the host provided on the command line if provided, or fallback on the -# $GHE_HOSTNAME configured in backup.config when not present. -host="${1:-$GHE_HOSTNAME}" - -# Options to pass to SSH during connection check -options=" - -o PasswordAuthentication=no - -o ConnectTimeout=5 - -o ConnectionAttempts=1 -" - -# Split host:port into parts -port=$(ssh_port_part "$host") -hostname=$(ssh_host_part "$host") - -set +e -output=$(echo "ghe-negotiate-version backup-utils $BACKUP_UTILS_VERSION" | ghe-ssh -o BatchMode=no $options $host -- /bin/sh 2>&1) -rc=$? -if [ $rc = 127 ]; then - # ghe-negotiate-version not found, fallback to reading version file - legacy_version_output="1" - output=$(echo "cat \"$GHE_REMOTE_METADATA_FILE\" 2>/dev/null || exit 101" | ghe-ssh -o BatchMode=no $options $host -- /bin/sh 2>&1) - rc=$? -fi -set -e - -if [ $rc -ne 0 ]; then - case $rc in - 255) - if echo "$output" | grep -i "port 22: connection refused" >/dev/null; then - exec "bin/$(basename $0)" "$hostname:122" - fi - - echo "$output" 1>&2 - echo "Error: ssh connection with '$host' failed" 1>&2 - echo "Note that your SSH key needs to be setup on $host as described in:" 1>&2 - echo "* https://enterprise.github.com/help/articles/adding-an-ssh-key-for-shell-access" 1>&2 - ;; - 101) - echo "Error: couldn't read GitHub Enterprise fingerprint on '$host' or this isn't a GitHub appliance." 1>&2 - ;; - 1) - if [ "${port:-22}" -eq 22 ] && echo "$output" | grep "use port 122" >/dev/null; then - exec "bin/$(basename $0)" "$hostname:122" - else - echo "$output" 1>&2 - fi - ;; - - esac - exit $rc -fi - -if [ -z "$legacy_version_output" ]; then - version=$(echo "$output" | sed -n 's/GitHub Enterprise version \(.*\)/\1/p') -else - version=$(echo "$output" | grep version | cut -d'"' -f4) -fi - -if [ -z "$version" ]; then - echo "Error: failed to parse version on '$host' or this isn't a GitHub appliance." 1>&2 - exit 2 -fi - -echo "Connect $hostname:$port OK (v$version)" diff --git a/bin/ghe-restore b/bin/ghe-restore deleted file mode 100755 index 4ac6691fc..000000000 --- a/bin/ghe-restore +++ /dev/null @@ -1,228 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore [-v] [-s ] [] -#/ Restores a GitHub instance from local backup snapshots. The is the -#/ hostname or IP of the GitHub instance. The may be omitted when -#/ the GHE_RESTORE_HOST config variable is set in backup.config. When a -#/ argument is provided, it always overrides the configured restore host. -#/ -#/ Options: -#/ -f Don't prompt for confirmation before restoring. -#/ -c Restore appliance settings and license in addition to -#/ datastores. Settings are not restored by default to -#/ prevent overwriting different configuration on the -#/ restore host. -#/ -s Restore from the snapshot with the given id. Available -#/ snapshots may be listed under the data directory. -#/ -v Enable verbose output. -#/ -#/ Note that the host must be reachable and your SSH key must be setup as -#/ described in the following help article: -#/ -#/ -set -e - -# Bring in the backup configuration. -cd $(dirname "$0")/.. -. share/github-backup-utils/ghe-backup-config - -# Parse arguments -restore_settings=false -force=false -while true; do - case "$1" in - -f|--force) - force=true - shift - ;; - -s) - snapshot_id="$(basename "$2")" - shift 2 - ;; - -c) - restore_settings=true - shift - ;; - -*) - echo "Error: invalid argument: '$1'" 1>&2 - exit 1 - ;; - *) - break - ;; - esac -done - -# Grab the host arg -GHE_HOSTNAME="${1:-$GHE_RESTORE_HOST}" - -# Hostname without any port suffix -hostname=$(echo "$GHE_HOSTNAME" | cut -f 1 -d :) - -# Show usage with no -[ -z "$GHE_HOSTNAME" ] && print_usage - -# ghe-restore-snapshot-path validates it exists, determines what current is, -# and if there's any problem, exit for us -GHE_RESTORE_SNAPSHOT_PATH="$(ghe-restore-snapshot-path "$snapshot_id")" -GHE_RESTORE_SNAPSHOT=$(basename "$GHE_RESTORE_SNAPSHOT_PATH") -export GHE_RESTORE_SNAPSHOT - -# Figure out whether to use the tarball or rsync restore strategy based on the -# strategy file written in the snapshot directory. -GHE_BACKUP_STRATEGY=$(cat "$GHE_RESTORE_SNAPSHOT_PATH/strategy") - -# Perform a host-check and establish the remote version in GHE_REMOTE_VERSION. -ghe_remote_version_required "$GHE_HOSTNAME" - -# Keep other processes on the VM in the loop about the restore status. -# -# Other processes will look for these states: -# "restoring" - restore is currently in progress -# "failed" - restore has failed -# "complete" - restore has completed successfully -update_restore_status () { - if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "$1" | - ghe-ssh "$GHE_HOSTNAME" -- "sudo dd of='$GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status' 2>/dev/null" - fi -} - -# Figure out if this instance has been configured or is entirely new. -instance_configured=false -if ghe-ssh "$GHE_HOSTNAME" -- \ - "[ -f '$GHE_REMOTE_DATA_DIR/enterprise/dna.json' -o \ - -f '$GHE_REMOTE_ROOT_DIR/etc/github/configured' ]"; then - instance_configured=true -elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - restore_settings=true -fi - -# Figure out if this instance is in a replication pair -if ghe-ssh "$GHE_HOSTNAME" -- "ghe-repl-status -r 2>/dev/null" \ - | grep -Eq "replica|primary"; then - instance_configured=true - echo "WARNING: Restoring to a server with replication enabled interrupts replication." - echo " You will need to reconfigure replication after the restore completes." -fi - -# Prompt to verify the restore host given is correct. Restoring overwrites -# important data on the destination appliance that cannot be recovered. This is -# mostly to prevent accidents where the backup host is given to restore instead -# of a separate restore host since they're used in such close proximity. -if $instance_configured && ! $force; then - echo - echo "WARNING: All data on GitHub Enterprise appliance $hostname ($GHE_REMOTE_VERSION)" - echo " will be overwritten with data from snapshot ${GHE_RESTORE_SNAPSHOT}." - echo "Please verify that this is the correct restore host before continuing." - printf "Type 'yes' to continue: " - - while read -r response; do - case $response in - yes|Yes|YES) - break - ;; - '') - printf "Type 'yes' to continue: " - ;; - *) - echo "Restore aborted." 1>&2 - exit 1 - ;; - esac - done - echo -fi - -echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -trap "update_restore_status failed" EXIT -update_restore_status "restoring" - -# Verify the host has been fully configured at least once if when running -# against v11.10.x appliances and the -c option wasn't specified. -if [ "$GHE_VERSION_MAJOR" -le 1 ] && ! $restore_settings && ! $instance_configured; then - echo "Error: $hostname not configured." 1>&2 - echo "Please visit https://$hostname/setup/settings to configure base appliance settings before continuing." 1>&2 - exit 1 -fi - -# Restoring Elasticsearch to 11.10.3x via rsync requires GNU tar -if [ "$GHE_VERSION_MAJOR" -le 1 ] && [ "$GHE_BACKUP_STRATEGY" = "rsync" ]; then - if ! tar --version | grep GNU >/dev/null; then - if ! command -v gtar >/dev/null 2>&1; then - echo "GNU tar is required. Aborting." >&2 - exit 1 - fi - fi -fi - -# Make sure the GitHub appliance is in maintenance mode. -if $instance_configured; then - if ! ghe-maintenance-mode-status "$GHE_HOSTNAME"; then - echo "Error: $GHE_HOSTNAME must be put in maintenance mode before restoring. Aborting." 1>&2 - exit 1 - fi -fi - -# Restore settings and license if restoring to an unconfigured appliance or when -# specified manually. -if $restore_settings; then - ghe-restore-settings "$GHE_HOSTNAME" -fi - -# Make sure mysql and elasticsearch are prep'd and running before restoring into -# appliances v2.x or greater. These services will not have been started on appliances -# that have not been configured yet. -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "sudo ghe-service-ensure-mysql && sudo ghe-service-ensure-elasticsearch" | - ghe-ssh "$GHE_HOSTNAME" -- /bin/sh 1>&3 -fi - -echo "Restoring Git repositories ..." -ghe-restore-repositories-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 - -echo "Restoring GitHub Pages ..." -ghe-restore-pages-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 - -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "Restoring asset attachments ..." - ghe-restore-userdata alambic_assets "$GHE_HOSTNAME" 1>&3 - - echo "Restoring hook deliveries ..." - ghe-restore-userdata hookshot "$GHE_HOSTNAME" 1>&3 -fi - -echo "Restoring MySQL database ..." -gzip -dc "$GHE_RESTORE_SNAPSHOT_PATH/mysql.sql.gz" | ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-mysql' 1>&3 - -echo "Restoring Redis database ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-redis' < "$GHE_RESTORE_SNAPSHOT_PATH/redis.rdb" 1>&3 - -echo "Restoring SSH authorized keys ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-authorized-keys' < "$GHE_RESTORE_SNAPSHOT_PATH/authorized-keys.json" 1>&3 - -echo "Restoring Elasticsearch indices ..." -ghe-restore-es-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 - -# When restoring to a host that has already been configured, kick off a -# config run to perform data migrations. -if $instance_configured; then - echo "Configuring storage ..." - if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - ghe-ssh "$GHE_HOSTNAME" -- "sudo ghe-config-apply --full" 1>&3 - else - echo " This will take several minutes to complete..." - ghe-ssh "$GHE_HOSTNAME" -- "sudo enterprise-configure" 1>&3 2>&3 - fi -fi - -# Update the remote status to "complete". This has to happen before importing -# ssh host keys because subsequent commands will fail due to the host key -# changing otherwise. -trap "" EXIT -update_restore_status "complete" - -echo "Restoring SSH host keys ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-ssh-host-keys' < "$GHE_RESTORE_SNAPSHOT_PATH/ssh-host-keys.tar" 1>&3 - -echo "Completed restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -echo "Visit https://$hostname/setup/settings to review appliance configuration." diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index 537d66986..000000000 --- a/debian/changelog +++ /dev/null @@ -1,87 +0,0 @@ -github-backup-utils (2.2.0) UNRELEASED; urgency=medium - - * Adds support for the new repositories filesystem layout include in - GitHub Enterprise v2.2. #122, #124 - * ghe-restore now performs a config run on the instance after an incremental - restore to 11.10.x and 2.x instances. #100 - * ghe-restore now fails fast when run against a GHE instance with replication - enabled. Replication should be disabled during a restore and then setup - after the restore completes. #121 - * Fixes an issue with special port 122 detection failing when port is - overridden in an ssh config file. #102 - * Removes a warning message when running ghe-backup against an instance with - GitHub Pages disabled. #117 - * backup-utils release version numbers now track GitHub Enterprise releases - to ease the process of determining which version of backup-utils is - required for a given GitHub Enterprise version. - - -- Ryan Tomayko Wed, 29 Apr 2015 07:29:04 +0000 - -github-backup-utils (2.0.2) UNRELEASED; urgency=medium - - * ghe-restore now requires that an already-configured appliance be put into - maintenance mode manually. This is a safeguard against accidentally - overwriting data on the wrong instance. #62, #84 - * ghe-backup and ghe-restore now run a ghe-negotiate-version program on the - appliance to determine whether the backup-utils and GHE versions are - compatible. #91 - * Various portability fixes for problems surfaced when running on Solaris - and FreeBSD. #86, #87 - * Fixes an issue in ghe-backup where mysqldump failures weren't being - reported properly. #90 - * Automated builds are now run on Travis CI. #77 - - -- Ryan Tomayko Tue, 20 Jan 2015 16:00:00 +0000 - -github-backup-utils (2.0.1) UNRELEASED; urgency=medium - - * Adds /etc/github-backup-utils/backup.config as a default config file search - location for deb / system installs. - * Enables SSH BatchMode for all remote command invocation except initial host - check / version identification. - * Fixes a bug in ghe-backup where Git GC process detection would misclassify - long-running server processes matching /git.*gc/, causing the backup operation - to timeout. - * Adds a note and link to the Migrating from GitHub Enterprise v11.10.34x to - v2.0 documentation in the README. - * Adds example / documentation for the GHE_EXTRA_SSH_OPTS config value to the - backup.config-example file. - - -- Ryan Tomayko Mon, 17 Nov 2014 12:47:22 +0000 - -github-backup-utils (2.0.0) UNRELEASED; urgency=medium - - * Support for GitHub Enterprise 2.0. - * Support for migrating from GitHub Enterprise 11.10.34x to 2.0 (including from - VMware to AWS). - * ghe-backup retains hardlinks present on VM in backup snapshots, saving space. - * ghe-restore retains hardlinks present in backup snapshot when restoring to VM. - * backup-utils now includes debian packaging support. - * Fixes an issue with ghe-restore -s not using the snapshot specified. - * Fixes an issue with ghe-backup not waiting for nw-repack processes to finish - in some instances. - - -- Ryan Tomayko Mon, 10 Nov 2014 10:48:36 +0000 - -github-backup-utils (1.1.0) UNRELEASED; urgency=medium - - * Updated documentation on minimum GitHub Enterprise version requirements for - online and incremental backups from v11.10.341 to at least v11.10.342. - * The ghe-restore command now prompts for confirmation of the host to restore to - before performing any destructive operation. This is to reduce the chances of - restoring to the wrong host. The prompt may be bypassed in automated scenarios - by providing the --force option. - * Added a -c option to ghe-restore for restoring base appliance settings in - addition to primary datastores. See ghe-restore --help for more information. - * Added a note about disabling maintenance mode on the appliance after a - successful ghe-restore operation. - * Added support for filesystem layout changes and upgraded server components in - * future versions of GitHub Enterprise. - - -- Twan Wolthof Sat, 18 Oct 2014 19:14:47 +0000 - -github-backup-utils (1.0.1) UNRELEASED; urgency=medium - - * Initial release. - - -- Twan Wolthof Tue, 23 Sep 2014 08:34:55 +0000 diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec635144f..000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index c7a42cc30..000000000 --- a/debian/control +++ /dev/null @@ -1,32 +0,0 @@ -Source: github-backup-utils -Maintainer: Twan Wolthof -Section: misc -Priority: optional -Standards-Version: 3.9.2 -Build-Depends: debhelper (>= 9), git - -Package: github-backup-utils -Architecture: any -Depends: ${misc:Depends}, rsync (>= 2.6.4) -Description: Backup and recovery utilities for GitHub Enterprise - The backup utilities implement a number of advanced capabilities for backup - hosts, built on top of the backup and restore features already included in - GitHub Enterprise. - . - These advanced features include: - . - Complete GitHub Enterprise backup and recovery system via two simple utilities: - `ghe-backup` and `ghe-restore`. - Online backups. The GitHub appliance need not be put in maintenance mode for - the duration of the backup run. - Incremental backup of Git repository data. Only changes since the last - snapshot are transferred, leading to faster backup runs and lower network - bandwidth and machine utilization. - Efficient snapshot storage. Only data added since the previous snapshot - consumes new space on the backup host. - Multiple backup snapshots with configurable retention periods. - Backup commands run under the lowest CPU/IO priority on the GitHub appliance, - reducing performance impact while backups are in progress. - Runs under most Linux/Unix environments. - MIT licensed, open source software maintained by GitHub, Inc. - diff --git a/debian/copyright b/debian/copyright deleted file mode 120000 index ea5b60640..000000000 --- a/debian/copyright +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/debian/install b/debian/install deleted file mode 100644 index bc992fc49..000000000 --- a/debian/install +++ /dev/null @@ -1,2 +0,0 @@ -bin/* usr/bin -share/* usr/share diff --git a/debian/rules b/debian/rules deleted file mode 100755 index 290fed548..000000000 --- a/debian/rules +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/make -f -build-indep: - -override_dh_auto_build: - -%: - dh $@ diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..e84d313df --- /dev/null +++ b/docs/README.md @@ -0,0 +1,12 @@ +# GitHub Enterprise Server Backup Utilities Documentation + +- **[Requirements](requirements.md)** + - **[Backup host requirements](requirements.md#backup-host-requirements)** + - **[Storage requirements](requirements.md#storage-requirements)** + - **[GitHub Enterprise Server version requirements](requirements.md#github-enterprise-version-requirements)** +- **[Getting started](getting-started.md)** +- **[Using the backup and restore commands](usage.md)** +- **[Scheduling backups & snapshot pruning](scheduling-backups.md)** +- **[Backup snapshot file structure](backup-snapshot-file-structure.md)** +- **[How does Backup Utilities differ from a High Availability replica?](faq.md)** +- **[Docker](docker.md)** diff --git a/docs/backup-snapshot-file-structure.md b/docs/backup-snapshot-file-structure.md new file mode 100644 index 000000000..0a7032397 --- /dev/null +++ b/docs/backup-snapshot-file-structure.md @@ -0,0 +1,92 @@ +# Backup snapshot file structure + +Backup snapshots are stored in rotating increment directories named after the +date and time the snapshot was taken. Each snapshot directory contains a full +backup snapshot of all relevant data stores. Repository, Search, and Pages data +is stored efficiently via hard links. + +*Please note* Symlinks must be maintained when archiving backup snapshots. +Dereferencing or excluding symlinks, or storing the snapshot contents on a +filesystem which does not support symlinks will result in operational +problems when the data is restored. + +The following example shows a snapshot file hierarchy for hourly frequency. +There are five snapshot directories, with the `current` symlink pointing to the +most recent successful snapshot: + + ./data + |- 20180124T010000 + |- 20180125T010000 + |- 20180126T010000 + |- 20180127T010000 + |- 20180128T010000 + |- audit-log + |- benchmarks + |- elasticsearch + |- git-hooks + |- hookshot + |- pages + |- repositories + |- storage + |- authorized-keys.json + |- enterprise.ghl + |- es-scan-complete + |- manage-password + |- mssql + |- mysql.sql.gz + |- redis.rdb + |- settings.json + |- ssh-host-keys.tar + |- ssl-ca-certificates.tar + |- strategy + |- uuid + |- version + |- current -> 20180128T010000 + +Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change +the disk location where snapshots are written. + +## MS SQL Server backup structure +Actions service uses MS SQL Server as backend data store. Each snapshot includes a suite of backup files for MS SQL Server database(s). + +To save time in backup, a three-level backup strategy is implemented. Based on the `GHE_MSSQL_BACKUP_CADENCE` setting, at each snapshot, either a (**F**)ull backup, a (**D**)ifferential or a (**T**)ransaction log backup is taken. + +As a result, a suite always contains following for each database: a full backup, possibly a differential backup and at least one transaction log backup. Their relationship with timeline is demonstrated below: + +```text +M---8:00--16:00---T---8:00--16:00---W... (timeline) + +F-----------------F-----------------F... (full backup) +#-----D-----D-----#-----D-----D-----#... (differential backup) +T--T--T--T--T--T--T--T--T--T--T--T--T... (transaction log backup) +``` + +To save disk space, at each snapshot, hard links are created to point to previous backup files. Only newly-created backup files are transferred from appliance to backup host. When a new full/differential backup is created, they become the new source for hard links and new base line for transaction log backups, for subsequent snapshots. + +During restore, a suite of backup files are restored in the sequence of full -> differential -> chronological transaction log. + +## Benchmark data + +Benchmark data for each snapshot is stored as a log file within the `benchmarks` directory within a snapshot directory. The benchmark log can be used to determine the duration of each backup step. For example: + +```text +ghe-backup-store-version took 0s +ghe-backup-settings took 2s +ghe-export-authorized-keys took 0s +ghe-export-ssh-host-keys took 0s +ghe-backup-mysql-binary took 9s +ghe-backup-mysql took 9s +ghe-backup-minio took 0s +ghe-backup-redis took 1s +ghe-backup-es-audit-log took 1s +ghe-backup-repositories - Generating routes took 3s +ghe-backup-repositories - Fetching routes took 0s +ghe-backup-repositories - Processing routes took 0s +ghe-backup-pages - hostname took 1s +ghe-backup-pages took 1s +ghe-backup-storage - Generating routes took 2s +ghe-backup-storage - Fetching routes took 0s +ghe-backup-storage - Processing routes took 0s +ghe-backup-git-hooks took 0s +ghe-backup-es-rsync took 2s +``` diff --git a/docs/backup.config-example b/docs/backup.config-example new file mode 100644 index 000000000..39a4d417f --- /dev/null +++ b/docs/backup.config-example @@ -0,0 +1,140 @@ +# GitHub Enterprise Server backup configuration file + +# The hostname of the GitHub Enterprise Server appliance to back up. The host +# must be reachable via SSH from the backup host. +GHE_HOSTNAME="github.example.com" + +# Path to where backup data is stored. By default this is the "data" +# directory next to this file but can be set to an absolute path +# elsewhere for backing up to a separate partition / mount point. +GHE_DATA_DIR="data" + +# The number of backup snapshots to retain. Old snapshots are pruned after each +# successful ghe-backup run. This option should be tuned based on the frequency +# of scheduled backup runs. If backups are scheduled hourly, snapshots will be +# available for the past N hours; if backups are scheduled daily, snapshots will +# be available for the past N days ... +GHE_NUM_SNAPSHOTS=10 + +# Pruning snapshots can be scheduled outside of the backup process. +# If set to 'yes', snapshots will not be pruned by ghe-backup. +# Instead, ghe-prune-snapshots will need to be invoked separately via cron +#GHE_PRUNING_SCHEDULED=yes + +# If GHE_ROUTE_VERIFICATION is set to true then ghe-repository-backup and +# ghe-storage-backup will issue a warning if the repositories and objects in +# the backup do not match the pre-backup inventory of routes. +#GHE_ROUTE_VERIFICATION=false + +# If GHE_MANAGE_CONSOLE_PW_RESTORE is set to false then management-console password +# will not be restored from backed-up snapshot data, it is restored by default +#GHE_MANAGE_CONSOLE_PW_RESTORE=true + +# If GHE_SKIP_CHECKS is set to true (or if --skip-checks is used with ghe-backup) then ghe-host-check +# disk space validation and software version checks on the backup-host will be disabled. +#GHE_SKIP_CHECKS=false + +# Cluster filesystem to check if it's writable as part of ghe-host-check +# By default it is /data/user/tmp but can be updated if needed +#GHE_FILE_SYSTEM_WRITE_CHECK="/data/user/tmp" + +# The hostname of the GitHub appliance to restore. If you've set up a separate +# GitHub appliance to act as a standby for recovery, specify its IP or hostname +# here. The host to restore to may also be specified directly when running +# ghe-restore so use of this variable isn't strictly required. +# +#GHE_RESTORE_HOST="github-standby.example.com" + +# If set to 'yes', ghe-restore will omit the restore of audit logs. +# +#GHE_RESTORE_SKIP_AUDIT_LOGS=no + +# If set to 'yes', backup and restore of Elasticsearch indices will be skipped +# +#GHE_SKIP_SEARCH_INDICES=no + +# When verbose output is enabled with `-v`, it's written to stdout by default. If +# you'd prefer it to be written to a separate file, set this option. +# +#GHE_VERBOSE_LOG="/var/log/backup-verbose.log" + +# Any extra options passed to the SSH command. +# In a single instance environment, nothing is required by default. +# In a clustering environment, "-i abs-path-to-ssh-private-key" is required. +# +#GHE_EXTRA_SSH_OPTS="" +# +# All backup processes are ran with the lowest priority for scheduling by default. +# To change throttling behaviour/allow higher priority for backup processes, set higher values for following variables. +# default value for GHENICE=nice -n 19 +# default value for GHE_IONICE=ionice -c 3 +#GHE_NICE="" +#GHE_IONICE="" + +# Any extra options passed to the rsync command. Nothing required by default. +# +#GHE_EXTRA_RSYNC_OPTS="" + +# If set to 'yes', rsync will be set to use compression during backups and restores transfers. Defaults to 'no'. +# +#GHE_RSYNC_COMPRESSION_ENABLED=yes + +# If enabled and set to 'no', rsync warning message during backups will be suppressed. +#RSYNC_WARNING=no + + +# If set to 'yes', logging output will be colorized. +# +#OUTPUT_COLOR=no + +# If set to 'no', GHE_DATA_DIR will not be created automatically +# and restore/backup will exit 8 +# +#GHE_CREATE_DATA_DIR=yes + +# If set to 'yes', git fsck will run on the repositories +# and print some additional info. +# +# WARNING: do not enable this, only useful for debugging/development +#GHE_BACKUP_FSCK=no + +# Cadence of MSSQL backups +# ,, all in minutes +# e.g. +# - Full backup every week (10080 minutes) +# - Differential backup every day (1440 minutes) +# - Transactionlog backup every 15 minutes +# +#GHE_MSSQL_BACKUP_CADENCE=10080,1440,15 + +# If set to 'yes', ghe-backup jobs will run in parallel. Defaults to 'no'. +# +#GHE_PARALLEL_ENABLED=yes + +# Sets the maximum number of jobs to run in parallel. Defaults to the number +# of available processing units on the machine. +# +#GHE_PARALLEL_MAX_JOBS=2 + +# Sets the maximum number of rsync jobs to run in parallel. Defaults to the +# configured GHE_PARALLEL_MAX_JOBS, or the number of available processing +# units on the machine. +# +# GHE_PARALLEL_RSYNC_MAX_JOBS=3 + +# When jobs are running in parallel wait as needed to avoid starting new jobs +# when the system's load average is not below the specified percentage. Defaults to +# unrestricted. +# +#GHE_PARALLEL_MAX_LOAD=50 + +# When running an external mysql database, run this script to trigger a MySQL backup +# rather than attempting to backup via backup-utils directly. +#EXTERNAL_DATABASE_BACKUP_SCRIPT="/bin/false" + +# When running an external mysql database, run this script to trigger a MySQL restore +# rather than attempting to backup via backup-utils directly. +#EXTERNAL_DATABASE_RESTORE_SCRIPT="/bin/false" + +# If set to 'yes', Pages data will be included in backup and restore. Defaults to 'yes' +#GHE_BACKUP_PAGES=no diff --git a/docs/docker.md b/docs/docker.md new file mode 100644 index 000000000..6e0785716 --- /dev/null +++ b/docs/docker.md @@ -0,0 +1,158 @@ +# Docker + +## Building the image + +``` +docker build -t github/backup-utils . +``` + +## Setting configuration options at runtime +The `backup.config` file is dynamically populated at runtime with all `GHE_` +environment variables that are part of the run command or Docker environment: + +``` +$ docker run -it -e "GHE_HOSTNAME=hostname" \ +-e "GHE_DATA_DIR=/data" \ +-e "GHE_EXTRA_SSH_OPTS=-i /ghe-ssh/id_rsa -o UserKnownHostsFile=/ghe-ssh/known_hosts" \ +-e "GHE_NUM_SNAPSHOTS=15" \ +-v "ghe-backup-data:/data" \ +-v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" \ +-v "$HOME/.ssh/id_rsa:/ghe-ssh/id_rsa" \ +--rm \ +github/backup-utils ghe-backup +``` + +It is also possible to specify a `-e GHE_BACKUP_CONFIG` flag and volume mount in +a local `backup.config` file rather than specify the variables individually at +run time, as long as `GHE_HOSTNAME` and `GHE_EXTRA_SSH_OPTS` variables are configured : + +``` +$ docker run -it -e "GHE_BACKUP_CONFIG=/mnt/backup.config" \ +-v "ghe-backup-data:/data" \ +-v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" \ +-v "$HOME/.ssh/id_rsa:/ghe-ssh/id_rsa" \ +-v "$HOME/backup-utils/backup.config:/mnt/backup.config" \ +--rm \ +github/backup-utils ghe-backup +``` + +## SSH Keys + +A SSH private key that has been added to the GitHub Enterprise Server [Management Console +for administrative SSH access][1] needs to be mounted into the container from the +host system. It is also recommended to mount a SSH `.ssh/known_hosts` file into +the container. + +``` +$ docker run -it -e "GHE_HOSTNAME=hostname" \ +-e "GHE_DATA_DIR=/data" \ +-e "GHE_EXTRA_SSH_OPTS=-i /ghe-ssh/id_rsa -o UserKnownHostsFile=/ghe-ssh/known_hosts" \ +-v "ghe-backup-data:/data" \ +-v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" \ +-v "$HOME/.ssh/id_rsa:/ghe-ssh/id_rsa" \ +--rm \ +github/backup-utils ghe-backup +``` + +### Using ssh-agent + +If your SSH private key is protected with a passphrase, you can mount the `ssh-agent` +socket from the Docker host into the GitHub Enterprise Server Backup Utilities image. + +1. Start the ssh-agent in the background. + + ``` + $ eval "$(ssh-agent -s)" + Agent pid 59566 + ``` + +2. Add your SSH private key to the ssh-agent. If you created your key with a + different name, or if you are adding an existing key that has a different name, + replace *id_rsa* in the command with the name of your private key file. + + ``` + $ ssh-add ~/.ssh/id_rsa + ``` + +3. Run the container setting the `SSH_AUTH_SOCK` environment variable, and + mounting the socket into the container as a volume: + + ``` + docker run -it -e "GHE_HOSTNAME=hostname" \ + -e "GHE_DATA_DIR=/data" \ + -e "GHE_EXTRA_SSH_OPTS=-i /ghe-ssh/id_rsa -o UserKnownHostsFile=/ghe-ssh/known_hosts" \ + -e "GHE_NUM_SNAPSHOTS=15" \ + -v "ghe-backup-data:/data" \ + -v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" \ + -v "$HOME/.ssh/id_rsa:/ghe-ssh/id_rsa" \ + -v "$(dirname $SSH_AUTH_SOCK):$(dirname $SSH_AUTH_SOCK)" \ + -e "SSH_AUTH_SOCK=$SSH_AUTH_SOCK" \ + --rm \ + github/backup-utils ghe-backup + ``` + +## Managing backup data + +Data persistence is achieved by using [Docker volumes][2], which are managed with +[`docker volume` commands][3]. Prior to running the container for the first time, +a volume can be created if you need to specify additional options. The named +volume will be automatically created at runtime if it does not exist: + +``` +docker volume create ghe-backup-data +``` + +The named Docker volume can be mounted and accessed from other containers, using +any image you like: + +``` +# Accessing backups using the backup-utils image: + +$ docker run -it -v ghe-backup-data:/data --rm github/backup-utils ls -l /data/ +total 8 +drwxr-xr-x 11 root root 4096 Oct 24 19:46 20171024T194650 +drwxr-xr-x 11 root root 4096 Oct 24 19:49 20171024T194921 +lrwxrwxrwx 1 root root 15 Oct 24 19:49 current -> 20171024T194921 + +# Accessing backups using the busybox library image: + +$ docker run --rm -v ghe-backup-data:/data busybox ls -l /data +total 8 +drwxr-xr-x 11 root root 4096 Oct 24 19:46 20171024T194650 +drwxr-xr-x 11 root root 4096 Oct 24 19:49 20171024T194921 +lrwxrwxrwx 1 root root 15 Oct 24 19:49 current -> 20171024T194921 +``` + +* The volume's filesystem must support hard links. + +* Bind mounting a volume is supported, as long as the Docker host supports them + and allows hard links. + +## Scheduling backups using crontab with Docker + +Designed to be a "one shot" type container, scheduling backup runs with the Docker +image is similar to the non-Docker scheduling. Run the container with all the same +variables options and volume mounts on `crontab`. This avoids needing to run +`crond` or an init system inside the container, and allows for the container to +be disposable (enabling the use of Docker's `--rm` flag). + +To schedule hourly backup snapshots with verbose informational output written to +a log file and errors generating an email: + +``` +MAILTO=admin@example.com + +0 * * * * /usr/local/bin/docker run -i -e "GHE_HOSTNAME=hostname" -e "GHE_DATA_DIR=/data" -e "GHE_EXTRA_SSH_OPTS=-i /ghe-ssh/ghelocal -o UserKnownHostsFile=/ghe-ssh/known_hosts" -v "ghe-backup-data:/data" -v "$HOME/.ssh/ghelocal:/ghe-ssh/ghelocal" -v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" --rm github/backup-utils ghe-backup -v 1>>/opt/backup-utils/backup.log 2>&1 +``` + +To schedule nightly backup snapshots instead, use: + +``` +MAILTO=admin@example.com + +0 0 * * * /usr/local/bin/docker run -i -e "GHE_HOSTNAME=hostname" -e "GHE_DATA_DIR=/data" -e "GHE_EXTRA_SSH_OPTS=-i /ghe-ssh/ghelocal -o UserKnownHostsFile=/ghe-ssh/known_hosts" -v "ghe-backup-data:/data" -v "$HOME/.ssh/ghelocal:/ghe-ssh/ghelocal" -v "$HOME/.ssh/known_hosts:/ghe-ssh/known_hosts" --rm github/backup-utils ghe-backup -v 1>>/opt/backup-utils/backup.log 2>&1 +``` + +[1]: https://help.github.com/enterprise/admin/guides/installation/administrative-shell-ssh-access/ +[2]: https://docs.docker.com/engine/admin/volumes/volumes/ +[3]: https://docs.docker.com/engine/reference/commandline/volume/ diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 000000000..ee3b07c24 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,29 @@ +# Frequently Asked Questions + +## How does Backup Utilities differ from a High Availability replica? +It is recommended that both Backup Utilities and an [High Availability replica][1] +are used as part of a GitHub Enterprise Server deployment but they serve different roles. + +### The purpose of the High Availability replica +The High Availability replica is a fully redundant secondary GitHub Enterprise Server +instance, kept in sync with the primary instance via replication of all major +datastores. This active/passive cluster configuration is designed to minimize +service disruption in the event of hardware failure or major network outage +affecting the primary instance. Because some forms of data corruption or loss may +be replicated immediately from primary to replica, it is not a replacement for +Backup Utilities as part of your disaster recovery plan. + +### The purpose of Backup Utilities +Backup Utilities are a disaster recovery tool. This tool takes date-stamped +snapshots of all major datastores. These snapshots are used to restore an instance +to a prior state or set up a new instance without having another always-on GitHub +Enterprise instance (like the High Availability replica). + +### Does taking or restoring a backup impact the GitHub Enterprise Server's performance or operation? + +Git background maintenance and garbage collection jobs become paused during the repositories stage of a backup and restore, and the storage stage of a backup. This may result in a backlog of queued maintenance or storage jobs observable in the GitHub Enterprise Server metrics for the duration of those steps. We suggest allowing any backlog to process and drain to 0 before starting another backup run. Repositories that are frequently pushed to may experience performance degradation over time if queued maintenance jobs are not processed. + +Backup processes triggered by `backup-utils` running on the GitHub Enterprise Server instance run at a low CPU and IO priority to reduce any user facing impact. You may observe elevated levels of CPU usage, disk IO, and network IO for the duration of a backup run. + + +[1]: https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/ diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 000000000..f8806e616 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,41 @@ +# Getting started + + 1. [Download the latest version of backup-utils][1] and extract the repository using `tar`: + + `tar -xzvf /path/to/github-backup-utils-vMAJOR.MINOR.PATCH.tar.gz` + + **Note**: you will need to use [Backup Utilities v2.11.x][2] or the `legacy` branch to + backup and restore GitHub Enterprise Server 2.10 and earlier. + + 2. Copy the [`backup.config-example`][3] file to `backup.config` and modify as + necessary. The `GHE_HOSTNAME` value must be set to the primary GitHub Enterprise Server + hostname. Additional options are available and documented in the + configuration file but none are required for basic backup functionality. + + As the data on a High Availability replica may be in a transient state at the time of backup, + Backup Utilities should not be used to backup data from a High Availability replica. + + * Backup Utilities will attempt to load the backup configuration from the following + locations, in this order: + + ```bash + $GHE_BACKUP_CONFIG (User configurable environment variable) + $GHE_BACKUP_ROOT/backup.config (Root directory of backup-utils install) + $HOME/.github-backup-utils/backup.config + /etc/github-backup-utils/backup.config + ``` + * In a clustering environment, the `GHE_EXTRA_SSH_OPTS` key must be configured + with the `-i ` SSH option. + + 3. Add the backup host's SSH public key to the GitHub Enterprise Server appliance, in order to grant it administrative shell access. + See [Accessing the GitHub Enterprise Server administrative shell (SSH)][4] for instructions. + + 4. Run `bin/ghe-host-check` to verify SSH connectivity with the GitHub + appliance. + + 5. Run `bin/ghe-backup` to perform an initial full backup. + +[1]: https://github.com/github/backup-utils/releases +[2]: https://github.com/github/backup-utils/releases/tag/v2.11.4 +[3]: https://github.com/github/backup-utils/blob/master/docs/backup.config-example +[4]: https://docs.github.com/enterprise-server/admin/configuration/configuring-your-enterprise/accessing-the-administrative-shell-ssh diff --git a/docs/requirements.md b/docs/requirements.md new file mode 100644 index 000000000..c87a0775a --- /dev/null +++ b/docs/requirements.md @@ -0,0 +1,109 @@ +# Requirements + +Backup Utilities should be run on a host dedicated to long-term permanent +storage and must have network connectivity with the GitHub Enterprise Server appliance. + +## Backup host requirements + +Backup host software requirements are modest: Linux or other modern Unix operating system (Ubuntu is highly recommended) with [bash][1], [git][2] 1.7.6 or newer, [OpenSSH][3] 5.6 or newer, [rsync][4] v2.6.4 or newer* (see [below](april-2023-update-of-rsync-requirements) for exceptions), [jq][11] v1.5 or newer and [bc][12] v1.0.7 or newer. See below for an update on rsync. + +The parallel backup and restore feature will require [GNU awk][10] and [moreutils][9] to be installed. + +We encourage the use of [Docker](docker.md), as it ensures compatible versions of the aforementioned software are available to backup-utils. + +The backup host must be able to establish outbound network connections to the GitHub appliance over SSH. TCP port 122 is used to backup GitHub Enterprise Server. + +CPU and memory requirements are dependent on the size of the GitHub Enterprise Server appliance. We recommend a minimum of 4 cores and 8GB of RAM for the host running [GitHub Enterprise Backup Utilities](https://github.com/github/backup-utils). We recommend monitoring the backup host's CPU and memory usage to ensure it is sufficient for your environment. + +### April 2023 Update of Rsync Requirements + +The [fix in rsync `3.2.5`](https://github.com/WayneD/rsync/blob/master/NEWS.md#news-for-rsync-325-14-aug-2022) for [CVE-2022-29154](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29154) can cause severe performance degradation to `backup-utils`. + +If you encounter this degradation you can mitigate it by using the `--trust-sender` flag, which is available in rsync >= v3.2.5. +**Note**: If you are using backup-utils 3.9 or greater, `--trust-sender` is automatically used if your rsync version supports it and no further changes are needed. + +If your backup host is running rsync < v3.2.5 you may or may not need to make changes to your rsync package, depending on whether your rsync package has backported the fix for CVE-2022-29154 without also backporting the `--trust-sender` flag. + +If your rsync package has backported the CVE fix _and_ the `--trust-sender` flag then you don't need to change anything. + +However, if your rsync package has backported the CVE fix without backporting the `--trust-sender` flag then you have three options: + +1. Downgrade (using the package manager on your host) the rsync package to a version before the CVE fix was backported +2. Upgrade (using the package manager on your host) the rsync package to v3.2.5 or newer +3. Manually download rsync v3.2.5 or newer and build the rsync binary + +Option #3 is required if your operating system's package manager does not have access to rsync v3.2.5 or later (e.g. Ubuntu Focal). + +Please note that some operating systems have their own versioning scheme for packages (including `rsync`). +If your backup host is using one of these operating systems, you will not be able to rely on a version check to determine whether you are +affected by the `rsync` performance degredation described above. + +## Storage requirements + +Storage requirements vary based on current Git repository disk usage and growth +patterns of the GitHub appliance. We recommend allocating at least 5x the amount +of storage allocated to the primary GitHub appliance for historical snapshots +and growth over time. + +Backup Utilities use [hard links][5] to store data efficiently, and the +repositories on GitHub Enterprise Server use [symbolic links][6] so the backup snapshots +must be written to a filesystem with support for symbolic and hard links. + +To check if your filesystem supports creating hardlinks of symbolic links, you can run the following within your backup destination directory: + +```bash +touch file +ln -s file symlink +ln symlink hardlink +ls -la +``` + +Using a [case sensitive][7] file system is also required to avoid conflicts. + +Performance of backup and restore operations are also dependent on the backup host's storage. We recommend using a high performance storage system with low latency and high IOPS. + +Please avoid using an NFS mount for the data directory (where backup data is stored) as this can cause performance issues and timeouts during backups. + +## GitHub Enterprise Server version requirements + +Starting with Backup Utilities v2.13.0, version support is inline with that of the +[GitHub Enterprise Server upgrade requirements][8] and as such, support is limited to +three versions of GitHub Enterprise Server: the version that corresponds with the version +of Backup Utilities, and the two versions prior to it. + +For example, Backup Utilities v2.13.0 can be used to backup and restore all patch +versions from 2.11.0 to the latest patch version of GitHub Enterprise Server 2.13. +Backup Utilities v2.14.0 will be released when GitHub Enterprise Server 2.14.0 is released +and will then be used to backup all versions of GitHub Enterprise Server from 2.12.0 +to the latest patch version of GitHub Enterprise Server 2.14. + +Backup Utilities v2.11.4 and earlier offer support for GitHub Enterprise Server 2.10 +and earlier versions up to GitHub Enterprise Server 2.2.0. Backup Utilities v2.11.0 and earlier +offer support for GitHub Enterprise Server 2.1.0 and earlier. + +**Note**: You can restore a snapshot that's at most two feature versions behind +the restore target's version of GitHub Enterprise Server. For example, to restore a +snapshot of GitHub Enterprise Server 2.11, the target GitHub Enterprise Server appliance must +be running GitHub Enterprise Server 2.12.x or 2.13.x. You can't restore a snapshot from +2.10 to 2.13, because that's three versions ahead. + +**Note**: You _cannot_ restore a backup created from a newer version of GitHub Enterprise Server to an older version. For example, an attempt to restore a snapshot of GitHub Enterprise Server 2.21 to a GitHub Enterprise Server 2.20 environment will fail with an error of `Error: Snapshot can not be restored to an older release of GitHub Enterprise Server.`. + +## Multiple backup hosts + +Using multiple backup hosts or backup configurations is not currently recommended. + +Due to how some components of Backup Utilities (e.g. MSSQL) take incremental backups, running another instance of Backup Utilities may result in unrestorable snapshots as data may be split across backup hosts. If you still wish to have multiple instances of Backup Utilities for redundancy purposes or to run at different frequencies, ensure that they share the same `GHE_DATA_DIR` backup directory. + +[1]: https://www.gnu.org/software/bash/ +[2]: https://git-scm.com/ +[3]: https://www.openssh.com/ +[4]: http://rsync.samba.org/ +[5]: https://en.wikipedia.org/wiki/Hard_link +[6]: https://en.wikipedia.org/wiki/Symbolic_link +[7]: https://en.wikipedia.org/wiki/Case_sensitivity +[8]: https://docs.github.com/enterprise-server/admin/monitoring-managing-and-updating-your-instance/updating-the-virtual-machine-and-physical-resources/upgrade-requirements +[9]: https://joeyh.name/code/moreutils +[10]: https://www.gnu.org/software/gawk +[11]: https://stedolan.github.io/jq/ +[12]: https://www.gnu.org/software/bc/ diff --git a/docs/scheduling-backups.md b/docs/scheduling-backups.md new file mode 100644 index 000000000..3a7b0d12c --- /dev/null +++ b/docs/scheduling-backups.md @@ -0,0 +1,63 @@ +# Scheduling backups & snapshot pruning + +Regular backups should be scheduled using `cron(8)` or similar command +scheduling service on the backup host. The backup frequency will dictate the +worst case [recovery point objective (RPO)][1] in your backup plan. We recommend +hourly backups as a starting point. + +It's important to consider the duration of each backup operation on the +GitHub Enterprise Server (GHES) appliance. Backups of large datasets or +over slow network links can take more than an hour. Additionally, +maintenance queues are paused during a portion of a backup runs. +We recommend scheduling backups to allow sufficient time for jobs +waiting in maintenance queues to process between backup runs + +Only one backup may be in progress at a time. + +## Example scheduling of backups + +The following examples assume the Backup Utilities are installed under +`/opt/backup-utils`. The crontab entry should be made under the same user that +manual backup/recovery commands will be issued under and must have write access +to the configured `GHE_DATA_DIR` directory. + +Note that the `GHE_NUM_SNAPSHOTS` option in `backup.config` should be tuned +based on the frequency of backups. The ten most recent snapshots are retained by +default. The number should be adjusted based on backup frequency and available +storage. + +To schedule hourly backup snapshots with verbose informational output written to +a log file and errors generating an email: + +```shell +MAILTO=admin@example.com + +0 * * * * /opt/backup-utils/bin/ghe-backup -v 1>>/opt/backup-utils/backup.log 2>&1 +``` + +To schedule nightly backup snapshots instead, use: + +```shell +MAILTO=admin@example.com + +0 0 * * * /opt/backup-utils/bin/ghe-backup -v 1>>/opt/backup-utils/backup.log 2>&1 +``` + +## Example snapshot pruning + +By default all expired and incomplete snapshots are deleted at the end of the main +backup process `ghe-backup`. If pruning these snapshots takes a long time you can +choose to disable the pruning process from the backup run and schedule it separately. +This can be achieved by enabling the `GHE_PRUNING_SCHEDULED` option in `backup.config`. +Please note that this option is only avilable for `backup-utils` >= `v3.10.0`. +If this option is enabled you will need to schedule the pruning script `ghe-prune-snapshots` using `cron` or a similar command scheduling service on the backup host. + +To schedule daily snapshot pruning, use: + +```shell +MAILTO=admin@example.com + +0 3 * * * /opt/backup-utils/share/github-backup-utils/ghe-prune-snapshots 1>>/opt/backup-utils/prune-snapshots.log 2>&1 +``` + +[1]: https://en.wikipedia.org/wiki/Recovery_point_objective diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 000000000..58a816bc8 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,123 @@ +# Using the backup and restore commands + +After the initial backup, use the following commands: + +- The `ghe-backup` command creates incremental snapshots of repository data, long with full snapshots of all other pertinent data stores. +- The `ghe-restore` command restores snapshots to the same or separate GitHub +Enterprise appliance. You must add the backup host's SSH key to the target +GitHub Enterprise Server appliance before using this command. + +These commands are run on the host you [installed][1] Backup Utilities on. + +## Configuring backup and restore behavior + +You can supply your own configuration file or use the example configuration file as a template where you can set up your environment for backing up and restoring. + +An example configuration file with documentation on possible settings can found in [backup.config-example](backup.config-example). + +There are a number of command-line options that can also be passed to the `ghe-restore` command. Of particular note, if you use an external MySQL service but are restoring from a snapshot prior to enabling this, or vice versa, you must migrate the MySQL data outside of the context of backup-utils first, then pass the `--skip-mysql` flag to `ghe-restore`. + +## Example backup and restore usage + +The following assumes that `GHE_HOSTNAME` is set to "github.example.com" in +`backup.config`. + +Creating a backup snapshot: + + $ ghe-backup + Starting backup of github.example.com in snapshot 20180326T020444 + Connect github.example.com:122 OK (v2.13.0) + Backing up GitHub settings ... + Backing up SSH authorized keys ... + Backing up SSH host keys ... + Backing up MySQL database ... + Backing up Redis database ... + Backing up audit log ... + Backing up hookshot logs ... + Backing up Git repositories ... + Backing up GitHub Pages ... + Backing up storage data ... + Backing up custom Git hooks ... + Backing up Elasticsearch indices ... + Completed backup of github.example.com:122 in snapshot 20180326T020444 at 02:05:12 + Checking for leaked ssh keys ... + * No leaked keys found + +Restoring from last successful snapshot to a newly provisioned GitHub Enterprise Server +appliance at IP "5.5.5.5": + + $ ghe-restore 5.5.5.5 + Checking for leaked keys in the backup snapshot that is being restored ... + * No leaked keys found + Connect 5.5.5.5:122 OK (v2.13.0) + Starting restore of 5.5.5.5:122 from snapshot 20180326T020444 + Stopping cron and github-timerd ... + Restoring settings ... + Restoring license ... + Restoring management console password ... + Restoring CA certificates ... + --> Importing custom CA certificates... + Restoring UUID ... + Restoring MySQL database ... + --> Importing MySQL data... + Restoring Redis database ... + Restoring Git repositories and Gists ... + Restoring GitHub Pages ... + Restoring SSH authorized keys ... + Restoring storage data ... + Restoring custom Git hooks ... + Restoring Elasticsearch indices ... + Starting cron ... + Restoring SSH host keys ... + Restore of 5.5.5.5:122 from snapshot 20180326T020444 finished. + To complete the restore process, please visit https://5.5.5.5/setup/settings to review and save the appliance configuration. + +A different backup snapshot may be selected by passing the `-s` argument to `ghe-restore` and specifying the +datestamp-named directory from the backup location as the value. + +The `ghe-backup` and `ghe-restore` commands also have a verbose output mode +(`-v`) that lists files as they're being transferred. It's often useful to +enable when output is logged to a file. + +Every time you execute `ghe-backup` we verify the storage and software setup of the host +you [installed][1] Backup Utilities on, to make sure our [requirements][2] for the host are +met. You can disable this check using the `--skip-checks` argument or by +adding `GHE_SKIP_CHECKS=true` to your configuration file. + +### Restoring settings, TLS certificate, and license + +When restoring to a new GitHub Enterprise Server instance, settings, certificate, and +license data *are* restored. These settings must be reviewed and saved before +using the GitHub Enterprise Server to ensure all migrations take place and all required +services are started. + +When restoring to an already configured GitHub Enterprise Server instance, settings, certificate, and license data +are *not* restored to prevent overwriting manual configuration on the restore +host. This behavior can be overridden by passing the `-c` argument to `ghe-restore`, +forcing settings, certificate, and license data to be overwritten with the backup copy's data. + +## Backup and restore with GitHub Actions enabled + +GitHub Actions data on your external storage provider is not included in regular GitHub Enterprise Server +backups, and must be backed up separately. When restoring a GitHub Enterprise Server backup with +GitHub Actions enabled, the following steps are required: + +1. Enable GitHub Actions on the replacement appliance and configure it to use the same GitHub Actions + external storage configuration as the original appliance. +2. Put replacement appliance into maintenance mode. +3. Use `ghe-restore` to restore the backup. +4. Re-register your self-hosted runners on the replacement appliance. + +Please refer to [GHES Documentation](https://docs.github.com/en/enterprise-server/admin/github-actions/advanced-configuration-and-troubleshooting/backing-up-and-restoring-github-enterprise-server-with-github-actions-enabled) for more details. + +## Incremental MySQL Backups and Restores + +Incremental MySQL backup has been deprecated since 3.17 due to data integrity concerns. Restoring backups created with incremental backups remains supported for compatibility reasons. +## Rsync compression + +From backup-utils v3.11.0 onwards, we have disabled rsync compression by default to improve transfer speed and reduce CPU usage during the transfer process. + +If you would like to use compression with rsync, you can add `GHE_RSYNC_COMPRESSION_ENABLED=true` in your `backup.config` file. + +[1]: https://github.com/github/backup-utils/blob/master/docs/getting-started.md +[2]: requirements.md diff --git a/script/cibuild b/script/cibuild deleted file mode 100755 index 553a8d035..000000000 --- a/script/cibuild +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh -# Usage: script/cibuild [--no-package] -set -e - -# GHE appliance versions to run tests against. Remote metadata files are put in -# place with this version at the beginning of each test and many commands have -# conditional logic based on the remote version. Running the suite against -# different major versions ensures we're covering these conditional paths. -REMOTE_VERSIONS=" - 11.10.344 - 2.0.0 - 2.2.0 -" - -# Enable verbose logging of ssh commands -export GHE_VERBOSE_SSH=true - -# Run over all remote versions and run entire test suite against each -res=true -for version in $REMOTE_VERSIONS -do - echo "==> Running testsuite with GHE_TEST_REMOTE_VERSION=$version" - export GHE_TEST_REMOTE_VERSION="$version" - if ! ls -1 test/test-*.sh | xargs -P 4 -n 1 /bin/sh; then - res=false - fi - echo -done - -# If any of the version tests failed, exit non-zero -$res - -# Bail out when --no-package given -[ "$1" = "--no-package" ] && exit 0 - -# files we'll md5sum at the end -pkg_files= - -# Build the tarball -echo "Building tar.gz package ..." -if script/package-tarball 1>package-tarball.txt 2>&1; then - pkg_files=$(grep '^Package ' < package-tarball.txt | cut -f 2 -d ' ') -else - echo "Packaging tar.gz failed:" - cat package-tarball.txt | sed 's/^/ /' 1>&2 - exit 1 -fi - -# Skip deb packaging if debuild not installed -if ! type debuild 1>/dev/null 2>&1; then - echo "debuild not installed, skipping deb packaging ..." - exit 0 -fi - -# Build the deb related packages -echo "Building deb package ..." -if script/package-deb 1>package-deb-out.txt 2>package-deb-err.txt; then - pkg_files="$pkg_files $(cat package-deb-out.txt)" -else - echo "Package build failed:" - cat package-tarball.txt | sed 's/^/ /' 1>&2 - exit 1 -fi - -# Publish package files on wcat.io -echo "Uploading packages ..." -for f in $pkg_files; do - printf "%-32s %-s\n" "$(curl -sT- https://wcat.io <"$f" || true)" "$f" -done - -# Generate md5sums -md5sum $pkg_files diff --git a/script/package-deb b/script/package-deb deleted file mode 100755 index 6e3d9e829..000000000 --- a/script/package-deb +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -# Usage: script/package-deb -# Script to build a deb release package from the current HEAD version. -# The package version comes from the debian/changelog file so that should -# be edited before running this. -set -e - -# Change into project root -cd "$(dirname "$0")"/.. - -# Basic package name and version. -PKG_BASE="github-backup-utils" -PKG_VERS="$(git describe --tags)" -PKG_NAME="${PKG_BASE}-${PKG_VERS}" -PKG_HEAD="$(git rev-parse HEAD)" - -# Run git-archive to generate tarball -rm -rf dist/debuild -trap "rm -rf dist/debuild" EXIT -mkdir -p dist/debuild - -distdir="$(pwd)/dist/debuild/$PKG_NAME" -git clone -q . "$distdir" -cd "$distdir" -git checkout -q "$PKG_HEAD" - -debuild -uc -us 1>&2 -cd .. -files=$(ls -1 *.deb *.tar.gz *.dsc *.changes) -mv $files ../ -for f in $files; do echo "dist/$f"; done diff --git a/script/package-tarball b/script/package-tarball deleted file mode 100755 index 6229cc33d..000000000 --- a/script/package-tarball +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -# Usage: script/package-tarball -# Script to build a tarball release package from the current HEAD version. -# The package version comes from `git-describe --tags' so the release tag should -# be in place before this command is run. -set -e - -# Change into project root -cd "$(dirname "$0")"/.. - -# Basic package name and version. -PKG_BASE="github-backup-utils" -PKG_VERS="$(git describe --tags)" -PKG_NAME="${PKG_BASE}-${PKG_VERS}" - -# Run git-archive to generate tarball -echo "Creating ${PKG_NAME}.tar.gz ..." -mkdir -p dist -git archive \ - --format=tar.gz \ - --prefix="$PKG_NAME/" \ - --output="dist/${PKG_NAME}.tar.gz" \ - HEAD - -# List archive contents for review -gzip -dc < "dist/${PKG_NAME}.tar.gz" | tar tf - - -# Output location -echo "Package dist/${PKG_NAME}.tar.gz OK" diff --git a/share/github-backup-utils/ghe-backup-config b/share/github-backup-utils/ghe-backup-config deleted file mode 100755 index 969f1155d..000000000 --- a/share/github-backup-utils/ghe-backup-config +++ /dev/null @@ -1,235 +0,0 @@ -#!/bin/sh -# Usage: . ghe-backup-config -# GitHub Enterprise backup shell configuration. -# -# This file is sourced by the various utilities under bin and share/github-backup-utils to -# load in backup configuration and ensure things are configured properly. -# -# All commands should start with the following: -# -# cd $(dirname "$0")/../.. -# . share/github-backup-utils/ghe-backup-config -# - -# Assume the current directory is the root. This should be fine so long as all -# scripts source us in according to the instructions above. -GHE_BACKUP_ROOT="$(pwd)" - -# Get the version from the version file. -BACKUP_UTILS_VERSION="$(cat share/github-backup-utils/version)" - -# Add the bin and share/github-backup-utils dirs to PATH -PATH="$GHE_BACKUP_ROOT/bin:$GHE_BACKUP_ROOT/share/github-backup-utils:$PATH" - -# The backup config file. This may be set in the environment. -: ${GHE_BACKUP_CONFIG:="$GHE_BACKUP_ROOT/backup.config"} - -# Parse out -v (verbose) argument -if [ "$1" = "-v" ]; then - GHE_VERBOSE=true - shift -fi -export GHE_VERBOSE - -# If verbose logging is enabled, redirect fd 3 to stdout; otherwise, redirect it -# to /dev/null. Write verbose output to fd 3. -if [ -n "$GHE_VERBOSE" ]; then - exec 3>&1 -else - exec 3>/dev/null -fi - -# Save off GHE_HOSTNAME from the environment since we want it to override the -# backup.config value when set. -GHE_HOSTNAME_PRESERVE="$GHE_HOSTNAME" - -# Source in the backup config file from the local working copy location first -# and then falling back to the system location. -config_found=false -for f in "$GHE_BACKUP_CONFIG" "/etc/github-backup-utils/backup.config"; do - if [ -f "$f" ]; then - GHE_BACKUP_CONFIG="$f" - . "$GHE_BACKUP_CONFIG" - config_found=true - break - fi -done - -# Check that the config file exists before we source it in. -if ! $config_found; then - echo "Error: No backup configuration file found. Tried:" 1>&2 - echo " - $GHE_BACKUP_CONFIG" 1>&2 - echo " - /etc/github-backup-utils/backup.config" 1>&2 - exit 2 -fi - -# Restore saved off hostname. -[ -n "$GHE_HOSTNAME_PRESERVE" ] && GHE_HOSTNAME="$GHE_HOSTNAME_PRESERVE" - -# Check that the GHE hostname is set. -if [ -z "$GHE_HOSTNAME" ]; then - echo "Error: GHE_HOSTNAME not set in config file." 1>&2 - exit 2 -fi - -# Check that the data directory is set and create it if it doesn't exist. -if [ ! -d "$GHE_DATA_DIR" ]; then - echo "Creating the backup data directory ..." 1>&3 - mkdir -p "$GHE_DATA_DIR" -fi - -# Set some defaults if needed. -: ${GHE_NUM_SNAPSHOTS:=10} - -# Generate a backup timestamp if one has not already been generated. -# We export the variable so the process group shares the same value. -: ${GHE_SNAPSHOT_TIMESTAMP:=$(date +"%Y%m%dT%H%M%S")} -export GHE_SNAPSHOT_TIMESTAMP - -# Set the current snapshot directory to /. This is where -# all backups should be written for the current invocation. -GHE_SNAPSHOT_DIR="$GHE_DATA_DIR"/"$GHE_SNAPSHOT_TIMESTAMP" - -# The root filesystem location. This must be used so that tests can override -# the root as a local directory location. -: ${GHE_REMOTE_ROOT_DIR:=""} - -# The root location of persistent data and applications on the remote side. This -# is always "/data" for GitHub instances. Use of this variable allows -# the location to be overridden in tests. -: ${GHE_REMOTE_DATA_DIR:="/data"} - -# The root location of user data stores such as git repositories, pages sites, -# elasticsearch indices, etc. This is "/data" under 1.x filesystem layouts and -# "/data/user" under the 2.x filesystem layout. The location is adjusted -# dynamically in ghe_remote_version_config() immediately after obtaining the -# remote version. Utilities that transfer data in and out of the appliance -# should use this variable to ensure proper behavior under different versions. -: ${GHE_REMOTE_DATA_USER_DIR:="$GHE_REMOTE_DATA_DIR"} - -# The location of the license file on the remote side. This is always -# "/data/enterprise/enterprise.ghl" for GitHub instances. Use of this variable -# allows the location to be overridden in tests. -: ${GHE_REMOTE_LICENSE_FILE:="$GHE_REMOTE_DATA_DIR/enterprise/enterprise.ghl"} - -# The legacy location of the metadata file on the remote side. Only used if -# the newer "ghe-negotiate-version" script cannot be found or fails. This was -# "/data/enterprise/metadata.json" for GitHub instances. Use of this variable -# allows the location to be overridden in tests. -: ${GHE_REMOTE_METADATA_FILE:="$GHE_REMOTE_DATA_DIR/enterprise/chef_metadata.json"} - -# CPU and IO throttling to keep backups and restores from thrashing around. -: ${GHE_NICE:="nice -n 19"} -: ${GHE_IONICE:="ionice -c 3"} - -# The number of seconds to wait for in progress git-gc processes to complete -# before starting the sync of git data. See share/github-backup-utils/ghe-backup-repositories-rsync -# for more information. Default: 10 minutes. -: ${GHE_GIT_COOLDOWN_PERIOD:=600} - -# Set "true" to get verbose logging of all ssh commands on stderr -: ${GHE_VERBOSE_SSH:=false} - - -############################################################################### -### Dynamic remote version config - -# Adjusts remote paths based on the version of the remote appliance. This is -# called immediately after the remote version is obtained by -# ghe_remote_version_required(). Child processes inherit the values set here. -ghe_remote_version_config () { - if [ "$GHE_VERSION_MAJOR" -gt 1 ]; then - GHE_REMOTE_DATA_USER_DIR="$GHE_REMOTE_DATA_DIR/user" - fi - export GHE_REMOTE_DATA_DIR GHE_REMOTE_DATA_USER_DIR - export GHE_REMOTE_LICENSE_FILE GHE_REMOTE_METADATA_FILE -} - -############################################################################### -### Utility functions - -# Function to print usage embedded in a script's opening doc comments. -print_usage () { - grep '^#/' <"$0" | cut -c 4- - exit ${1:-1} -} - -# Check for a "--help" arg and show usage -for a in "$@"; do - if [ "$a" = "--help" ]; then - print_usage - fi -done - -# If we don't have a readlink command, parse ls -l output. -if ! type readlink 1>/dev/null 2>&1; then - readlink () { - if [ -x "$1" ]; then - ls -ld "$1" | sed 's/.*-> //' - else - return 1 - fi - } -fi - -# Run ghe-host-check and establish the version of the remote GitHub instance in -# the exported GHE_REMOTE_VERSION variable. If the remote version has already -# been established then don't perform the host check again. Utilities in share/github-backup-utils -# that need the remote version should use this function instead of calling -# ghe-host-check directly to reduce ssh roundtrips. The top-level ghe-backup and -# ghe-restore commands establish the version for all subcommands. -ghe_remote_version_required () { - if [ -z "$GHE_REMOTE_VERSION" ]; then - _out=$(ghe-host-check "$@") - echo "$_out" - - # override hostname w/ ghe-host-check output because the port could have - # been autodetected to 122. - GHE_HOSTNAME=$(echo "$_out" | sed 's/Connect \(.*:[0-9]*\) OK.*/\1/') - export GHE_HOSTNAME - - GHE_REMOTE_VERSION=$(echo "$_out" | sed 's/.*(\(.*\))/\1/') - export GHE_REMOTE_VERSION - - ghe_parse_remote_version "$GHE_REMOTE_VERSION" - ghe_remote_version_config "$GHE_REMOTE_VERSION" - fi - true -} - -# Parse major, minor, and patch parts of the remote appliance version and store -# in GHE_VERSION_MAJOR, GHE_VERSION_MINOR, and GHE_VERSION_PATCH variables. All -# parts are numeric. This is called automatically from -# ghe_remote_version_required so shouldn't be used directly. -# -# Scripts use these variables to alter behavior based on what's supported on the -# appliance version. The version parts are modified somewhat to make dealing -# with the 11.10.x version scheme more sane. The "11.10" part of the remote -# version is normalized to "1.0" so "11.10.340" would have parts "1.0.340". -ghe_parse_remote_version () { - GHE_VERSION_MAJOR=$(echo "${1#v}" | cut -f 1 -d .) - GHE_VERSION_MINOR=$(echo "$1" | cut -f 2 -d .) - GHE_VERSION_PATCH=$(echo "$1" | cut -f 3 -d .) - GHE_VERSION_PATCH=${GHE_VERSION_PATCH%%[a-zA-Z]*} - - if [ "$GHE_VERSION_MAJOR.$GHE_VERSION_MINOR" = "11.10" ]; then - GHE_VERSION_MAJOR=1 - GHE_VERSION_MINOR=0 - fi - - export GHE_VERSION_MAJOR GHE_VERSION_MINOR GHE_VERSION_PATCH -} - -# Parses the part out of a ":" or just "" string. -# This is used primarily to break hostspecs with non-standard ports down for -# rsync commands. -ssh_host_part () { - [ "${1##*:}" = "$1" ] && echo "$1" || echo "${1%:*}" -} - -# Parses the part out of a ":" or just "" string. -# This is used primarily to break hostspecs with non-standard ports down for -# rsync commands. -ssh_port_part () { - [ "${1##*:}" = "$1" ] && echo 22 || echo "${1##*:}" -} diff --git a/share/github-backup-utils/ghe-backup-es-rsync b/share/github-backup-utils/ghe-backup-es-rsync deleted file mode 100755 index c3d31cca5..000000000 --- a/share/github-backup-utils/ghe-backup-es-rsync +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-es-rsync -#/ Take an online, incremental snapshot of Elasticsearch indices. -#/ -#/ Note: This command typically isn't called directly. It's invoked by -#/ ghe-backup when the rsync strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Set up remote host and root elastic backup directory based on config -host="$GHE_HOSTNAME" - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$host" - -# Verify rsync is available. -if ! rsync --version 1>/dev/null 2>&1; then - echo "Error: rsync not found." 1>&2 - exit 1 -fi - -# Make sure root backup dir exists if this is the first run -mkdir -p "$GHE_SNAPSHOT_DIR/elasticsearch" - -# Verify that the /data/elasticsearch directory exists. -if ! ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' ]"; then - echo "* The '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' directory doesn't exist." 1>&3 - exit 0 -fi - -# Grab the elasticsearch.yml file which is root owned and mode -rw------- so -# can't be read via rsync or cat. We use the root allowed grep -F as a cat -# replacement. This is necessary on v11.10.x appliances only. -if [ "$GHE_VERSION_MAJOR" -lt 2 ]; then - echo "* Retrieving elasticsearch.yml config file ..." 1>&3 - ghe-ssh "$host" -- "sudo grep -F '' '$GHE_REMOTE_DATA_USER_DIR/elasticsearch/elasticsearch.yml'" \ - > "$GHE_SNAPSHOT_DIR/elasticsearch/elasticsearch.yml" - chmod 0600 "$GHE_SNAPSHOT_DIR/elasticsearch/elasticsearch.yml" -fi - -# If we have a previous increment, avoid transferring existing files via rsync's -# --link-dest support. This also decreases physical space usage considerably. -if [ -d "$GHE_DATA_DIR/current/elasticsearch" ]; then - link_dest="--link-dest=../../current/elasticsearch" -fi - -# Determine which user to run the rsync operation under. This is the git user on -# v11.10.34x appliances and the elasticsearch user under >= v2.x appliances. -if [ "$GHE_VERSION_MAJOR" -eq 1 ]; then - rsync_user=git -elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - rsync_user=elasticsearch -else - echo "Error: invalid remote version: $GHE_REMOTE_VERSION" 1>&2 - exit 1 -fi - -# Transfer ES indices from a GitHub instance to the current snapshot -# directory, using a previous snapshot to avoid transferring files that have -# already been transferred. -echo "* Performing initial sync of ES indices ..." 1>&3 -ghe-rsync -avz \ - -e "ghe-ssh -p $(ssh_port_part "$host")" \ - --rsync-path="sudo -u $rsync_user rsync" \ - $link_dest \ - --exclude='elasticsearch.yml' \ - "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \ - "$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3 - -# Set up a trap to re-enable flushing on exit -cleanup () { - echo "* Enabling ES index flushing ..." 1>&3 - echo '{"index":{"translog.disable_flush":false}}' | - ghe-ssh "$host" -- curl -s -XPUT "localhost:9200/_settings" -d @- >/dev/null -} -trap 'cleanup' EXIT -trap 'exit $?' INT # ^C always terminate - -# Disable ES flushing and force a flush right now -echo "* Disabling ES index flushing ..." 1>&3 -echo '{"index":{"translog.disable_flush":true}}' | -ghe-ssh "$host" -- curl -s -XPUT "localhost:9200/_settings" -d @- >/dev/null -ghe-ssh "$host" -- curl -s -XPOST "localhost:9200/_flush" >/dev/null - -# Transfer all ES indices again -echo "* Performing follow-up sync of ES indices ..." 1>&3 -ghe-rsync -avz \ - -e "ghe-ssh -p $(ssh_port_part "$host")" \ - --rsync-path="sudo -u $rsync_user rsync" \ - $link_dest \ - --exclude='elasticsearch.yml' \ - "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \ - "$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3 diff --git a/share/github-backup-utils/ghe-backup-es-tarball b/share/github-backup-utils/ghe-backup-es-tarball deleted file mode 100755 index ebce38c47..000000000 --- a/share/github-backup-utils/ghe-backup-es-tarball +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-es-tarball -#/ Take a tarball snapshot of all Elasticsearch data. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-backup when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Snapshot all Elasticsearch data or fake it when no /data/elasticsearch -# directory exists. -echo " - if [ -d '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' ]; then - ghe-export-es-indices - else - tar cvf - --files-from /dev/null - fi -" | ghe-ssh "$GHE_HOSTNAME" /bin/sh > "$GHE_SNAPSHOT_DIR"/elasticsearch.tar diff --git a/share/github-backup-utils/ghe-backup-pages-rsync b/share/github-backup-utils/ghe-backup-pages-rsync deleted file mode 100755 index 9de913183..000000000 --- a/share/github-backup-utils/ghe-backup-pages-rsync +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-pages-rsync -#/ Take an online, incremental snapshot of all Pages data. -#/ -#/ Note: This command typically isn't called directly. It's invoked by -#/ ghe-backup when the rsync strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Make sure root backup dir exists if this is the first run -mkdir -p "$GHE_SNAPSHOT_DIR/pages" - -# Use the common user data rsync backup utility. -ghe-backup-userdata pages diff --git a/share/github-backup-utils/ghe-backup-pages-tarball b/share/github-backup-utils/ghe-backup-pages-tarball deleted file mode 100755 index 7e8bda265..000000000 --- a/share/github-backup-utils/ghe-backup-pages-tarball +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-pages-tarball -#/ Take a tarball snapshot of all Pages data. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-backup command when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Snapshot all Pages data or fake it when no /data/pages directory exists. -echo ' - if [ -d /data/pages ]; then - ghe-export-pages - else - tar cvf - --files-from /dev/null - fi -' | ghe-ssh "$GHE_HOSTNAME" /bin/sh > "$GHE_SNAPSHOT_DIR"/pages.tar diff --git a/share/github-backup-utils/ghe-backup-redis b/share/github-backup-utils/ghe-backup-redis deleted file mode 100755 index dbd1e796f..000000000 --- a/share/github-backup-utils/ghe-backup-redis +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-redis -#/ Take a snapshot of all Redis data. This is needed because older versions of -#/ the remote side ghe-export-redis command use a blocking SAVE instead of a -#/ non-blocking BGSAVE. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-backup command. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$GHE_HOSTNAME" - -# Force a redis BGSAVE, and wait for it to complete. -sudo= -[ "$GHE_VERSION_MAJOR" -ge 2 ] && sudo="sudo" -ghe-ssh "$GHE_HOSTNAME" /bin/sh </dev/null - while [ \$(redis-cli LASTSAVE) -eq \$timestamp ]; do - sleep 1 - done - $sudo cat '$GHE_REMOTE_DATA_USER_DIR/redis/dump.rdb' -EOF diff --git a/share/github-backup-utils/ghe-backup-repositories-rsync b/share/github-backup-utils/ghe-backup-repositories-rsync deleted file mode 100755 index be94160bb..000000000 --- a/share/github-backup-utils/ghe-backup-repositories-rsync +++ /dev/null @@ -1,269 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-repositories-rsync -#/ Take an online, incremental snapshot of all Git repository data. -#/ -#/ Note: This command typically isn't called directly. It's invoked by -#/ ghe-backup when the rsync strategy is used. -set -e - -# This command is designed to allow for transferring active Git repository data -# from a GitHub instance to a backup site in a way that ensures data is -# captured in a consistent state even when being written to. -# -# - All Git GC operations are disabled on the GitHub instance for the duration of -# the backup. This removes the possibly of objects or packs being removed -# while the backup is in progress. -# -# - In progress Git GC operations are given a cooldown window to complete. The -# script will sleep for up to 60 seconds waiting for GC operations to finish. -# -# - Git repository data is transferred in a specific order: auxiliary files, -# packed refs, loose refs, reflogs, and finally objects and pack files in that -# order. This ensures that all referenced objects are captured. -# -# - Git GC operations are re-enabled on the GitHub instance. -# -# The script uses multiple runs of rsync to transfer repository files. Each run -# includes a list of filter rules that ensure only specific types of files are -# transferred. -# -# See the "FILTER RULES" and "INCLUDE/EXCLUDE PATTERN RULES" sections of the -# rsync(1) manual for more information: -# - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Set up remote host and root backup snapshot directory based on config -host="$GHE_HOSTNAME" -backup_dir="$GHE_SNAPSHOT_DIR/repositories" - -# Location of last good backup for rsync --link-dest -backup_current="$GHE_DATA_DIR/current/repositories" - -# Verify rsync is available. -if ! rsync --version 1>/dev/null 2>&1; then - echo "Error: rsync not found." 1>&2 - exit 1 -fi - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$host" - -# Remote sync-in-progress file location. When this file exists, Git GC -# operations are disabled on the GitHub instance. -sync_in_progress_file="$GHE_REMOTE_DATA_USER_DIR/repositories/.sync_in_progress" - -# Make sure root backup dir exists if this is the first run -mkdir -p "$backup_dir" - -# Removes the remote sync-in-progress file on exit, re-enabling GC operations -# on the remote instance. -cleanup() { - ghe-ssh "$host" -- "sudo rm -f '$sync_in_progress_file'" -} -trap 'cleanup' EXIT -trap 'exit $?' INT # ^C always terminate - -# Touch the sync-in-progress file, disabling GC operations, and wait for all -# active GC processes to finish on the remote side. -echo " - set -e - sudo -u git touch '$sync_in_progress_file' - - sanity=0 - while [ \$sanity -lt $GHE_GIT_COOLDOWN_PERIOD ]; do - # note: the bracket synta[x] below is to prevent matches against the - # grep process itself. - if ps axo args | grep -E -e '^git( -.*)? nw-repac[k]( |$)' -e '^git( -.*)? g[c]( |$)' >/dev/null; then - sleep 1 - sanity=\$(( sanity + 1 )) - else - exit 0 - fi - done - exit 7 -" | ghe-ssh "$host" -- /bin/sh || { - res=$? - if [ $res = 7 ]; then - echo "Error: Git GC processes remain after $GHE_GIT_COOLDOWN_PERIOD seconds. Aborting..." 1>&2 - fi - exit $res -} - -# Transfer repository data from a GitHub instance to the current snapshot -# directory, using a previous snapshot to avoid transferring files that have -# already been transferred. A set of rsync filter rules are provided on stdin -# for each invocation. -rsync_repository_data () { - ghe-rsync -av \ - -e "ghe-ssh -p $(ssh_port_part "$host")" \ - $link_dest "$@" \ - --rsync-path='sudo -u git rsync' \ - --include-from=- --exclude=\* \ - "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/repositories/" \ - "$backup_dir" 1>&3 -} - -# If we have a previous increment, avoid transferring existing files via rsync's -# --link-dest support. This also decreases physical space usage considerably. -if [ -d "$backup_current" ]; then - link_dest="--link-dest=../../current/repositories" -fi - -# Sync all auxiliary repository data. This includes files and directories like -# HEAD, audit_log, config, description, info/, etc. No refs or object data -# should be transferred here. -echo 1>&3 -echo "* Transferring auxiliary files ..." 1>&3 -rsync_repository_data -z <&3 -echo "* Transferring packed-refs files ..." 1>&3 -rsync_repository_data -z <&3 -echo "* Transferring refs and reflogs ..." 1>&3 -rsync_repository_data -z <&3 -echo "* Transferring objects and packs ..." 1>&3 -rsync_repository_data -H <&3 -echo "* Transferring special data directories ..." 1>&3 -rsync_repository_data <&3 diff --git a/share/github-backup-utils/ghe-backup-repositories-tarball b/share/github-backup-utils/ghe-backup-repositories-tarball deleted file mode 100755 index 09ee15ac4..000000000 --- a/share/github-backup-utils/ghe-backup-repositories-tarball +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-repositories-tarball -#/ Take a tarball snapshot of all Git repository data. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-backup command when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Snapshot all Git repository data -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-repositories' > "$GHE_SNAPSHOT_DIR"/repositories.tar diff --git a/share/github-backup-utils/ghe-backup-settings b/share/github-backup-utils/ghe-backup-settings deleted file mode 100755 index 4d618c12a..000000000 --- a/share/github-backup-utils/ghe-backup-settings +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-settings -#/ Restore settings from a snapshot to the given . -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$host" - -# Grab the host -host="$GHE_HOSTNAME" - -# Create the snapshot directory if needed and change into it. -mkdir -p "$GHE_SNAPSHOT_DIR" -cd "$GHE_SNAPSHOT_DIR" - -echo "* Transferring settings data ..." 1>&3 -ghe-ssh "$host" -- 'ghe-export-settings' > settings.json - -echo "* Transferring license data ..." 1>&3 -comm="cat '$GHE_REMOTE_LICENSE_FILE'" -[ "$GHE_VERSION_MAJOR" -ge 2 ] && comm="sudo $comm" -ghe-ssh "$host" -- "$comm" > enterprise.ghl - -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "* Transferring management console password ..." 1>&3 - manage_password_file="$GHE_REMOTE_DATA_USER_DIR/common/manage-password" - if echo "sudo cat '$manage_password_file' 2>/dev/null || true" | - ghe-ssh "$host" -- /bin/sh > manage-password+ - then - if [ -n "$(cat manage-password+)" ]; then - mv manage-password+ manage-password - fi - else - unlink manage-password+ - fi -fi diff --git a/share/github-backup-utils/ghe-backup-userdata b/share/github-backup-utils/ghe-backup-userdata deleted file mode 100755 index 187982511..000000000 --- a/share/github-backup-utils/ghe-backup-userdata +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-backup-userdata -#/ Take an online, incremental snapshot of a user data directory. This is used -#/ for a number of different simple datastores kept under /data/user on the -#/ remote appliance, including: hookshot, alambic_assets, and pages data. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Verify rsync is available. -if ! rsync --version 1>/dev/null 2>&1; then - echo "Error: rsync not found." 1>&2 - exit 1 -fi - -# Grab the host and /data/user directory name. -host="$GHE_HOSTNAME" -dirname="$1" - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$host" - -# Verify that the user data directory exists. Bail out if not, which may be due -# to an older version of GHE or no data has been added to this directory yet. -ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/$dirname' ]" || exit 0 - -# If we have a previous increment, avoid transferring existing files via rsync's -# --link-dest support. This also decreases physical space usage considerably. -if [ -d "$GHE_DATA_DIR/current/$dirname" ]; then - link_dest="--link-dest=../../current/$dirname" -fi - -# Transfer all data from the user data directory using rsync. -ghe-rsync -avz \ - -e "ghe-ssh -p $(ssh_port_part "$host")" \ - --rsync-path='sudo -u git rsync' \ - $link_dest \ - "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/$dirname/" \ - "$GHE_SNAPSHOT_DIR/$dirname" 1>&3 diff --git a/share/github-backup-utils/ghe-maintenance-mode-disable b/share/github-backup-utils/ghe-maintenance-mode-disable deleted file mode 100755 index 5df69d785..000000000 --- a/share/github-backup-utils/ghe-maintenance-mode-disable +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-maintenance-mode-disable -#/ Disable maintenance mode on GitHub appliance at . This opens up access -#/ to the appliance. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -host="$1" - -# SSH to the appliance and run the remote maintenance mode enable command -echo "Disabling maintenance mode on $host ..." -ghe-ssh "$host" -- "ghe-maintenance -u" diff --git a/share/github-backup-utils/ghe-maintenance-mode-enable b/share/github-backup-utils/ghe-maintenance-mode-enable deleted file mode 100755 index eee86cd3d..000000000 --- a/share/github-backup-utils/ghe-maintenance-mode-enable +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-maintenance-mode-enable [-w] -#/ Enable maintenance mode on GitHub appliance at . This locks down all -#/ access to the appliance to prevent writes to datastores and waits for all -#/ currently running processes to bleed out. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Parse args -wait_procs=true -while true; do - case "$1" in - -n|--no-wait) - wait_procs=false - shift - ;; - -*) - echo "ghe-maintenance-mode-enable: illegal argument: $1" 1>&2 - exit 1 - ;; - *) - break - ;; - esac -done - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -host="$1" - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$host" - -# Never wait on processes to complete under versions >= 2.x. -# TODO need wait procs support under versions >= 2.x. -if [ "$GHE_VERSION_MAJOR" -gt 1 ]; then - wait_procs=false -fi - -# SSH to the appliance and run the remote maintenance mode enable command -echo "Enabling maintenance mode on $host ..." -ghe-ssh "$host" -- "/usr/bin/env GHEBUVER=2 ghe-maintenance -s" - -# Bail out early if --no-wait was given. -$wait_procs || exit 0 - -# Wait for all writing processes to complete -ghe-ssh "$host" -- /bin/sh < -#/ Checks the status of maintenance mode on GitHub appliance at . -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Parse args -while true; do - case "$1" in - -*) - echo "ghe-maintenance-mode-enable: illegal argument: $1" 1>&2 - exit 1 - ;; - *) - break - ;; - esac -done - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -host="$1" - -# Check if the maintenance page is present -ghe-ssh "$host" -- test -e "$GHE_REMOTE_DATA_DIR/github/current/public/system/maintenance.html" diff --git a/share/github-backup-utils/ghe-prune-snapshots b/share/github-backup-utils/ghe-prune-snapshots deleted file mode 100755 index 78a30e2e4..000000000 --- a/share/github-backup-utils/ghe-prune-snapshots +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-prune-snapshots -#/ Keep N latest backup snapshots. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# First prune all incomplete / failed snapshot directories -prune_dirs="$(ls -1 "$GHE_DATA_DIR"/[0-9]*/incomplete 2>/dev/null || true)" -prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l) - -if [ $prune_num -gt 0 ]; then - echo Pruning $prune_num "failed snapshot(s) ..." - echo "$prune_dirs" | sed 's@/incomplete$@@' | xargs rm -rf -fi - -# Now prune all expired snapshots. Keep GHE_NUM_SNAPSHOTS around. -snapshot_count=$(ls -1d "$GHE_DATA_DIR"/[0-9]* 2>/dev/null | wc -l) - -if [ "$snapshot_count" -gt "$GHE_NUM_SNAPSHOTS" ]; then - prune_dirs="$(ls -1d "$GHE_DATA_DIR"/[0-9]* | sort -r | awk "NR>$GHE_NUM_SNAPSHOTS")" - prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l) - echo Pruning $prune_num "expired snapshot(s) ..." - echo "$prune_dirs" | xargs rm -rf -fi diff --git a/share/github-backup-utils/ghe-restore-es-rsync b/share/github-backup-utils/ghe-restore-es-rsync deleted file mode 100755 index d1b769339..000000000 --- a/share/github-backup-utils/ghe-restore-es-rsync +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-es-rsync -#/ Restore an rsync snapshot of all Elasticsearch data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the rsync strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -GHE_HOSTNAME="$1" - -# Perform a host-check and establish the remote version in GHE_REMOTE_VERSION. -ghe_remote_version_required "$GHE_HOSTNAME" - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# The directory holding the snapshot to restore -snapshot_dir="$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT" - -# Transfer all ES data from the latest snapshot to the GitHub instance. -if [ ! -d "$snapshot_dir/elasticsearch" ]; then - echo "Warning: Elasticsearch backup missing. Skipping ..." - exit 0 - -# restoring v11.10.x ES snapshot into a v2.0 appliance -elif [ "$GHE_VERSION_MAJOR" -gt 1 -a -f "$snapshot_dir/elasticsearch/elasticsearch.yml" ]; then - ghe-ssh "$GHE_HOSTNAME" -- "sudo mkdir -p '$GHE_REMOTE_DATA_USER_DIR/elasticsearch-legacy'" 1>&3 - ghe-ssh "$GHE_HOSTNAME" -- "sudo chown elasticsearch:elasticsearch '$GHE_REMOTE_DATA_USER_DIR/elasticsearch-legacy'" 1>&3 - - ghe-rsync -avz --delete \ - -e "ghe-ssh -p $(ssh_port_part "$GHE_HOSTNAME")" \ - --rsync-path="sudo -u elasticsearch rsync" \ - "$snapshot_dir/elasticsearch/" \ - "$(ssh_host_part "$GHE_HOSTNAME"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch-legacy" 1>&3 - -# restoring v2.0 ES snapshot into a v2.0 appliance -elif [ "$GHE_VERSION_MAJOR" -gt 1 ]; then - ghe-ssh "$GHE_HOSTNAME" -- "sudo mkdir -p '$GHE_REMOTE_DATA_USER_DIR/elasticsearch-restore'" 1>&3 - ghe-ssh "$GHE_HOSTNAME" -- "sudo chown elasticsearch:elasticsearch '$GHE_REMOTE_DATA_USER_DIR/elasticsearch-restore'" 1>&3 - - ghe-rsync -avz --delete \ - -e "ghe-ssh -p $(ssh_port_part "$GHE_HOSTNAME")" \ - --rsync-path="sudo -u elasticsearch rsync" \ - "$snapshot_dir/elasticsearch/" \ - "$(ssh_host_part "$GHE_HOSTNAME"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch-restore" 1>&3 - -# restoring v11.10.x ES snapshot into a v11.10.x appliance -else - # Use GNU tar on BSDs. - TAR=tar - if ! tar --version | grep GNU >/dev/null; then - TAR=gtar - fi - cd "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT" - $TAR -cf - --owner=root --group=root elasticsearch | - ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-es-indices' 1>&3 -fi diff --git a/share/github-backup-utils/ghe-restore-es-tarball b/share/github-backup-utils/ghe-restore-es-tarball deleted file mode 100755 index df5e0e91a..000000000 --- a/share/github-backup-utils/ghe-restore-es-tarball +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-es-tarball -#/ Restore a tarball snapshot of all ES data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Grab the host arg -GHE_HOSTNAME="$1" - -# Show usage and bail with no -[ -z "$GHE_HOSTNAME" ] && print_usage - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Restore ElasticSearch indices from tarball snapshot. -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-es-indices' \ - < "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/elasticsearch.tar" 1>&3 diff --git a/share/github-backup-utils/ghe-restore-pages-rsync b/share/github-backup-utils/ghe-restore-pages-rsync deleted file mode 100755 index ecda7113b..000000000 --- a/share/github-backup-utils/ghe-restore-pages-rsync +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-pages-rsync -#/ Restore an rsync snapshot of all Pages data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the rsync strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Restore all pages data via rsync -ghe-restore-userdata pages "$1" diff --git a/share/github-backup-utils/ghe-restore-pages-tarball b/share/github-backup-utils/ghe-restore-pages-tarball deleted file mode 100755 index 9a4daf5e1..000000000 --- a/share/github-backup-utils/ghe-restore-pages-tarball +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-pages-tarball -#/ Restore a tarball snapshot of all Pages data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Grab the host arg -GHE_HOSTNAME="$1" - -# Show usage and bail with no -[ -z "$GHE_HOSTNAME" ] && print_usage - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Restore Pages data from tarball snapshot. -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-pages' \ - < "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/pages.tar" 1>&3 diff --git a/share/github-backup-utils/ghe-restore-repositories-rsync b/share/github-backup-utils/ghe-restore-repositories-rsync deleted file mode 100755 index d0ae56aeb..000000000 --- a/share/github-backup-utils/ghe-restore-repositories-rsync +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-repositories-rsync -#/ Restore an rsync snapshot of all Git repository data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the rsync strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -GHE_HOSTNAME="$1" - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$GHE_HOSTNAME" - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Transfer all git repository data from the latest snapshot to the GitHub -# instance in a single rsync invocation. -ghe-rsync -avH --delete \ - -e "ghe-ssh -p $(ssh_port_part "$GHE_HOSTNAME")" \ - --rsync-path="sudo -u git rsync" \ - "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/" \ - "$(ssh_host_part "$GHE_HOSTNAME"):$GHE_REMOTE_DATA_USER_DIR/repositories" 1>&3 diff --git a/share/github-backup-utils/ghe-restore-repositories-tarball b/share/github-backup-utils/ghe-restore-repositories-tarball deleted file mode 100755 index 3b41830a6..000000000 --- a/share/github-backup-utils/ghe-restore-repositories-tarball +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-repositories-tarball -#/ Restore a tarball snapshot of all Git repository data to a GitHub instance. -#/ -#/ Note: This script typically isn't called directly. It's invoked by the -#/ ghe-restore command when the tarball strategy is used. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Grab the host arg -GHE_HOSTNAME="$1" - -# Show usage and bail with no -[ -z "$GHE_HOSTNAME" ] && print_usage - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Restore Git repository data from tarball snapshot. -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-repositories' \ - < "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories.tar" 1>&3 diff --git a/share/github-backup-utils/ghe-restore-settings b/share/github-backup-utils/ghe-restore-settings deleted file mode 100755 index 0c5aaa054..000000000 --- a/share/github-backup-utils/ghe-restore-settings +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-settings -#/ Restore settings from a snapshot to the given . -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ -z "$*" ] && print_usage - -# Grab host arg -GHE_HOSTNAME="$1" - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$GHE_HOSTNAME" - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Path to snapshot dir we're restoring from -GHE_RESTORE_SNAPSHOT_PATH="$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT" - -echo "Restoring settings ..." -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - # work around issue importing settings with bad storage mode values - ( cat "$GHE_RESTORE_SNAPSHOT_PATH/settings.json" && echo ) | - sed 's/"storage_mode": "device"/"storage_mode": "rootfs"/' | - ghe-ssh "$GHE_HOSTNAME" -- '/usr/bin/env GHEBUVER=2 ghe-import-settings' 1>&3 -else - ( cat "$GHE_RESTORE_SNAPSHOT_PATH/settings.json" && echo ) | - ghe-ssh "$GHE_HOSTNAME" -- '/usr/bin/env GHEBUVER=2 ghe-import-settings' 1>&3 -fi - -# Bail out if we're restoring against a pre-2.x appliance. Everything below is -# supported by v2.0 appliances only. -if [ "$GHE_VERSION_MAJOR" -lt 2 ]; then - exit 0 -fi - -echo "Restoring license ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-license' < "$GHE_RESTORE_SNAPSHOT_PATH/enterprise.ghl" 1>&3 - -# Restore management console password hash if present. -if [ -f "$GHE_RESTORE_SNAPSHOT_PATH/manage-password" ]; then - echo "Restoring management console password ..." - cat "$GHE_RESTORE_SNAPSHOT_PATH/manage-password" | - ghe-ssh "$GHE_HOSTNAME" -- "ghe-import-passwords" -fi diff --git a/share/github-backup-utils/ghe-restore-snapshot-path b/share/github-backup-utils/ghe-restore-snapshot-path deleted file mode 100755 index a5a0db8db..000000000 --- a/share/github-backup-utils/ghe-restore-snapshot-path +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-snapshot-path [snapshot] -#/ -#/ Print the path to the given snapshot. Defaults to current if no argument given. -#/ Exits with non-0 if the snapshot doesn't exist in GHE_DATA_DIR - -set -e - -# Bring in the backup configuration. -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - - -if [ -n "$1" ]; then - GHE_RESTORE_SNAPSHOT="$(basename "$1")" -else - GHE_RESTORE_SNAPSHOT="current" -fi - -# Resolve the snapshot id if we're restoring from current. This is mostly -# just for logging. -if [ "$GHE_RESTORE_SNAPSHOT" = "current" ]; then - GHE_RESTORE_SNAPSHOT=$(readlink "$GHE_DATA_DIR"/current || true) -fi - -# Bail out if we don't have a good snapshot. -if [ ! -d "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT" ]; then - echo "Error: Snapshot '$GHE_RESTORE_SNAPSHOT' doesn't exist." 1>&2 - exit 1 -fi - -echo "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT" diff --git a/share/github-backup-utils/ghe-restore-userdata b/share/github-backup-utils/ghe-restore-userdata deleted file mode 100755 index 3ee5942fc..000000000 --- a/share/github-backup-utils/ghe-restore-userdata +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-restore-userdata -#/ Restore a special user data directory via rsync. This is used -#/ for a number of different simple datastores kept under /data/user on the -#/ remote appliance, including: hookshot, alambic_assets, and pages data. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -# Show usage and bail with no arguments -[ $# -lt 2 ] && print_usage - -# Grab userdata directory name and host args -dirname="$1" -GHE_HOSTNAME="$2" - -# Verify rsync is available. -if ! rsync --version 1>/dev/null 2>&1; then - echo "Error: rsync not found." 1>&2 - exit 1 -fi - -# Perform a host-check and establish GHE_REMOTE_XXX variables. -ghe_remote_version_required "$GHE_HOSTNAME" - -# The snapshot to restore should be set by the ghe-restore command but this lets -# us run this script directly. -: ${GHE_RESTORE_SNAPSHOT:=current} - -# Transfer data from the latest snapshot to the GitHub instance in a single -# rsync invocation. -if [ -d "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/$dirname" ]; then - ghe-rsync -avz --delete \ - -e "ghe-ssh -p $(ssh_port_part "$GHE_HOSTNAME")" \ - --rsync-path="sudo -u git rsync" \ - "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/$dirname/" \ - "$(ssh_host_part "$GHE_HOSTNAME"):$GHE_REMOTE_DATA_USER_DIR/$dirname" 1>&3 -fi diff --git a/share/github-backup-utils/ghe-rsync b/share/github-backup-utils/ghe-rsync deleted file mode 100755 index 7a234dccc..000000000 --- a/share/github-backup-utils/ghe-rsync +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -#/ Usage: ghe-rsync -#/ Run rsync with silenced vanished file warnings (non-critical). -# -# Based on the rsync-no-vanished support script included with rsync: -# https://bugzilla.samba.org/show_bug.cgi?id=10356 - -set -o pipefail - -# Filter vanished file warnings from both stdout (rsync versions < 3.x) and -# stderr (rsync versions >= 3.x). The complex redirections are necessary to -# filter stderr while also keeping stdout and stderr separated. -IGNOREOUT='^(file has vanished: |rsync warning: some files vanished before they could be transferred)' -(rsync "${@}" 3>&1 1>&2 2>&3 3>&- | - (egrep -v "$IGNOREOUT" || true)) 3>&1 1>&2 2>&3 3>&- | - (egrep -v "$IGNOREOUT" || true) -res=$? - -# rsync exits with 24 when vanished files are detected. -if [ $res = 24 ]; then - res=0 -fi - -exit $res diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup deleted file mode 100755 index ef9838f0f..000000000 --- a/share/github-backup-utils/ghe-s3-backup +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-s3-backup-all -#/ Take snapshots of all GitHub Enterprise data, including the mysql database -#/ and backup to S3. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -if [ -e $HOME/.s3cfg ]; then - echo "Using existing ~/.s3cfg ..." -else - # Configure s3cmd if there isn't a config file already. - s3cmd --configure -fi - -# Run the backup script. -ghe-backup - -# Create the bucket if it doesn't exist. -s3cmd mb s3://$GHE_S3_BUCKET - -# Upload to S3. -cd "$GHE_DATA_DIR"/current -s3cmd --preserve put * s3://$GHE_S3_BUCKET diff --git a/share/github-backup-utils/ghe-s3-restore b/share/github-backup-utils/ghe-s3-restore deleted file mode 100755 index 2554da8da..000000000 --- a/share/github-backup-utils/ghe-s3-restore +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-s3-restore -#/ Restores backup files from S3. -set -e - -# Bring in the backup configuration. -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -if [ -e $HOME/.s3cfg ]; then - echo "Using existing ~/.s3cfg ..." -else - # Configure s3cmd if there isn't a config file already. - s3cmd --configure -fi - -# Restore from S3 into a new snapshot directory. -mkdir -p "$GHE_SNAPSHOT_DIR" -s3cmd --preserve get s3://$GHE_S3_BUCKET/* "$GHE_SNAPSHOT_DIR" - -# Run the restore script. -ghe-restore -s "$GHE_SNAPSHOT_DIR" diff --git a/share/github-backup-utils/ghe-ssh b/share/github-backup-utils/ghe-ssh deleted file mode 100755 index 19e6e4029..000000000 --- a/share/github-backup-utils/ghe-ssh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-ssh [