From ecb5d0b4d5e1ba2093f5d118090369d6546b9fbe Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Thu, 6 Aug 2015 18:55:22 +0100 Subject: [PATCH 0001/2652] Only use --link-dest when backing up populated directories --- share/github-backup-utils/ghe-backup-userdata | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/github-backup-utils/ghe-backup-userdata b/share/github-backup-utils/ghe-backup-userdata index b2466747c..ba4efd491 100755 --- a/share/github-backup-utils/ghe-backup-userdata +++ b/share/github-backup-utils/ghe-backup-userdata @@ -26,9 +26,9 @@ ghe_remote_version_required "$host" # to an older version of GHE or no data has been added to this directory yet. ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/$dirname' ]" || exit 0 -# If we have a previous increment, avoid transferring existing files via rsync's +# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's # --link-dest support. This also decreases physical space usage considerably. -if [ -d "$GHE_DATA_DIR/current/$dirname" ]; then +if [ -d "$GHE_DATA_DIR/current/$dirname" ] && [ "$(ls -A $GHE_DATA_DIR/current/$dirname)" ]; then link_dest="--link-dest=../../current/$dirname" fi From ee189ce50c23acf1a3c7315a0d83635e2703e2de Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Thu, 6 Aug 2015 18:55:22 +0100 Subject: [PATCH 0002/2652] Only use --link-dest when backing up populated directories --- share/github-backup-utils/ghe-backup-userdata | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/github-backup-utils/ghe-backup-userdata b/share/github-backup-utils/ghe-backup-userdata index 187982511..2c39a9827 100755 --- a/share/github-backup-utils/ghe-backup-userdata +++ b/share/github-backup-utils/ghe-backup-userdata @@ -26,9 +26,9 @@ ghe_remote_version_required "$host" # to an older version of GHE or no data has been added to this directory yet. ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/$dirname' ]" || exit 0 -# If we have a previous increment, avoid transferring existing files via rsync's +# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's # --link-dest support. This also decreases physical space usage considerably. -if [ -d "$GHE_DATA_DIR/current/$dirname" ]; then +if [ -d "$GHE_DATA_DIR/current/$dirname" ] && [ "$(ls -A $GHE_DATA_DIR/current/$dirname)" ]; then link_dest="--link-dest=../../current/$dirname" fi From 4caccda2e349e23d97ba7c016af9cc7c010c209d Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Fri, 7 Aug 2015 15:13:36 +0100 Subject: [PATCH 0003/2652] Added a test for --link-dest warning --- test/test-ghe-backup.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test-ghe-backup.sh b/test/test-ghe-backup.sh index 2077a1e44..bc961a778 100755 --- a/test/test-ghe-backup.sh +++ b/test/test-ghe-backup.sh @@ -251,3 +251,16 @@ begin_test "ghe-backup without manage-password file" [ ! -f "$GHE_DATA_DIR/current/manage-password" ] ) end_test + +begin_test "ghe-backup emtpy hookshot directory" +( + set -e + + rm -rf $GHE_REMOTE_DATA_USER_DIR/hookshot/repository-* + rm -rf $GHE_DATA_DIR/current/hookshot/repository-* + ghe-backup + + # Check that the "--link-dest arg does not exist" message hasn't occurred. + [ ! "$(grep "[l]ink-dest arg does not exist" $TRASHDIR/out)" ] +) +end_test From 94ebad85a5783babb35b0051df7b351500f92b39 Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 08:56:13 -0600 Subject: [PATCH 0004/2652] how it differs from HA instance A customer asked why they needed to use the backup-utils utility while we were onsite last week. Their assumption was that because all data is replicated to the High Availability instance the duplication of this data in the form of the backup-utils utility was unnecessary. I wanted to define the separate use cases here so we could point customers to it. --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 2e8470f33..940e94fc5 100644 --- a/README.md +++ b/README.md @@ -231,6 +231,13 @@ most recent successful snapshot: Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. +### How does this backup data differ from the HA (High Availability) backup data? +These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. + +The backup-utils data is meant to be used to restore an istance or set up a new instance. This backup data is more lightweight than having a replica instance. It does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. + +As an example, if a company wanted to create a QA server to test upgrading GitHub Enterprise they could use the backup-utils utility to populate the QA server with data and settings from a snapshot and be up and running in a few minutes. The process is more complex to setup an HA instance and do the conversion after a data set has been copied over. The latter use case is not recommended. + ### Support If you find a bug or would like to request a feature in backup-utils, please From 2592c8e4eb3e93f3cc7ad4c1d1886fca491a34cb Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 09:02:09 -0600 Subject: [PATCH 0005/2652] clarified HA data is not usable without backup-utils Made the clarification that the backup-utils data is made to populate other instances with data. This is not the purpose of the HA data and in order to transfer that data to another instance you would need to use backup-utils. Other ways are possible but not recommended. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 940e94fc5..cfdcc55b3 100644 --- a/README.md +++ b/README.md @@ -231,12 +231,12 @@ most recent successful snapshot: Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. -### How does this backup data differ from the HA (High Availability) backup data? -These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. +### How does this data differ from the HA (High Availability) backup data? +These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. This data on the HA instance is not usable by other instances without being extracted by the backup-utils. -The backup-utils data is meant to be used to restore an istance or set up a new instance. This backup data is more lightweight than having a replica instance. It does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. +The backup-utils data is meant to be used to restore an instance or set up a new instance. This backup data does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. -As an example, if a company wanted to create a QA server to test upgrading GitHub Enterprise they could use the backup-utils utility to populate the QA server with data and settings from a snapshot and be up and running in a few minutes. The process is more complex to setup an HA instance and do the conversion after a data set has been copied over. The latter use case is not recommended. +As an example, if a company wanted to create a QA server to test upgrading GitHub Enterprise they could use the backup-utils utility to populate the QA server with data and settings from a snapshot and be up and running in a very short amount of time. The process is more complex to setup an HA instance and do the conversion after a data set has been copied over. The latter use case is not recommended. ### Support From 2b996928b97aac619c037fb2c922659537c775ba Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 09:03:44 -0600 Subject: [PATCH 0006/2652] testing H5 headers to make it more readable --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index cfdcc55b3..27b12bf5b 100644 --- a/README.md +++ b/README.md @@ -232,8 +232,10 @@ Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. ### How does this data differ from the HA (High Availability) backup data? +##### The purpose of High Avaibility backup These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. This data on the HA instance is not usable by other instances without being extracted by the backup-utils. +##### The purpose of the backup-utils The backup-utils data is meant to be used to restore an instance or set up a new instance. This backup data does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. As an example, if a company wanted to create a QA server to test upgrading GitHub Enterprise they could use the backup-utils utility to populate the QA server with data and settings from a snapshot and be up and running in a very short amount of time. The process is more complex to setup an HA instance and do the conversion after a data set has been copied over. The latter use case is not recommended. From eb465c5ef819b22bb505179723b8b38a12d86446 Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 09:04:12 -0600 Subject: [PATCH 0007/2652] fixed typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27b12bf5b..0055bd26b 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. ### How does this data differ from the HA (High Availability) backup data? -##### The purpose of High Avaibility backup +##### The purpose of the High Avaibility backup These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. This data on the HA instance is not usable by other instances without being extracted by the backup-utils. ##### The purpose of the backup-utils From 7c47b7b74dac82e112ac155f216a9a564b5e727e Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 09:12:18 -0600 Subject: [PATCH 0008/2652] changed HA backup to HA replica @nathos had a great recommendation to not use the term HA backup as it confuses things and instead use HA replica. I think he was spot on and made the changes. --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0055bd26b..db5fc0154 100644 --- a/README.md +++ b/README.md @@ -232,8 +232,9 @@ Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. ### How does this data differ from the HA (High Availability) backup data? -##### The purpose of the High Avaibility backup -These two backups serve different purposes. The [HA backup](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a backup instance that is ready in case the primary instance becomes unavailable. The HA backup pulls data from the primary instance to ensure it is up to date in case it needs to be promoted to the primary instance. In that sense the HA backup has all the data and configuration of the primary instance. This data on the HA instance is not usable by other instances without being extracted by the backup-utils. + +##### The purpose of the High Avaibility replica +These two tools serve different purposes. The [HA replica](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a replica instance that is ready in case the primary instance becomes unavailable. The HA replica pulls data from the primary instance to ensure it's data is current in the case it needs to be promoted to the primary instance. In that sense the HA replica has all the data and configuration of the primary instance. However, this data on the HA instance is not usable by other instances without being extracted by the backup-utils. ##### The purpose of the backup-utils The backup-utils data is meant to be used to restore an instance or set up a new instance. This backup data does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. From 5935e78a289750206d8c1ed66dbeceb9d1aae983 Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Fri, 28 Aug 2015 09:38:06 -0600 Subject: [PATCH 0009/2652] added note saying both tools should be used @nathos recommended having a note saying that we *do* recommend using both tools. it is in the first sentence now of this new section. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index db5fc0154..60f1d7f33 100644 --- a/README.md +++ b/README.md @@ -234,7 +234,7 @@ the disk location where snapshots are written. ### How does this data differ from the HA (High Availability) backup data? ##### The purpose of the High Avaibility replica -These two tools serve different purposes. The [HA replica](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a replica instance that is ready in case the primary instance becomes unavailable. The HA replica pulls data from the primary instance to ensure it's data is current in the case it needs to be promoted to the primary instance. In that sense the HA replica has all the data and configuration of the primary instance. However, this data on the HA instance is not usable by other instances without being extracted by the backup-utils. +These two tools serve different purposes and are best used separately as part of the recommended GitHub Enterprise setup. The [HA replica](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a replica instance that is ready in case the primary instance becomes unavailable. The HA replica pulls data from the primary instance to ensure it's data is current in the case it needs to be promoted to the primary instance. In that sense the HA replica has all the data and configuration of the primary instance. However, this data on the HA instance is not usable by other instances without being extracted by the backup-utils. ##### The purpose of the backup-utils The backup-utils data is meant to be used to restore an instance or set up a new instance. This backup data does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. From bab312cb53f2f79c4020102b4f0d9e8faddf11eb Mon Sep 17 00:00:00 2001 From: Nathan Henderson Date: Fri, 28 Aug 2015 15:51:37 -0400 Subject: [PATCH 0010/2652] Linked to HA anchor; edit HA description language - Link HA section in the TOC - borrowed some descriptive language directly from the help docs - Made "backup utilities" naming consistent with the rest of the README - Remove version from help doc link to HA Replica article - Removed `backup-utils` example --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 60f1d7f33..c92099220 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** +- **[How do backup snapshots differ from a HA (High Availability) replica?](#how-do-backup-snapshots-differ-from-a-ha-high-availability-replica) - **[Support](#support)** ### Features @@ -231,15 +232,15 @@ most recent successful snapshot: Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. -### How does this data differ from the HA (High Availability) backup data? +### How do backup snapshots differ from the HA (High Availability) replica? +The backup utilities and [HA replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) serve different yet complementary purposes as part of a recommended GitHub Enterprise deployment. ##### The purpose of the High Avaibility replica -These two tools serve different purposes and are best used separately as part of the recommended GitHub Enterprise setup. The [HA replica](https://help.github.com/enterprise/2.0/admin-guide/high-availability-cluster-configuration/) has the primary purpose of being another a replica instance that is ready in case the primary instance becomes unavailable. The HA replica pulls data from the primary instance to ensure it's data is current in the case it needs to be promoted to the primary instance. In that sense the HA replica has all the data and configuration of the primary instance. However, this data on the HA instance is not usable by other instances without being extracted by the backup-utils. +The HA replica is a fully redundant secondary GitHub Enterprise instance, kept in sync with the primary instance via replication of all major datastores. This active/passive cluster configuration is designed to minimize service disruption in the event of hardware failure or major network outage affecting the primary instance. Because some forms of data corruption or loss may be replicated immediately from primary to replica, it is not a replacement for the backup utilities as part of your disaster recovery plan. -##### The purpose of the backup-utils -The backup-utils data is meant to be used to restore an instance or set up a new instance. This backup data does not require the overhead of having a VM that can support running GitHub Enterprise. The backup-utils utility only copies over repository data, along with full snapshots of all other pertinent data stores. and formats this data so it can be reused in the restoration or setup of an instance. +##### The purpose of the backup utilities +Backup utility snapshots can be used to restore a GitHub Enterprise instance to a prior state, or set up a new instance. The backup host does not require the overhead of running another GitHub Enterprise instance. The backup utilities copy repository data, along with full snapshots of all other pertinent data stores. Backup snapshots are date-stamped, making rollback to previous states possible. -As an example, if a company wanted to create a QA server to test upgrading GitHub Enterprise they could use the backup-utils utility to populate the QA server with data and settings from a snapshot and be up and running in a very short amount of time. The process is more complex to setup an HA instance and do the conversion after a data set has been copied over. The latter use case is not recommended. ### Support From 99a0ab7325c8a6de0555d06df4d008d81cc1c363 Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Tue, 1 Sep 2015 10:19:39 +0100 Subject: [PATCH 0011/2652] Correct typo --- test/test-ghe-backup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-ghe-backup.sh b/test/test-ghe-backup.sh index bc961a778..b36e036b2 100755 --- a/test/test-ghe-backup.sh +++ b/test/test-ghe-backup.sh @@ -252,7 +252,7 @@ begin_test "ghe-backup without manage-password file" ) end_test -begin_test "ghe-backup emtpy hookshot directory" +begin_test "ghe-backup empty hookshot directory" ( set -e From 7fbe268f7775f21c9b2df375da5b9f197d6b7bea Mon Sep 17 00:00:00 2001 From: Nathan Henderson Date: Tue, 1 Sep 2015 10:54:01 -0400 Subject: [PATCH 0012/2652] Update current datetimestamp in directory structure example The "current" symlink in the directory structure wasn't pointing to the the latest snapshot. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e8470f33..f58045529 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ most recent successful snapshot: |- ssh-host-keys.tar |- strategy |- version - |- current -> 20140727T010000 + |- current -> 20140728T010000 Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. From 3720f16da841542e3734ced7702796b14b6f4589 Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Wed, 2 Sep 2015 13:36:34 -0600 Subject: [PATCH 0013/2652] updated title and first paragraph --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c92099220..f1612aa8f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** -- **[How do backup snapshots differ from a HA (High Availability) replica?](#how-do-backup-snapshots-differ-from-a-ha-high-availability-replica) +- **[How does backup utilities differ from an HA (High Availability) replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica) - **[Support](#support)** ### Features @@ -232,8 +232,8 @@ most recent successful snapshot: Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. -### How do backup snapshots differ from the HA (High Availability) replica? -The backup utilities and [HA replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) serve different yet complementary purposes as part of a recommended GitHub Enterprise deployment. +### How does backup utilities differ from an HA (High Availability) replica? +It is recommended that both backup utilities and an [HA replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) are used as part of a GitHub Enterprise deployment but they serve different roles. ##### The purpose of the High Avaibility replica The HA replica is a fully redundant secondary GitHub Enterprise instance, kept in sync with the primary instance via replication of all major datastores. This active/passive cluster configuration is designed to minimize service disruption in the event of hardware failure or major network outage affecting the primary instance. Because some forms of data corruption or loss may be replicated immediately from primary to replica, it is not a replacement for the backup utilities as part of your disaster recovery plan. From b59e3cfddeb2d2b22aab4d2348f41676010f76b0 Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Wed, 2 Sep 2015 13:39:14 -0600 Subject: [PATCH 0014/2652] Made backup utilities paragraph more succinct also fixed a typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f1612aa8f..88f9ce160 100644 --- a/README.md +++ b/README.md @@ -235,11 +235,11 @@ the disk location where snapshots are written. ### How does backup utilities differ from an HA (High Availability) replica? It is recommended that both backup utilities and an [HA replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) are used as part of a GitHub Enterprise deployment but they serve different roles. -##### The purpose of the High Avaibility replica +##### The purpose of the High Availability replica The HA replica is a fully redundant secondary GitHub Enterprise instance, kept in sync with the primary instance via replication of all major datastores. This active/passive cluster configuration is designed to minimize service disruption in the event of hardware failure or major network outage affecting the primary instance. Because some forms of data corruption or loss may be replicated immediately from primary to replica, it is not a replacement for the backup utilities as part of your disaster recovery plan. ##### The purpose of the backup utilities -Backup utility snapshots can be used to restore a GitHub Enterprise instance to a prior state, or set up a new instance. The backup host does not require the overhead of running another GitHub Enterprise instance. The backup utilities copy repository data, along with full snapshots of all other pertinent data stores. Backup snapshots are date-stamped, making rollback to previous states possible. +Backup utilities takes date-stamped snapshots of all major datastores. These snapshots are used to restore an instance to a prior state or set up a new instance without having another always-on GitHub Enterprise instance (like the HA replica). ### Support From 2eed112eaace7074e55e1e4b302d0c72287d75be Mon Sep 17 00:00:00 2001 From: Matt Duff Date: Thu, 3 Sep 2015 11:36:17 -0600 Subject: [PATCH 0015/2652] changed HA to High Availability. Referenced disaster recovery --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 88f9ce160..c1182a63d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** -- **[How does backup utilities differ from an HA (High Availability) replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica) +- **[How does backup utilities differ from a High Availability replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica) - **[Support](#support)** ### Features @@ -232,14 +232,14 @@ most recent successful snapshot: Note: the `GHE_DATA_DIR` variable set in `backup.config` can be used to change the disk location where snapshots are written. -### How does backup utilities differ from an HA (High Availability) replica? -It is recommended that both backup utilities and an [HA replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) are used as part of a GitHub Enterprise deployment but they serve different roles. +### How does backup utilities differ from a High Availability replica? +It is recommended that both backup utilities and an [High Availability replica](https://help.github.com/enterprise/admin/guides/installation/high-availability-cluster-configuration/) are used as part of a GitHub Enterprise deployment but they serve different roles. ##### The purpose of the High Availability replica -The HA replica is a fully redundant secondary GitHub Enterprise instance, kept in sync with the primary instance via replication of all major datastores. This active/passive cluster configuration is designed to minimize service disruption in the event of hardware failure or major network outage affecting the primary instance. Because some forms of data corruption or loss may be replicated immediately from primary to replica, it is not a replacement for the backup utilities as part of your disaster recovery plan. +The High Availability replica is a fully redundant secondary GitHub Enterprise instance, kept in sync with the primary instance via replication of all major datastores. This active/passive cluster configuration is designed to minimize service disruption in the event of hardware failure or major network outage affecting the primary instance. Because some forms of data corruption or loss may be replicated immediately from primary to replica, it is not a replacement for the backup utilities as part of your disaster recovery plan. ##### The purpose of the backup utilities -Backup utilities takes date-stamped snapshots of all major datastores. These snapshots are used to restore an instance to a prior state or set up a new instance without having another always-on GitHub Enterprise instance (like the HA replica). +Backup utilities are a disaster recovery tool. This tool takes date-stamped snapshots of all major datastores. These snapshots are used to restore an instance to a prior state or set up a new instance without having another always-on GitHub Enterprise instance (like the High Availability replica). ### Support From b2dff3f34f31b92c831d3b43f7935555fe49e2cb Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 7 Sep 2015 10:53:31 +0900 Subject: [PATCH 0016/2652] Fix rendering of "How does backup utilities..." link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 72b406baf..272a609af 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** -- **[How does backup utilities differ from a High Availability replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica) +- **[How does backup utilities differ from a High Availability replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica)** - **[Support](#support)** ### Features From a0b6c976d72b4366b34a24954ab039b1d4b9d694 Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 7 Sep 2015 10:56:11 +0900 Subject: [PATCH 0017/2652] Fix the link too --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 272a609af..a73d4d23c 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** -- **[How does backup utilities differ from a High Availability replica?](#how-does-backup-utilities-differ-from-an-ha-high-availability-replica)** +- **[How does backup utilities differ from a High Availability replica?](#how-does-backup-utilities-differ-from-a-high-availability-replica)** - **[Support](#support)** ### Features From bc97d5aa4b8b2a0bf942038bf6df71f52755271c Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Mon, 28 Sep 2015 15:56:57 +0200 Subject: [PATCH 0018/2652] Move in progress detection to separate file with pid This changes the logic of detecting if a backup is in progress by writing out the snapshot and PID of the process making the backup. This improves the situation where a previous backup was killed in a non-clean way. Clean failures in the sense of a proper exit (also non 0 exit statuses) or using ctrl-c was already handled by traps. This just removes the in-progress file if the process is no longer running after the backup. It then cleans up the in-progress file and lets the rest of the system clean up the failed backup. Fixes #99 --- bin/ghe-backup | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/bin/ghe-backup b/bin/ghe-backup index f1323518d..27211a33d 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -28,37 +28,56 @@ touch "incomplete" GHE_MAINTENANCE_MODE_ENABLED=false # To prevent multiple backup runs happening at the same time, we create a -# in-progress symlink pointing to the snapshot directory. This will fail if -# another backup is already in progress, giving us a form of locking. +# in-progress file with the timestamp and pid of the backup process, +# giving us a form of locking. # -# Set up a trap to remove the in-progress symlink if we exit for any reason but -# verify that we own the in-progress symlink before doing so. +# Set up a trap to remove the in-progress file if we exit for any reason but +# verify that we are the same process before doing so. # # The cleanup trap also handles disabling maintenance mode on the appliance if # it was automatically enabled. cleanup () { - if [ "$(readlink ../in-progress)" = "$GHE_SNAPSHOT_TIMESTAMP" ]; then - unlink ../in-progress + if [ -f ../in-progress ]; then + progress=$(cat ../in-progress) + snapshot=$(echo "$progress" | cut -d ' ' -f 1) + pid=$(echo "$progress" | cut -d ' ' -f 2) + if [ "$snapshot" = "$GHE_SNAPSHOT_TIMESTAMP" -a "$$" = $pid ]; then + unlink ../in-progress fi + fi - if $GHE_MAINTENANCE_MODE_ENABLED; then - ghe-maintenance-mode-disable "$GHE_HOSTNAME" - fi + if $GHE_MAINTENANCE_MODE_ENABLED; then + ghe-maintenance-mode-disable "$GHE_HOSTNAME" + fi } # Setup exit traps trap 'cleanup' EXIT trap 'exit $?' INT # ^C always terminate -# Mark the snapshot as in-progress by creating the symlink. If this fails, it -# means another ghe-backup run is already in progress and we should exit. -# NOTE: The -n argument to ln is non-POSIX but widely supported. -if ! ln -sn "$GHE_SNAPSHOT_TIMESTAMP" ../in-progress 2>/dev/null; then - snapshot="$(readlink ../in-progress)" - echo "Error: backup of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2 +if [ -h ../in-progress ]; then + echo "Error: in progress backup from previous version detected." 1>&2 + echo "If there is no backup in progress anymore, please remove" 1>&2 + echo "the $GHE_DATA_DIR/in-progress symlink. This is only needed once." 1>&2 + exit 1 +fi + +if [ -f ../in-progress ]; then + progress=$(cat ../in-progress) + snapshot=$(echo "$progress" | cut -d ' ' -f 1) + pid=$(echo "$progress" | cut -d ' ' -f 2) + if ! ps -p $pid -o command= | grep ghe-backup; then + # We can safely remove in-progress, ghe-prune-snapshots + # will clean up the failed backup. + unlink ../in-progress + else + echo "Error: backup process $pid of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2 exit 1 + fi fi +echo "$GHE_SNAPSHOT_TIMESTAMP $$" > ../in-progress + echo "Starting backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP" # Perform a host connection check and establish the remote appliance version. From 4d6e261b7c6cd57058503c35ab9fccdd4bcb6135 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Mon, 28 Sep 2015 16:05:39 +0200 Subject: [PATCH 0019/2652] Add test for cleanup process --- test/test-ghe-backup.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/test-ghe-backup.sh b/test/test-ghe-backup.sh index b36e036b2..1643481ad 100755 --- a/test/test-ghe-backup.sh +++ b/test/test-ghe-backup.sh @@ -230,7 +230,7 @@ begin_test "ghe-backup tarball strategy" ) end_test -begin_test "ghe-backup fails fast when other run in progress" +begin_test "ghe-backup fails fast when old style run in progress" ( set -e @@ -241,6 +241,18 @@ begin_test "ghe-backup fails fast when other run in progress" ) end_test +begin_test "ghe-backup cleans up stale in-progress file" +( + set -e + + echo "20150928T153353 99999" > "$GHE_DATA_DIR/in-progress" + ghe-backup + + [ ! -f "$GHE_DATA_DIR/in-progress" ] +) +end_test + + begin_test "ghe-backup without manage-password file" ( set -e From d301fd32f35b76b04fd5c8475a0351260ef24900 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Thu, 1 Oct 2015 10:43:36 +0200 Subject: [PATCH 0020/2652] Update wording of when old version backup progress is detected --- bin/ghe-backup | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/ghe-backup b/bin/ghe-backup index 27211a33d..e46e89fcf 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -56,9 +56,9 @@ trap 'cleanup' EXIT trap 'exit $?' INT # ^C always terminate if [ -h ../in-progress ]; then - echo "Error: in progress backup from previous version detected." 1>&2 + echo "Error: detected a backup already in progress from a previous version of ghe-backup." 1>&2 echo "If there is no backup in progress anymore, please remove" 1>&2 - echo "the $GHE_DATA_DIR/in-progress symlink. This is only needed once." 1>&2 + echo "the $GHE_DATA_DIR/in-progress symlink." 1>&2 exit 1 fi From 0a2fc9e5652ac5bec9e29746de95f4e1582abf3f Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 19 Oct 2015 17:03:41 -0700 Subject: [PATCH 0021/2652] Update changelog --- debian/changelog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/debian/changelog b/debian/changelog index 537d66986..5b05947e8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,25 @@ +github-backup-utils (2.4.0) UNRELEASED; urgency=medium + + * Moves the in-progress detection to a separate file with PID which is + removed if the process is no longer running after the backup. #145, #99 + * Updates the README to explain why backup-utils is useful even if you have + the high availability replica running. #140 + * Changes the use of the --link-dest option to only occur when backing up + populated directories. #138 + * Adds logging to /var/log/syslog on the remote GitHub Enterprise appliance + to both ghe-backup and ghe-restore. #131 + * Restarts memcached after restoring to an already configured appliance to + ensure it doesn't contain out-of-sync information. #130 + * Removes the temporary /data/user/repositories-nw-backup directory that + remains after successfully migrating the repository storage layout to the + new format used on GitHub Enterprise 2.2.0 and later after restoring a + backup from an older release of GitHub Enterprise. #129 + * Add devscripts to Debian's build-depends for checkbashisms. #101 + * Documents the -c option which forces the restoration of the configuration + information to an already configured appliance. #96 + + -- Colin Seymour Tue, 20 Oct 2015 00:02:37 +0000 + github-backup-utils (2.2.0) UNRELEASED; urgency=medium * Adds support for the new repositories filesystem layout include in From 217b9459e8a394b7ec1684f2c31413200d69efdb Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 19 Oct 2015 17:09:44 -0700 Subject: [PATCH 0022/2652] 2.4.0 release --- share/github-backup-utils/version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/version b/share/github-backup-utils/version index ccbccc3dc..197c4d5c2 100644 --- a/share/github-backup-utils/version +++ b/share/github-backup-utils/version @@ -1 +1 @@ -2.2.0 +2.4.0 From 54f3deb625f56ba2eb8193aa8003e55b4c240317 Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 19 Oct 2015 17:14:29 -0700 Subject: [PATCH 0023/2652] Update changelog --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 5b05947e8..26e45dd09 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,7 +18,7 @@ github-backup-utils (2.4.0) UNRELEASED; urgency=medium * Documents the -c option which forces the restoration of the configuration information to an already configured appliance. #96 - -- Colin Seymour Tue, 20 Oct 2015 00:02:37 +0000 + -- Colin Seymour Tue, 20 Oct 2015 00:02:37 +0000 github-backup-utils (2.2.0) UNRELEASED; urgency=medium From d686981c2c278cd90ca73a9181679228b2b12757 Mon Sep 17 00:00:00 2001 From: Tim Littlemore Date: Tue, 3 Nov 2015 10:02:03 +0000 Subject: [PATCH 0024/2652] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a73d4d23c..969aa42f1 100644 --- a/README.md +++ b/README.md @@ -93,13 +93,14 @@ download the most recent GitHub Enterprise version. [release]: https://github.com/github/backup-utils/releases -### Migrating from GitHub Enterprise v11.10.34x to v2.0 +### Migrating from GitHub Enterprise v11.10.34x to v2.0, or v2.1 -If you are migrating from GitHub Enterprise version 11.10.34x to 2.0 or greater, +If you are migrating from GitHub Enterprise version 11.10.34x to 2.0 or 2.1 +(note, migrations to versions greater than 2.1 are not officially supported), please see the [Migrating from GitHub Enterprise v11.10.34x][10] documentation in the [GitHub Enterprise System Administrator's Guide][11]. It includes important information on using the backup utilities to migrate data from your -v11.10.34x instance to v2.0. +v11.10.34x instance to v2.0 or v2.1. ### Using the backup and restore commands From a0d9253b58024759330b71ec761390352d2db71e Mon Sep 17 00:00:00 2001 From: Tim Littlemore Date: Tue, 3 Nov 2015 10:05:09 +0000 Subject: [PATCH 0025/2652] Update README.md Fix the link to the edited title of the migration section. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 969aa42f1..5f76cf42e 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository includes backup and recovery utilities for [GitHub Enterprise][1 - **[Storage requirements](#storage-requirements)** - **[GitHub Enterprise version requirements](#github-enterprise-version-requirements)** - **[Getting started](#getting-started)** -- **[Migrating from GitHub Enterprise v11.10.34x to v2.0](#migrating-from-github-enterprise-v111034x-to-v20)** +- **[Migrating from GitHub Enterprise v11.10.34x to v2.0](#migrating-from-github-enterprise-v111034x-to-v20-or-v21)** - **[Using the backup and restore commands](#using-the-backup-and-restore-commands)** - **[Scheduling backups](#scheduling-backups)** - **[Backup snapshot file structure](#backup-snapshot-file-structure)** From 221c4d540cf27cc51ce8008b30cf9234448aa9f9 Mon Sep 17 00:00:00 2001 From: Jason Massey Date: Thu, 3 Dec 2015 01:20:28 -0800 Subject: [PATCH 0026/2652] change s3cmd from put to sync --- share/github-backup-utils/ghe-s3-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup index ef9838f0f..6f8079b68 100755 --- a/share/github-backup-utils/ghe-s3-backup +++ b/share/github-backup-utils/ghe-s3-backup @@ -23,4 +23,4 @@ s3cmd mb s3://$GHE_S3_BUCKET # Upload to S3. cd "$GHE_DATA_DIR"/current -s3cmd --preserve put * s3://$GHE_S3_BUCKET +s3cmd sync --delete-removed --follow-symlinks * s3://$GHE_S3_BUCKET From 2471358109bc950c8976b38fa59a2f565e40aba5 Mon Sep 17 00:00:00 2001 From: Jason Massey Date: Thu, 3 Dec 2015 10:42:42 -0800 Subject: [PATCH 0027/2652] make path to backup data dir explicit --- share/github-backup-utils/ghe-s3-backup | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup index 6f8079b68..8311c503c 100755 --- a/share/github-backup-utils/ghe-s3-backup +++ b/share/github-backup-utils/ghe-s3-backup @@ -22,5 +22,4 @@ ghe-backup s3cmd mb s3://$GHE_S3_BUCKET # Upload to S3. -cd "$GHE_DATA_DIR"/current -s3cmd sync --delete-removed --follow-symlinks * s3://$GHE_S3_BUCKET +s3cmd sync --delete-removed --follow-symlinks "$GHE_DATA_DIR"/current/ s3://$GHE_S3_BUCKET From 53edcd09810218cc3ceccd27d45a179b2401052b Mon Sep 17 00:00:00 2001 From: Jason Massey Date: Thu, 3 Dec 2015 13:03:04 -0800 Subject: [PATCH 0028/2652] Remove --follow-symlinks S3 does not support symlinks, so this command was actually creating a new copy of the linked file --- share/github-backup-utils/ghe-s3-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup index 8311c503c..b7101018c 100755 --- a/share/github-backup-utils/ghe-s3-backup +++ b/share/github-backup-utils/ghe-s3-backup @@ -22,4 +22,4 @@ ghe-backup s3cmd mb s3://$GHE_S3_BUCKET # Upload to S3. -s3cmd sync --delete-removed --follow-symlinks "$GHE_DATA_DIR"/current/ s3://$GHE_S3_BUCKET +s3cmd sync --delete-removed "$GHE_DATA_DIR"/current/ s3://$GHE_S3_BUCKET From ed52c6b18683e81920475916d1eb0f7abf535964 Mon Sep 17 00:00:00 2001 From: Brent Beer Date: Thu, 3 Dec 2015 13:04:10 -0800 Subject: [PATCH 0029/2652] describe what the new command line flag would do --- share/github-backup-utils/ghe-s3-backup | 1 + 1 file changed, 1 insertion(+) diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup index b7101018c..63501a793 100755 --- a/share/github-backup-utils/ghe-s3-backup +++ b/share/github-backup-utils/ghe-s3-backup @@ -22,4 +22,5 @@ ghe-backup s3cmd mb s3://$GHE_S3_BUCKET # Upload to S3. +# --delete-removed is to delete items in s3 that have been removed on the host s3cmd sync --delete-removed "$GHE_DATA_DIR"/current/ s3://$GHE_S3_BUCKET From 01869ee78b96030c2f53ed36c8b9682dc03d01a3 Mon Sep 17 00:00:00 2001 From: Brent Beer Date: Thu, 3 Dec 2015 14:19:54 -0800 Subject: [PATCH 0030/2652] add GHE_S3_BUCKET variable for user's backup config --- backup.config-example | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backup.config-example b/backup.config-example index 54b221187..17e43074a 100644 --- a/backup.config-example +++ b/backup.config-example @@ -26,3 +26,7 @@ GHE_NUM_SNAPSHOTS=10 # Any extra options passed to the SSH command. Nothing required by default # #GHE_EXTRA_SSH_OPTS="" + +# Add s3 bucket for configuring which bucket to use in ghe-s3-backup and +# ghe-s3-restore +# GHE_S3_BUCKET="" From e12a23c1a6bf5b0197e059c2c3c9af4411811878 Mon Sep 17 00:00:00 2001 From: Ronald Chmara Date: Mon, 11 Jan 2016 12:44:15 -0800 Subject: [PATCH 0031/2652] Old port 22 reference changed Newer software versions utilize port 122, not port 22, for SSH --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5f76cf42e..7c3b17c12 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Backup host software requirements are modest: Linux or other modern Unix operating system with [rsync][4] v2.6.4 or newer. The backup host must be able to establish network connections outbound to the -GitHub appliance over SSH (port 22). +GitHub appliance over SSH (port 22, or 122 (see below)). ##### Storage requirements From 6497f296d68caace37998107388fe4932769a032 Mon Sep 17 00:00:00 2001 From: Ronald Chmara Date: Fri, 29 Jan 2016 08:24:17 -0800 Subject: [PATCH 0032/2652] Update verbiage per Sergio Rubio (rubiojr) Commit on PR https://github.com/github/backup-utils/pull/153#issuecomment-176767435 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c3b17c12..1e97a926d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Backup host software requirements are modest: Linux or other modern Unix operating system with [rsync][4] v2.6.4 or newer. The backup host must be able to establish network connections outbound to the -GitHub appliance over SSH (port 22, or 122 (see below)). +GitHub appliance over SSH. TCP port 122 is used to backup GitHub Enterprise 2.0 or newer instances and TCP port 22 is used for older versions (11.10.34X). ##### Storage requirements From d08cbb3a3ece6cac47b0ed691393cb88d81ecc5d Mon Sep 17 00:00:00 2001 From: Ronald Chmara Date: Fri, 29 Jan 2016 09:08:25 -0800 Subject: [PATCH 0033/2652] Minor comma added for grammar. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e97a926d..b47c0e7d5 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Backup host software requirements are modest: Linux or other modern Unix operating system with [rsync][4] v2.6.4 or newer. The backup host must be able to establish network connections outbound to the -GitHub appliance over SSH. TCP port 122 is used to backup GitHub Enterprise 2.0 or newer instances and TCP port 22 is used for older versions (11.10.34X). +GitHub appliance over SSH. TCP port 122 is used to backup GitHub Enterprise 2.0 or newer instances, and TCP port 22 is used for older versions (11.10.34X). ##### Storage requirements From bc9136ffccb91ab5520141f4f999e7346980d478 Mon Sep 17 00:00:00 2001 From: Fermentedbeats Date: Wed, 3 Feb 2016 14:00:48 -0800 Subject: [PATCH 0034/2652] add cert to -c flag description --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b47c0e7d5..8936c1a81 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ download the most recent GitHub Enterprise version. ### Migrating from GitHub Enterprise v11.10.34x to v2.0, or v2.1 -If you are migrating from GitHub Enterprise version 11.10.34x to 2.0 or 2.1 +If you are migrating from GitHub Enterprise version 11.10.34x to 2.0 or 2.1 (note, migrations to versions greater than 2.1 are not officially supported), please see the [Migrating from GitHub Enterprise v11.10.34x][10] documentation in the [GitHub Enterprise System Administrator's Guide][11]. It includes @@ -158,7 +158,7 @@ enable when output is logged to a file. When restoring to an already configured GHE instance, settings and license data are *not* restored to prevent overwriting manual configuration on the restore host. This behavior can be overriden by passing the `-c` argument to `ghe-restore`, -forcing settings and license data to be overwritten with the backup copy's data. +forcing settings, certificate, and license data to be overwritten with the backup copy's data. ### Scheduling backups From ea9a126325a50f1a4f5a13a6c13141be2cfb92bb Mon Sep 17 00:00:00 2001 From: CJ Johnson Date: Thu, 4 Feb 2016 09:06:05 -0800 Subject: [PATCH 0035/2652] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8936c1a81..bdd373901 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ The `ghe-backup` and `ghe-restore` commands also have a verbose output mode (`-v`) that lists files as they're being transferred. It's often useful to enable when output is logged to a file. -When restoring to an already configured GHE instance, settings and license data +When restoring to an already configured GHE instance, settings, certificate, and license data are *not* restored to prevent overwriting manual configuration on the restore host. This behavior can be overriden by passing the `-c` argument to `ghe-restore`, forcing settings, certificate, and license data to be overwritten with the backup copy's data. From 9ce99bac112290a4f74018c719485820b4e2e8c7 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 9 Feb 2016 03:25:43 +0100 Subject: [PATCH 0036/2652] GitHub Enterprise Cluster support + SAML fixes * Adds GitHub Enterpise 2.5 support * Adds GitHub Enterprise Cluster support, mainly contributed by @dbussink * Backups and restores SAML keypairs --- bin/ghe-backup | 61 +++- bin/ghe-host-check | 2 +- bin/ghe-restore | 93 +++-- .../ghe-backup-alambic-cluster | 86 +++++ share/github-backup-utils/ghe-backup-config | 11 + .../ghe-backup-es-audit-log | 32 ++ .../ghe-backup-es-hookshot | 32 ++ .../ghe-backup-pages-cluster | 86 +++++ .../ghe-backup-redis-cluster | 37 ++ .../ghe-backup-repositories-cluster | 332 ++++++++++++++++++ share/github-backup-utils/ghe-backup-settings | 13 + .../ghe-restore-alambic-cluster | 121 +++++++ .../ghe-restore-es-audit-log | 30 ++ .../ghe-restore-pages-dpages | 120 +++++++ .../ghe-restore-repositories-dgit | 132 +++++++ .../ghe-restore-repositories-gist | 120 +++++++ .../github-backup-utils/ghe-restore-settings | 15 +- share/github-backup-utils/ghe-ssh | 2 +- share/github-backup-utils/version | 2 +- test/bin/ghe-export-es-audit-log | 1 + 20 files changed, 1287 insertions(+), 41 deletions(-) create mode 100644 share/github-backup-utils/ghe-backup-alambic-cluster create mode 100644 share/github-backup-utils/ghe-backup-es-audit-log create mode 100644 share/github-backup-utils/ghe-backup-es-hookshot create mode 100644 share/github-backup-utils/ghe-backup-pages-cluster create mode 100644 share/github-backup-utils/ghe-backup-redis-cluster create mode 100644 share/github-backup-utils/ghe-backup-repositories-cluster create mode 100644 share/github-backup-utils/ghe-restore-alambic-cluster create mode 100644 share/github-backup-utils/ghe-restore-es-audit-log create mode 100644 share/github-backup-utils/ghe-restore-pages-dpages create mode 100644 share/github-backup-utils/ghe-restore-repositories-dgit create mode 100644 share/github-backup-utils/ghe-restore-repositories-gist create mode 120000 test/bin/ghe-export-es-audit-log diff --git a/bin/ghe-backup b/bin/ghe-backup index e46e89fcf..564597dfe 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -86,6 +86,13 @@ echo "Starting backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP" ghe_remote_version_required echo "$GHE_REMOTE_VERSION" > version +# Figure out if we're restoring into cluster +cluster=false +if ghe-ssh "$GHE_HOSTNAME" -- \ + "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/cluster' ]"; then + cluster=true +fi + # Log backup start message in /var/log/syslog on remote instance ghe_remote_logger "Starting backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP ..." @@ -99,8 +106,13 @@ if [ $GHE_VERSION_MAJOR -eq 1 -a $GHE_VERSION_PATCH -lt 340 ]; then GHE_BACKUP_STRATEGY="tarball" fi +if $cluster; then + GHE_BACKUP_STRATEGY="cluster" +fi + # Record the strategy with the snapshot so we will know how to restore. echo "$GHE_BACKUP_STRATEGY" > strategy +export GHE_BACKUP_STRATEGY # If we're using the tarball backup strategy, put the appliance in maintenance # mode and wait for all writing processes to bleed out. @@ -127,8 +139,21 @@ ghe-ssh "$GHE_HOSTNAME" -- /bin/bash > mysql.sql.gz || failures="$failures mysql" echo "Backing up Redis database ..." -ghe-backup-redis > redis.rdb || -failures="$failures redis" +if $cluster; then + ghe-backup-redis-cluster > redis.rdb || + failures="$failures redis" +else + ghe-backup-redis > redis.rdb || + failures="$failures redis" +fi + +echo "Backing up audit log ..." +ghe-backup-es-audit-log || +failures="$failures audit-log" + +echo "Backing up hookshot logs ..." +ghe-backup-es-hookshot || +failures="$failures hookshot" echo "Backing up Git repositories ..." ghe-backup-repositories-${GHE_BACKUP_STRATEGY} || @@ -139,18 +164,30 @@ ghe-backup-pages-${GHE_BACKUP_STRATEGY} || failures="$failures pages" if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "Backing up asset attachments ..." - ghe-backup-userdata alambic_assets || - failures="$failures alambic_assets" - - echo "Backing up hook deliveries ..." - ghe-backup-userdata hookshot || - failures="$failures hookshot" + if $cluster; then + echo "Backing up asset attachments ..." + ghe-backup-alambic-cluster || + failures="$failures alambic_assets" + else + echo "Backing up asset attachments ..." + ghe-backup-userdata alambic_assets || + failures="$failures alambic_assets" + + echo "Backing up storage data ..." + ghe-backup-userdata storage || + failures="$failures storage" + + echo "Backing up hook deliveries ..." + ghe-backup-userdata hookshot || + failures="$failures hookshot" + fi fi -echo "Backing up Elasticsearch indices ..." -ghe-backup-es-${GHE_BACKUP_STRATEGY} || -failures="$failures elasticsearch" +if ! $cluster; then + echo "Backing up Elasticsearch indices ..." + ghe-backup-es-${GHE_BACKUP_STRATEGY} || + failures="$failures elasticsearch" +fi # If we're using the tarball backup strategy, bring the appliance out of # maintenance mode now instead of waiting until after pruning stale snapshots. diff --git a/bin/ghe-host-check b/bin/ghe-host-check index d5be624ff..039ce168e 100755 --- a/bin/ghe-host-check +++ b/bin/ghe-host-check @@ -37,7 +37,7 @@ set -e if [ $rc -ne 0 ]; then case $rc in 255) - if echo "$output" | grep -i "port 22: connection refused" >/dev/null; then + if echo "$output" | grep -i "port 22: connection refused\|Connection timed out during banner exchange" >/dev/null; then exec "bin/$(basename $0)" "$hostname:122" fi diff --git a/bin/ghe-restore b/bin/ghe-restore index 4f80b5c46..53742873c 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -97,6 +97,15 @@ elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then restore_settings=true fi +# Figure out if we're restoring into cluster +cluster=false +if ghe-ssh "$GHE_HOSTNAME" -- \ + "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/cluster' ]"; then + cluster=true + instance_configured=true + restore_settings=false +fi + # Figure out if this instance is in a replication pair if ghe-ssh "$GHE_HOSTNAME" -- "ghe-repl-status -r 2>/dev/null" \ | grep -Eq "replica|primary"; then @@ -176,40 +185,66 @@ fi # Make sure mysql and elasticsearch are prep'd and running before restoring into # appliances v2.x or greater. These services will not have been started on appliances # that have not been configured yet. -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then +if ! $cluster; then + if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then echo "sudo ghe-service-ensure-mysql && sudo ghe-service-ensure-elasticsearch" | ghe-ssh "$GHE_HOSTNAME" -- /bin/sh 1>&3 + fi fi -# Remove temporary 2.2 storage migration directory if it exists -echo "if [ -d /data/user/repositories-nw-backup ]; then sudo rm -rf /data/user/repositories-nw-backup; fi" | -ghe-ssh "$GHE_HOSTNAME" -- /bin/sh 1>&3 +echo "Restoring MySQL database ..." +gzip -dc "$GHE_RESTORE_SNAPSHOT_PATH/mysql.sql.gz" | ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-mysql' -echo "Restoring Git repositories ..." -ghe-restore-repositories-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 +echo "Restoring Redis database ..." +ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-redis' < "$GHE_RESTORE_SNAPSHOT_PATH/redis.rdb" 1>&3 -echo "Restoring GitHub Pages ..." -ghe-restore-pages-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 +if $cluster; then + echo "Restoring Git repositories into cluster ..." + ghe-restore-repositories-dgit "$GHE_HOSTNAME" 1>&3 -if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - echo "Restoring asset attachments ..." - ghe-restore-userdata alambic_assets "$GHE_HOSTNAME" 1>&3 + echo "Restoring Gists into cluster ..." + ghe-restore-repositories-gist "$GHE_HOSTNAME" 1>&3 +else + # Remove temporary 2.2 storage migration directory if it exists + echo "if [ -d /data/user/repositories-nw-backup ]; then sudo rm -rf /data/user/repositories-nw-backup; fi" | + ghe-ssh "$GHE_HOSTNAME" -- /bin/sh 1>&3 - echo "Restoring hook deliveries ..." - ghe-restore-userdata hookshot "$GHE_HOSTNAME" 1>&3 + echo "Restoring Git repositories ..." + ghe-restore-repositories-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 fi -echo "Restoring MySQL database ..." -gzip -dc "$GHE_RESTORE_SNAPSHOT_PATH/mysql.sql.gz" | ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-mysql' 1>&3 - -echo "Restoring Redis database ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-redis' < "$GHE_RESTORE_SNAPSHOT_PATH/redis.rdb" 1>&3 +if $cluster; then + echo "Restoring GitHub Pages into DPages..." + ghe-restore-pages-dpages "$GHE_HOSTNAME" 1>&3 +else + echo "Restoring GitHub Pages ..." + ghe-restore-pages-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 +fi echo "Restoring SSH authorized keys ..." ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-authorized-keys' < "$GHE_RESTORE_SNAPSHOT_PATH/authorized-keys.json" 1>&3 -echo "Restoring Elasticsearch indices ..." -ghe-restore-es-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 +if $cluster; then + echo "Restoring storage data ..." + ghe-restore-alambic-cluster "$GHE_HOSTNAME" 1>&3 +elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then + echo "Restoring asset attachments ..." + ghe-restore-userdata alambic_assets "$GHE_HOSTNAME" 1>&3 + + echo "Restoring storage data ..." + ghe-restore-userdata storage "$GHE_HOSTNAME" 1>&3 + + echo "Restoring hook deliveries ..." + ghe-restore-userdata hookshot "$GHE_HOSTNAME" 1>&3 +fi + +if $cluster; then + echo "Restoring ElasticSearch Audit logs" + ghe-restore-es-audit-log "$GHE_HOSTNAME" 1>&3 +else + echo "Restoring Elasticsearch indices ..." + ghe-restore-es-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3 +fi # Restart an already running memcached to reset the cache after restore if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then @@ -220,10 +255,13 @@ fi # When restoring to a host that has already been configured, kick off a # config run to perform data migrations. -if $instance_configured; then +if $cluster; then + echo "Configuring cluster ..." + ghe-ssh "$GHE_HOSTNAME" -- "ghe-cluster-config-apply" 1>&3 2>&3 +elif $instance_configured; then echo "Configuring storage ..." if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - ghe-ssh "$GHE_HOSTNAME" -- "sudo ghe-config-apply --full" 1>&3 + ghe-ssh "$GHE_HOSTNAME" -- "ghe-config-apply --full" 1>&3 2>&3 else echo " This will take several minutes to complete..." ghe-ssh "$GHE_HOSTNAME" -- "sudo enterprise-configure" 1>&3 2>&3 @@ -239,8 +277,13 @@ update_restore_status "complete" # Log restore complete message in /var/log/syslog on remote instance ghe_remote_logger "Completed restore from $(hostname) / snapshot ${GHE_SNAPSHOT_TIMESTAMP}." -echo "Restoring SSH host keys ..." -ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-ssh-host-keys' < "$GHE_RESTORE_SNAPSHOT_PATH/ssh-host-keys.tar" 1>&3 +if ! $cluster; then + echo "Restoring SSH host keys ..." + ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-ssh-host-keys' < "$GHE_RESTORE_SNAPSHOT_PATH/ssh-host-keys.tar" 1>&3 +fi echo "Completed restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -echo "Visit https://$hostname/setup/settings to review appliance configuration." + +if ! $cluster; then + echo "Visit https://$hostname/setup/settings to review appliance configuration." +fi diff --git a/share/github-backup-utils/ghe-backup-alambic-cluster b/share/github-backup-utils/ghe-backup-alambic-cluster new file mode 100644 index 000000000..7d4276692 --- /dev/null +++ b/share/github-backup-utils/ghe-backup-alambic-cluster @@ -0,0 +1,86 @@ +#!/bin/sh +#/ Usage: ghe-backup-alambic-cluster +#/ Take an online, incremental snapshot of all Alambic Storage data +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup when the cluster strategy is used. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Set up remote host and root backup snapshot directory based on config +host="$GHE_HOSTNAME" +backup_dir="$GHE_SNAPSHOT_DIR/storage" + +# Verify rsync is available. +if ! rsync --version 1>/dev/null 2>&1; then + echo "Error: rsync not found." 1>&2 + exit 1 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$host" + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +# git server hostnames +hostnames=$(ghe_cluster_online_nodes "storage-server") + +for hostname in $hostnames; do + config="$config +Host $hostname + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p + StrictHostKeyChecking=no +" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +# Make sure root backup dir exists if this is the first run +mkdir -p "$backup_dir" + +# Removes the remote sync-in-progress file on exit, re-enabling GC operations +# on the remote instance. +cleanup() { + rm -f $config_file +} +trap 'cleanup' EXIT INT + +# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's +# --link-dest support. This also decreases physical space usage considerably. +if [ -d "$GHE_DATA_DIR/current/storage" ] && [ "$(ls -A $GHE_DATA_DIR/current/storage)" ]; then + link_dest="--link-dest=../../current/storage" +fi + +for hostname in $hostnames; do + echo 1>&3 + echo "* Starting backup for host: $hostname" + # Sync all auxiliary repository data. This includes files and directories like + # HEAD, audit_log, config, description, info/, etc. No refs or object data + # should be transferred here. + echo 1>&3 + echo "* Transferring storage files ..." 1>&3 + + # Transfer all data from the user data directory using rsync. + ghe-rsync -az \ + -e "ssh -q $opts -p 122 -F $config_file -l $user" \ + --rsync-path='sudo -u git rsync' \ + $link_dest \ + "$hostname:$GHE_REMOTE_DATA_USER_DIR/storage/" \ + "$GHE_SNAPSHOT_DIR/storage" 1>&3 +done diff --git a/share/github-backup-utils/ghe-backup-config b/share/github-backup-utils/ghe-backup-config index cbe0051eb..521a91d2d 100755 --- a/share/github-backup-utils/ghe-backup-config +++ b/share/github-backup-utils/ghe-backup-config @@ -130,6 +130,10 @@ GHE_SNAPSHOT_DIR="$GHE_DATA_DIR"/"$GHE_SNAPSHOT_TIMESTAMP" # Set "true" to get verbose logging of all ssh commands on stderr : ${GHE_VERBOSE_SSH:=false} +# The location of the cluster configuration file file on the remote side. +# This is always "/data/user/common/cluster.conf" for GitHub Cluster instances. +# Use of this variable allows the location to be overridden in tests. +: ${GHE_REMOTE_CLUSTER_CONF_FILE:="$GHE_REMOTE_DATA_DIR/user/common/cluster.conf"} ############################################################################### ### Dynamic remote version config @@ -241,3 +245,10 @@ ghe_remote_logger () { echo "$@" | ghe-ssh "$GHE_HOSTNAME" -- logger -t backup-utils || true } + +# Usage: ghe_cluster_online_nodes role +# Returns the online nodes with a certain role in cluster +ghe_cluster_online_nodes () { + role=$1 + echo "ghe-config --get-regexp cluster.*.$role | egrep 'true$' | awk '{ print \$1; }' | awk 'BEGIN { FS=\".\" }; { print \$2 };' | xargs -I{} -n1 bash -c 'if [ \"\$(ghe-config cluster.\$hostname.offline)\" != true ]; then ghe-config cluster.{}.hostname; fi'" | ghe-ssh "$GHE_HOSTNAME" /bin/bash +} diff --git a/share/github-backup-utils/ghe-backup-es-audit-log b/share/github-backup-utils/ghe-backup-es-audit-log new file mode 100644 index 000000000..7ef6e40e3 --- /dev/null +++ b/share/github-backup-utils/ghe-backup-es-audit-log @@ -0,0 +1,32 @@ +#!/bin/sh +#/ Usage: ghe-backup-es-audit-log +#/ Take a backup of audit logs in ElasticSearch. +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Set up remote host and root elastic backup directory based on config +host="$GHE_HOSTNAME" + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$host" + +# Make sure root backup dir exists if this is the first run +mkdir -p "$GHE_SNAPSHOT_DIR/audit-log" + +indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/audit_log*"' | cut -d ' ' -f 3) +current_index=audit_log-$(ghe-ssh "$host" 'date +"%Y-%m"') + +for index in $indices; do + if [ -f $GHE_DATA_DIR/current/audit-log/$index.gz -a $index \< $current_index ]; then + # Hard link any older indices since they are read only and won't change + ln $GHE_DATA_DIR/current/audit-log/$index.gz $GHE_SNAPSHOT_DIR/audit-log/$index.gz + else + ghe-ssh "$host" "/usr/local/share/enterprise/ghe-es-dump-json 'http://localhost:9201/$index'" | gzip > $GHE_SNAPSHOT_DIR/audit-log/$index.gz + fi +done diff --git a/share/github-backup-utils/ghe-backup-es-hookshot b/share/github-backup-utils/ghe-backup-es-hookshot new file mode 100644 index 000000000..7a69a7e52 --- /dev/null +++ b/share/github-backup-utils/ghe-backup-es-hookshot @@ -0,0 +1,32 @@ +#!/bin/sh +#/ Usage: ghe-backup-es-hookshot +#/ Take a backup of hookshot logs in ElasticSearch. +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Set up remote host and root elastic backup directory based on config +host="$GHE_HOSTNAME" + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$host" + +# Make sure root backup dir exists if this is the first run +mkdir -p "$GHE_SNAPSHOT_DIR/hookshot" + +indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/hookshot-logs-*"' | cut -d ' ' -f 3) +current_index=hookshot-logs-$(ghe-ssh "$host" 'date +"%Y-%m-%d"') + +for index in $indices; do + if [ -f $GHE_DATA_DIR/current/hookshot/$index.gz -a $index \< $current_index ]; then + # Hard link any older indices since they are read only and won't change + ln $GHE_DATA_DIR/current/hookshot/$index.gz $GHE_SNAPSHOT_DIR/hookshot/$index.gz + else + ghe-ssh "$host" "/usr/local/share/enterprise/ghe-es-dump-json 'http://localhost:9201/$index'" | gzip > $GHE_SNAPSHOT_DIR/hookshot/$index.gz + fi +done diff --git a/share/github-backup-utils/ghe-backup-pages-cluster b/share/github-backup-utils/ghe-backup-pages-cluster new file mode 100644 index 000000000..a0807fe5f --- /dev/null +++ b/share/github-backup-utils/ghe-backup-pages-cluster @@ -0,0 +1,86 @@ +#!/bin/sh +#/ Usage: ghe-backup-pages-cluster +#/ Take an online, incremental snapshot of all Pages data +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup when the cluster strategy is used. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Set up remote host and root backup snapshot directory based on config +host="$GHE_HOSTNAME" +backup_dir="$GHE_SNAPSHOT_DIR/pages" + +# Verify rsync is available. +if ! rsync --version 1>/dev/null 2>&1; then + echo "Error: rsync not found." 1>&2 + exit 1 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$host" + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +# git server hostnames +hostnames=$(ghe_cluster_online_nodes "pages-server") + +for hostname in $hostnames; do + config="$config +Host $hostname + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p + StrictHostKeyChecking=no +" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +# Make sure root backup dir exists if this is the first run +mkdir -p "$backup_dir" + +# Removes the remote sync-in-progress file on exit, re-enabling GC operations +# on the remote instance. +cleanup() { + rm -f $config_file +} +trap 'cleanup' EXIT INT + +# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's +# --link-dest support. This also decreases physical space usage considerably. +if [ -d "$GHE_DATA_DIR/current/pages" ] && [ "$(ls -A $GHE_DATA_DIR/current/pages)" ]; then + link_dest="--link-dest=../../current/pages" +fi + +for hostname in $hostnames; do + echo 1>&3 + echo "* Starting backup for host: $hostname" + # Sync all auxiliary repository data. This includes files and directories like + # HEAD, audit_log, config, description, info/, etc. No refs or object data + # should be transferred here. + echo 1>&3 + echo "* Transferring pages files ..." 1>&3 + + # Transfer all data from the user data directory using rsync. + ghe-rsync -az \ + -e "ssh -q $opts -p 122 -F $config_file -l $user" \ + --rsync-path='sudo -u git rsync' \ + $link_dest \ + "$hostname:$GHE_REMOTE_DATA_USER_DIR/pages/" \ + "$GHE_SNAPSHOT_DIR/pages" 1>&3 +done diff --git a/share/github-backup-utils/ghe-backup-redis-cluster b/share/github-backup-utils/ghe-backup-redis-cluster new file mode 100644 index 000000000..d571adb8d --- /dev/null +++ b/share/github-backup-utils/ghe-backup-redis-cluster @@ -0,0 +1,37 @@ +#!/bin/sh +#/ Usage: ghe-backup-redis-cluster +#/ Take a snapshot of all Redis data. This is needed because older versions of +#/ the remote side ghe-export-redis command use a blocking SAVE instead of a +#/ non-blocking BGSAVE. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-backup command. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# Force a redis BGSAVE, and wait for it to complete. +sudo= +[ "$GHE_VERSION_MAJOR" -ge 2 ] && sudo="sudo" +ghe-ssh "$GHE_HOSTNAME" /bin/sh </dev/null || echo "localhost") + timestamp=\$(redis-cli -h \$redis_host LASTSAVE) + redis-cli -h \$redis_host BGSAVE 1>/dev/null + + while [ \$(redis-cli -h \$redis_host LASTSAVE) -eq \$timestamp ]; do + sleep 1 + done + + if [ "\$redis_host" != "localhost" ]; then + ssh \$redis_host $sudo cat '$GHE_REMOTE_DATA_USER_DIR/redis/dump.rdb' + else + $sudo cat '$GHE_REMOTE_DATA_USER_DIR/redis/dump.rdb' + fi +EOF diff --git a/share/github-backup-utils/ghe-backup-repositories-cluster b/share/github-backup-utils/ghe-backup-repositories-cluster new file mode 100644 index 000000000..54a6931cc --- /dev/null +++ b/share/github-backup-utils/ghe-backup-repositories-cluster @@ -0,0 +1,332 @@ +#!/bin/sh +#/ Usage: ghe-backup-repositories-cluster +#/ Take an online, incremental snapshot of all Git repository data. +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup when the cluster strategy is used. +set -e + +# This command is designed to allow for transferring active Git repository data +# from a GitHub instance to a backup site in a way that ensures data is +# captured in a consistent state even when being written to. +# +# - All Git GC operations are disabled on the GitHub instance for the duration of +# the backup. This removes the possibly of objects or packs being removed +# while the backup is in progress. +# +# - In progress Git GC operations are given a cooldown window to complete. The +# script will sleep for up to 60 seconds waiting for GC operations to finish. +# +# - Git repository data is transferred in a specific order: auxiliary files, +# packed refs, loose refs, reflogs, and finally objects and pack files in that +# order. This ensures that all referenced objects are captured. +# +# - Git GC operations are re-enabled on the GitHub instance. +# +# The script uses multiple runs of rsync to transfer repository files. Each run +# includes a list of filter rules that ensure only specific types of files are +# transferred. +# +# See the "FILTER RULES" and "INCLUDE/EXCLUDE PATTERN RULES" sections of the +# rsync(1) manual for more information: +# + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +backup_dir="$GHE_SNAPSHOT_DIR/repositories" + +# Location of last good backup for rsync --link-dest +backup_current="$GHE_DATA_DIR/current/repositories" + +# Verify rsync is available. +if ! rsync --version 1>/dev/null 2>&1; then + echo "Error: rsync not found." 1>&2 + exit 1 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$host" + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +# git server hostnames +hostnames=$(ghe_cluster_online_nodes "git-server") +for hostname in $hostnames; do + config="$config +Host $hostname + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p + StrictHostKeyChecking=no +" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +# Remote sync-in-progress file location. When this file exists, Git GC +# operations are disabled on the GitHub instance. +sync_in_progress_file="$GHE_REMOTE_DATA_USER_DIR/repositories/.sync_in_progress" + +# Make sure root backup dir exists if this is the first run +mkdir -p "$backup_dir" + +# Removes the remote sync-in-progress file on exit, re-enabling GC operations +# on the remote instance. +cleanup() { + for hostname in $hostnames; do + ghe-ssh -F $config_file "$hostname:$port" -- "sudo rm -f '$sync_in_progress_file'" + done + rm -f $config_file +} +trap 'cleanup' EXIT +trap 'exit $?' INT # ^C always terminate + +wait_pids="" +for hostname in $hostnames; do + # Touch the sync-in-progress file, disabling GC operations, and wait for all + # active GC processes to finish on the remote side. + echo " + set -e + sudo -u git touch '$sync_in_progress_file' + + sanity=0 + while [ \$sanity -lt $GHE_GIT_COOLDOWN_PERIOD ]; do + # note: the bracket synta[x] below is to prevent matches against the + # grep process itself. + if ps axo args | grep -E -e '^git( -.*)? nw-repac[k]( |$)' -e '^git( -.*)? g[c]( |$)' >/dev/null; then + sleep 1 + sanity=\$(( sanity + 1 )) + else + exit 0 + fi + done + exit 7 + " | ssh -q $opts -p $port -F $config_file -l $user $hostname -- /bin/sh + wait_pids="$wait_pids $hostname:$!" +done + +# strip leading space +wait_pids="$(echo -e "${wait_pids}" | sed -e 's/^[[:space:]]*//')" + +# wait for jobs one by one, bail on the first one +for pid in $wait_pids; do + # pull out pid:hostname pairs into vars + wait_hostname="$(echo $pid | awk 'BEGIN {FS = ":" }; { print $1; }')" + wait_pid="$(echo $pid | awk 'BEGIN {FS = ":" }; { print $2; }')" + wait $wait_pid || { + res=$? + if [ $res = 7 ]; then + echo "Error: Git GC processes remain after $GHE_GIT_COOLDOWN_PERIOD seconds on host: $wait_hostname. Aborting..." 1>&2 + fi + exit $res + } + unset wait_hostname wait_pid +done + +unset wait_pids + +# If we have a previous increment, avoid transferring existing files via rsync's +# --link-dest support. This also decreases physical space usage considerably. +if [ -d "$backup_current" ]; then + link_dest="--link-dest=../../current/repositories" +fi + +# Transfer repository data from a GitHub instance to the current snapshot +# directory, using a previous snapshot to avoid transferring files that have +# already been transferred. A set of rsync filter rules are provided on stdin +# for each invocation. +rsync_repository_data () { + port=$(ssh_port_part "$1") + host=$(ssh_host_part "$1") + + shift + ghe-rsync -a \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + $link_dest "$@" \ + --rsync-path='sudo -u git rsync' \ + --include-from=- --exclude=\* \ + "$host:$GHE_REMOTE_DATA_USER_DIR/repositories/" \ + "$backup_dir" 1>&3 +} + + +for hostname in $hostnames; do + echo 1>&3 + echo "* Starting backup for host: $hostname" + # Sync all auxiliary repository data. This includes files and directories like + # HEAD, audit_log, config, description, info/, etc. No refs or object data + # should be transferred here. + echo 1>&3 + echo "* Transferring auxiliary files ..." 1>&3 + rsync_repository_data $hostname:122 -z <&3 + echo "* Transferring packed-refs files ..." 1>&3 + rsync_repository_data $hostname:122 -z <&3 +echo "* Transferring refs and reflogs ..." 1>&3 +rsync_repository_data $hostname:122 -z <&3 + echo "* Transferring objects and packs ..." 1>&3 + rsync_repository_data $hostname:122 -H <&3 + echo "* Transferring special data directories ..." 1>&3 + rsync_repository_data $hostname:122 <&3 +done diff --git a/share/github-backup-utils/ghe-backup-settings b/share/github-backup-utils/ghe-backup-settings index 4d618c12a..a6691c740 100755 --- a/share/github-backup-utils/ghe-backup-settings +++ b/share/github-backup-utils/ghe-backup-settings @@ -37,4 +37,17 @@ if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then else unlink manage-password+ fi + + if ghe-ssh "$host" -- "test -f $GHE_REMOTE_DATA_USER_DIR/common/idp.crt"; then + echo "* Transferring SAML keys ..." 1>&3 + ghe-ssh $host -- sudo tar -C $GHE_REMOTE_DATA_USER_DIR/common/ -cf - "idp.crt saml-sp.p12" > saml-keys.tar + fi +fi + +if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then + echo "* Transferring cluster configuration ..." 1>&3 + if ! ghe-ssh "$host" -- "sudo cat $GHE_REMOTE_CLUSTER_CONF_FILE 2>/dev/null" > cluster.conf; then + echo "Error: Enterprise Cluster is not configured yet, backup will fail" >&2 + exit 1 + fi fi diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster new file mode 100644 index 000000000..368c545e3 --- /dev/null +++ b/share/github-backup-utils/ghe-restore-alambic-cluster @@ -0,0 +1,121 @@ +#!/usr/bin/env bash +#/ Usage: ghe-restore-alambic-cluster +#/ Restore Alambic assets from an rsync snapshot of all assets to a GitHub cluster. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-restore command when restoring into a cluster. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ -z "$*" ] && print_usage + +# Grab host arg +GHE_HOSTNAME="$1" + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# The snapshot to restore should be set by the ghe-restore command but this lets +# us run this script directly. +: ${GHE_RESTORE_SNAPSHOT:=current} + +# Find the binary blobs to restore +storage_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find storage -mindepth 4 -maxdepth 4 | cut -d / -f2-) + +# No need to restore anything, early exit +if [ -z "$storage_paths" ]; then + echo "Warning: Storage backup missing. Skipping ..." + exit 0 +fi + +# Generate SSH config for forwarding +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | cut -d ' ' -f 2) +for hostname in $hostnames; do + config="$config +Host $hostname + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +cleanup() { + for pid in $(jobs -p); do + kill -KILL $pid > /dev/null 1>&2 || true + done + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +} + +trap 'cleanup' INT TERM EXIT + +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +mkfifo ssh_routes_in +mkfifo ssh_routes_out +mkfifo ssh_finalize_in +mkfifo ssh_finalize_out + +echo "Setting up storage processes" + +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-routes - < ssh_routes_out > ssh_routes_in & +ssh_routes_pid=$! +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ssh_finalize_pid=$! + +echo "Set up storage processes" + +exec 4> ssh_routes_out +exec 5> ssh_finalize_out + +for storage_path in $storage_paths; do + oid=$(echo $storage_path | awk -F/ '{print $(NF)}') + echo "$oid" >&4 + read routes < ssh_routes_in + + for route in $routes; do + ghe-rsync -aHR --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/storage/./$storage_path" \ + "$route:$GHE_REMOTE_DATA_USER_DIR/storage" & + done + + for pid in $(jobs -p); do + if [ $pid = $ssh_routes_pid -o $pid = $ssh_finalize_pid ]; then + continue + fi + wait $pid + ret_code=$? + if [ "$ret_code" != "0" ]; then + echo "$pid exited $ret_code" + exit $ret_code + fi + done + + echo "$oid $routes" >&5 +done + +exec 4>&- +exec 5>&- + +# Ensure to flush these and close the pipes +cat ssh_routes_in > /dev/null +cat ssh_finalize_in > /dev/null + +wait $ssh_routes_pid > /dev/null 1>&2 || true +wait $ssh_finalize_pid > /dev/null 1>&2 || true diff --git a/share/github-backup-utils/ghe-restore-es-audit-log b/share/github-backup-utils/ghe-restore-es-audit-log new file mode 100644 index 000000000..e224dcfc6 --- /dev/null +++ b/share/github-backup-utils/ghe-restore-es-audit-log @@ -0,0 +1,30 @@ +#!/bin/sh +#/ Usage: ghe-restore-es-audit-log +#/ Take a backup of audit logs in ElasticSearch. +#/ +#/ Note: This command typically isn't called directly. It's invoked by +#/ ghe-backup. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ $# -lt 1 ] && print_usage + +GHE_HOSTNAME="$1" + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +last_index=$(ghe-ssh "$GHE_HOSTNAME" 'curl -s "localhost:9201/_cat/indices/audit_log*"' | cut -d ' ' -f 3 | sort | tail -1) + +indices=$(ls -1 $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/audit-log/*.gz | xargs -I{} -n1 basename {} .gz) + +for index in $indices; do + if [ -z "$last_index" ] || ! [ $index \< $last_index ]; then + echo "Restoring $index" + gzip -dc $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/audit-log/$index.gz | ghe-ssh "$GHE_HOSTNAME" "/usr/local/share/enterprise/ghe-es-load-json 'http://localhost:9201/$index'" + fi +done diff --git a/share/github-backup-utils/ghe-restore-pages-dpages b/share/github-backup-utils/ghe-restore-pages-dpages new file mode 100644 index 000000000..ff2746f47 --- /dev/null +++ b/share/github-backup-utils/ghe-restore-pages-dpages @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +#/ Usage: ghe-restore-pages-dpages +#/ Restore repositories fron an rsync snapshot of all Git repository data to a GitHub cluster. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-restore command when restoring into a cluster. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ -z "$*" ] && print_usage + +# Grab host arg +GHE_HOSTNAME="$1" + +# The snapshot to restore should be set by the ghe-restore command but this lets +# us run this script directly. +: ${GHE_RESTORE_SNAPSHOT:=current} + +# Find the pages to restore +pages_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find pages -mindepth 5 -maxdepth 5 | cut -d / -f2-) + +# No need to restore anything, early exit +if [ -z "$pages_paths" ]; then + echo "Warning: Pages backup missing. Skipping ..." + exit 0 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | cut -d ' ' -f 2) +for hostname in $hostnames; do + config="$config +Host $hostname + ServerAliveInterval 60 + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +cleanup() { + for pid in $(jobs -p); do + kill -KILL $pid > /dev/null 2>&1 || true + done + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +} + +trap 'cleanup' INT TERM EXIT + +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +mkfifo ssh_routes_in +mkfifo ssh_routes_out +mkfifo ssh_finalize_in +mkfifo ssh_finalize_out + +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/dpages-cluster-import-routes - < ssh_routes_out > ssh_routes_in & +ssh_routes_pid=$! +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/dpages-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ssh_finalize_pid=$! + +exec 4> ssh_routes_out +exec 5> ssh_finalize_out + +for pages_path in $pages_paths; do + page_id=$(echo $pages_path | awk -F/ '{print $(NF)}') + echo "$page_id" >&4 + read routes < ssh_routes_in + + for route in $routes; do + ghe-rsync -aHR --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/pages/./$pages_path" \ + "$route:$GHE_REMOTE_DATA_USER_DIR/pages" & + done + + for pid in $(jobs -p); do + if [ $pid = $ssh_routes_pid -o $pid = $ssh_finalize_pid ]; then + continue + fi + wait $pid + ret_code=$? + if [ "$ret_code" != "0" ]; then + echo "$pid exited $ret_code" + exit $ret_code + fi + done + + echo "$page_id $routes" >&5 + read output < ssh_finalize_in +done + +exec 4>&- +exec 5>&- + +# Ensure to flush these and close the pipes +cat ssh_routes_in > /dev/null & +cat ssh_finalize_in > /dev/null & + +wait $ssh_routes_pid > /dev/null 2>&1 || true +wait $ssh_finalize_pid > /dev/null 2>&1 || true diff --git a/share/github-backup-utils/ghe-restore-repositories-dgit b/share/github-backup-utils/ghe-restore-repositories-dgit new file mode 100644 index 000000000..b90129c40 --- /dev/null +++ b/share/github-backup-utils/ghe-restore-repositories-dgit @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +#/ Usage: ghe-restore-repositories-dgit +#/ Restore repositories fron an rsync snapshot of all Git repository data to a GitHub cluster. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-restore command when restoring into a cluster. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ -z "$*" ] && print_usage + +# Grab host arg +GHE_HOSTNAME="$1" + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# The snapshot to restore should be set by the ghe-restore command but this lets +# us run this script directly. +: ${GHE_RESTORE_SNAPSHOT:=current} + +network_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find repositories -mindepth 6 -maxdepth 7 -name \*.git -exec dirname {} \; | uniq | grep nw | cut -d / -f2-) + +if [ -z "$network_paths" ]; then + echo "Warning: Repositories backup missing. Skipping ..." + exit 0 +fi + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | cut -d ' ' -f 2) +for hostname in $hostnames; do + config="$config +Host $hostname + ServerAliveInterval 60 + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +cleanup() { + for pid in $(jobs -p); do + kill -KILL $pid > /dev/null 2>&1 || true + done + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +} + +trap 'cleanup' INT TERM EXIT + +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +mkfifo ssh_routes_in +mkfifo ssh_routes_out +mkfifo ssh_finalize_in +mkfifo ssh_finalize_out + +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/dgit-cluster-import-routes - < ssh_routes_out > ssh_routes_in & +ssh_routes_pid=$! +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/dgit-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ssh_finalize_pid=$! + +exec 4> ssh_routes_out +exec 5> ssh_finalize_out + +for network_path in $network_paths; do + network_id=$(echo $network_path | awk -F/ '{print $(NF)}') + echo "$network_id" >&4 + read routes < ssh_routes_in + + for route in $routes; do + ghe-rsync -aHR --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/./$network_path" \ + "$route:$GHE_REMOTE_DATA_USER_DIR/repositories" & + done + + for pid in $(jobs -p); do + if [ $pid = $ssh_routes_pid -o $pid = $ssh_finalize_pid ]; then + continue + fi + wait $pid + ret_code=$? + if [ "$ret_code" != "0" ]; then + echo "$pid exited $ret_code" + exit $ret_code + fi + done + + echo "$network_id /data/repositories/$network_path $routes" >&5 + read output < ssh_finalize_in +done + +exec 4>&- +exec 5>&- + +# Ensure to flush output and close pipes +cat ssh_routes_in > /dev/null & +cat ssh_finalize_in > /dev/null & + +wait $ssh_routes_pid > /dev/null 2>&1 || true +wait $ssh_finalize_pid > /dev/null 2>&1 || true + +if [ -d $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/info ]; then + for route in `ghe-ssh "$GHE_HOSTNAME" ghe-cluster-each -r git -p`; do + if ! ghe-rsync -a --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/info/" \ + "$route:$GHE_REMOTE_DATA_USER_DIR/repositories/info"; then + echo "Error restoring /data/repositories/info to $route" + fi + done +else + ghe-ssh "$GHE_HOSTNAME" ghe-cluster-each -r git -- rm -f /data/repositories/info/* +fi diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist new file mode 100644 index 000000000..a4337bcce --- /dev/null +++ b/share/github-backup-utils/ghe-restore-repositories-gist @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +#/ Usage: ghe-restore-repositories-gist +#/ Restore repositories fron an rsync snapshot of all Git repository data to a GitHub cluster. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-restore command when restoring into a cluster. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ -z "$*" ] && print_usage + +# Grab host arg +GHE_HOSTNAME="$1" + +# The snapshot to restore should be set by the ghe-restore command but this lets +# us run this script directly. +: ${GHE_RESTORE_SNAPSHOT:=current} + +# Find the gists to restore +gist_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find repositories -mindepth 6 -maxdepth 7 -name \*.git | grep gist | cut -d / -f2-) + +# No need to restore anything, early exit +if [ -z "$gist_paths" ]; then + echo "Warning: Gist backup missing. Skipping ..." + exit 0 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# Generate SSH config for forwarding + +config="" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | cut -d ' ' -f 2) +for hostname in $hostnames; do + config="$config +Host $hostname + ServerAliveInterval 60 + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" + +cleanup() { + for pid in $(jobs -p); do + kill -KILL $pid > /dev/null 2>&1 || true + done + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +} + +trap 'cleanup' INT TERM EXIT + +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +mkfifo ssh_routes_in +mkfifo ssh_routes_out +mkfifo ssh_finalize_in +mkfifo ssh_finalize_out + +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/gist-cluster-import-routes - < ssh_routes_out > ssh_routes_in & +ssh_routes_pid=$! +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/gist-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ssh_finalize_pid=$! + +exec 4> ssh_routes_out +exec 5> ssh_finalize_out + +for gist_path in $gist_paths; do + gist_id=$(basename $(echo $gist_path | awk -F/ '{print $(NF)}') .git) + echo "$gist_id" >&4 + read routes < ssh_routes_in + + for route in $routes; do + ghe-rsync -aHR --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/./$gist_path" \ + "$route:$GHE_REMOTE_DATA_USER_DIR/repositories" & + done + + for pid in $(jobs -p); do + if [ $pid = $ssh_routes_pid -o $pid = $ssh_finalize_pid ]; then + continue + fi + wait $pid + ret_code=$? + if [ "$ret_code" != "0" ]; then + echo "$pid exited $ret_code" + exit $ret_code + fi + done + + echo "$gist_id /data/repositories/$gist_path $routes" >&5 + read output < ssh_finalize_in +done + +exec 4>&- +exec 5>&- + +# Ensure to flush these and close the pipes +cat ssh_routes_in > /dev/null & +cat ssh_finalize_in > /dev/null & + +wait $ssh_routes_pid > /dev/null 2>&1 || true +wait $ssh_finalize_pid > /dev/null 2>&1 || true diff --git a/share/github-backup-utils/ghe-restore-settings b/share/github-backup-utils/ghe-restore-settings index 0c5aaa054..e3cfe46c9 100755 --- a/share/github-backup-utils/ghe-restore-settings +++ b/share/github-backup-utils/ghe-restore-settings @@ -37,7 +37,7 @@ fi # Bail out if we're restoring against a pre-2.x appliance. Everything below is # supported by v2.0 appliances only. if [ "$GHE_VERSION_MAJOR" -lt 2 ]; then - exit 0 + exit 0 fi echo "Restoring license ..." @@ -49,3 +49,16 @@ if [ -f "$GHE_RESTORE_SNAPSHOT_PATH/manage-password" ]; then cat "$GHE_RESTORE_SNAPSHOT_PATH/manage-password" | ghe-ssh "$GHE_HOSTNAME" -- "ghe-import-passwords" fi + +if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then + echo "Restoring cluster configuration ..." + cat "$GHE_RESTORE_SNAPSHOT_PATH/cluster.conf" | + ghe-ssh "$GHE_HOSTNAME" -- "sudo tee $GHE_REMOTE_CLUSTER_CONF_FILE >/dev/null" +fi + +# Restore SAML keys if present. +if [ -f "$GHE_RESTORE_SNAPSHOT_PATH/saml-keys.tar" ]; then + echo "Restoring SAML keys ..." + cat "$GHE_RESTORE_SNAPSHOT_PATH/saml-keys.tar" | + ghe-ssh "$GHE_HOSTNAME" -- "sudo tar -C $GHE_REMOTE_DATA_USER_DIR/common/ -xf -" +fi diff --git a/share/github-backup-utils/ghe-ssh b/share/github-backup-utils/ghe-ssh index 19e6e4029..67f7e0fff 100755 --- a/share/github-backup-utils/ghe-ssh +++ b/share/github-backup-utils/ghe-ssh @@ -13,7 +13,7 @@ cd $(dirname "$0")/../.. opts="$GHE_EXTRA_SSH_OPTS" while true; do case "$1" in - -p|-l|-o) + -p|-l|-o|-F) opts="$opts $1 $2" shift 2 ;; diff --git a/share/github-backup-utils/version b/share/github-backup-utils/version index 197c4d5c2..437459cd9 100644 --- a/share/github-backup-utils/version +++ b/share/github-backup-utils/version @@ -1 +1 @@ -2.4.0 +2.5.0 diff --git a/test/bin/ghe-export-es-audit-log b/test/bin/ghe-export-es-audit-log new file mode 120000 index 000000000..a772e4ad9 --- /dev/null +++ b/test/bin/ghe-export-es-audit-log @@ -0,0 +1 @@ +ghe-fake-export-command \ No newline at end of file From 7468a6633cdb6be48f0484874e3155ba5ebae725 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 9 Feb 2016 03:36:45 +0100 Subject: [PATCH 0037/2652] Make new scripts executable --- share/github-backup-utils/ghe-backup-alambic-cluster | 0 share/github-backup-utils/ghe-backup-es-audit-log | 0 share/github-backup-utils/ghe-backup-es-hookshot | 0 share/github-backup-utils/ghe-backup-pages-cluster | 0 share/github-backup-utils/ghe-backup-redis-cluster | 0 share/github-backup-utils/ghe-backup-repositories-cluster | 0 share/github-backup-utils/ghe-restore-alambic-cluster | 0 share/github-backup-utils/ghe-restore-es-audit-log | 0 share/github-backup-utils/ghe-restore-pages-dpages | 0 share/github-backup-utils/ghe-restore-repositories-dgit | 0 share/github-backup-utils/ghe-restore-repositories-gist | 0 11 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 share/github-backup-utils/ghe-backup-alambic-cluster mode change 100644 => 100755 share/github-backup-utils/ghe-backup-es-audit-log mode change 100644 => 100755 share/github-backup-utils/ghe-backup-es-hookshot mode change 100644 => 100755 share/github-backup-utils/ghe-backup-pages-cluster mode change 100644 => 100755 share/github-backup-utils/ghe-backup-redis-cluster mode change 100644 => 100755 share/github-backup-utils/ghe-backup-repositories-cluster mode change 100644 => 100755 share/github-backup-utils/ghe-restore-alambic-cluster mode change 100644 => 100755 share/github-backup-utils/ghe-restore-es-audit-log mode change 100644 => 100755 share/github-backup-utils/ghe-restore-pages-dpages mode change 100644 => 100755 share/github-backup-utils/ghe-restore-repositories-dgit mode change 100644 => 100755 share/github-backup-utils/ghe-restore-repositories-gist diff --git a/share/github-backup-utils/ghe-backup-alambic-cluster b/share/github-backup-utils/ghe-backup-alambic-cluster old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-backup-es-audit-log b/share/github-backup-utils/ghe-backup-es-audit-log old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-backup-es-hookshot b/share/github-backup-utils/ghe-backup-es-hookshot old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-backup-pages-cluster b/share/github-backup-utils/ghe-backup-pages-cluster old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-backup-redis-cluster b/share/github-backup-utils/ghe-backup-redis-cluster old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-backup-repositories-cluster b/share/github-backup-utils/ghe-backup-repositories-cluster old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-restore-es-audit-log b/share/github-backup-utils/ghe-restore-es-audit-log old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-restore-pages-dpages b/share/github-backup-utils/ghe-restore-pages-dpages old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-restore-repositories-dgit b/share/github-backup-utils/ghe-restore-repositories-dgit old mode 100644 new mode 100755 diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist old mode 100644 new mode 100755 From 7cc66965637c22a11021c7b3ebea55cac7e80e15 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 9 Feb 2016 03:42:01 +0100 Subject: [PATCH 0038/2652] Bump Debian changelog --- debian/changelog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/debian/changelog b/debian/changelog index 26e45dd09..cbac875e1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +github-backup-utils (2.5.0) UNRELEASED; urgency=medium + + * Adds GitHub Enterpise 2.5 support + * Adds GitHub Enterprise Cluster support + * Backups and restores SAML keypairs + + -- Sergio Rubio Tue, 20 Oct 2015 00:02:37 +0000 + github-backup-utils (2.4.0) UNRELEASED; urgency=medium * Moves the in-progress detection to a separate file with PID which is From 71efc055c44f7d6258b948b0a5d79d635af1e8ba Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 9 Feb 2016 19:35:44 +0100 Subject: [PATCH 0039/2652] Bump debian changelog date --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index cbac875e1..1aaefd5cb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,7 +4,7 @@ github-backup-utils (2.5.0) UNRELEASED; urgency=medium * Adds GitHub Enterprise Cluster support * Backups and restores SAML keypairs - -- Sergio Rubio Tue, 20 Oct 2015 00:02:37 +0000 + -- Sergio Rubio Tue, 9 Feb 2016 00:02:37 +0000 github-backup-utils (2.4.0) UNRELEASED; urgency=medium From 23c41cc215d51e6e6b743a452d8b16bab5013efa Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 9 Feb 2016 20:03:18 +0100 Subject: [PATCH 0040/2652] :lipstick: Debian changelog --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 1aaefd5cb..fc9bbbefd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ github-backup-utils (2.5.0) UNRELEASED; urgency=medium * Adds GitHub Enterpise 2.5 support - * Adds GitHub Enterprise Cluster support + * Adds GitHub Enterprise Clustering support * Backups and restores SAML keypairs -- Sergio Rubio Tue, 9 Feb 2016 00:02:37 +0000 From 2308904635eacca3f8b09bd4933916f457d3a1b5 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Wed, 10 Feb 2016 17:05:05 +0100 Subject: [PATCH 0041/2652] Minor changes to the relase docs Also added a new Makefile target that wraps the `script/package-deb` script. --- Makefile | 3 +++ RELEASING.md | 17 +++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 33528374e..136411a16 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,9 @@ info: dist: @script/package-tarball +deb: + @script/package-deb + clean: rm -rf dist diff --git a/RELEASING.md b/RELEASING.md index 69a45cb41..692c74bfc 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -1,12 +1,13 @@ # Making a backup-utils release - 1. Add a new version and release notes to the `debian/changelog` file. - 2. Rev the `share/github-backup-utils/version` file. - 3. Tag the release: `git tag v2.0.2` - 4. Build that tarball package: `make dist` - 5. Install the debian devscripts package if necessary: + 1. Install the debian devscripts package: `sudo apt-get install devscripts` - 6. Build the deb package: `debuild -uc -us` - 7. Draft a new release at https://github.com/github/backup-utils/releases, - including the release notes and attaching the tarball and deb packages. + 2. Add a new version and release notes to the `debian/changelog` file: + `dch --newversion 2.6.0 --release-heuristic log` + 3. Rev the `share/github-backup-utils/version` file. + 4. Tag the release: `git tag v2.0.2` + 5. Build that tarball package: `make dist` + 6. Build the deb package: `make deb`. All the tests should pass. + 7. Draft a new release at https://github.com/github/backup-utils/releases, including the release notes and attaching the tarball and deb packages. + The dist tarball you should upload has the git revision in the file name, i.e. something like `github-backup-utils-v2.5.0-1-g23c41cc.tar.gz` 8. Push the head of the release to the 'stable' branch. From 251c13bb8a883be255b693b78e9f9d9c60813cad Mon Sep 17 00:00:00 2001 From: Melissa Smith Date: Thu, 11 Feb 2016 17:06:12 -0800 Subject: [PATCH 0042/2652] add note on adding SSH to target instance --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bdd373901..819d32ca7 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ After the initial backup, use the following commands: - The `ghe-backup` command creates incremental snapshots of repository data, along with full snapshots of all other pertinent data stores. - The `ghe-restore` command restores snapshots to the same or separate GitHub - Enterprise appliance. + Enterprise appliance. You must add the backup host's SSH key to the target GitHub Enterprise appliance before using this command. ##### Example backup and restore usage From bdeac3fa88b7b970be33742d88c2760c1c39a8ab Mon Sep 17 00:00:00 2001 From: Melissa Smith Date: Thu, 11 Feb 2016 17:35:55 -0800 Subject: [PATCH 0043/2652] add line break --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 819d32ca7..d11838d3a 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,8 @@ After the initial backup, use the following commands: - The `ghe-backup` command creates incremental snapshots of repository data, along with full snapshots of all other pertinent data stores. - The `ghe-restore` command restores snapshots to the same or separate GitHub - Enterprise appliance. You must add the backup host's SSH key to the target GitHub Enterprise appliance before using this command. + Enterprise appliance. You must add the backup host's SSH key to the target + GitHub Enterprise appliance before using this command. ##### Example backup and restore usage From 027359bc66710d8eeb8fe0c9ccf6cc5fe693f831 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Fri, 12 Feb 2016 02:45:20 +0100 Subject: [PATCH 0044/2652] :lipstick: format --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d11838d3a..dd143f167 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ After the initial backup, use the following commands: along with full snapshots of all other pertinent data stores. - The `ghe-restore` command restores snapshots to the same or separate GitHub Enterprise appliance. You must add the backup host's SSH key to the target - GitHub Enterprise appliance before using this command. + GitHub Enterprise appliance before using this command. ##### Example backup and restore usage From d8e88ee9444837b997ea28fa2423cc67df6e8747 Mon Sep 17 00:00:00 2001 From: Tomoaki Nosaka Date: Wed, 11 Nov 2015 22:12:42 +0900 Subject: [PATCH 0045/2652] add GHE_EXTRA_RSYNC_OPTS variable for backup.config --- backup.config-example | 4 ++++ share/github-backup-utils/ghe-rsync | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backup.config-example b/backup.config-example index 17e43074a..fa7e1b0e7 100644 --- a/backup.config-example +++ b/backup.config-example @@ -27,6 +27,10 @@ GHE_NUM_SNAPSHOTS=10 # #GHE_EXTRA_SSH_OPTS="" +# Any extra options passed to the rsync command. Nothing required by default +# +#GHE_EXTRA_RSYNC_OPTS="" + # Add s3 bucket for configuring which bucket to use in ghe-s3-backup and # ghe-s3-restore # GHE_S3_BUCKET="" diff --git a/share/github-backup-utils/ghe-rsync b/share/github-backup-utils/ghe-rsync index 7a234dccc..7b4151d54 100755 --- a/share/github-backup-utils/ghe-rsync +++ b/share/github-backup-utils/ghe-rsync @@ -7,11 +7,15 @@ set -o pipefail +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + # Filter vanished file warnings from both stdout (rsync versions < 3.x) and # stderr (rsync versions >= 3.x). The complex redirections are necessary to # filter stderr while also keeping stdout and stderr separated. IGNOREOUT='^(file has vanished: |rsync warning: some files vanished before they could be transferred)' -(rsync "${@}" 3>&1 1>&2 2>&3 3>&- | +(rsync $GHE_EXTRA_RSYNC_OPTS "${@}" 3>&1 1>&2 2>&3 3>&- | (egrep -v "$IGNOREOUT" || true)) 3>&1 1>&2 2>&3 3>&- | (egrep -v "$IGNOREOUT" || true) res=$? From 2ca97c84ac7ecbef8eefbe98d395dc8666305fcb Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Thu, 18 Feb 2016 12:29:56 +1100 Subject: [PATCH 0046/2652] Update ghe-host-check to detect extra port 22 error Treat `port 22: No route to host` errors from `ssh` the same as `port 22: Connection refused` errors. --- bin/ghe-host-check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ghe-host-check b/bin/ghe-host-check index 039ce168e..7d5833411 100755 --- a/bin/ghe-host-check +++ b/bin/ghe-host-check @@ -37,7 +37,7 @@ set -e if [ $rc -ne 0 ]; then case $rc in 255) - if echo "$output" | grep -i "port 22: connection refused\|Connection timed out during banner exchange" >/dev/null; then + if echo "$output" | grep -i "port 22: connection refused\|port 22: no route to host\|Connection timed out during banner exchange" >/dev/null; then exec "bin/$(basename $0)" "$hostname:122" fi From f350b6fec34393d36c49e97d970f263271f40bd7 Mon Sep 17 00:00:00 2001 From: Daniel Hwang Date: Thu, 18 Feb 2016 14:07:22 -0800 Subject: [PATCH 0047/2652] clustering: Add notes for GHE_EXTRA_SSH_OPTS --- README.md | 2 ++ backup.config-example | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dd143f167..032d96723 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,8 @@ download the most recent GitHub Enterprise version. host name. Additional options are available and documented in the configuration file but none are required for basic backup functionality. + * In a clustering environment, the `GHE_EXTRA_SSH_OPTS` key must be configured. + 3. Add the backup host's SSH key to the GitHub appliance as an *Authorized SSH key*. See [Adding an SSH key for shell access][3] for instructions. diff --git a/backup.config-example b/backup.config-example index fa7e1b0e7..ab71140bc 100644 --- a/backup.config-example +++ b/backup.config-example @@ -23,11 +23,13 @@ GHE_NUM_SNAPSHOTS=10 # #GHE_RESTORE_HOST="github-standby.example.com" -# Any extra options passed to the SSH command. Nothing required by default +# Any extra options passed to the SSH command. +# In a single instance environment, nothing is required by default. +# In a clustering environment, "-i path-to-ssh-private-key" is required. # #GHE_EXTRA_SSH_OPTS="" -# Any extra options passed to the rsync command. Nothing required by default +# Any extra options passed to the rsync command. Nothing required by default. # #GHE_EXTRA_RSYNC_OPTS="" From a5b27f357b8531f78ce8751d9519d90406ed68e4 Mon Sep 17 00:00:00 2001 From: Daniel Hwang Date: Thu, 18 Feb 2016 14:29:54 -0800 Subject: [PATCH 0048/2652] clustering: Explicitly use absolute path --- backup.config-example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backup.config-example b/backup.config-example index ab71140bc..239c8d37b 100644 --- a/backup.config-example +++ b/backup.config-example @@ -25,7 +25,7 @@ GHE_NUM_SNAPSHOTS=10 # Any extra options passed to the SSH command. # In a single instance environment, nothing is required by default. -# In a clustering environment, "-i path-to-ssh-private-key" is required. +# In a clustering environment, "-i abs-path-to-ssh-private-key" is required. # #GHE_EXTRA_SSH_OPTS="" From 4cccc7bf70ff7033b2fceb836c503454c5d14b73 Mon Sep 17 00:00:00 2001 From: Daniel Hwang Date: Thu, 18 Feb 2016 15:46:13 -0800 Subject: [PATCH 0049/2652] clustering: Be explicit in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 032d96723..d29ba9e43 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ download the most recent GitHub Enterprise version. host name. Additional options are available and documented in the configuration file but none are required for basic backup functionality. - * In a clustering environment, the `GHE_EXTRA_SSH_OPTS` key must be configured. + * In a clustering environment, the `GHE_EXTRA_SSH_OPTS` key must be configured with the `-i ` SSH option. 3. Add the backup host's SSH key to the GitHub appliance as an *Authorized SSH key*. See [Adding an SSH key for shell access][3] for instructions. From ab1bdda371d526850e773f93b74605443255f8e3 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Mon, 22 Feb 2016 20:26:58 +1100 Subject: [PATCH 0050/2652] Prevent multiple simultaneous restores --- bin/ghe-restore | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/ghe-restore b/bin/ghe-restore index 53742873c..c4ca82a9b 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -146,10 +146,6 @@ fi echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP ..." -# Update remote restore state file and setup failure trap -trap "update_restore_status failed" EXIT -update_restore_status "restoring" - # Verify the host has been fully configured at least once if when running # against v11.10.x appliances and the -c option wasn't specified. if [ "$GHE_VERSION_MAJOR" -le 1 ] && ! $restore_settings && ! $instance_configured; then @@ -176,6 +172,16 @@ if $instance_configured; then fi fi +# Make sure the appliance doesn't already have a restore underway +if [ "$GHE_VERSION_MAJOR" -ge 2 ] && ghe-ssh "$GHE_HOSTNAME" -- "sudo grep -q 'restoring' '$GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status' 2>/dev/null"; then + echo "Error: $GHE_HOSTNAME already has a restore underway. Aborting." 1>&2 + exit 1 +fi + +# Update remote restore state file and setup failure trap +trap "update_restore_status failed" EXIT +update_restore_status "restoring" + # Restore settings and license if restoring to an unconfigured appliance or when # specified manually. if $restore_settings; then From a4ab951b0325263863bc39b0ffa1efa325a22228 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Mon, 22 Feb 2016 20:57:42 +1100 Subject: [PATCH 0051/2652] Tests for simultaneous restores --- test/test-ghe-restore.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/test-ghe-restore.sh b/test/test-ghe-restore.sh index 5ca5e5f0f..f761d0f85 100755 --- a/test/test-ghe-restore.sh +++ b/test/test-ghe-restore.sh @@ -410,3 +410,37 @@ begin_test "ghe-restore with tarball strategy" echo "$output" | grep -q 'fake ghe-export-repositories data' ) end_test + +begin_test "ghe-restore aborts when another restore is underway" +( + set -e + # This test is only valid for version 2 and above + if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then + rm -rf "$GHE_REMOTE_ROOT_DIR" + setup_remote_metadata + + # create file used to determine if instance has been configured. + touch "$GHE_REMOTE_ROOT_DIR/etc/github/configured" + + # create file used to determine if instance is in maintenance mode. + mkdir -p "$GHE_REMOTE_DATA_DIR/github/current/public/system" + touch "$GHE_REMOTE_DATA_DIR/github/current/public/system/maintenance.html" + + # create file to indicate restore is underway + echo "restoring" > "$GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status" + + # set restore host environ var + GHE_RESTORE_HOST=127.0.0.1 + export GHE_RESTORE_HOST + + # run ghe-restore and write output to file for asserting against + # this should fail due to the appliance being in an unconfigured state + ! ghe-restore -v > "$TRASHDIR/restore-out" 2>&1 + + cat $TRASHDIR/restore-out + + # verify that ghe-restore failed due to the appliance not being configured + grep -q -e "already has a restore underway" "$TRASHDIR/restore-out" + fi +) +end_test From 2143e95daf97b2517514c242b32a769d29693950 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Mon, 22 Feb 2016 21:18:10 +1100 Subject: [PATCH 0052/2652] Fix test comment --- test/test-ghe-restore.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-ghe-restore.sh b/test/test-ghe-restore.sh index f761d0f85..b467eaf4e 100755 --- a/test/test-ghe-restore.sh +++ b/test/test-ghe-restore.sh @@ -439,7 +439,7 @@ begin_test "ghe-restore aborts when another restore is underway" cat $TRASHDIR/restore-out - # verify that ghe-restore failed due to the appliance not being configured + # verify that ghe-restore failed due a restore already being underway grep -q -e "already has a restore underway" "$TRASHDIR/restore-out" fi ) From f77595b8063a4f3338c614f255dbbdb7aaa84689 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Mon, 22 Feb 2016 21:33:02 +1100 Subject: [PATCH 0053/2652] Minor style tidy up --- bin/ghe-restore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ghe-restore b/bin/ghe-restore index c4ca82a9b..b7776ee9d 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -173,7 +173,7 @@ if $instance_configured; then fi # Make sure the appliance doesn't already have a restore underway -if [ "$GHE_VERSION_MAJOR" -ge 2 ] && ghe-ssh "$GHE_HOSTNAME" -- "sudo grep -q 'restoring' '$GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status' 2>/dev/null"; then +if [ "$GHE_VERSION_MAJOR" -ge 2 ] && ghe-ssh "$GHE_HOSTNAME" -- "sudo grep -q restoring $GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status 2>/dev/null"; then echo "Error: $GHE_HOSTNAME already has a restore underway. Aborting." 1>&2 exit 1 fi From 0925b39ef602669903dd3517c0ad890e265f5294 Mon Sep 17 00:00:00 2001 From: Daniel Hwang Date: Tue, 23 Feb 2016 10:01:59 -0800 Subject: [PATCH 0054/2652] Remove experimental s3 support --- backup.config-example | 4 ---- share/github-backup-utils/ghe-s3-backup | 26 ------------------------ share/github-backup-utils/ghe-s3-restore | 22 -------------------- 3 files changed, 52 deletions(-) delete mode 100755 share/github-backup-utils/ghe-s3-backup delete mode 100755 share/github-backup-utils/ghe-s3-restore diff --git a/backup.config-example b/backup.config-example index 239c8d37b..89dd3ce85 100644 --- a/backup.config-example +++ b/backup.config-example @@ -32,7 +32,3 @@ GHE_NUM_SNAPSHOTS=10 # Any extra options passed to the rsync command. Nothing required by default. # #GHE_EXTRA_RSYNC_OPTS="" - -# Add s3 bucket for configuring which bucket to use in ghe-s3-backup and -# ghe-s3-restore -# GHE_S3_BUCKET="" diff --git a/share/github-backup-utils/ghe-s3-backup b/share/github-backup-utils/ghe-s3-backup deleted file mode 100755 index 63501a793..000000000 --- a/share/github-backup-utils/ghe-s3-backup +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-s3-backup-all -#/ Take snapshots of all GitHub Enterprise data, including the mysql database -#/ and backup to S3. -set -e - -# Bring in the backup configuration -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -if [ -e $HOME/.s3cfg ]; then - echo "Using existing ~/.s3cfg ..." -else - # Configure s3cmd if there isn't a config file already. - s3cmd --configure -fi - -# Run the backup script. -ghe-backup - -# Create the bucket if it doesn't exist. -s3cmd mb s3://$GHE_S3_BUCKET - -# Upload to S3. -# --delete-removed is to delete items in s3 that have been removed on the host -s3cmd sync --delete-removed "$GHE_DATA_DIR"/current/ s3://$GHE_S3_BUCKET diff --git a/share/github-backup-utils/ghe-s3-restore b/share/github-backup-utils/ghe-s3-restore deleted file mode 100755 index 2554da8da..000000000 --- a/share/github-backup-utils/ghe-s3-restore +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -#/ Usage: ghe-s3-restore -#/ Restores backup files from S3. -set -e - -# Bring in the backup configuration. -cd $(dirname "$0")/../.. -. share/github-backup-utils/ghe-backup-config - -if [ -e $HOME/.s3cfg ]; then - echo "Using existing ~/.s3cfg ..." -else - # Configure s3cmd if there isn't a config file already. - s3cmd --configure -fi - -# Restore from S3 into a new snapshot directory. -mkdir -p "$GHE_SNAPSHOT_DIR" -s3cmd --preserve get s3://$GHE_S3_BUCKET/* "$GHE_SNAPSHOT_DIR" - -# Run the restore script. -ghe-restore -s "$GHE_SNAPSHOT_DIR" From 6c5dc101d566dfdc66e97918f1e9e1442c4d1e01 Mon Sep 17 00:00:00 2001 From: Keeran Raj Hawoldar Date: Tue, 23 Feb 2016 23:31:17 +0000 Subject: [PATCH 0055/2652] Highlight symlink importance in snapshots --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index d29ba9e43..4c53d0845 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,11 @@ date and time the snapshot was taken. Each snapshot directory contains a full backup snapshot of all relevant data stores. Repository, Search, and Pages data is stored efficiently via hard links. +*Please note* Symlinks must be maintained when archiving backup snapshots. +Dereferencing or excluding symlinks, or storing the snapshot contents on a +filesystem which does not support symlinks (e.g. Amazon S3) will result in operational +problems when the data is restored. + The following example shows a snapshot file hierarchy for hourly frequency. There are five snapshot directories, with the `current` symlink pointing to the most recent successful snapshot: From 777bb4e66af904c6ac11cf73fc7c58ba87522334 Mon Sep 17 00:00:00 2001 From: Keeran Raj Hawoldar Date: Wed, 24 Feb 2016 00:24:48 +0000 Subject: [PATCH 0056/2652] Remove unnecessary S3 reference --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c53d0845..9fcb034f2 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ is stored efficiently via hard links. *Please note* Symlinks must be maintained when archiving backup snapshots. Dereferencing or excluding symlinks, or storing the snapshot contents on a -filesystem which does not support symlinks (e.g. Amazon S3) will result in operational +filesystem which does not support symlinks will result in operational problems when the data is restored. The following example shows a snapshot file hierarchy for hourly frequency. From 540460bbe63c15444067573af7d68b4f69108973 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Wed, 24 Feb 2016 16:24:05 +1100 Subject: [PATCH 0057/2652] Fix logging output --- bin/ghe-backup | 2 +- bin/ghe-restore | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/ghe-backup b/bin/ghe-backup index 564597dfe..5b990db27 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -94,7 +94,7 @@ if ghe-ssh "$GHE_HOSTNAME" -- \ fi # Log backup start message in /var/log/syslog on remote instance -ghe_remote_logger "Starting backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP ..." +ghe_remote_logger "Starting backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP ..." # Determine whether to use the rsync or tarball backup strategy based on the # remote appliance version. The tarball strategy must be used with GitHub diff --git a/bin/ghe-restore b/bin/ghe-restore index 53742873c..9d427fff5 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -144,7 +144,7 @@ fi # Log restore start message locally and in /var/log/syslog on remote instance echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP ..." +ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT ..." # Update remote restore state file and setup failure trap trap "update_restore_status failed" EXIT @@ -275,7 +275,7 @@ trap "" EXIT update_restore_status "complete" # Log restore complete message in /var/log/syslog on remote instance -ghe_remote_logger "Completed restore from $(hostname) / snapshot ${GHE_SNAPSHOT_TIMESTAMP}." +ghe_remote_logger "Completed restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT successfully." if ! $cluster; then echo "Restoring SSH host keys ..." From 50f34334cfda8d57a1f16a7d94c1558556eecf09 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Wed, 24 Feb 2016 17:54:01 +1100 Subject: [PATCH 0058/2652] Remote logging wording --- bin/ghe-backup | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/ghe-backup b/bin/ghe-backup index 5b990db27..5b297d7b2 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -94,7 +94,7 @@ if ghe-ssh "$GHE_HOSTNAME" -- \ fi # Log backup start message in /var/log/syslog on remote instance -ghe_remote_logger "Starting backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP ..." +ghe_remote_logger "Starting backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP ..." # Determine whether to use the rsync or tarball backup strategy based on the # remote appliance version. The tarball strategy must be used with GitHub @@ -213,10 +213,10 @@ echo "Completed backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP at $ # Exit non-zero and list the steps that failed. if [ -z "$failures" ]; then - ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP successfully." + ghe_remote_logger "Completed backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP successfully." else steps="$(echo $failures | sed 's/ /, /g')" - ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}." + ghe_remote_logger "Completed backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}." echo "Error: Snapshot incomplete. Some steps failed: ${steps}. " exit 1 fi From 5fec8c0223dba3458820a6a238928517cf7625a3 Mon Sep 17 00:00:00 2001 From: Daniel Hwang Date: Tue, 23 Feb 2016 22:57:09 -0800 Subject: [PATCH 0059/2652] Use consistent logging format as ghe-backup --- bin/ghe-restore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/ghe-restore b/bin/ghe-restore index 9d427fff5..0244d5c51 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -144,7 +144,7 @@ fi # Log restore start message locally and in /var/log/syslog on remote instance echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT ..." +ghe_remote_logger "Starting restore from $(hostname) in snapshot $GHE_RESTORE_SNAPSHOT ..." # Update remote restore state file and setup failure trap trap "update_restore_status failed" EXIT @@ -275,7 +275,7 @@ trap "" EXIT update_restore_status "complete" # Log restore complete message in /var/log/syslog on remote instance -ghe_remote_logger "Completed restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT successfully." +ghe_remote_logger "Completed restore from $(hostname) in snapshot $GHE_RESTORE_SNAPSHOT successfully." if ! $cluster; then echo "Restoring SSH host keys ..." From 8c7cbe7bf201702af5e4cc3834ace4e436672e52 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Thu, 25 Feb 2016 09:41:57 +1100 Subject: [PATCH 0060/2652] Revert log message format --- bin/ghe-backup | 4 ++-- bin/ghe-restore | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/ghe-backup b/bin/ghe-backup index 5b297d7b2..564597dfe 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -213,10 +213,10 @@ echo "Completed backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP at $ # Exit non-zero and list the steps that failed. if [ -z "$failures" ]; then - ghe_remote_logger "Completed backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP successfully." + ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP successfully." else steps="$(echo $failures | sed 's/ /, /g')" - ghe_remote_logger "Completed backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}." + ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}." echo "Error: Snapshot incomplete. Some steps failed: ${steps}. " exit 1 fi diff --git a/bin/ghe-restore b/bin/ghe-restore index 0244d5c51..52b650138 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -144,7 +144,7 @@ fi # Log restore start message locally and in /var/log/syslog on remote instance echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" -ghe_remote_logger "Starting restore from $(hostname) in snapshot $GHE_RESTORE_SNAPSHOT ..." +ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT ..." # Update remote restore state file and setup failure trap trap "update_restore_status failed" EXIT @@ -275,7 +275,7 @@ trap "" EXIT update_restore_status "complete" # Log restore complete message in /var/log/syslog on remote instance -ghe_remote_logger "Completed restore from $(hostname) in snapshot $GHE_RESTORE_SNAPSHOT successfully." +ghe_remote_logger "Completed restore from $(hostname) / snapshot ${GHE_RESTORE_SNAPSHOT}." if ! $cluster; then echo "Restoring SSH host keys ..." From f36f431386c3128512566a25040fa5f1bdf17dae Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 16 Feb 2016 17:28:25 +0100 Subject: [PATCH 0061/2652] Ignore archived gists The server will send a gist-not-found line when the gist is not in the gists database table. --- share/github-backup-utils/ghe-restore-repositories-gist | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist index a4337bcce..07eafa0ed 100755 --- a/share/github-backup-utils/ghe-restore-repositories-gist +++ b/share/github-backup-utils/ghe-restore-repositories-gist @@ -85,6 +85,11 @@ for gist_path in $gist_paths; do echo "$gist_id" >&4 read routes < ssh_routes_in + if [ "$routes" = 'gist-not-found' ]; then + echo " Warning: gist $gist_id not found in the database (deleted?), ignoring." + continue + fi + for route in $routes; do ghe-rsync -aHR --delete \ -e "ssh -q $opts -p $port -F $config_file -l $user" \ From 716e792f291c28bb72c9dcd6e56efd9c6d037cfc Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Mon, 22 Feb 2016 13:58:43 +0100 Subject: [PATCH 0062/2652] Ignore invalid page IDs Needs https://github.com/github/github/pull/51274 --- share/github-backup-utils/ghe-restore-pages-dpages | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/share/github-backup-utils/ghe-restore-pages-dpages b/share/github-backup-utils/ghe-restore-pages-dpages index ff2746f47..e22fa31de 100755 --- a/share/github-backup-utils/ghe-restore-pages-dpages +++ b/share/github-backup-utils/ghe-restore-pages-dpages @@ -85,6 +85,11 @@ for pages_path in $pages_paths; do echo "$page_id" >&4 read routes < ssh_routes_in + if [ "$routes" = 'page-id-not-found' ]; then + echo " Warning: page ID $page_id not found in the database, ignoring." + continue + fi + for route in $routes; do ghe-rsync -aHR --delete \ -e "ssh -q $opts -p $port -F $config_file -l $user" \ From 5aa5f6dd9337aa387d316484f1537ec21cb752c1 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 23 Feb 2016 12:00:54 +0100 Subject: [PATCH 0063/2652] Read output from storage-cluster-import-finalize Needs #XXXXX --- share/github-backup-utils/ghe-restore-alambic-cluster | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster index 368c545e3..c91b9e731 100755 --- a/share/github-backup-utils/ghe-restore-alambic-cluster +++ b/share/github-backup-utils/ghe-restore-alambic-cluster @@ -108,14 +108,15 @@ for storage_path in $storage_paths; do done echo "$oid $routes" >&5 + read output < ssh_finalize_in done exec 4>&- exec 5>&- # Ensure to flush these and close the pipes -cat ssh_routes_in > /dev/null -cat ssh_finalize_in > /dev/null +cat ssh_routes_in > /dev/null & +cat ssh_finalize_in > /dev/null & wait $ssh_routes_pid > /dev/null 1>&2 || true wait $ssh_finalize_pid > /dev/null 1>&2 || true From fd8d9de2f13156b576acdda8fa9224c21395b853 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Tue, 23 Feb 2016 14:18:03 +0100 Subject: [PATCH 0064/2652] Add ServerAliveInterval Mostly for consistency with other scripts. --- share/github-backup-utils/ghe-restore-alambic-cluster | 1 + 1 file changed, 1 insertion(+) diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster index c91b9e731..a20998a6b 100755 --- a/share/github-backup-utils/ghe-restore-alambic-cluster +++ b/share/github-backup-utils/ghe-restore-alambic-cluster @@ -47,6 +47,7 @@ hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | for hostname in $hostnames; do config="$config Host $hostname + ServerAliveInterval 60 ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" done From c94486190f2e0f0880d7490e8356d6fae729af5e Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Wed, 24 Feb 2016 14:28:27 +0100 Subject: [PATCH 0065/2652] ssh_finalize_in pipe not required Also fixed some stderr redirections. --- .../github-backup-utils/ghe-restore-alambic-cluster | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster index a20998a6b..a20f5f6b5 100755 --- a/share/github-backup-utils/ghe-restore-alambic-cluster +++ b/share/github-backup-utils/ghe-restore-alambic-cluster @@ -60,22 +60,21 @@ cleanup() { for pid in $(jobs -p); do kill -KILL $pid > /dev/null 1>&2 || true done - rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_out } trap 'cleanup' INT TERM EXIT -rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_out mkfifo ssh_routes_in mkfifo ssh_routes_out -mkfifo ssh_finalize_in mkfifo ssh_finalize_out echo "Setting up storage processes" ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-routes - < ssh_routes_out > ssh_routes_in & ssh_routes_pid=$! -ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-finalize - < ssh_finalize_out & ssh_finalize_pid=$! echo "Set up storage processes" @@ -109,7 +108,6 @@ for storage_path in $storage_paths; do done echo "$oid $routes" >&5 - read output < ssh_finalize_in done exec 4>&- @@ -117,7 +115,6 @@ exec 5>&- # Ensure to flush these and close the pipes cat ssh_routes_in > /dev/null & -cat ssh_finalize_in > /dev/null & -wait $ssh_routes_pid > /dev/null 1>&2 || true -wait $ssh_finalize_pid > /dev/null 1>&2 || true +wait $ssh_routes_pid > /dev/null 2>&1 || true +wait $ssh_finalize_pid > /dev/null 2>&1 || true From 0b1c4d0ace1575f332ba86b282982fbe166147d1 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Thu, 25 Feb 2016 18:15:01 +0100 Subject: [PATCH 0066/2652] Clustering: ghe-restore fixes More robust restores when restoring gists, pages and storage data into a cluster. --- .../ghe-restore-alambic-cluster | 15 +++++++-------- .../github-backup-utils/ghe-restore-pages-dpages | 5 +++++ .../ghe-restore-repositories-gist | 5 +++++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster b/share/github-backup-utils/ghe-restore-alambic-cluster index 368c545e3..a20f5f6b5 100755 --- a/share/github-backup-utils/ghe-restore-alambic-cluster +++ b/share/github-backup-utils/ghe-restore-alambic-cluster @@ -47,6 +47,7 @@ hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | for hostname in $hostnames; do config="$config Host $hostname + ServerAliveInterval 60 ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" done @@ -59,22 +60,21 @@ cleanup() { for pid in $(jobs -p); do kill -KILL $pid > /dev/null 1>&2 || true done - rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out + rm -rf $config_file ssh_routes_in ssh_routes_out ssh_finalize_out } trap 'cleanup' INT TERM EXIT -rm -rf ssh_routes_in ssh_routes_out ssh_finalize_in ssh_finalize_out +rm -rf ssh_routes_in ssh_routes_out ssh_finalize_out mkfifo ssh_routes_in mkfifo ssh_routes_out -mkfifo ssh_finalize_in mkfifo ssh_finalize_out echo "Setting up storage processes" ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-routes - < ssh_routes_out > ssh_routes_in & ssh_routes_pid=$! -ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-finalize - < ssh_finalize_out > ssh_finalize_in & +ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-import-finalize - < ssh_finalize_out & ssh_finalize_pid=$! echo "Set up storage processes" @@ -114,8 +114,7 @@ exec 4>&- exec 5>&- # Ensure to flush these and close the pipes -cat ssh_routes_in > /dev/null -cat ssh_finalize_in > /dev/null +cat ssh_routes_in > /dev/null & -wait $ssh_routes_pid > /dev/null 1>&2 || true -wait $ssh_finalize_pid > /dev/null 1>&2 || true +wait $ssh_routes_pid > /dev/null 2>&1 || true +wait $ssh_finalize_pid > /dev/null 2>&1 || true diff --git a/share/github-backup-utils/ghe-restore-pages-dpages b/share/github-backup-utils/ghe-restore-pages-dpages index ff2746f47..e22fa31de 100755 --- a/share/github-backup-utils/ghe-restore-pages-dpages +++ b/share/github-backup-utils/ghe-restore-pages-dpages @@ -85,6 +85,11 @@ for pages_path in $pages_paths; do echo "$page_id" >&4 read routes < ssh_routes_in + if [ "$routes" = 'page-id-not-found' ]; then + echo " Warning: page ID $page_id not found in the database, ignoring." + continue + fi + for route in $routes; do ghe-rsync -aHR --delete \ -e "ssh -q $opts -p $port -F $config_file -l $user" \ diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist index a4337bcce..07eafa0ed 100755 --- a/share/github-backup-utils/ghe-restore-repositories-gist +++ b/share/github-backup-utils/ghe-restore-repositories-gist @@ -85,6 +85,11 @@ for gist_path in $gist_paths; do echo "$gist_id" >&4 read routes < ssh_routes_in + if [ "$routes" = 'gist-not-found' ]; then + echo " Warning: gist $gist_id not found in the database (deleted?), ignoring." + continue + fi + for route in $routes; do ghe-rsync -aHR --delete \ -e "ssh -q $opts -p $port -F $config_file -l $user" \ From 11385242fcf4e60f9aebdede3a018b59158c2088 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Thu, 25 Feb 2016 18:19:56 +0100 Subject: [PATCH 0067/2652] Consistent warning message --- share/github-backup-utils/ghe-restore-repositories-gist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist index 07eafa0ed..a76866b06 100755 --- a/share/github-backup-utils/ghe-restore-repositories-gist +++ b/share/github-backup-utils/ghe-restore-repositories-gist @@ -86,7 +86,7 @@ for gist_path in $gist_paths; do read routes < ssh_routes_in if [ "$routes" = 'gist-not-found' ]; then - echo " Warning: gist $gist_id not found in the database (deleted?), ignoring." + echo " Warning: gist $gist_id not found in the database, ignoring." continue fi From 011bdd8f0ddc488abd7607b4e563fffc29cbace8 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Thu, 25 Feb 2016 18:47:49 +0100 Subject: [PATCH 0068/2652] :lipstick: warning message --- share/github-backup-utils/ghe-restore-repositories-gist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-restore-repositories-gist b/share/github-backup-utils/ghe-restore-repositories-gist index a76866b06..ea399e786 100755 --- a/share/github-backup-utils/ghe-restore-repositories-gist +++ b/share/github-backup-utils/ghe-restore-repositories-gist @@ -86,7 +86,7 @@ for gist_path in $gist_paths; do read routes < ssh_routes_in if [ "$routes" = 'gist-not-found' ]; then - echo " Warning: gist $gist_id not found in the database, ignoring." + echo " Warning: gist ID $gist_id not found in the database, ignoring." continue fi From 0a078b4a598e639d32fa6e4c2f395deaa2fb745d Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Thu, 25 Feb 2016 19:50:53 +0100 Subject: [PATCH 0069/2652] Revert "Prevent multiple restores from running at the same time" --- bin/ghe-restore | 14 ++++---------- test/test-ghe-restore.sh | 34 ---------------------------------- 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/bin/ghe-restore b/bin/ghe-restore index 0ba3ff90d..52b650138 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -146,6 +146,10 @@ fi echo "Starting restore of $GHE_HOSTNAME from snapshot $GHE_RESTORE_SNAPSHOT" ghe_remote_logger "Starting restore from $(hostname) / snapshot $GHE_RESTORE_SNAPSHOT ..." +# Update remote restore state file and setup failure trap +trap "update_restore_status failed" EXIT +update_restore_status "restoring" + # Verify the host has been fully configured at least once if when running # against v11.10.x appliances and the -c option wasn't specified. if [ "$GHE_VERSION_MAJOR" -le 1 ] && ! $restore_settings && ! $instance_configured; then @@ -172,16 +176,6 @@ if $instance_configured; then fi fi -# Make sure the appliance doesn't already have a restore underway -if [ "$GHE_VERSION_MAJOR" -ge 2 ] && ghe-ssh "$GHE_HOSTNAME" -- "sudo grep -q restoring $GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status 2>/dev/null"; then - echo "Error: $GHE_HOSTNAME already has a restore underway. Aborting." 1>&2 - exit 1 -fi - -# Update remote restore state file and setup failure trap -trap "update_restore_status failed" EXIT -update_restore_status "restoring" - # Restore settings and license if restoring to an unconfigured appliance or when # specified manually. if $restore_settings; then diff --git a/test/test-ghe-restore.sh b/test/test-ghe-restore.sh index b467eaf4e..5ca5e5f0f 100755 --- a/test/test-ghe-restore.sh +++ b/test/test-ghe-restore.sh @@ -410,37 +410,3 @@ begin_test "ghe-restore with tarball strategy" echo "$output" | grep -q 'fake ghe-export-repositories data' ) end_test - -begin_test "ghe-restore aborts when another restore is underway" -( - set -e - # This test is only valid for version 2 and above - if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then - rm -rf "$GHE_REMOTE_ROOT_DIR" - setup_remote_metadata - - # create file used to determine if instance has been configured. - touch "$GHE_REMOTE_ROOT_DIR/etc/github/configured" - - # create file used to determine if instance is in maintenance mode. - mkdir -p "$GHE_REMOTE_DATA_DIR/github/current/public/system" - touch "$GHE_REMOTE_DATA_DIR/github/current/public/system/maintenance.html" - - # create file to indicate restore is underway - echo "restoring" > "$GHE_REMOTE_DATA_USER_DIR/common/ghe-restore-status" - - # set restore host environ var - GHE_RESTORE_HOST=127.0.0.1 - export GHE_RESTORE_HOST - - # run ghe-restore and write output to file for asserting against - # this should fail due to the appliance being in an unconfigured state - ! ghe-restore -v > "$TRASHDIR/restore-out" 2>&1 - - cat $TRASHDIR/restore-out - - # verify that ghe-restore failed due a restore already being underway - grep -q -e "already has a restore underway" "$TRASHDIR/restore-out" - fi -) -end_test From 02b4552675a1dc3d4e8298a0c23f108c6887cc55 Mon Sep 17 00:00:00 2001 From: Ben Gollmer Date: Thu, 25 Feb 2016 14:42:23 -0700 Subject: [PATCH 0070/2652] Ask for index field instead of using cut --- share/github-backup-utils/ghe-backup-es-audit-log | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-backup-es-audit-log b/share/github-backup-utils/ghe-backup-es-audit-log index 7ef6e40e3..c67e7c7dd 100755 --- a/share/github-backup-utils/ghe-backup-es-audit-log +++ b/share/github-backup-utils/ghe-backup-es-audit-log @@ -19,7 +19,7 @@ ghe_remote_version_required "$host" # Make sure root backup dir exists if this is the first run mkdir -p "$GHE_SNAPSHOT_DIR/audit-log" -indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/audit_log*"' | cut -d ' ' -f 3) +indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/audit_log*?h=index"') || true current_index=audit_log-$(ghe-ssh "$host" 'date +"%Y-%m"') for index in $indices; do From 061905edcef6a03f62284c1e28c152d7f43bf734 Mon Sep 17 00:00:00 2001 From: Ben Gollmer Date: Thu, 25 Feb 2016 16:09:03 -0700 Subject: [PATCH 0071/2652] Return empty index list for ghe-backup-es-audit-log --- test/bin/curl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/bin/curl b/test/bin/curl index 64545c39a..d1234b872 100755 --- a/test/bin/curl +++ b/test/bin/curl @@ -3,6 +3,11 @@ # Fake curl command stub for tests. set -e +# Return empty list of indexes for ghe-backup-es-audit-log +if echo "$@" | grep -q '_cat/indices/audit_log\*?h=index$'; then + exit 0 +fi + # Write args to stdout echo "$@" From d4c6de828bf0f167696ca4ba739f955dab109574 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Fri, 26 Feb 2016 20:23:42 +1100 Subject: [PATCH 0072/2652] Use .sync_in_progress lock file during restore --- .../ghe-restore-repositories-rsync | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/share/github-backup-utils/ghe-restore-repositories-rsync b/share/github-backup-utils/ghe-restore-repositories-rsync index d0ae56aeb..b99c74756 100755 --- a/share/github-backup-utils/ghe-restore-repositories-rsync +++ b/share/github-backup-utils/ghe-restore-repositories-rsync @@ -19,13 +19,52 @@ GHE_HOSTNAME="$1" # Perform a host-check and establish GHE_REMOTE_XXX variables. ghe_remote_version_required "$GHE_HOSTNAME" +# Remote sync-in-progress file location. When this file exists, Git GC +# operations are disabled on the GitHub instance. +sync_in_progress_file="$GHE_REMOTE_DATA_USER_DIR/repositories/.sync_in_progress" + # The snapshot to restore should be set by the ghe-restore command but this lets # us run this script directly. : ${GHE_RESTORE_SNAPSHOT:=current} +# Removes the remote sync-in-progress file on exit, re-enabling GC operations +# on the remote instance. +cleanup() { + ghe-ssh "$GHE_HOSTNAME" -- "sudo rm -f '$sync_in_progress_file'" +} +trap 'cleanup' EXIT +trap 'exit $?' INT # ^C always terminate + +# Touch the sync-in-progress file, disabling GC operations, and wait for all +# active GC processes to finish on the remote side. +echo " + set -e + sudo -u git touch '$sync_in_progress_file' + + sanity=0 + while [ \$sanity -lt $GHE_GIT_COOLDOWN_PERIOD ]; do + # note: the bracket synta[x] below is to prevent matches against the + # grep process itself. + if ps axo args | grep -E -e '^git( -.*)? nw-repac[k]( |$)' -e '^git( -.*)? g[c]( |$)' >/dev/null; then + sleep 1 + sanity=\$(( sanity + 1 )) + else + exit 0 + fi + done + exit 7 +" | ghe-ssh "$GHE_HOSTNAME" -- /bin/sh || { + res=$? + if [ $res = 7 ]; then + echo "Error: Git GC processes remain after $GHE_GIT_COOLDOWN_PERIOD seconds. Aborting..." 1>&2 + fi + exit $res +} + # Transfer all git repository data from the latest snapshot to the GitHub # instance in a single rsync invocation. ghe-rsync -avH --delete \ + --exclude ".sync_in_progress" \ -e "ghe-ssh -p $(ssh_port_part "$GHE_HOSTNAME")" \ --rsync-path="sudo -u git rsync" \ "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/" \ From a89ec70c61377904a643fea1f0a13241617c87b5 Mon Sep 17 00:00:00 2001 From: Sergio Rubio Date: Mon, 29 Feb 2016 13:18:52 +0100 Subject: [PATCH 0073/2652] Exit 1 if ssh/curl fail to retrieve the indices --- share/github-backup-utils/ghe-backup-es-audit-log | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/share/github-backup-utils/ghe-backup-es-audit-log b/share/github-backup-utils/ghe-backup-es-audit-log index c67e7c7dd..014b66fb9 100755 --- a/share/github-backup-utils/ghe-backup-es-audit-log +++ b/share/github-backup-utils/ghe-backup-es-audit-log @@ -19,7 +19,11 @@ ghe_remote_version_required "$host" # Make sure root backup dir exists if this is the first run mkdir -p "$GHE_SNAPSHOT_DIR/audit-log" -indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/audit_log*?h=index"') || true +if ! indices=$(ghe-ssh "$host" 'curl -s "localhost:9201/_cat/indices/audit_log*?h=index"'); then + echo "Error: failed to retrieve audit log indices." 1>&2 + exit 1 +fi + current_index=audit_log-$(ghe-ssh "$host" 'date +"%Y-%m"') for index in $indices; do From 5e3c9096d6fa7653136a3f9965ee0639d7670817 Mon Sep 17 00:00:00 2001 From: Steven Honson Date: Tue, 1 Mar 2016 17:44:55 +1100 Subject: [PATCH 0074/2652] Change to bash --- README.md | 6 +++--- bin/ghe-backup | 2 +- bin/ghe-host-check | 2 +- bin/ghe-restore | 2 +- script/cibuild | 2 +- script/package-deb | 2 +- script/package-tarball | 2 +- share/github-backup-utils/ghe-backup-alambic-cluster | 2 +- share/github-backup-utils/ghe-backup-config | 2 +- share/github-backup-utils/ghe-backup-es-audit-log | 2 +- share/github-backup-utils/ghe-backup-es-hookshot | 2 +- share/github-backup-utils/ghe-backup-es-rsync | 2 +- share/github-backup-utils/ghe-backup-es-tarball | 2 +- share/github-backup-utils/ghe-backup-pages-cluster | 2 +- share/github-backup-utils/ghe-backup-pages-rsync | 2 +- share/github-backup-utils/ghe-backup-pages-tarball | 2 +- share/github-backup-utils/ghe-backup-redis | 2 +- share/github-backup-utils/ghe-backup-redis-cluster | 2 +- share/github-backup-utils/ghe-backup-repositories-cluster | 2 +- share/github-backup-utils/ghe-backup-repositories-rsync | 2 +- share/github-backup-utils/ghe-backup-repositories-tarball | 2 +- share/github-backup-utils/ghe-backup-settings | 2 +- share/github-backup-utils/ghe-backup-userdata | 2 +- share/github-backup-utils/ghe-maintenance-mode-disable | 2 +- share/github-backup-utils/ghe-maintenance-mode-enable | 2 +- share/github-backup-utils/ghe-maintenance-mode-status | 2 +- share/github-backup-utils/ghe-prune-snapshots | 2 +- share/github-backup-utils/ghe-restore-es-audit-log | 2 +- share/github-backup-utils/ghe-restore-es-rsync | 2 +- share/github-backup-utils/ghe-restore-es-tarball | 2 +- share/github-backup-utils/ghe-restore-pages-rsync | 2 +- share/github-backup-utils/ghe-restore-pages-tarball | 2 +- share/github-backup-utils/ghe-restore-repositories-rsync | 2 +- share/github-backup-utils/ghe-restore-repositories-tarball | 2 +- share/github-backup-utils/ghe-restore-settings | 2 +- share/github-backup-utils/ghe-restore-snapshot-path | 2 +- share/github-backup-utils/ghe-restore-userdata | 2 +- share/github-backup-utils/ghe-ssh | 2 +- test/bin/chown | 2 +- test/bin/curl | 2 +- test/bin/enterprise-configure | 2 +- test/bin/ghe-config-apply | 2 +- test/bin/ghe-es-snapshot | 2 +- test/bin/ghe-fake-export-command | 2 +- test/bin/ghe-fake-import-command | 2 +- test/bin/ghe-maintenance | 2 +- test/bin/ghe-service-ensure-elasticsearch | 2 +- test/bin/ghe-service-ensure-mysql | 2 +- test/bin/ionice-stub | 2 +- test/bin/python | 2 +- test/bin/redis-cli | 2 +- test/bin/ssh | 2 +- test/test-bashisms.sh | 2 +- test/test-ghe-backup-config.sh | 2 +- test/test-ghe-backup-repositories-rsync-nw.sh | 2 +- test/test-ghe-backup-repositories-rsync.sh | 2 +- test/test-ghe-backup.sh | 2 +- test/test-ghe-host-check.sh | 2 +- test/test-ghe-prune-snapshots.sh | 2 +- test/test-ghe-restore.sh | 2 +- test/test-ghe-ssh.sh | 2 +- test/testlib.sh | 2 +- 62 files changed, 64 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 9fcb034f2..1f527009a 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ storage and must have network connectivity with the GitHub Enterprise appliance. ##### Backup host requirements Backup host software requirements are modest: Linux or other modern Unix -operating system with [rsync][4] v2.6.4 or newer. +operating system with bash and [rsync][4] v2.6.4 or newer. The backup host must be able to establish network connections outbound to the GitHub appliance over SSH. TCP port 122 is used to backup GitHub Enterprise 2.0 or newer instances, and TCP port 22 is used for older versions (11.10.34X). @@ -211,8 +211,8 @@ backup snapshot of all relevant data stores. Repository, Search, and Pages data is stored efficiently via hard links. *Please note* Symlinks must be maintained when archiving backup snapshots. -Dereferencing or excluding symlinks, or storing the snapshot contents on a -filesystem which does not support symlinks will result in operational +Dereferencing or excluding symlinks, or storing the snapshot contents on a +filesystem which does not support symlinks will result in operational problems when the data is restored. The following example shows a snapshot file hierarchy for hourly frequency. diff --git a/bin/ghe-backup b/bin/ghe-backup index 564597dfe..39048db1e 100755 --- a/bin/ghe-backup +++ b/bin/ghe-backup @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup [-v] #/ Take snapshots of all GitHub Enterprise data, including Git repository data, #/ the MySQL database, instance settings, GitHub Pages data, etc. diff --git a/bin/ghe-host-check b/bin/ghe-host-check index 7d5833411..b75a3b59d 100755 --- a/bin/ghe-host-check +++ b/bin/ghe-host-check @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-host-check [] #/ Verify connectivity with the GitHub Enterprise host. When no is #/ provided, the $GHE_HOSTNAME configured in backup.config is assumed. diff --git a/bin/ghe-restore b/bin/ghe-restore index 52b650138..7146de8ec 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore [-v] [-s ] [] #/ Restores a GitHub instance from local backup snapshots. The is the #/ hostname or IP of the GitHub instance. The may be omitted when diff --git a/script/cibuild b/script/cibuild index 553a8d035..28839c30c 100755 --- a/script/cibuild +++ b/script/cibuild @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # Usage: script/cibuild [--no-package] set -e diff --git a/script/package-deb b/script/package-deb index 6e3d9e829..b42f0d5fd 100755 --- a/script/package-deb +++ b/script/package-deb @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # Usage: script/package-deb # Script to build a deb release package from the current HEAD version. # The package version comes from the debian/changelog file so that should diff --git a/script/package-tarball b/script/package-tarball index 6229cc33d..65f4ed51d 100755 --- a/script/package-tarball +++ b/script/package-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # Usage: script/package-tarball # Script to build a tarball release package from the current HEAD version. # The package version comes from `git-describe --tags' so the release tag should diff --git a/share/github-backup-utils/ghe-backup-alambic-cluster b/share/github-backup-utils/ghe-backup-alambic-cluster index 7d4276692..f92a7180e 100755 --- a/share/github-backup-utils/ghe-backup-alambic-cluster +++ b/share/github-backup-utils/ghe-backup-alambic-cluster @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-alambic-cluster #/ Take an online, incremental snapshot of all Alambic Storage data #/ diff --git a/share/github-backup-utils/ghe-backup-config b/share/github-backup-utils/ghe-backup-config index 521a91d2d..c2ff85059 100755 --- a/share/github-backup-utils/ghe-backup-config +++ b/share/github-backup-utils/ghe-backup-config @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # Usage: . ghe-backup-config # GitHub Enterprise backup shell configuration. # diff --git a/share/github-backup-utils/ghe-backup-es-audit-log b/share/github-backup-utils/ghe-backup-es-audit-log index 7ef6e40e3..454349f59 100755 --- a/share/github-backup-utils/ghe-backup-es-audit-log +++ b/share/github-backup-utils/ghe-backup-es-audit-log @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-es-audit-log #/ Take a backup of audit logs in ElasticSearch. #/ diff --git a/share/github-backup-utils/ghe-backup-es-hookshot b/share/github-backup-utils/ghe-backup-es-hookshot index 7a69a7e52..9ec41f46a 100755 --- a/share/github-backup-utils/ghe-backup-es-hookshot +++ b/share/github-backup-utils/ghe-backup-es-hookshot @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-es-hookshot #/ Take a backup of hookshot logs in ElasticSearch. #/ diff --git a/share/github-backup-utils/ghe-backup-es-rsync b/share/github-backup-utils/ghe-backup-es-rsync index c3d31cca5..50cb3606c 100755 --- a/share/github-backup-utils/ghe-backup-es-rsync +++ b/share/github-backup-utils/ghe-backup-es-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-es-rsync #/ Take an online, incremental snapshot of Elasticsearch indices. #/ diff --git a/share/github-backup-utils/ghe-backup-es-tarball b/share/github-backup-utils/ghe-backup-es-tarball index ebce38c47..febfdb718 100755 --- a/share/github-backup-utils/ghe-backup-es-tarball +++ b/share/github-backup-utils/ghe-backup-es-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-es-tarball #/ Take a tarball snapshot of all Elasticsearch data. #/ diff --git a/share/github-backup-utils/ghe-backup-pages-cluster b/share/github-backup-utils/ghe-backup-pages-cluster index a0807fe5f..9cdd54e43 100755 --- a/share/github-backup-utils/ghe-backup-pages-cluster +++ b/share/github-backup-utils/ghe-backup-pages-cluster @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-pages-cluster #/ Take an online, incremental snapshot of all Pages data #/ diff --git a/share/github-backup-utils/ghe-backup-pages-rsync b/share/github-backup-utils/ghe-backup-pages-rsync index 9de913183..318562a91 100755 --- a/share/github-backup-utils/ghe-backup-pages-rsync +++ b/share/github-backup-utils/ghe-backup-pages-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-pages-rsync #/ Take an online, incremental snapshot of all Pages data. #/ diff --git a/share/github-backup-utils/ghe-backup-pages-tarball b/share/github-backup-utils/ghe-backup-pages-tarball index 7e8bda265..9c56620ef 100755 --- a/share/github-backup-utils/ghe-backup-pages-tarball +++ b/share/github-backup-utils/ghe-backup-pages-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-pages-tarball #/ Take a tarball snapshot of all Pages data. #/ diff --git a/share/github-backup-utils/ghe-backup-redis b/share/github-backup-utils/ghe-backup-redis index dbd1e796f..2ebe3cd3a 100755 --- a/share/github-backup-utils/ghe-backup-redis +++ b/share/github-backup-utils/ghe-backup-redis @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-redis #/ Take a snapshot of all Redis data. This is needed because older versions of #/ the remote side ghe-export-redis command use a blocking SAVE instead of a diff --git a/share/github-backup-utils/ghe-backup-redis-cluster b/share/github-backup-utils/ghe-backup-redis-cluster index d571adb8d..fb4e7e13f 100755 --- a/share/github-backup-utils/ghe-backup-redis-cluster +++ b/share/github-backup-utils/ghe-backup-redis-cluster @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-redis-cluster #/ Take a snapshot of all Redis data. This is needed because older versions of #/ the remote side ghe-export-redis command use a blocking SAVE instead of a diff --git a/share/github-backup-utils/ghe-backup-repositories-cluster b/share/github-backup-utils/ghe-backup-repositories-cluster index 54a6931cc..2ccb1dfed 100755 --- a/share/github-backup-utils/ghe-backup-repositories-cluster +++ b/share/github-backup-utils/ghe-backup-repositories-cluster @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-repositories-cluster #/ Take an online, incremental snapshot of all Git repository data. #/ diff --git a/share/github-backup-utils/ghe-backup-repositories-rsync b/share/github-backup-utils/ghe-backup-repositories-rsync index be94160bb..9cc31f02f 100755 --- a/share/github-backup-utils/ghe-backup-repositories-rsync +++ b/share/github-backup-utils/ghe-backup-repositories-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-repositories-rsync #/ Take an online, incremental snapshot of all Git repository data. #/ diff --git a/share/github-backup-utils/ghe-backup-repositories-tarball b/share/github-backup-utils/ghe-backup-repositories-tarball index 09ee15ac4..87f695608 100755 --- a/share/github-backup-utils/ghe-backup-repositories-tarball +++ b/share/github-backup-utils/ghe-backup-repositories-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-repositories-tarball #/ Take a tarball snapshot of all Git repository data. #/ diff --git a/share/github-backup-utils/ghe-backup-settings b/share/github-backup-utils/ghe-backup-settings index a6691c740..25e6d7ae7 100755 --- a/share/github-backup-utils/ghe-backup-settings +++ b/share/github-backup-utils/ghe-backup-settings @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-settings #/ Restore settings from a snapshot to the given . set -e diff --git a/share/github-backup-utils/ghe-backup-userdata b/share/github-backup-utils/ghe-backup-userdata index 2c39a9827..0800b54d5 100755 --- a/share/github-backup-utils/ghe-backup-userdata +++ b/share/github-backup-utils/ghe-backup-userdata @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-backup-userdata #/ Take an online, incremental snapshot of a user data directory. This is used #/ for a number of different simple datastores kept under /data/user on the diff --git a/share/github-backup-utils/ghe-maintenance-mode-disable b/share/github-backup-utils/ghe-maintenance-mode-disable index 5df69d785..9c04cff2c 100755 --- a/share/github-backup-utils/ghe-maintenance-mode-disable +++ b/share/github-backup-utils/ghe-maintenance-mode-disable @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-maintenance-mode-disable #/ Disable maintenance mode on GitHub appliance at . This opens up access #/ to the appliance. diff --git a/share/github-backup-utils/ghe-maintenance-mode-enable b/share/github-backup-utils/ghe-maintenance-mode-enable index eee86cd3d..74673e402 100755 --- a/share/github-backup-utils/ghe-maintenance-mode-enable +++ b/share/github-backup-utils/ghe-maintenance-mode-enable @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-maintenance-mode-enable [-w] #/ Enable maintenance mode on GitHub appliance at . This locks down all #/ access to the appliance to prevent writes to datastores and waits for all diff --git a/share/github-backup-utils/ghe-maintenance-mode-status b/share/github-backup-utils/ghe-maintenance-mode-status index 679ebb894..2922989cc 100755 --- a/share/github-backup-utils/ghe-maintenance-mode-status +++ b/share/github-backup-utils/ghe-maintenance-mode-status @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-maintenance-mode-status #/ Checks the status of maintenance mode on GitHub appliance at . set -e diff --git a/share/github-backup-utils/ghe-prune-snapshots b/share/github-backup-utils/ghe-prune-snapshots index 78a30e2e4..051fd7d75 100755 --- a/share/github-backup-utils/ghe-prune-snapshots +++ b/share/github-backup-utils/ghe-prune-snapshots @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-prune-snapshots #/ Keep N latest backup snapshots. set -e diff --git a/share/github-backup-utils/ghe-restore-es-audit-log b/share/github-backup-utils/ghe-restore-es-audit-log index e224dcfc6..7014d0332 100755 --- a/share/github-backup-utils/ghe-restore-es-audit-log +++ b/share/github-backup-utils/ghe-restore-es-audit-log @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-es-audit-log #/ Take a backup of audit logs in ElasticSearch. #/ diff --git a/share/github-backup-utils/ghe-restore-es-rsync b/share/github-backup-utils/ghe-restore-es-rsync index d1b769339..f8d93af88 100755 --- a/share/github-backup-utils/ghe-restore-es-rsync +++ b/share/github-backup-utils/ghe-restore-es-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-es-rsync #/ Restore an rsync snapshot of all Elasticsearch data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-es-tarball b/share/github-backup-utils/ghe-restore-es-tarball index df5e0e91a..e876b6666 100755 --- a/share/github-backup-utils/ghe-restore-es-tarball +++ b/share/github-backup-utils/ghe-restore-es-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-es-tarball #/ Restore a tarball snapshot of all ES data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-pages-rsync b/share/github-backup-utils/ghe-restore-pages-rsync index ecda7113b..927459f44 100755 --- a/share/github-backup-utils/ghe-restore-pages-rsync +++ b/share/github-backup-utils/ghe-restore-pages-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-pages-rsync #/ Restore an rsync snapshot of all Pages data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-pages-tarball b/share/github-backup-utils/ghe-restore-pages-tarball index 9a4daf5e1..e7a40c840 100755 --- a/share/github-backup-utils/ghe-restore-pages-tarball +++ b/share/github-backup-utils/ghe-restore-pages-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-pages-tarball #/ Restore a tarball snapshot of all Pages data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-repositories-rsync b/share/github-backup-utils/ghe-restore-repositories-rsync index d0ae56aeb..418f50a23 100755 --- a/share/github-backup-utils/ghe-restore-repositories-rsync +++ b/share/github-backup-utils/ghe-restore-repositories-rsync @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-repositories-rsync #/ Restore an rsync snapshot of all Git repository data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-repositories-tarball b/share/github-backup-utils/ghe-restore-repositories-tarball index 3b41830a6..1345c9f74 100755 --- a/share/github-backup-utils/ghe-restore-repositories-tarball +++ b/share/github-backup-utils/ghe-restore-repositories-tarball @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-repositories-tarball #/ Restore a tarball snapshot of all Git repository data to a GitHub instance. #/ diff --git a/share/github-backup-utils/ghe-restore-settings b/share/github-backup-utils/ghe-restore-settings index e3cfe46c9..20116ee87 100755 --- a/share/github-backup-utils/ghe-restore-settings +++ b/share/github-backup-utils/ghe-restore-settings @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-settings #/ Restore settings from a snapshot to the given . set -e diff --git a/share/github-backup-utils/ghe-restore-snapshot-path b/share/github-backup-utils/ghe-restore-snapshot-path index a5a0db8db..18f155917 100755 --- a/share/github-backup-utils/ghe-restore-snapshot-path +++ b/share/github-backup-utils/ghe-restore-snapshot-path @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-snapshot-path [snapshot] #/ #/ Print the path to the given snapshot. Defaults to current if no argument given. diff --git a/share/github-backup-utils/ghe-restore-userdata b/share/github-backup-utils/ghe-restore-userdata index 3ee5942fc..7757546e3 100755 --- a/share/github-backup-utils/ghe-restore-userdata +++ b/share/github-backup-utils/ghe-restore-userdata @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-restore-userdata #/ Restore a special user data directory via rsync. This is used #/ for a number of different simple datastores kept under /data/user on the diff --git a/share/github-backup-utils/ghe-ssh b/share/github-backup-utils/ghe-ssh index 67f7e0fff..7cc3811ae 100755 --- a/share/github-backup-utils/ghe-ssh +++ b/share/github-backup-utils/ghe-ssh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash #/ Usage: ghe-ssh [