Skip to content

Commit 2e9c55f

Browse files
committed
[Issue #240] Handle SIGPIPE. Also when remote agent encounters an error condition during exectuion of asynchronous tasks, the error message is saved in global variable and reported when agent status is checked
1 parent 88cb60f commit 2e9c55f

File tree

9 files changed

+330
-41
lines changed

9 files changed

+330
-41
lines changed

src/archive.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,8 @@ push_file_internal_uncompressed(const char *wal_file_name, const char *pg_xlog_d
421421
int partial_try_count = 0;
422422
int partial_file_size = 0;
423423
bool partial_is_stale = true;
424+
/* remote agent error message */
425+
char *errmsg = NULL;
424426

425427
/* from path */
426428
join_path_components(from_fullpath, pg_xlog_dir, wal_file_name);
@@ -579,7 +581,7 @@ push_file_internal_uncompressed(const char *wal_file_name, const char *pg_xlog_d
579581
from_fullpath, strerror(errno));
580582
}
581583

582-
if (read_len > 0 && fio_write(out, buf, read_len) != read_len)
584+
if (read_len > 0 && fio_write_async(out, buf, read_len) != read_len)
583585
{
584586
fio_unlink(to_fullpath_part, FIO_BACKUP_HOST);
585587
elog(ERROR, "Cannot write to destination temp file \"%s\": %s",
@@ -593,6 +595,14 @@ push_file_internal_uncompressed(const char *wal_file_name, const char *pg_xlog_d
593595
/* close source file */
594596
fclose(in);
595597

598+
/* Writing is asynchronous in case of push in remote mode, so check agent status */
599+
if (fio_check_error_fd(out, &errmsg))
600+
{
601+
fio_unlink(to_fullpath_part, FIO_BACKUP_HOST);
602+
elog(ERROR, "Cannot write to the remote file \"%s\": %s",
603+
to_fullpath_part, errmsg);
604+
}
605+
596606
/* close temp file */
597607
if (fio_close(out) != 0)
598608
{
@@ -652,6 +662,8 @@ push_file_internal_gz(const char *wal_file_name, const char *pg_xlog_dir,
652662
int partial_try_count = 0;
653663
int partial_file_size = 0;
654664
bool partial_is_stale = true;
665+
/* remote agent errormsg */
666+
char *errmsg = NULL;
655667

656668
/* from path */
657669
join_path_components(from_fullpath, pg_xlog_dir, wal_file_name);
@@ -804,6 +816,7 @@ push_file_internal_gz(const char *wal_file_name, const char *pg_xlog_dir,
804816
}
805817

806818
/* copy content */
819+
/* TODO: move to separate function */
807820
for (;;)
808821
{
809822
size_t read_len = 0;
@@ -831,7 +844,15 @@ push_file_internal_gz(const char *wal_file_name, const char *pg_xlog_dir,
831844
/* close source file */
832845
fclose(in);
833846

834-
/* close temp file */
847+
/* Writing is asynchronous in case of push in remote mode, so check agent status */
848+
if (fio_check_error_fd_gz(out, &errmsg))
849+
{
850+
fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST);
851+
elog(ERROR, "Cannot write to the remote compressed file \"%s\": %s",
852+
to_fullpath_gz_part, errmsg);
853+
}
854+
855+
/* close temp file, TODO: make it synchronous */
835856
if (fio_gzclose(out) != 0)
836857
{
837858
fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST);

src/data.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -1084,21 +1084,22 @@ restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_vers
10841084
cur_pos_out = write_pos;
10851085
}
10861086

1087-
/* If page is compressed and restore is in remote mode, send compressed
1088-
* page to the remote side.
1087+
/*
1088+
* If page is compressed and restore is in remote mode,
1089+
* send compressed page to the remote side.
10891090
*/
10901091
if (is_compressed)
10911092
{
10921093
ssize_t rc;
1093-
rc = fio_fwrite_compressed(out, page.data, compressed_size, file->compress_alg);
1094+
rc = fio_fwrite_async_compressed(out, page.data, compressed_size, file->compress_alg);
10941095

10951096
if (!fio_is_remote_file(out) && rc != BLCKSZ)
10961097
elog(ERROR, "Cannot write block %u of \"%s\": %s, size: %u",
10971098
blknum, to_fullpath, strerror(errno), compressed_size);
10981099
}
10991100
else
11001101
{
1101-
if (fio_fwrite(out, page.data, BLCKSZ) != BLCKSZ)
1102+
if (fio_fwrite_async(out, page.data, BLCKSZ) != BLCKSZ)
11021103
elog(ERROR, "Cannot write block %u of \"%s\": %s",
11031104
blknum, to_fullpath, strerror(errno));
11041105
}

src/pg_probackup.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1185,7 +1185,7 @@ extern datapagemap_t *fio_get_lsn_map(const char *fullpath, uint32 checksum_vers
11851185
fio_location location);
11861186
extern pid_t fio_check_postmaster(const char *pgdata, fio_location location);
11871187

1188-
extern int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg);
1188+
extern int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg, char **errormsg);
11891189

11901190
/* return codes for fio_send_pages() and fio_send_file() */
11911191
#define SEND_OK (0)

src/restore.c

+5
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,7 @@ restore_files(void *arg)
11091109
bool already_exists = false;
11101110
PageState *checksum_map = NULL; /* it should take ~1.5MB at most */
11111111
datapagemap_t *lsn_map = NULL; /* it should take 16kB at most */
1112+
char *errmsg = NULL; /* remote agent error message */
11121113
pgFile *dest_file = (pgFile *) parray_get(arguments->dest_files, i);
11131114

11141115
/* Directories were created before */
@@ -1262,6 +1263,10 @@ restore_files(void *arg)
12621263
}
12631264

12641265
done:
1266+
/* Writing is asynchronous in case of restore in remote mode, so check the agent status */
1267+
if (fio_check_error_file(out, &errmsg))
1268+
elog(ERROR, "Cannot write to the remote file \"%s\": %s", to_fullpath, errmsg);
1269+
12651270
/* close file */
12661271
if (fio_fclose(out) != 0)
12671272
elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath,

0 commit comments

Comments
 (0)