3535 * worker and dealt with just by having the worker exit normally. The launcher
3636 * will launch a new worker again later, per schedule.
3737 *
38- * When the worker is done vacuuming it sends SIGUSR1 to the launcher. The
38+ * When the worker is done vacuuming it sends SIGUSR2 to the launcher. The
3939 * launcher then wakes up and is able to launch another worker, if the schedule
4040 * is so tight that a new worker is needed immediately. At this time the
4141 * launcher can also balance the settings for the various remaining workers'
5555 *
5656 *
5757 * IDENTIFICATION
58- * $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.103 2009/08/27 17:18:44 alvherre Exp $
58+ * $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.104 2009/08/31 19:40:59 tgl Exp $
5959 *
6060 *-------------------------------------------------------------------------
6161 */
6767#include <time.h>
6868#include <unistd.h>
6969
70- #include "access/genam.h"
7170#include "access/heapam.h"
7271#include "access/reloptions.h"
7372#include "access/transam.h"
7473#include "access/xact.h"
7574#include "catalog/dependency.h"
76- #include "catalog/indexing.h"
7775#include "catalog/namespace.h"
7876#include "catalog/pg_database.h"
7977#include "commands/dbcommands.h"
8078#include "commands/vacuum.h"
81- #include "libpq/hba.h"
8279#include "libpq/pqsignal.h"
8380#include "miscadmin.h"
8481#include "pgstat.h"
8582#include "postmaster/autovacuum.h"
8683#include "postmaster/fork_process.h"
8784#include "postmaster/postmaster.h"
8885#include "storage/bufmgr.h"
89- #include "storage/fd.h"
9086#include "storage/ipc.h"
9187#include "storage/pmsignal.h"
9288#include "storage/proc.h"
93- #include "storage/procarray.h"
9489#include "storage/procsignal.h"
9590#include "storage/sinvaladt.h"
9691#include "tcop/tcopprot.h"
97- #include "utils/dynahash.h"
98- #include "utils/flatfiles.h"
9992#include "utils/fmgroids.h"
10093#include "utils/lsyscache.h"
10194#include "utils/memutils.h"
10295#include "utils/ps_status.h"
96+ #include "utils/snapmgr.h"
10397#include "utils/syscache.h"
10498#include "utils/tqual.h"
10599
@@ -133,7 +127,7 @@ static bool am_autovacuum_worker = false;
133127
134128/* Flags set by signal handlers */
135129static volatile sig_atomic_t got_SIGHUP = false;
136- static volatile sig_atomic_t got_SIGUSR1 = false;
130+ static volatile sig_atomic_t got_SIGUSR2 = false;
137131static volatile sig_atomic_t got_SIGTERM = false;
138132
139133/* Comparison point for determining whether freeze_max_age is exceeded */
@@ -303,9 +297,8 @@ static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
303297 PgStat_StatDBEntry * dbentry );
304298static void autovac_report_activity (autovac_table * tab );
305299static void avl_sighup_handler (SIGNAL_ARGS );
306- static void avl_sigusr1_handler (SIGNAL_ARGS );
300+ static void avl_sigusr2_handler (SIGNAL_ARGS );
307301static void avl_sigterm_handler (SIGNAL_ARGS );
308- static void avl_quickdie (SIGNAL_ARGS );
309302static void autovac_refresh_stats (void );
310303
311304
@@ -407,6 +400,9 @@ AutoVacLauncherMain(int argc, char *argv[])
407400 /* Identify myself via ps */
408401 init_ps_display ("autovacuum launcher process" , "" , "" , "" );
409402
403+ ereport (LOG ,
404+ (errmsg ("autovacuum launcher started" )));
405+
410406 if (PostAuthDelay )
411407 pg_usleep (PostAuthDelay * 1000000L );
412408
@@ -424,20 +420,20 @@ AutoVacLauncherMain(int argc, char *argv[])
424420#endif
425421
426422 /*
427- * Set up signal handlers. Since this is an auxiliary process, it has
428- * particular signal requirements -- no deadlock checker or sinval
429- * catchup, for example .
423+ * Set up signal handlers. We operate on databases much like a regular
424+ * backend, so we use the same signal handling. See equivalent code in
425+ * tcop/postgres.c .
430426 */
431427 pqsignal (SIGHUP , avl_sighup_handler );
432-
433- pqsignal (SIGINT , SIG_IGN );
428+ pqsignal (SIGINT , StatementCancelHandler );
434429 pqsignal (SIGTERM , avl_sigterm_handler );
435- pqsignal (SIGQUIT , avl_quickdie );
436- pqsignal (SIGALRM , SIG_IGN );
430+
431+ pqsignal (SIGQUIT , quickdie );
432+ pqsignal (SIGALRM , handle_sig_alarm );
437433
438434 pqsignal (SIGPIPE , SIG_IGN );
439- pqsignal (SIGUSR1 , avl_sigusr1_handler );
440- pqsignal (SIGUSR2 , SIG_IGN );
435+ pqsignal (SIGUSR1 , procsignal_sigusr1_handler );
436+ pqsignal (SIGUSR2 , avl_sigusr2_handler );
441437 pqsignal (SIGFPE , FloatExceptionHandler );
442438 pqsignal (SIGCHLD , SIG_DFL );
443439
@@ -451,9 +447,13 @@ AutoVacLauncherMain(int argc, char *argv[])
451447 * had to do some stuff with LWLocks).
452448 */
453449#ifndef EXEC_BACKEND
454- InitAuxiliaryProcess ();
450+ InitProcess ();
455451#endif
456452
453+ InitPostgres (NULL , InvalidOid , NULL , NULL );
454+
455+ SetProcessingMode (NormalProcessing );
456+
457457 /*
458458 * Create a memory context that we will do all our work in. We do this so
459459 * that we can reset the context during error recovery and thereby avoid
@@ -466,11 +466,10 @@ AutoVacLauncherMain(int argc, char *argv[])
466466 ALLOCSET_DEFAULT_MAXSIZE );
467467 MemoryContextSwitchTo (AutovacMemCxt );
468468
469-
470469 /*
471470 * If an exception is encountered, processing resumes here.
472471 *
473- * This code is heavily based on bgwriter.c, q.v .
472+ * This code is a stripped down version of PostgresMain error recovery .
474473 */
475474 if (sigsetjmp (local_sigjmp_buf , 1 ) != 0 )
476475 {
@@ -480,17 +479,16 @@ AutoVacLauncherMain(int argc, char *argv[])
480479 /* Prevents interrupts while cleaning up */
481480 HOLD_INTERRUPTS ();
482481
482+ /* Forget any pending QueryCancel request */
483+ QueryCancelPending = false;
484+ disable_sig_alarm (true);
485+ QueryCancelPending = false; /* again in case timeout occurred */
486+
483487 /* Report the error to the server log */
484488 EmitErrorReport ();
485489
486- /*
487- * These operations are really just a minimal subset of
488- * AbortTransaction(). We don't have very many resources to worry
489- * about, but we do have LWLocks.
490- */
491- LWLockReleaseAll ();
492- AtEOXact_Files ();
493- AtEOXact_HashTables (false);
490+ /* Abort the current transaction in order to recover */
491+ AbortCurrentTransaction ();
494492
495493 /*
496494 * Now return to normal top-level context and clear ErrorContext for
@@ -525,9 +523,6 @@ AutoVacLauncherMain(int argc, char *argv[])
525523 /* We can now handle ereport(ERROR) */
526524 PG_exception_stack = & local_sigjmp_buf ;
527525
528- ereport (LOG ,
529- (errmsg ("autovacuum launcher started" )));
530-
531526 /* must unblock signals before calling rebuild_database_list */
532527 PG_SETMASK (& UnBlockSig );
533528
@@ -561,11 +556,14 @@ AutoVacLauncherMain(int argc, char *argv[])
561556 * necessity for manual cleanup of all postmaster children.
562557 */
563558 if (!PostmasterIsAlive (true))
564- exit (1 );
559+ proc_exit (1 );
565560
566561 launcher_determine_sleep ((AutoVacuumShmem -> av_freeWorkers != NULL ),
567562 false, & nap );
568563
564+ /* Allow sinval catchup interrupts while sleeping */
565+ EnableCatchupInterrupt ();
566+
569567 /*
570568 * Sleep for a while according to schedule.
571569 *
@@ -595,12 +593,14 @@ AutoVacLauncherMain(int argc, char *argv[])
595593 * necessity for manual cleanup of all postmaster children.
596594 */
597595 if (!PostmasterIsAlive (true))
598- exit (1 );
596+ proc_exit (1 );
599597
600- if (got_SIGTERM || got_SIGHUP || got_SIGUSR1 )
598+ if (got_SIGTERM || got_SIGHUP || got_SIGUSR2 )
601599 break ;
602600 }
603601
602+ DisableCatchupInterrupt ();
603+
604604 /* the normal shutdown case */
605605 if (got_SIGTERM )
606606 break ;
@@ -610,7 +610,7 @@ AutoVacLauncherMain(int argc, char *argv[])
610610 got_SIGHUP = false;
611611 ProcessConfigFile (PGC_SIGHUP );
612612
613- /* shutdown requested in config file */
613+ /* shutdown requested in config file? */
614614 if (!AutoVacuumingActive ())
615615 break ;
616616
@@ -627,9 +627,9 @@ AutoVacLauncherMain(int argc, char *argv[])
627627 * a worker finished, or postmaster signalled failure to start a
628628 * worker
629629 */
630- if (got_SIGUSR1 )
630+ if (got_SIGUSR2 )
631631 {
632- got_SIGUSR1 = false;
632+ got_SIGUSR2 = false;
633633
634634 /* rebalance cost limits, if needed */
635635 if (AutoVacuumShmem -> av_signal [AutoVacRebalance ])
@@ -1306,7 +1306,7 @@ launch_worker(TimestampTz now)
13061306
13071307/*
13081308 * Called from postmaster to signal a failure to fork a process to become
1309- * worker. The postmaster should kill(SIGUSR1 ) the launcher shortly
1309+ * worker. The postmaster should kill(SIGUSR2 ) the launcher shortly
13101310 * after calling this function.
13111311 */
13121312void
@@ -1322,11 +1322,11 @@ avl_sighup_handler(SIGNAL_ARGS)
13221322 got_SIGHUP = true;
13231323}
13241324
1325- /* SIGUSR1 : a worker is up and running, or just finished, or failed to fork */
1325+ /* SIGUSR2 : a worker is up and running, or just finished, or failed to fork */
13261326static void
1327- avl_sigusr1_handler (SIGNAL_ARGS )
1327+ avl_sigusr2_handler (SIGNAL_ARGS )
13281328{
1329- got_SIGUSR1 = true;
1329+ got_SIGUSR2 = true;
13301330}
13311331
13321332/* SIGTERM: time to die */
@@ -1336,38 +1336,6 @@ avl_sigterm_handler(SIGNAL_ARGS)
13361336 got_SIGTERM = true;
13371337}
13381338
1339- /*
1340- * avl_quickdie occurs when signalled SIGQUIT from postmaster.
1341- *
1342- * Some backend has bought the farm, so we need to stop what we're doing
1343- * and exit.
1344- */
1345- static void
1346- avl_quickdie (SIGNAL_ARGS )
1347- {
1348- PG_SETMASK (& BlockSig );
1349-
1350- /*
1351- * We DO NOT want to run proc_exit() callbacks -- we're here because
1352- * shared memory may be corrupted, so we don't want to try to clean up our
1353- * transaction. Just nail the windows shut and get out of town. Now that
1354- * there's an atexit callback to prevent third-party code from breaking
1355- * things by calling exit() directly, we have to reset the callbacks
1356- * explicitly to make this work as intended.
1357- */
1358- on_exit_reset ();
1359-
1360- /*
1361- * Note we do exit(2) not exit(0). This is to force the postmaster into a
1362- * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
1363- * backend. This is necessary precisely because we don't clean up our
1364- * shared memory state. (The "dead man switch" mechanism in pmsignal.c
1365- * should ensure the postmaster sees this as a crash, too, but no harm in
1366- * being doubly sure.)
1367- */
1368- exit (2 );
1369- }
1370-
13711339
13721340/********************************************************************
13731341 * AUTOVACUUM WORKER CODE
@@ -1590,7 +1558,7 @@ AutoVacWorkerMain(int argc, char *argv[])
15901558
15911559 /* wake up the launcher */
15921560 if (AutoVacuumShmem -> av_launcherpid != 0 )
1593- kill (AutoVacuumShmem -> av_launcherpid , SIGUSR1 );
1561+ kill (AutoVacuumShmem -> av_launcherpid , SIGUSR2 );
15941562 }
15951563 else
15961564 {
@@ -1784,46 +1752,57 @@ autovac_balance_cost(void)
17841752
17851753/*
17861754 * get_database_list
1755+ * Return a list of all databases found in pg_database.
17871756 *
1788- * Return a list of all databases. Note we cannot use pg_database,
1789- * because we aren't connected; we use the flat database file.
1757+ * Note: this is the only function in which the autovacuum launcher uses a
1758+ * transaction. Although we aren't attached to any particular database and
1759+ * therefore can't access most catalogs, we do have enough infrastructure
1760+ * to do a seqscan on pg_database.
17901761 */
17911762static List *
17921763get_database_list (void )
17931764{
1794- char * filename ;
17951765 List * dblist = NIL ;
1796- char thisname [NAMEDATALEN ];
1797- FILE * db_file ;
1798- Oid db_id ;
1799- Oid db_tablespace ;
1800- TransactionId db_frozenxid ;
1801-
1802- filename = database_getflatfilename ();
1803- db_file = AllocateFile (filename , "r" );
1804- if (db_file == NULL )
1805- ereport (FATAL ,
1806- (errcode_for_file_access (),
1807- errmsg ("could not open file \"%s\": %m" , filename )));
1766+ Relation rel ;
1767+ HeapScanDesc scan ;
1768+ HeapTuple tup ;
1769+
1770+ /*
1771+ * Start a transaction so we can access pg_database, and get a snapshot.
1772+ * We don't have a use for the snapshot itself, but we're interested in
1773+ * the secondary effect that it sets RecentGlobalXmin. (This is critical
1774+ * for anything that reads heap pages, because HOT may decide to prune
1775+ * them even if the process doesn't attempt to modify any tuples.)
1776+ */
1777+ StartTransactionCommand ();
1778+ (void ) GetTransactionSnapshot ();
1779+
1780+ /* Allocate our results in AutovacMemCxt, not transaction context */
1781+ MemoryContextSwitchTo (AutovacMemCxt );
18081782
1809- while (read_pg_database_line (db_file , thisname , & db_id ,
1810- & db_tablespace , & db_frozenxid ))
1783+ rel = heap_open (DatabaseRelationId , AccessShareLock );
1784+ scan = heap_beginscan (rel , SnapshotNow , 0 , NULL );
1785+
1786+ while (HeapTupleIsValid (tup = heap_getnext (scan , ForwardScanDirection )))
18111787 {
1812- avw_dbase * avdb ;
1788+ Form_pg_database pgdatabase = (Form_pg_database ) GETSTRUCT (tup );
1789+ avw_dbase * avdb ;
18131790
18141791 avdb = (avw_dbase * ) palloc (sizeof (avw_dbase ));
18151792
1816- avdb -> adw_datid = db_id ;
1817- avdb -> adw_name = pstrdup (thisname );
1818- avdb -> adw_frozenxid = db_frozenxid ;
1793+ avdb -> adw_datid = HeapTupleGetOid ( tup ) ;
1794+ avdb -> adw_name = pstrdup (NameStr ( pgdatabase -> datname ) );
1795+ avdb -> adw_frozenxid = pgdatabase -> datfrozenxid ;
18191796 /* this gets set later: */
18201797 avdb -> adw_entry = NULL ;
18211798
18221799 dblist = lappend (dblist , avdb );
18231800 }
18241801
1825- FreeFile (db_file );
1826- pfree (filename );
1802+ heap_endscan (scan );
1803+ heap_close (rel , AccessShareLock );
1804+
1805+ CommitTransactionCommand ();
18271806
18281807 return dblist ;
18291808}
0 commit comments