Skip to content

Commit 69b216e

Browse files
authored
Merge branch 'master' into master
2 parents 46b0bd6 + 27d5c99 commit 69b216e

File tree

21 files changed

+828
-242
lines changed

21 files changed

+828
-242
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,11 @@ flag. This removes all built-in metrics, and uses only metrics defined by querie
163163

164164
### Automatically discover databases
165165
To scrape metrics from all databases on a database server, the database DSN's can be dynamically discovered via the
166-
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database` is run for all configured DSN's. From the
166+
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false` is run for all configured DSN's. From the
167167
result a new set of DSN's is created for which the metrics are scraped.
168168

169+
In addition, the option `--exclude-databases` adds the possibily to filter the result from the auto discovery to discard databases you do not need.
170+
169171
### Running as non-superuser
170172

171173
To be able to collect metrics from `pg_stat_activity` and `pg_stat_replication`
@@ -206,6 +208,7 @@ ALTER USER postgres_exporter SET SEARCH_PATH TO postgres_exporter,pg_catalog;
206208
-- GRANT postgres_exporter TO <MASTER_USER>;
207209
CREATE SCHEMA IF NOT EXISTS postgres_exporter;
208210
GRANT USAGE ON SCHEMA postgres_exporter TO postgres_exporter;
211+
GRANT CONNECT ON DATABASE postgres TO postgres_exporter;
209212

210213
CREATE OR REPLACE FUNCTION get_pg_stat_activity() RETURNS SETOF pg_stat_activity AS
211214
$$ SELECT * FROM pg_catalog.pg_stat_activity; $$

cmd/postgres_exporter/postgres_exporter.go

Lines changed: 96 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ var (
3939
queriesPath = kingpin.Flag("extend.query-path", "Path to custom queries to run.").Default("").Envar("PG_EXPORTER_EXTEND_QUERY_PATH").String()
4040
onlyDumpMaps = kingpin.Flag("dumpmaps", "Do not run, simply dump the maps.").Bool()
4141
constantLabelsList = kingpin.Flag("constantLabels", "A list of label=value separated by comma(,).").Default("").Envar("PG_EXPORTER_CONSTANT_LABELS").String()
42+
excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String()
4243
)
4344

4445
// Metric name parts.
@@ -128,6 +129,16 @@ type MetricMap struct {
128129
conversion func(interface{}) (float64, bool) // Conversion function to turn PG result into float64
129130
}
130131

132+
// ErrorConnectToServer is a connection to PgSQL server error
133+
type ErrorConnectToServer struct {
134+
Msg string
135+
}
136+
137+
// Error returns error
138+
func (e *ErrorConnectToServer) Error() string {
139+
return e.Msg
140+
}
141+
131142
// TODO: revisit this with the semver system
132143
func dumpMaps() {
133144
// TODO: make this function part of the exporter
@@ -231,6 +242,7 @@ var builtinMetricMaps = map[string]map[string]ColumnMapping{
231242
"restart_lsn": {DISCARD, "The address (LSN) of oldest WAL which still might be required by the consumer of this slot and thus won't be automatically removed during checkpoints", nil, nil},
232243
"pg_current_xlog_location": {DISCARD, "pg_current_xlog_location", nil, nil},
233244
"pg_current_wal_lsn": {DISCARD, "pg_current_xlog_location", nil, semver.MustParseRange(">=10.0.0")},
245+
"pg_current_wal_lsn_bytes": {GAUGE, "WAL position in bytes", nil, semver.MustParseRange(">=10.0.0")},
234246
"pg_xlog_location_diff": {GAUGE, "Lag in bytes between master and slave", nil, semver.MustParseRange(">=9.2.0 <10.0.0")},
235247
"pg_wal_lsn_diff": {GAUGE, "Lag in bytes between master and slave", nil, semver.MustParseRange(">=10.0.0")},
236248
"confirmed_flush_lsn": {DISCARD, "LSN position a consumer of a slot has confirmed flushing the data received", nil, nil},
@@ -288,6 +300,7 @@ var queryOverrides = map[string][]OverrideQuery{
288300
`
289301
SELECT *,
290302
(case pg_is_in_recovery() when 't' then null else pg_current_wal_lsn() end) AS pg_current_wal_lsn,
303+
(case pg_is_in_recovery() when 't' then null else pg_wal_lsn_diff(pg_current_wal_lsn(), pg_lsn('0/0'))::float end) AS pg_current_wal_lsn_bytes,
291304
(case pg_is_in_recovery() when 't' then null else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff
292305
FROM pg_stat_replication
293306
`,
@@ -812,21 +825,24 @@ func (s *Server) String() string {
812825
}
813826

814827
// Scrape loads metrics.
815-
func (s *Server) Scrape(ch chan<- prometheus.Metric, errGauge prometheus.Gauge, disableSettingsMetrics bool) {
828+
func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool) error {
816829
s.mappingMtx.RLock()
817830
defer s.mappingMtx.RUnlock()
818831

832+
var err error
833+
819834
if !disableSettingsMetrics {
820-
if err := querySettings(ch, s); err != nil {
821-
log.Errorf("Error retrieving settings: %s", err)
822-
errGauge.Inc()
835+
if err = querySettings(ch, s); err != nil {
836+
err = fmt.Errorf("error retrieving settings: %s", err)
823837
}
824838
}
825839

826840
errMap := queryNamespaceMappings(ch, s)
827841
if len(errMap) > 0 {
828-
errGauge.Inc()
842+
err = fmt.Errorf("queryNamespaceMappings returned %d errors", len(errMap))
829843
}
844+
845+
return err
830846
}
831847

832848
// Servers contains a collection of servers to Postgres.
@@ -849,17 +865,29 @@ func (s *Servers) GetServer(dsn string) (*Server, error) {
849865
s.m.Lock()
850866
defer s.m.Unlock()
851867
var err error
852-
server, ok := s.servers[dsn]
853-
if !ok {
854-
server, err = NewServer(dsn, s.opts...)
855-
if err != nil {
868+
var ok bool
869+
errCount := 0 // start at zero because we increment before doing work
870+
retries := 3
871+
var server *Server
872+
for {
873+
if errCount++; errCount > retries {
856874
return nil, err
857875
}
858-
s.servers[dsn] = server
859-
}
860-
if err = server.Ping(); err != nil {
861-
delete(s.servers, dsn)
862-
return nil, err
876+
server, ok = s.servers[dsn]
877+
if !ok {
878+
server, err = NewServer(dsn, s.opts...)
879+
if err != nil {
880+
time.Sleep(time.Duration(errCount) * time.Second)
881+
continue
882+
}
883+
s.servers[dsn] = server
884+
}
885+
if err = server.Ping(); err != nil {
886+
delete(s.servers, dsn)
887+
time.Sleep(time.Duration(errCount) * time.Second)
888+
continue
889+
}
890+
break
863891
}
864892
return server, nil
865893
}
@@ -883,6 +911,7 @@ type Exporter struct {
883911

884912
disableDefaultMetrics, disableSettingsMetrics, autoDiscoverDatabases bool
885913

914+
excludeDatabases []string
886915
dsn []string
887916
userQueriesPath string
888917
constantLabels prometheus.Labels
@@ -921,6 +950,13 @@ func AutoDiscoverDatabases(b bool) ExporterOpt {
921950
}
922951
}
923952

953+
// ExcludeDatabases allows to filter out result from AutoDiscoverDatabases
954+
func ExcludeDatabases(s string) ExporterOpt {
955+
return func(e *Exporter) {
956+
e.excludeDatabases = strings.Split(s, ",")
957+
}
958+
}
959+
924960
// WithUserQueriesPath configures user's queries path.
925961
func WithUserQueriesPath(p string) ExporterOpt {
926962
return func(e *Exporter) {
@@ -1065,7 +1101,7 @@ func newDesc(subsystem, name, help string, labels prometheus.Labels) *prometheus
10651101
}
10661102

10671103
func queryDatabases(server *Server) ([]string, error) {
1068-
rows, err := server.db.Query("SELECT datname FROM pg_database") // nolint: safesql
1104+
rows, err := server.db.Query("SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false") // nolint: safesql
10691105
if err != nil {
10701106
return nil, fmt.Errorf("Error retrieving databases: %v", err)
10711107
}
@@ -1280,16 +1316,40 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
12801316
e.duration.Set(time.Since(begun).Seconds())
12811317
}(time.Now())
12821318

1283-
e.error.Set(0)
1284-
e.psqlUp.Set(0)
12851319
e.totalScrapes.Inc()
12861320

12871321
dsns := e.dsn
12881322
if e.autoDiscoverDatabases {
12891323
dsns = e.discoverDatabaseDSNs()
12901324
}
1325+
1326+
var errorsCount int
1327+
var connectionErrorsCount int
1328+
12911329
for _, dsn := range dsns {
1292-
e.scrapeDSN(ch, dsn)
1330+
if err := e.scrapeDSN(ch, dsn); err != nil {
1331+
errorsCount++
1332+
1333+
log.Errorf(err.Error())
1334+
1335+
if _, ok := err.(*ErrorConnectToServer); ok {
1336+
connectionErrorsCount++
1337+
}
1338+
}
1339+
}
1340+
1341+
switch {
1342+
case connectionErrorsCount >= len(dsns):
1343+
e.psqlUp.Set(0)
1344+
default:
1345+
e.psqlUp.Set(1) // Didn't fail, can mark connection as up for this scrape.
1346+
}
1347+
1348+
switch errorsCount {
1349+
case 0:
1350+
e.error.Set(0)
1351+
default:
1352+
e.error.Set(1)
12931353
}
12941354
}
12951355

@@ -1315,6 +1375,9 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
13151375
continue
13161376
}
13171377
for _, databaseName := range databaseNames {
1378+
if contains(e.excludeDatabases, databaseName) {
1379+
continue
1380+
}
13181381
parsedDSN.Path = databaseName
13191382
dsns[parsedDSN.String()] = struct{}{}
13201383
}
@@ -1330,24 +1393,18 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
13301393
return result
13311394
}
13321395

1333-
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) {
1396+
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error {
13341397
server, err := e.servers.GetServer(dsn)
13351398
if err != nil {
1336-
log.Errorf("Error opening connection to database (%s): %v", loggableDSN(dsn), err)
1337-
e.error.Inc()
1338-
return
1399+
return &ErrorConnectToServer{fmt.Sprintf("Error opening connection to database (%s): %s", loggableDSN(dsn), err)}
13391400
}
13401401

1341-
// Didn't fail, can mark connection as up for this scrape.
1342-
e.psqlUp.Inc()
1343-
13441402
// Check if map versions need to be updated
13451403
if err := e.checkMapVersions(ch, server); err != nil {
13461404
log.Warnln("Proceeding with outdated query maps, as the Postgres version could not be determined:", err)
1347-
e.error.Inc()
13481405
}
13491406

1350-
server.Scrape(ch, e.error, e.disableSettingsMetrics)
1407+
return server.Scrape(ch, e.disableSettingsMetrics)
13511408
}
13521409

13531410
// try to get the DataSource
@@ -1389,6 +1446,15 @@ func getDataSources() []string {
13891446
return strings.Split(dsn, ",")
13901447
}
13911448

1449+
func contains(a []string, x string) bool {
1450+
for _, n := range a {
1451+
if x == n {
1452+
return true
1453+
}
1454+
}
1455+
return false
1456+
}
1457+
13921458
func main() {
13931459
kingpin.Version(fmt.Sprintf("postgres_exporter %s (built with %s)\n", Version, runtime.Version()))
13941460
log.AddFlags(kingpin.CommandLine)
@@ -1421,6 +1487,7 @@ func main() {
14211487
AutoDiscoverDatabases(*autoDiscoverDatabases),
14221488
WithUserQueriesPath(*queriesPath),
14231489
WithConstantLabels(*constantLabelsList),
1490+
ExcludeDatabases(*excludeDatabases),
14241491
)
14251492
defer func() {
14261493
exporter.servers.Close()
@@ -1430,8 +1497,8 @@ func main() {
14301497

14311498
http.Handle(*metricPath, promhttp.Handler())
14321499
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
1433-
w.Header().Set("Content-Type", "Content-Type:text/plain; charset=UTF-8") // nolint: errcheck
1434-
w.Write(landingPage) // nolint: errcheck
1500+
w.Header().Set("Content-Type", "text/html; charset=UTF-8") // nolint: errcheck
1501+
w.Write(landingPage) // nolint: errcheck
14351502
})
14361503

14371504
log.Infof("Starting Server: %s", *listenAddress)

queries.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pg_postmaster:
1313
description: "Time at which postmaster started"
1414

1515
pg_stat_user_tables:
16-
query: "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, last_vacuum, last_autovacuum, last_analyze, last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
16+
query: "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
1717
metrics:
1818
- schemaname:
1919
usage: "LABEL"

tools/src

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
/home/will/src/go/src/github.com/wrouesnel/postgres_exporter/tools/vendor
1+
/Users/alex/go/src/github.com/AlexisSellier/postgres_exporter/tools/vendor

vendor/github.com/lib/pq/README.md

Lines changed: 2 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/lib/pq/TESTS.md

Lines changed: 33 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/lib/pq/buf.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)