diff --git a/.travis.yml b/.travis.yml
index 6a426e7..3777367 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,8 @@ sudo: false
addons:
postgresql: 9.4
go:
- - 1.4
+ - 1.8
+ - 1.9
- tip
cache:
directories:
@@ -13,6 +14,8 @@ install:
- go get github.com/lib/pq
- go get github.com/kennygrant/sanitize
- go get github.com/cheggaaa/pb
+ - go get github.com/JensRantil/go-csv
- ./download_samples.sh
script:
- go install && ./test.sh
+ - go test
diff --git a/README.md b/README.md
index c260ebc..841c71d 100644
--- a/README.md
+++ b/README.md
@@ -2,17 +2,18 @@
-Import CSV and JSON into PostgreSQL the easy way.
+Import CSV and line delimited JSON into PostgreSQL the easy way.
This small tool abstract all the hassles and swearing you normally
have to deal with when you just want to dump some data into the database.
Features:
-- Generated import tables (`pgfutter csv ` and your done)
+- Generated import tables (`pgfutter csv ` and you're done)
- Good performance using the `COPY` streaming protocol
- Easy deployment
- Dealing with import errors
- Import over the network
+- Only supports UTF8 encoding
> Check out [pgclimb](https://github.com/lukasmartinelli/pgclimb) for exporting data from PostgreSQL into different data formats.
@@ -23,7 +24,7 @@ You can download a single binary for Linux, OSX or Windows.
**OSX**
```bash
-wget -O pgfutter https://github.com/lukasmartinelli/pgfutter/releases/download/v1.1/pgfutter_darwin_amd64
+wget -O pgfutter https://github.com/lukasmartinelli/pgfutter/releases/download/v1.2/pgfutter_darwin_amd64
chmod +x pgfutter
./pgfutter --help
@@ -32,7 +33,7 @@ chmod +x pgfutter
**Linux**
```bash
-wget -O pgfutter https://github.com/lukasmartinelli/pgfutter/releases/download/v1.1/pgfutter_linux_amd64
+wget -O pgfutter https://github.com/lukasmartinelli/pgfutter/releases/download/v1.2/pgfutter_linux_amd64
chmod +x pgfutter
./pgfutter --help
@@ -229,16 +230,7 @@ This works the same for invalid JSON objects.
`pgfutter` will take the sanitized filename as the table name. If you want to specify a custom table name or import into your predefined table schema you can specify the table explicitly.
```bash
-pgfutter csv --table violations traffic_violations.csv
-```
-
-### Import single JSON object
-
-Instead of using JSON lines you can also [import a single JSON object](https://github.com/lukasmartinelli/pgfutter/issues/9)
-into the database. This will load the JSON document into memory first.
-
-```bash
-pgfutter jsonobj document.json
+pgfutter --table violations csv traffic_violations.csv
```
## Alternatives
@@ -267,5 +259,6 @@ We use [gox](https://github.com/mitchellh/gox) to create distributable
binaries for Windows, OSX and Linux.
```bash
-docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.4.2-light
+docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.9
+
```
diff --git a/csv.go b/csv.go
index 57c6a48..1b05a51 100644
--- a/csv.go
+++ b/csv.go
@@ -1,7 +1,6 @@
package main
import (
- "encoding/csv"
"errors"
"fmt"
"io"
@@ -9,6 +8,7 @@ import (
"strings"
"unicode/utf8"
+ csv "github.com/JensRantil/go-csv"
"github.com/cheggaaa/pb"
)
@@ -18,6 +18,24 @@ func containsDelimiter(col string) bool {
strings.Contains(col, "^") || strings.Contains(col, "~")
}
+// Parse the delimiter for an escape sequence. This allows windows users to pass
+// in \t since they cannot pass "`t" or "$Tab" to the program.
+func parseDelimiter(delim string, skip bool) string {
+ if !strings.HasPrefix(delim, "\\") || skip {
+ return delim
+ }
+ switch delim {
+ case "\\t":
+ {
+ return "\t"
+ }
+ default:
+ {
+ return delim
+ }
+ }
+}
+
// Parse columns from first header row or from flags
func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string, error) {
var err error
@@ -30,14 +48,17 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string,
}
} else {
columns, err = reader.Read()
+ fmt.Printf("%v columns\n%v\n", len(columns), columns)
if err != nil {
+ fmt.Printf("FOUND ERR\n")
return nil, err
}
}
for _, col := range columns {
if containsDelimiter(col) {
- return columns, errors.New("Please specify the correct delimiter with -d.\nHeader column contains a delimiter character: " + col)
+ return columns, errors.New("Please specify the correct delimiter with -d.\n" +
+ "Header column contains a delimiter character: " + col)
}
}
@@ -48,7 +69,8 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string,
return columns, nil
}
-func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter string, columns []string) (error, int, int) {
+func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool,
+ delimiter string, columns []string, nullDelimiter string) (error, int, int) {
success := 0
failed := 0
@@ -68,7 +90,7 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
os.Stderr.WriteString(string(line))
continue
} else {
- err = errors.New(fmt.Sprintf("%s: %s", err, line))
+ err = fmt.Errorf("%s: %s", err, line)
return err, success, failed
}
}
@@ -76,10 +98,12 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
//Loop ensures we don't insert too many values and that
//values are properly converted into empty interfaces
for i, col := range record {
- cols[i] = col
+ cols[i] = strings.Replace(col, "\x00", "", -1)
+ // bytes.Trim(b, "\x00")
+ // cols[i] = col
}
- err = i.AddRow(cols...)
+ err = i.AddRow(nullDelimiter, cols...)
if err != nil {
line := strings.Join(record, delimiter)
@@ -89,7 +113,7 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
os.Stderr.WriteString(string(line))
continue
} else {
- err = errors.New(fmt.Sprintf("%s: %s", err, line))
+ err = fmt.Errorf("%s: %s", err, line)
return err, success, failed
}
}
@@ -100,7 +124,8 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
return nil, success, failed
}
-func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string) error {
+func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool,
+ skipHeader bool, fields string, delimiter string, excel bool, nullDelimiter string) error {
db, err := connect(connStr, schema)
if err != nil {
@@ -108,8 +133,19 @@ func importCSV(filename string, connStr string, schema string, tableName string,
}
defer db.Close()
+ dialect := csv.Dialect{}
+ dialect.Delimiter, _ = utf8.DecodeRuneInString(delimiter)
+
+ // Excel 2008 and 2011 and possibly other versions uses a carriage return \r
+ // rather than a line feed \n as a newline
+ if excel {
+ dialect.LineTerminator = "\r"
+ } else {
+ dialect.LineTerminator = "\n"
+ }
+
var reader *csv.Reader
- var bar *pb.ProgressBar
+ var bar *pb.ProgressBar
if filename != "" {
file, err := os.Open(filename)
if err != nil {
@@ -118,21 +154,16 @@ func importCSV(filename string, connStr string, schema string, tableName string,
defer file.Close()
bar = NewProgressBar(file)
- reader = csv.NewReader(io.TeeReader(file, bar))
+ reader = csv.NewDialectReader(io.TeeReader(file, bar), dialect)
} else {
- reader = csv.NewReader(os.Stdin)
+ reader = csv.NewDialectReader(os.Stdin, dialect)
}
- reader.Comma, _ = utf8.DecodeRuneInString(delimiter)
- reader.LazyQuotes = true
-
columns, err := parseColumns(reader, skipHeader, fields)
if err != nil {
return err
}
- reader.FieldsPerRecord = len(columns)
-
i, err := NewCSVImport(db, schema, tableName, columns)
if err != nil {
return err
@@ -141,10 +172,10 @@ func importCSV(filename string, connStr string, schema string, tableName string,
var success, failed int
if filename != "" {
bar.Start()
- err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns)
+ err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns, nullDelimiter)
bar.Finish()
} else {
- err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns)
+ err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns, nullDelimiter)
}
if err != nil {
@@ -152,14 +183,14 @@ func importCSV(filename string, connStr string, schema string, tableName string,
if !skipHeader {
lineNumber++
}
- return errors.New(fmt.Sprintf("line %d: %s", lineNumber, err))
- } else {
- fmt.Println(fmt.Sprintf("%d rows imported into %s.%s", success, schema, tableName))
+ return fmt.Errorf("line %d: %s", lineNumber, err)
+ }
- if ignoreErrors && failed > 0 {
- fmt.Println(fmt.Sprintf("%d rows could not be imported into %s.%s and have been written to stderr.", failed, schema, tableName))
- }
+ fmt.Println(fmt.Sprintf("%d rows imported into %s.%s", success, schema, tableName))
- return i.Commit()
+ if ignoreErrors && failed > 0 {
+ fmt.Println(fmt.Sprintf("%d rows could not be imported into %s.%s and have been written to stderr.", failed, schema, tableName))
}
+
+ return i.Commit()
}
diff --git a/download_samples.sh b/download_samples.sh
index efce026..cdc20d1 100755
--- a/download_samples.sh
+++ b/download_samples.sh
@@ -5,30 +5,28 @@ SAMPLES_DIR="$CWD/samples"
function download_json_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
- wget -nc http://data.githubarchive.org/2015-01-01-15.json.gz && gunzip -f 2015-01-01-15.json.gz
+ wget -nc wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/json_sample_2015-01-01-15.json
cd $CWD
}
function download_csv_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
- wget -nc -O local_severe_wheather_warning_systems.csv https://data.mo.gov/api/views/n59h-ggai/rows.csv
- wget -nc -O montgomery_crime.csv https://data.montgomerycountymd.gov/api/views/icn6-v9z3/rows.csv
- wget -nc -O employee_salaries.csv https://data.montgomerycountymd.gov/api/views/54rh-89p8/rows.csv
- wget -nc -O residential_permits.csv https://data.montgomerycountymd.gov/api/views/m88u-pqki/rows.csv
- wget -nc -O customer_complaints.csv https://data.consumerfinance.gov/api/views/x94z-ydhh/rows.csv
- wget -nc -O traffic_violations.csv https://data.montgomerycountymd.gov/api/views/4mse-ku6q/rows.csv
- wget -nc -O distribution_of_wealth_switzerland.csv http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Distribution_of_wealth.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Wealth_groups.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Vermoegensklassen.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Steuertarife.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Tax_rates.csv
- wget -nc -O whitehouse_visits_2014.zip https://www.whitehouse.gov/sites/default/files/disclosures/whitehouse_waves-2014_12.csv_.zip && unzip -o whitehouse_visits_2014.zip && rm -f whitehouse_visits_2014.csv && mv whitehouse_waves-2014_12.csv.csv whitehouse_visits_2014.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bag/Spitalstatistikdateien/qip/2012/qip12_tabdaten.csv
- wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bar.bar-02/Metadatenbank-Vernehmlassungen-OGD-V1-3.csv
- wget -nc https://www.data.gov/app/uploads/2015/08/opendatasites.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_distribution_of_wealth_switzerland.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_employee_salaries.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_local_severe_wheather_warning_systems.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_montgomery_crime.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_qip12_tabdaten.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_residential_permits.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramentocrime_jan_2006.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramento_realestate_transactions.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sales_jan_2009.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_steuertarife.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_techcrunch_continental_usa.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_vermoegensklassen.csv
+ wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_metadatenbank.csv
cd $CWD
}
-download_csv_samples
download_json_samples
+download_csv_samples
diff --git a/import.go b/import.go
index 6a68fe2..ddde9d9 100644
--- a/import.go
+++ b/import.go
@@ -56,7 +56,15 @@ func newImport(db *sql.DB, schema string, tableName string, columns []string) (*
return &Import{txn, stmt}, nil
}
-func (i *Import) AddRow(columns ...interface{}) error {
+func (i *Import) AddRow(nullDelimiter string, columns ...interface{}) error {
+ for index := range columns {
+ column := columns[index]
+
+ if column == nullDelimiter {
+ columns[index] = nil
+ }
+ }
+
_, err := i.stmt.Exec(columns...)
return err
}
diff --git a/json.go b/json.go
index b456bb8..e7bd5fc 100644
--- a/json.go
+++ b/json.go
@@ -3,10 +3,8 @@ package main
import (
"bufio"
"encoding/json"
- "errors"
"fmt"
"io"
- "io/ioutil"
"os"
)
@@ -33,7 +31,7 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
}
if err != nil {
- err = errors.New(fmt.Sprintf("%s: %s", err, line))
+ err = fmt.Errorf("%s: %s", err, line)
return err, success, failed
}
@@ -44,7 +42,7 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
os.Stderr.WriteString(string(line))
continue
} else {
- err = errors.New(fmt.Sprintf("%s: %s", err, line))
+ err = fmt.Errorf("%s: %s", err, line)
return err, success, failed
}
}
@@ -56,7 +54,7 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
os.Stderr.WriteString(string(line))
continue
} else {
- err = errors.New(fmt.Sprintf("%s: %s", err, line))
+ err = fmt.Errorf("%s: %s", err, line)
return err, success, failed
}
}
@@ -67,42 +65,6 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
return nil, success, failed
}
-func importJSONObject(filename string, connStr string, schema string, tableName string, dataType string) error {
- db, err := connect(connStr, schema)
- if err != nil {
- return err
- }
- defer db.Close()
-
- // The entire file is read into memory because we need to add
- // it into the PostgreSQL transaction, this will hit memory limits
- // for big JSON objects
- var bytes []byte
- if filename == "" {
- bytes, err = ioutil.ReadAll(os.Stdin)
- } else {
- bytes, err = ioutil.ReadFile(filename)
- }
- if err != nil {
- return err
- }
-
- i, err := NewJSONImport(db, schema, tableName, "data", dataType)
- if err != nil {
- return err
- }
-
- // The JSON file is not validated at client side
- // it is just copied into the database
- // If the JSON file is corrupt PostgreSQL will complain when querying
- err = i.AddRow(string(bytes))
- if err != nil {
- return err
- }
-
- return i.Commit()
-}
-
func importJSON(filename string, connStr string, schema string, tableName string, ignoreErrors bool, dataType string) error {
db, err := connect(connStr, schema)
@@ -136,14 +98,14 @@ func importJSON(filename string, connStr string, schema string, tableName string
if err != nil {
lineNumber := success + failed
- return errors.New(fmt.Sprintf("line %d: %s", lineNumber, err))
- } else {
- fmt.Println(fmt.Sprintf("%d rows imported into %s.%s", success, schema, tableName))
+ return fmt.Errorf("line %d: %s", lineNumber, err)
+ }
- if ignoreErrors && failed > 0 {
- fmt.Println(fmt.Sprintf("%d rows could not be imported into %s.%s and have been written to stderr.", failed, schema, tableName))
- }
+ fmt.Println(fmt.Sprintf("%d rows imported into %s.%s", success, schema, tableName))
- return i.Commit()
+ if ignoreErrors && failed > 0 {
+ fmt.Println(fmt.Sprintf("%d rows could not be imported into %s.%s and have been written to stderr.", failed, schema, tableName))
}
+
+ return i.Commit()
}
diff --git a/pgfutter.go b/pgfutter.go
index 6e9ccc4..5e1c918 100644
--- a/pgfutter.go
+++ b/pgfutter.go
@@ -43,7 +43,7 @@ func getDataType(c *cli.Context) string {
func main() {
app := cli.NewApp()
app.Name = "pgfutter"
- app.Version = "1.1"
+ app.Version = "1.2"
app.Usage = "Import JSON and CSV into PostgreSQL the easy way"
app.Flags = []cli.Flag{
cli.StringFlag{
@@ -105,7 +105,7 @@ func main() {
{
Name: "json",
Usage: "Import newline-delimited JSON objects into database",
- Action: func(c *cli.Context) {
+ Action: func(c *cli.Context) error {
cli.CommandHelpTemplate = strings.Replace(cli.CommandHelpTemplate, "[arguments...]", "", -1)
filename := c.Args().First()
@@ -117,30 +117,17 @@ func main() {
connStr := parseConnStr(c)
err := importJSON(filename, connStr, schema, tableName, ignoreErrors, dataType)
- exitOnError(err)
- },
- },
- {
- Name: "jsonobj",
- Usage: "Import single JSON object into database",
- Action: func(c *cli.Context) {
- cli.CommandHelpTemplate = strings.Replace(cli.CommandHelpTemplate, "[arguments...]", "", -1)
-
- filename := c.Args().First()
-
- schema := c.GlobalString("schema")
- tableName := parseTableName(c, filename)
- dataType := getDataType(c)
-
- connStr := parseConnStr(c)
- err := importJSONObject(filename, connStr, schema, tableName, dataType)
- exitOnError(err)
+ return err
},
},
{
Name: "csv",
Usage: "Import CSV into database",
Flags: []cli.Flag{
+ cli.BoolFlag{
+ Name: "excel",
+ Usage: "support problematic Excel 2008 and Excel 2011 csv line endings",
+ },
cli.BoolFlag{
Name: "skip-header",
Usage: "skip header row",
@@ -154,8 +141,17 @@ func main() {
Value: ",",
Usage: "field delimiter",
},
+ cli.StringFlag{
+ Name: "null-delimiter, nd",
+ Value: "\\N",
+ Usage: "null delimiter",
+ },
+ cli.BoolFlag{
+ Name: "skip-parse-delimiter",
+ Usage: "skip parsing escape sequences in the given delimiter",
+ },
},
- Action: func(c *cli.Context) {
+ Action: func(c *cli.Context) error {
cli.CommandHelpTemplate = strings.Replace(cli.CommandHelpTemplate, "[arguments...]", "", -1)
filename := c.Args().First()
@@ -166,11 +162,13 @@ func main() {
skipHeader := c.Bool("skip-header")
fields := c.String("fields")
- delimiter := c.String("delimiter")
-
+ nullDelimiter := c.String("null-delimiter")
+ skipParseheader := c.Bool("skip-parse-delimiter")
+ excel := c.Bool("excel")
+ delimiter := parseDelimiter(c.String("delimiter"), skipParseheader)
connStr := parseConnStr(c)
- err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter)
- exitOnError(err)
+ err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter, excel, nullDelimiter)
+ return err
},
},
}
diff --git a/postgres.go b/postgres.go
index 755255d..4b53b1b 100644
--- a/postgres.go
+++ b/postgres.go
@@ -3,7 +3,9 @@ package main
import (
"database/sql"
"fmt"
+ "log"
"math/rand"
+ "regexp"
"strconv"
"strings"
@@ -53,24 +55,17 @@ func postgresify(identifier string) string {
"-": "_",
",": "_",
"#": "_",
-
- "[": "",
- "]": "",
- "{": "",
- "}": "",
- "(": "",
- ")": "",
- "?": "",
- "!": "",
- "$": "",
- "%": "",
- "*": "",
- "\"": "",
}
for oldString, newString := range replacements {
str = strings.Replace(str, oldString, newString, -1)
}
+ reg, err := regexp.Compile("[^A-Za-z0-9_]+")
+ if err != nil {
+ log.Fatal(err)
+ }
+ str = reg.ReplaceAllString(str, "")
+
if len(str) == 0 {
str = fmt.Sprintf("_col%d", rand.Intn(10000))
} else {
diff --git a/postgres_test.go b/postgres_test.go
new file mode 100644
index 0000000..cb3ca41
--- /dev/null
+++ b/postgres_test.go
@@ -0,0 +1,24 @@
+package main
+
+import "testing"
+
+type testpair struct {
+ columnName string
+ sanitizedName string
+}
+
+var tests = []testpair{
+ {"Starting Date & Time", "starting_date__time"},
+ {"[$MYCOLUMN]", "mycolumn"},
+ {"({colname?!})", "colname"},
+ {"m4 * 4 / 3", "m4__4___3"},
+}
+
+func TestPostgresify(t *testing.T) {
+ for _, pair := range tests {
+ str := postgresify(pair.columnName)
+ if str != pair.sanitizedName {
+ t.Error("Invalid PostgreSQL identifier expected ", pair.sanitizedName, "got ", str)
+ }
+ }
+}
diff --git a/test.sh b/test.sh
index 63bfbe1..a78a1c7 100755
--- a/test.sh
+++ b/test.sh
@@ -1,36 +1,31 @@
#!/bin/bash
readonly CWD=$(pwd)
readonly SAMPLES_DIR="$CWD/samples"
-readonly DB_USER=${DB_USER:-postgres}
+readonly DB_USER="${DB_USER:-postgres}"
readonly DB_NAME="integration_test"
readonly DB_SCHEMA="import" # Use public schema instead of import because of permissions
function recreate_db() {
- psql -U ${DB_USER} -c "drop database if exists ${DB_NAME};"
- psql -U ${DB_USER} -c "create database ${DB_NAME};"
+ psql -U "${DB_USER}" -c "drop database if exists ${DB_NAME};"
+ psql -U "${DB_USER}" -c "create database ${DB_NAME};"
}
function query_counts() {
- local table=$1
- local counts=$(psql -U ${DB_USER} -d ${DB_NAME} -t -c "select count(*) from ${DB_SCHEMA}.${table}")
+ local table="$1"
+ local counts=$(psql -U "${DB_USER}" -d "${DB_NAME}" -t -c "select count(*) from ${DB_SCHEMA}.${table}")
echo "$counts"
}
function query_field_type() {
- local table=$1
- local data_type=$(psql -U ${DB_USER} -d ${DB_NAME} -t -c "SELECT data_type FROM information_schema.columns WHERE table_schema='${DB_SCHEMA}' AND table_name='${table}'")
+ local table="$1"
+ local data_type=$(psql -U "${DB_USER}" -d "${DB_NAME}" -t -c "SELECT data_type FROM information_schema.columns WHERE table_schema='${DB_SCHEMA}' AND table_name='${table}'")
echo "$data_type"
}
-function test_readme_csv_sample() {
- # test whether readme docs still work
- echo "test"
-}
-
function import_csv_with_special_delimiter_and_trailing() {
- local table="qip12_tabdaten"
- local filename="$SAMPLES_DIR/qip12_tabdaten.csv"
- pgfutter --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER csv "$filename" --delimiter=";"
+ local table="csv_sample_qip12_tabdaten"
+ local filename="$SAMPLES_DIR/csv_sample_qip12_tabdaten.csv"
+ pgfutter --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" csv "$filename" --delimiter=";"
if [ $? -ne 0 ]; then
echo "pgfutter could not import $filename"
exit 300
@@ -41,93 +36,94 @@ function import_csv_with_special_delimiter_and_trailing() {
}
function import_csv_and_skip_header_row_with_custom_fields() {
- local table="qip12_tabdaten"
- local filename="$SAMPLES_DIR/qip12_tabdaten.csv"
- pgfutter --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER csv "$filename"
- if [ $? -eq 0 ]; then
- echo "pgfutter should not be able to import $filename"
- exit 300
- fi
+ local table="csv_sample_qip12_tabdaten"
+ local filename="$SAMPLES_DIR/csv_sample_qip12_tabdaten.csv"
+ pgfutter --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" csv --delimiter ";" "$filename"
}
-function csv_with_wrong_delimiter_should_fail() {
- local table="metadatenbank_vernehmlassungen_ogd_v1_3"
- local filename="$SAMPLES_DIR/Metadatenbank-Vernehmlassungen-OGD-V1-3.csv"
- pgfutter --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER csv "$filename" --delimiter ";" --skip-header --fields "nr;typ_vernehmlassungsgegenstandes;titel_vernehmlassungsverfahrens;federfuhrendes_departement;fundort;adressaten;archivunterlagen;dokumententypen"
- if [ $? -eq 0 ]; then
- echo "pgfutter should not be able to import $filename"
+function test_json() {
+ local table=$1
+ local filename=$2
+ pgfutter --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" json "$filename"
+ if [ $? -ne 0 ]; then
+ echo "pgfutter could not import $filename"
exit 300
+ else
+ local db_count=$(query_counts $table)
+ local data_type=$(query_field_type $table)
+ echo "Imported $(expr $db_count) records into $table as $data_type"
fi
}
-function import_and_test_json() {
+function test_json_as_jsonb() {
local table=$1
local filename=$2
- pgfutter --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER json "$filename"
+ pgfutter --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" --jsonb json "$filename"
if [ $? -ne 0 ]; then
echo "pgfutter could not import $filename"
exit 300
else
- local db_count=$(query_counts $table)
- local data_type=$(query_field_type $table)
+ local db_count=$(query_counts "$table")
+ local data_type=$(query_field_type "$table")
echo "Imported $(expr $db_count) records into $table as $data_type"
fi
}
-function import_and_test_json_as_jsonb() {
+function test_excel_csv() {
local table=$1
local filename=$2
- pgfutter --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER --jsonb json "$filename"
+ local delimiter=${3:-,}
+ local general_args=${4:-}
+
+ pgfutter $general_args --table "$table" --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" csv "$filename" --delimiter "$delimiter" --excel
if [ $? -ne 0 ]; then
echo "pgfutter could not import $filename"
exit 300
else
local db_count=$(query_counts $table)
- local data_type=$(query_field_type $table)
- echo "Imported $(expr $db_count) records into $table as $data_type"
+ echo "Imported $(expr $db_count) records into $table from $filename"
fi
}
-function import_and_test_csv() {
+function test_csv() {
local table=$1
local filename=$2
local delimiter=${3:-,}
local general_args=${4:-}
- pgfutter $general_args --schema $DB_SCHEMA --db $DB_NAME --user $DB_USER csv "$filename" --delimiter "$delimiter"
+ pgfutter $general_args --table "$table" --schema "$DB_SCHEMA" --db "$DB_NAME" --user "$DB_USER" csv "$filename" --delimiter "$delimiter"
if [ $? -ne 0 ]; then
echo "pgfutter could not import $filename"
exit 300
else
local db_count=$(query_counts $table)
- echo "Imported $(expr $db_count) records into $table"
+ echo "Imported $(expr $db_count) records into $table from $filename"
fi
}
recreate_db
-csv_with_wrong_delimiter_should_fail
-import_csv_and_skip_header_row_with_custom_fields
-import_csv_with_special_delimiter_and_trailing
-
-import_and_test_json "_2015_01_01_15" "$SAMPLES_DIR/2015-01-01-15.json"
# We change the type of the data column for this test, so we have to recreate the database
recreate_db
-import_and_test_json_as_jsonb "_2015_01_01_15" "$SAMPLES_DIR/2015-01-01-15.json"
-
-# File can no longer be downloaded
-#import_and_test_csv "local_severe_wheather_warning_systems" "$SAMPLES_DIR/local_severe_wheather_warning_systems.csv"
-# CSV file broke and has now invalid number of columns
-# import_and_test_csv "montgomery_crime" "$SAMPLES_DIR/montgomery_crime.csv"
-#import_and_test_csv "employee_salaries" "$SAMPLES_DIR/employee_salaries.csv"
-import_and_test_csv "residential_permits" "$SAMPLES_DIR/residential_permits.csv"
-import_and_test_csv "steuertarife" "$SAMPLES_DIR/Steuertarife.csv"
-import_and_test_csv "vermoegensklassen" "$SAMPLES_DIR/Vermoegensklassen.csv"
-import_and_test_csv "distribution_of_wealth_switzerland" "$SAMPLES_DIR/distribution_of_wealth_switzerland.csv"
-# Customer complaints no longer available
-# import_and_test_csv "customer_complaints" "$SAMPLES_DIR/customer_complaints.csv"
-import_and_test_csv "whitehouse_visits_2014" "$SAMPLES_DIR/whitehouse_visits_2014.csv"
-import_and_test_csv "traffic_violations" "$SAMPLES_DIR/traffic_violations.csv"
+
+#TODO does not work cause quoted multiline char
+# test_csv "local_severe_wheather_warning_systems" "$SAMPLES_DIR/csv_sample_local_severe_wheather_warning_systems.csv"
+#TODO does not work cause quoted multiline char
+# test_csv "residential_permits" "$SAMPLES_DIR/csv_sample_residential_permits.csv"
+test_csv "distribution_of_wealth_switzerland" "$SAMPLES_DIR/csv_sample_distribution_of_wealth_switzerland.csv"
+test_excel_csv "techcrunch_continental_usa" "$SAMPLES_DIR/csv_sample_techcrunch_continental_usa.csv"
+test_csv "employee_salaries" "$SAMPLES_DIR/csv_sample_employee_salaries.csv"
+test_csv "montgomery_crime" "$SAMPLES_DIR/csv_sample_montgomery_crime.csv"
+test_excel_csv "sacramentocrime_jan_2006" "$SAMPLES_DIR/csv_sample_sacramentocrime_jan_2006.csv"
+test_excel_csv "sacramento_realestate_transactions" "$SAMPLES_DIR/csv_sample_sacramento_realestate_transactions.csv"
+test_excel_csv "sales_jan_2009" "$SAMPLES_DIR/csv_sample_sales_jan_2009.csv"
+test_csv "steuertarife" "$SAMPLES_DIR/csv_sample_steuertarife.csv"
+test_csv "vermoegensklassen" "$SAMPLES_DIR/csv_sample_vermoegensklassen.csv"
+
+import_csv_and_skip_header_row_with_custom_fields
+import_csv_with_special_delimiter_and_trailing
+
+test_json_as_jsonb "json_sample_2015_01_01_15" "$SAMPLES_DIR/json_sample_2015-01-01-15.json"
recreate_db