From c1331ff0e723648faa7076b028ac9f57d0383ce9 Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 14:30:56 +0100 Subject: [PATCH 1/9] feat: sqlx parser to support multiple pre/post operation blocks --- cmd/sqlx_parser.go | 189 +++++++++++++++++++++++++++++++++++++++++++++ cmd/utils.go | 67 ++++++++++------ 2 files changed, 234 insertions(+), 22 deletions(-) create mode 100644 cmd/sqlx_parser.go diff --git a/cmd/sqlx_parser.go b/cmd/sqlx_parser.go new file mode 100644 index 0000000..fe16744 --- /dev/null +++ b/cmd/sqlx_parser.go @@ -0,0 +1,189 @@ +package cmd + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +type ConfigBlockMeta struct { + exsists bool + startOfConfigBlock int + endOfConfigBlock int + configBlockContent string +} + +type PreOpsBlockMeta struct { + exsists bool + startOfPreOperationsBlock int + endOfPreOperationsBlock int + preOpsBlockContent string +} + +type PostOpsBlockMeta struct { + exsists bool + startOfpostOperationsBlock int + endOfpostOperationsBlock int + postOpsBlockContent string +} + +type SqlBlockMeta struct { + exsists bool + startOfSqlBlock int + endOfSqlBlock int + sqlBlockContent string + formattedSqlBlockContent string +} + +type sqlxParserMeta struct { + filepath string + numLines int + configBlockMeta ConfigBlockMeta + preOpsBlocksMeta []PreOpsBlockMeta + postOpsBlocksMeta []PostOpsBlockMeta + sqlBlocksMeta SqlBlockMeta +} + +func sqlxParser(filepath string) (sqlxParserMeta, error) { + + var inMajorBlock = false + + var startOfConfigBlock = 0 + var endOfConfigBlock = 0 + var configBlockExsists = false + var configBlockContent = "" + + var preOpsBlocksMeta = []PreOpsBlockMeta{} + var startOfPreOperationsBlock = 0 + var endOfPreOperationsBlock = 0 + + var postOpsBlocksMeta = []PostOpsBlockMeta{} + var startOfpostOperationsBlock = 0 + var endOfpostOperationsBlock = 0 + + var startOfSqlBlock = 0 + var endOfSqlBlock = 0 + var sqlBlockExsists = false + var sqlBlockContent = "" + + var isInInnerMajorBlock = false + var innerMajorBlockCount = 0 + + var currentBlock = "" + var currentBlockContent = "" + + file, err := os.Open(filepath) + if err != nil { + fmt.Printf("Error: %v\n", err) + return sqlxParserMeta{}, err + } + + i := 0 + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + i++ + var lineContents = scanner.Text() + "\n" + + if strings.Contains(lineContents, "config {") { + inMajorBlock = true + currentBlock = "config" + startOfConfigBlock = i + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "post_operations {") && !inMajorBlock { + startOfpostOperationsBlock = i + inMajorBlock = true + currentBlock = "post_operations" + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "pre_operations {") && !inMajorBlock { + inMajorBlock = true + currentBlock = "pre_operations" + startOfPreOperationsBlock = i + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "{") && inMajorBlock { + if strings.Contains(lineContents, "}") { + continue + } + isInInnerMajorBlock = true + innerMajorBlockCount += 1 + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && inMajorBlock { + innerMajorBlockCount -= 1 + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "}") && innerMajorBlockCount == 0 && inMajorBlock { + if currentBlock == "config" { + currentBlockContent += lineContents + configBlockContent = currentBlockContent + endOfConfigBlock = i + configBlockExsists = true + currentBlock = "" + currentBlockContent = "" + } else if currentBlock == "pre_operations" { + endOfPreOperationsBlock = i + currentBlockContent += lineContents + preOpsBlockMeta := PreOpsBlockMeta{ + exsists: true, + startOfPreOperationsBlock: startOfPreOperationsBlock, + endOfPreOperationsBlock: endOfPreOperationsBlock, + preOpsBlockContent: currentBlockContent, + } + preOpsBlocksMeta = append(preOpsBlocksMeta, preOpsBlockMeta) + currentBlock = "" + currentBlockContent = "" + } else if currentBlock == "post_operations" { + endOfpostOperationsBlock = i + currentBlockContent += lineContents + postOpsBlockMeta := PostOpsBlockMeta{ + exsists: true, + startOfpostOperationsBlock: startOfpostOperationsBlock, + endOfpostOperationsBlock: endOfpostOperationsBlock, + postOpsBlockContent: currentBlockContent, + } + postOpsBlocksMeta = append(postOpsBlocksMeta, postOpsBlockMeta) + currentBlock = "" + currentBlockContent = "" + } + inMajorBlock = false + } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && !inMajorBlock { + innerMajorBlockCount -= 1 + currentBlockContent += lineContents + } else if lineContents != "" && !inMajorBlock { + if startOfSqlBlock == 0 { + startOfSqlBlock = i + sqlBlockExsists = true + sqlBlockContent += lineContents + } else { + sqlBlockContent += lineContents + endOfSqlBlock = i + } + } else if inMajorBlock { + currentBlockContent += lineContents + } + } + + // fmt.Println("configBlockContent: ", configBlockContent) + // fmt.Println("preOpsBlockContent: ", postOpsBlocksMeta[0].postOpsBlockContent) + // fmt.Println("postOpsBlockContent: ", postOpsBlocksMeta[0].postOpsBlockContent) + // fmt.Println("sqlBlockContent: ", sqlBlockContent) + + return sqlxParserMeta{ + filepath: filepath, + numLines: i, + configBlockMeta: ConfigBlockMeta{ + exsists: configBlockExsists, + startOfConfigBlock: startOfConfigBlock, + endOfConfigBlock: endOfConfigBlock, + configBlockContent: configBlockContent, + }, + preOpsBlocksMeta: preOpsBlocksMeta, + postOpsBlocksMeta: postOpsBlocksMeta, + sqlBlocksMeta: SqlBlockMeta{ + exsists: sqlBlockExsists, + startOfSqlBlock: startOfSqlBlock, + endOfSqlBlock: endOfSqlBlock, + sqlBlockContent: sqlBlockContent, + formattedSqlBlockContent: "", + }, + }, nil +} diff --git a/cmd/utils.go b/cmd/utils.go index 7208d78..9e445d9 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -48,8 +48,8 @@ func findSqlxFiles(dataformRootDirectory string) *[]string { return nil } -func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) error { - queryString := *&sqlxFileMetaData.queryString +func formatSqlCode(sqlxFileMetaData *sqlxParserMeta, pythonScriptPath string, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) error { + queryString := *&sqlxFileMetaData.sqlBlocksMeta.sqlBlockContent cmd := exec.Command(pythonExecutable, pythonScriptPath, string(sqlfluffConfigPath), string(queryString)) @@ -61,7 +61,7 @@ func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, err := cmd.Run() if err != nil { logger.Error(stderr.String(), slog.String("file", sqlxFileMetaData.filepath), "error", err.Error()) - sqlxFileMetaData.formattedQuery = string(queryString) // If there is an error, return the original query + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = string(queryString) return ErrorFormattingSqlxFile } output := stdout.String() @@ -69,11 +69,44 @@ func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, if sql_fluff_not_installed { log.Fatal(color.RedString("sqlfluff not installed. Please install sqlfluff using 'pip install sqlfluff'")) } - sqlxFileMetaData.formattedQuery = output + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = output return nil } -func writeContentsToFile(sqlxFileMetaData *sqlxFileMetaData, formattingError error) { +func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { + spaceBetweenBlocks := "\n\n" + spaceBetweenSameOps := "\n" + + formattedQuery := "" + + preOpsBlocks := sqlxFileMetaData.preOpsBlocksMeta + postOpsBlocks := sqlxFileMetaData.postOpsBlocksMeta + + preOpsBlockContent := "" + if len(preOpsBlocks) > 0 { + for _, preOpsBlock := range preOpsBlocks { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps + } + } + + postOpsBlockContent := "" + if len(postOpsBlocks) > 0 { + for _, postOpsBlock := range postOpsBlocks { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps + } + } + + formattedQuery = sqlxFileMetaData.configBlockMeta.configBlockContent + + spaceBetweenBlocks + + preOpsBlockContent + + spaceBetweenBlocks + + postOpsBlockContent + + spaceBetweenBlocks + + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent + return formattedQuery +} + +func writeContentsToFile(sqlxFileMetaData *sqlxParserMeta, formattingError error) { yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() @@ -87,14 +120,9 @@ func writeContentsToFile(sqlxFileMetaData *sqlxFileMetaData, formattingError err os.MkdirAll(dirToCreate, 0755) // TODO: make this configurable - completeQuery := "" - if sqlxFileMetaData.preOperationsString == "" { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.formattedQuery - } else { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.preOperationsString + "\n\n" + sqlxFileMetaData.formattedQuery - } + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) - err := os.WriteFile(formattedFilePath, []byte(completeQuery), 0664) + err := os.WriteFile(formattedFilePath, []byte(formattedQuery), 0664) if err != nil { fmt.Println("Error writing to file:", err) return @@ -108,18 +136,14 @@ func writeContentsToFile(sqlxFileMetaData *sqlxFileMetaData, formattingError err } } -func writeContentsToFileInPlace(sqlxFileMetaData *sqlxFileMetaData, formattingError error) { +func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingError error) { yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() - completeQuery := "" - if sqlxFileMetaData.preOperationsString == "" { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.formattedQuery - } else { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.preOperationsString + "\n\n" + sqlxFileMetaData.formattedQuery - } - err := os.WriteFile(sqlxFileMetaData.filepath, []byte(completeQuery), 0664) + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) + + err := os.WriteFile(sqlxFileMetaData.filepath, []byte(formattedQuery), 0664) if err != nil { fmt.Println("Error writing to file:", err) return @@ -134,8 +158,7 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxFileMetaData, formattingEr } func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { - sqlxFileMetaData, err := getSqlxFileMetaData(sqlxFilePath) - + sqlxFileMetaData, err := sqlxParser(sqlxFilePath) if err != nil { fmt.Println("Error finding config blocks:", err) } else { From 9b4bcfe9113ae7296fc608c31dc209f1cfb9990b Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 15:37:15 +0100 Subject: [PATCH 2/9] feat: add testing for the new parser --- cmd/sqlx_file_metadata.go | 164 ------------------ cmd/sqlx_file_metadata_test.go | 299 --------------------------------- cmd/sqlx_parser.go | 20 +-- 3 files changed, 10 insertions(+), 473 deletions(-) delete mode 100644 cmd/sqlx_file_metadata.go delete mode 100644 cmd/sqlx_file_metadata_test.go diff --git a/cmd/sqlx_file_metadata.go b/cmd/sqlx_file_metadata.go deleted file mode 100644 index b641a35..0000000 --- a/cmd/sqlx_file_metadata.go +++ /dev/null @@ -1,164 +0,0 @@ -package cmd - -import ( - "bufio" - "fmt" - "io" - "os" - "strings" -) - -type sqlxFileMetaData struct { - filepath string - numLines int - configStartLine int - configEndLine int - configString string - preOperationsStartLine int - preOperationsEndLine int - preOperationsString string - queryString string - formattedQuery string -} - -func getSqlxFileMetaData(filepath string) (sqlxFileMetaData, error) { - file, err := os.Open(filepath) - if err != nil { - fmt.Println("Error opening file:", err) - return sqlxFileMetaData{}, err - } - - numLines, err := countLinesInFile(filepath) - - if err != nil { - fmt.Println("Error opening file:", err) - return sqlxFileMetaData{}, err - } - - // variables to keep track of where we are in the file - var configStartLine = 0 - var configEndLine = 0 - var preOperationsStartLine = 0 - var preOperationsEndLine = 0 - var preOperationsString = "" - var currentLineNumber = 0 - var configString = "" - var queryString = "" - - // flags to keep track of where we are in the file - var isConfigBlock = false - var isConfigBlockEnd = false - var isInInnerConfigBlock = false - var openCurlyBraceCount = 0 - var closeCurlyBraceCount = 0 - var preOperationsBlockStarted = false - var isInInnerPreOperationsBlock = false - var isInPreOperationsBlock = false - var queryBlockStarted = false - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - currentLineNumber++ - var line = scanner.Text() - - //TODO: Check if this line is ever hit ? - if err == io.EOF { - break // End of file - } - - // While we are in the config block, keep adding the lines to the configString - if isConfigBlock == true && isConfigBlockEnd == false { // we are in the config block - configString += line + "\n" - } - - // If the line contains the word "config" and if we are not already in the config block, then we start the config block - if strings.Contains(line, "config") && isConfigBlock == false { - isConfigBlock = true - configStartLine = currentLineNumber - configString += line + "\n" - } - - // keep track of open and close curly braces while we are in the config block - if strings.Contains(line, "{") && isConfigBlock == true { - openCurlyBraceCount++ - if (openCurlyBraceCount != closeCurlyBraceCount) && (openCurlyBraceCount > 1) { - isInInnerConfigBlock = true - } - } - - if strings.Contains(line, "}") && isConfigBlock == true { // TODO: breaks when you have curly brace before the config block ends - - if configStartLine == 0 { - configEndLine = 0 - // TODO: maybe we should return an error here - fmt.Errorf("No config block found in file: %s", filepath) - } else if isInInnerConfigBlock == true { - closeCurlyBraceCount++ - isInInnerConfigBlock = false // NOTE: does this mean that we only go to 1 nesting level ? - } else { - configEndLine = currentLineNumber - isConfigBlockEnd = true - isConfigBlock = false - } - } - - if isConfigBlockEnd == true && currentLineNumber != configEndLine { // query block started but looking for first non empty string - if strings.Contains(line, "pre_operations") { - isInPreOperationsBlock = true - preOperationsBlockStarted = true - openCurlyBraceCount = 0 - closeCurlyBraceCount = 0 - preOperationsStartLine = currentLineNumber - } - } - - if preOperationsBlockStarted == true && currentLineNumber != configEndLine { - preOperationsString += line + "\n" - if strings.Contains(line, "{") { - openCurlyBraceCount++ - if (openCurlyBraceCount != closeCurlyBraceCount) && (openCurlyBraceCount > 1) { - isInInnerPreOperationsBlock = true - } - } - - if strings.Contains(line, "}") { - closeCurlyBraceCount++ - if isInInnerPreOperationsBlock == true { - if closeCurlyBraceCount == openCurlyBraceCount { - isInPreOperationsBlock = false - isInInnerPreOperationsBlock = false - preOperationsEndLine = currentLineNumber - preOperationsBlockStarted = false - } - } - } - } - - if isConfigBlockEnd == true && preOperationsBlockStarted == false && currentLineNumber != preOperationsEndLine && currentLineNumber != configEndLine { - if line != "" { - queryBlockStarted = true - } - } - - if queryBlockStarted && isInPreOperationsBlock == false && currentLineNumber != preOperationsEndLine { // in the query block - if currentLineNumber == numLines { - queryString += line - } else { - queryString += line + "\n" - } - } - - } - - return sqlxFileMetaData{ - filepath: filepath, - numLines: numLines, - configStartLine: configStartLine, - configEndLine: configEndLine, - configString: configString, - preOperationsStartLine: preOperationsStartLine, - preOperationsEndLine: preOperationsEndLine, - preOperationsString: preOperationsString, - queryString: queryString, - }, nil -} diff --git a/cmd/sqlx_file_metadata_test.go b/cmd/sqlx_file_metadata_test.go deleted file mode 100644 index 3d3c235..0000000 --- a/cmd/sqlx_file_metadata_test.go +++ /dev/null @@ -1,299 +0,0 @@ -package cmd - -import ( - "os" - "strings" - "testing" -) - -// TODO: -// 1. If user tries to format a dataform config file that is invalid that might cause unexpected behavior - -func TestGetSqlxFileMetaData(t *testing.T) { - tests := []struct { - name string - content string - expected sqlxFileMetaData - wantErr bool - }{ - { - name: "Nested config blocks and single line query", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -SELECT * FROM electric_cars WHERE model = $1;`, - expected: sqlxFileMetaData{ - numLines: 12, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - { - name: "Pre operations query after config block", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} - -SELECT * FROM electric_cars WHERE model = $1;`, - expected: sqlxFileMetaData{ - numLines: 20, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - preOperationsStartLine: 13, - preOperationsEndLine: 19, - preOperationsString: `pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - { - name: "Pre operations query at the end of the file", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} - -SELECT * FROM electric_cars WHERE model = $1; - -pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} - -`, - expected: sqlxFileMetaData{ - numLines: 23, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - preOperationsStartLine: 16, - preOperationsEndLine: 22, - preOperationsString: `pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - - { - name: "Minimal config and longer query and comment before config", - content: `-- some comment -config { - type: "table", - schema: "electric_cars" -} - - -WITH CTE1 AS ( - SELECT - MAKE - , COUNTY - , CITY - , STATE - , POSTAL_CODE - , MODEL - , MODEL_YEAR - , COUNT(VIN) AS CNT_VIN - FROM ${ref("ALL_EV_CARS_DATA")} - GROUP BY MAKE, COUNTY, CITY, STATE, POSTAL_CODE, MODEL, MODEL_YEAR - HAVING MAKE = ${constants.make} -) -SELECT * FROM CTE1 - `, - expected: sqlxFileMetaData{ - numLines: 22, - configStartLine: 2, - configEndLine: 5, - configString: `config { - type: "table", - schema: "electric_cars" -} -`, - queryString: ` -WITH CTE1 AS ( - SELECT - MAKE - , COUNTY - , CITY - , STATE - , POSTAL_CODE - , MODEL - , MODEL_YEAR - , COUNT(VIN) AS CNT_VIN - FROM ${ref("ALL_EV_CARS_DATA")} - GROUP BY MAKE, COUNTY, CITY, STATE, POSTAL_CODE, MODEL, MODEL_YEAR - HAVING MAKE = ${constants.make} -) -SELECT * FROM CTE1 - `, - }, - wantErr: false, - }, - //TODO: Need to handle case where file does not have a config block - // { - // name: "File without config", - // content: `-- name: GetElectricCars :many - // SELECT * FROM electric_cars WHERE model = $1;`, - // expected: sqlxFileMetaData{ - // numLines: 1, - // configStartLine: 0, - // configEndLine: 0, - // configString: "", - // queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - // }, - // wantErr: false, - // }, - { - name: "Empty file", - content: "", - expected: sqlxFileMetaData{}, - wantErr: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create a temporary file - tmpfile, err := os.CreateTemp("", "test*.sqlx") - if err != nil { - t.Fatalf("Failed to create temp file: %v", err) - } - defer os.Remove(tmpfile.Name()) - - // Write content to the file - if _, err := tmpfile.Write([]byte(tt.content)); err != nil { - t.Fatalf("Failed to write to temp file: %v", err) - } - if err := tmpfile.Close(); err != nil { - t.Fatalf("Failed to close temp file: %v", err) - } - - // Call the function - got, err := getSqlxFileMetaData(tmpfile.Name()) - - // Check for errors - if (err != nil) != tt.wantErr { - t.Errorf("getSqlxFileMetaData() error = %v, wantErr %v", err, tt.wantErr) - return - } - - // Set the filepath in the expected result - tt.expected.filepath = tmpfile.Name() - - // Compare each field separately - if got.filepath != tt.expected.filepath { - t.Errorf("[got]: filepath = %v, [want]: %v", got.filepath, tt.expected.filepath) - } - if got.numLines != tt.expected.numLines { - t.Errorf("[got]: numLines = %v, [want]: %v", got.numLines, tt.expected.numLines) - } - if got.configStartLine != tt.expected.configStartLine { - t.Errorf("[got]: configStartLine = %v, [want]: %v", got.configStartLine, tt.expected.configStartLine) - } - if got.configEndLine != tt.expected.configEndLine { - t.Errorf("[got]: configEndLine = %v, [want]: %v", got.configEndLine, tt.expected.configEndLine) - } - if strings.TrimSpace(got.configString) != strings.TrimSpace(tt.expected.configString) { - t.Errorf("[got]: configString = %v, [want]: %v", got.configString, tt.expected.configString) - } - if got.preOperationsStartLine != tt.expected.preOperationsStartLine { - t.Errorf("[got]: preOperationsStartLine = %v, [want]: %v", got.preOperationsStartLine, tt.expected.preOperationsStartLine) - } - if got.preOperationsEndLine != tt.expected.preOperationsEndLine { - t.Errorf("[got]: preOperationsEndLine = %v, [want]: %v", got.preOperationsEndLine, tt.expected.preOperationsEndLine) - } - if strings.TrimSpace(got.preOperationsString) != strings.TrimSpace(tt.expected.preOperationsString) { - t.Errorf("[got]: preOperationsString = %v, [want]: %v", got.preOperationsString, tt.expected.preOperationsString) - } - if strings.TrimSpace(got.queryString) != strings.TrimSpace(tt.expected.queryString) { - t.Errorf("[got]: queryString = %v, [want]: %v", got.queryString, tt.expected.queryString) - } - }) - } -} diff --git a/cmd/sqlx_parser.go b/cmd/sqlx_parser.go index fe16744..1681c8d 100644 --- a/cmd/sqlx_parser.go +++ b/cmd/sqlx_parser.go @@ -29,11 +29,11 @@ type PostOpsBlockMeta struct { } type SqlBlockMeta struct { - exsists bool - startOfSqlBlock int - endOfSqlBlock int - sqlBlockContent string - formattedSqlBlockContent string + exsists bool + startOfSqlBlock int + endOfSqlBlock int + sqlBlockContent string + formattedSqlBlockContent string } type sqlxParserMeta struct { @@ -179,11 +179,11 @@ func sqlxParser(filepath string) (sqlxParserMeta, error) { preOpsBlocksMeta: preOpsBlocksMeta, postOpsBlocksMeta: postOpsBlocksMeta, sqlBlocksMeta: SqlBlockMeta{ - exsists: sqlBlockExsists, - startOfSqlBlock: startOfSqlBlock, - endOfSqlBlock: endOfSqlBlock, - sqlBlockContent: sqlBlockContent, - formattedSqlBlockContent: "", + exsists: sqlBlockExsists, + startOfSqlBlock: startOfSqlBlock, + endOfSqlBlock: endOfSqlBlock, + sqlBlockContent: sqlBlockContent, + formattedSqlBlockContent: "", }, }, nil } From 2f2b619fddcd96bc96db22762850e8dc01a4cb3b Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 16:43:39 +0100 Subject: [PATCH 3/9] feat: beginning of testing for sqlx parser --- cmd/sqlx_parser.go | 8 +- cmd/sqlx_parser_test.go | 171 ++++++++++++++++++++++++++++++++++++++++ cmd/utils.go | 1 + 3 files changed, 174 insertions(+), 6 deletions(-) create mode 100644 cmd/sqlx_parser_test.go diff --git a/cmd/sqlx_parser.go b/cmd/sqlx_parser.go index 1681c8d..82a928c 100644 --- a/cmd/sqlx_parser.go +++ b/cmd/sqlx_parser.go @@ -148,11 +148,12 @@ func sqlxParser(filepath string) (sqlxParserMeta, error) { } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && !inMajorBlock { innerMajorBlockCount -= 1 currentBlockContent += lineContents - } else if lineContents != "" && !inMajorBlock { + } else if lineContents != "\n" && !inMajorBlock { if startOfSqlBlock == 0 { startOfSqlBlock = i sqlBlockExsists = true sqlBlockContent += lineContents + endOfSqlBlock = i } else { sqlBlockContent += lineContents endOfSqlBlock = i @@ -162,11 +163,6 @@ func sqlxParser(filepath string) (sqlxParserMeta, error) { } } - // fmt.Println("configBlockContent: ", configBlockContent) - // fmt.Println("preOpsBlockContent: ", postOpsBlocksMeta[0].postOpsBlockContent) - // fmt.Println("postOpsBlockContent: ", postOpsBlocksMeta[0].postOpsBlockContent) - // fmt.Println("sqlBlockContent: ", sqlBlockContent) - return sqlxParserMeta{ filepath: filepath, numLines: i, diff --git a/cmd/sqlx_parser_test.go b/cmd/sqlx_parser_test.go new file mode 100644 index 0000000..306b767 --- /dev/null +++ b/cmd/sqlx_parser_test.go @@ -0,0 +1,171 @@ +package cmd + +import ( + "os" + "strings" + "testing" +) + +func TestSqlxParser(t *testing.T) { + tests := []struct { + name string + content string + expected sqlxParserMeta + wantErr bool + }{ + { + name: "Nested config blocks and single line query", + content: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +} +SELECT * FROM electric_cars WHERE model = $1;`, + expected: sqlxParserMeta{ + numLines: 13, + configBlockMeta: ConfigBlockMeta { + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: `config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +} +`}, + sqlBlocksMeta: SqlBlockMeta { + exsists: true, + startOfSqlBlock: 13, + endOfSqlBlock: 13, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + formattedSqlBlockContent: "", + }, + }, + wantErr: false, + }, + { + name: "Pre operations query after config block", + content: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +} +pre_operations { + ${when(incremental(), ` + "`" + `DELETE + FROM + ${self()} + WHERE + DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} +} + +SELECT * FROM electric_cars WHERE model = $1;`, +expected: sqlxParserMeta{ + numLines: 21, + configBlockMeta: ConfigBlockMeta { + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +}`, + }, + sqlBlocksMeta: SqlBlockMeta { + exsists: true, + startOfSqlBlock: 21, + endOfSqlBlock: 21, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + formattedSqlBlockContent: "", + }, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a temporary file + tmpfile, err := os.CreateTemp("", "test*.sqlx") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpfile.Name()) + + // Write content to the file + if _, err := tmpfile.Write([]byte(tt.content)); err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + if err := tmpfile.Close(); err != nil { + t.Fatalf("Failed to close temp file: %v", err) + } + + // Call the function + got, err := sqlxParser(tmpfile.Name()) + + // Check for errors + if (err != nil) != tt.wantErr { + t.Errorf("getSqlxFileMetaData() error = %v, wantErr %v", err, tt.wantErr) + return + } + + // Set the filepath in the expected result + tt.expected.filepath = tmpfile.Name() + + // Compare each field separately + if got.filepath != tt.expected.filepath { + t.Errorf("[got]: filepath = %v, [want]: %v", got.filepath, tt.expected.filepath) + } + if got.numLines != tt.expected.numLines { + t.Errorf("[got]: numLines = %v, [want]: %v", got.numLines, tt.expected.numLines) + } + if got.configBlockMeta.startOfConfigBlock != tt.expected.configBlockMeta.startOfConfigBlock { + t.Errorf("[got]: configStartLine = %v, [want]: %v", got.configBlockMeta.startOfConfigBlock, tt.expected.configBlockMeta.startOfConfigBlock) + } + if got.configBlockMeta.endOfConfigBlock != tt.expected.configBlockMeta.endOfConfigBlock { + t.Errorf("[got]: configEndLine = %v, [want]: %v", got.configBlockMeta.endOfConfigBlock, tt.expected.configBlockMeta.endOfConfigBlock) + } + if strings.TrimSpace(got.configBlockMeta.configBlockContent) != strings.TrimSpace(tt.expected.configBlockMeta.configBlockContent) { + t.Errorf("[got]: configString = %v, [want]: %v", got.configBlockMeta.configBlockContent, tt.expected.configBlockMeta.configBlockContent) + } + if strings.TrimSpace(got.sqlBlocksMeta.sqlBlockContent) != strings.TrimSpace(tt.expected.sqlBlocksMeta.sqlBlockContent) { + t.Errorf("[got]: sqlBlockContent = %v, [want]: %v", got.sqlBlocksMeta.sqlBlockContent, tt.expected.sqlBlocksMeta.sqlBlockContent) + } + + if (got.sqlBlocksMeta.startOfSqlBlock != tt.expected.sqlBlocksMeta.startOfSqlBlock) { + t.Errorf("[got]: startOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.startOfSqlBlock, tt.expected.sqlBlocksMeta.startOfSqlBlock) + } + if (got.sqlBlocksMeta.endOfSqlBlock != tt.expected.sqlBlocksMeta.endOfSqlBlock) { + t.Errorf("[got]: endOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.endOfSqlBlock, tt.expected.sqlBlocksMeta.endOfSqlBlock) + } + + }) + } +} diff --git a/cmd/utils.go b/cmd/utils.go index 9e445d9..7448981 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -159,6 +159,7 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingErro func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { sqlxFileMetaData, err := sqlxParser(sqlxFilePath) + fmt.Printf("%+v\n", sqlxFileMetaData) if err != nil { fmt.Println("Error finding config blocks:", err) } else { From 8a599b72051f5c4d108976838d36603c3a5b2cd0 Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 17:31:00 +0100 Subject: [PATCH 4/9] chore: remove function no longer used --- cmd/utils.go | 30 +----------------------------- cmd/utils_test.go | 34 ---------------------------------- 2 files changed, 1 insertion(+), 63 deletions(-) delete mode 100644 cmd/utils_test.go diff --git a/cmd/utils.go b/cmd/utils.go index 7448981..b8902d3 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -159,7 +159,7 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingErro func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { sqlxFileMetaData, err := sqlxParser(sqlxFilePath) - fmt.Printf("%+v\n", sqlxFileMetaData) + // fmt.Printf("%+v\n", sqlxFileMetaData) if err != nil { fmt.Println("Error finding config blocks:", err) } else { @@ -184,34 +184,6 @@ func getIoReader(filepath string) (io.Reader, error) { return file, nil } -// Gives number of lines by reading the file in chunks, supposed to be faster than lineCounterV1 (https://stackoverflow.com/questions/24562942/golang-how-do-i-determine-the-number-of-lines-in-a-file-efficiently) - -func lineCounterV3(reader io.Reader) (int, error) { - buf := make([]byte, 32*1024) - count := 0 - lineSep := []byte{'\n'} - - for { - c, err := reader.Read(buf) - count += bytes.Count(buf[:c], lineSep) - - switch { - case err == io.EOF: - return count, nil - case err != nil: - return count, err - } - } -} - -func countLinesInFile(filepath string) (int, error) { - reader, err := getIoReader(filepath) - if err != nil { - return 0, err - } - return lineCounterV3(reader) -} - func createFileFromText(text string, filepath string) error { f, err := os.Create(filepath) diff --git a/cmd/utils_test.go b/cmd/utils_test.go deleted file mode 100644 index 627b062..0000000 --- a/cmd/utils_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package cmd - -import ( - "strings" - "testing" -) - -func TestLineCounterV3(t *testing.T) { - tests := []struct { - name string - input string - want int - wantErr bool - }{ - {"Normal file 3 lines", "Line 1\nLine 2\nLine 3\n", 3, false}, - {"Empty file", "", 0, false}, - {"No newline at end", "Line 1\nLine 2\nLine 3", 2, false}, - {"Single line", "Single line", 0, false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - reader := strings.NewReader(tt.input) - got, err := lineCounterV3(reader) - if (err != nil) != tt.wantErr { - t.Errorf("lineCounterV3() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("lineCounterV3() = %v, want %v", got, tt.want) - } - }) - } -} From 68468b4891582f71f9e5451589559ff984171c4e Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 17:44:15 +0100 Subject: [PATCH 5/9] feat: fix spacing between the blocks --- cmd/utils.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cmd/utils.go b/cmd/utils.go index b8902d3..9b4aa2a 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -78,6 +78,7 @@ func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { spaceBetweenSameOps := "\n" formattedQuery := "" + prePostOpBlock := "" preOpsBlocks := sqlxFileMetaData.preOpsBlocksMeta postOpsBlocks := sqlxFileMetaData.postOpsBlocksMeta @@ -96,11 +97,14 @@ func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { } } + if preOpsBlockContent == "" && postOpsBlockContent == "" { + prePostOpBlock = "" + } else { + prePostOpBlock = spaceBetweenBlocks + preOpsBlockContent + spaceBetweenBlocks + postOpsBlockContent + } + formattedQuery = sqlxFileMetaData.configBlockMeta.configBlockContent + - spaceBetweenBlocks + - preOpsBlockContent + - spaceBetweenBlocks + - postOpsBlockContent + + prePostOpBlock + spaceBetweenBlocks + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent return formattedQuery From 62d18043ca8b0eb52654bd7c5c10023fe7f65fda Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 18:16:53 +0100 Subject: [PATCH 6/9] feat: more tests --- cmd/sqlx_parser_test.go | 108 ++++++++++++++++++++++++---------------- cmd/utils.go | 1 + 2 files changed, 67 insertions(+), 42 deletions(-) diff --git a/cmd/sqlx_parser_test.go b/cmd/sqlx_parser_test.go index 306b767..f4add4f 100644 --- a/cmd/sqlx_parser_test.go +++ b/cmd/sqlx_parser_test.go @@ -29,12 +29,12 @@ config { } SELECT * FROM electric_cars WHERE model = $1;`, expected: sqlxParserMeta{ - numLines: 13, - configBlockMeta: ConfigBlockMeta { - exsists: true, - startOfConfigBlock: 2, - endOfConfigBlock: 12, - configBlockContent: `config { + numLines: 13, + configBlockMeta: ConfigBlockMeta{ + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: `config { type: "table", schema: "electric_cars", dependencies: 'ALL_EV_CARS_DATA', @@ -46,17 +46,17 @@ SELECT * FROM electric_cars WHERE model = $1;`, tags: ["TAG_1"] } `}, - sqlBlocksMeta: SqlBlockMeta { - exsists: true, - startOfSqlBlock: 13, - endOfSqlBlock: 13, - sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, - formattedSqlBlockContent: "", - }, + sqlBlocksMeta: SqlBlockMeta{ + exsists: true, + startOfSqlBlock: 13, + endOfSqlBlock: 13, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + formattedSqlBlockContent: "", + }, }, - wantErr: false, + wantErr: false, }, - { + { name: "Pre operations query after config block", content: ` config { @@ -79,13 +79,13 @@ pre_operations { } SELECT * FROM electric_cars WHERE model = $1;`, -expected: sqlxParserMeta{ - numLines: 21, - configBlockMeta: ConfigBlockMeta { - exsists: true, - startOfConfigBlock: 2, - endOfConfigBlock: 12, - configBlockContent: ` + expected: sqlxParserMeta{ + numLines: 21, + configBlockMeta: ConfigBlockMeta{ + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: ` config { type: "table", schema: "electric_cars", @@ -97,18 +97,32 @@ config { }, tags: ["TAG_1"] }`, - }, - sqlBlocksMeta: SqlBlockMeta { - exsists: true, - startOfSqlBlock: 21, - endOfSqlBlock: 21, - sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, - formattedSqlBlockContent: "", - }, + }, + sqlBlocksMeta: SqlBlockMeta{ + exsists: true, + startOfSqlBlock: 21, + endOfSqlBlock: 21, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + formattedSqlBlockContent: "", + }, + preOpsBlocksMeta: []PreOpsBlockMeta{ + { + exsists: true, + startOfPreOperationsBlock: 13, + endOfPreOperationsBlock: 19, + preOpsBlockContent: `pre_operations { + ${when(incremental(), ` + "`" + `DELETE + FROM + ${self()} + WHERE + DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} +}`, + }, + }, }, - wantErr: false, - }, - } + wantErr: false, + }, + } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -152,18 +166,28 @@ config { if got.configBlockMeta.endOfConfigBlock != tt.expected.configBlockMeta.endOfConfigBlock { t.Errorf("[got]: configEndLine = %v, [want]: %v", got.configBlockMeta.endOfConfigBlock, tt.expected.configBlockMeta.endOfConfigBlock) } - if strings.TrimSpace(got.configBlockMeta.configBlockContent) != strings.TrimSpace(tt.expected.configBlockMeta.configBlockContent) { + if strings.TrimSpace(got.configBlockMeta.configBlockContent) != strings.TrimSpace(tt.expected.configBlockMeta.configBlockContent) { t.Errorf("[got]: configString = %v, [want]: %v", got.configBlockMeta.configBlockContent, tt.expected.configBlockMeta.configBlockContent) } - if strings.TrimSpace(got.sqlBlocksMeta.sqlBlockContent) != strings.TrimSpace(tt.expected.sqlBlocksMeta.sqlBlockContent) { - t.Errorf("[got]: sqlBlockContent = %v, [want]: %v", got.sqlBlocksMeta.sqlBlockContent, tt.expected.sqlBlocksMeta.sqlBlockContent) - } + if strings.TrimSpace(got.sqlBlocksMeta.sqlBlockContent) != strings.TrimSpace(tt.expected.sqlBlocksMeta.sqlBlockContent) { + t.Errorf("[got]: sqlBlockContent = %v, [want]: %v", got.sqlBlocksMeta.sqlBlockContent, tt.expected.sqlBlocksMeta.sqlBlockContent) + } - if (got.sqlBlocksMeta.startOfSqlBlock != tt.expected.sqlBlocksMeta.startOfSqlBlock) { - t.Errorf("[got]: startOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.startOfSqlBlock, tt.expected.sqlBlocksMeta.startOfSqlBlock) - } - if (got.sqlBlocksMeta.endOfSqlBlock != tt.expected.sqlBlocksMeta.endOfSqlBlock) { - t.Errorf("[got]: endOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.endOfSqlBlock, tt.expected.sqlBlocksMeta.endOfSqlBlock) + if got.sqlBlocksMeta.startOfSqlBlock != tt.expected.sqlBlocksMeta.startOfSqlBlock { + t.Errorf("[got]: startOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.startOfSqlBlock, tt.expected.sqlBlocksMeta.startOfSqlBlock) + } + if got.sqlBlocksMeta.endOfSqlBlock != tt.expected.sqlBlocksMeta.endOfSqlBlock { + t.Errorf("[got]: endOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.endOfSqlBlock, tt.expected.sqlBlocksMeta.endOfSqlBlock) + } + + if len(tt.expected.preOpsBlocksMeta) > 0 { + if got.preOpsBlocksMeta[0].startOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock { + t.Errorf("[got]: startOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].startOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock) + } + + if got.preOpsBlocksMeta[0].endOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock { + t.Errorf("[got]: endOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].endOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock) + } } }) diff --git a/cmd/utils.go b/cmd/utils.go index 9b4aa2a..58a7bf1 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -164,6 +164,7 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingErro func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { sqlxFileMetaData, err := sqlxParser(sqlxFilePath) // fmt.Printf("%+v\n", sqlxFileMetaData) + if err != nil { fmt.Println("Error finding config blocks:", err) } else { From 546af1893eed9e8297bff96492b37227a9f26e85 Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 20:16:56 +0100 Subject: [PATCH 7/9] chore: some more testing --- cmd/sqlx_parser.go | 8 ++-- cmd/sqlx_parser_test.go | 102 ++++++++++++++++++++++++++++------------ 2 files changed, 76 insertions(+), 34 deletions(-) diff --git a/cmd/sqlx_parser.go b/cmd/sqlx_parser.go index 82a928c..b43d8a0 100644 --- a/cmd/sqlx_parser.go +++ b/cmd/sqlx_parser.go @@ -103,10 +103,12 @@ func sqlxParser(filepath string) (sqlxParserMeta, error) { currentBlockContent += lineContents } else if strings.Contains(lineContents, "{") && inMajorBlock { if strings.Contains(lineContents, "}") { + currentBlockContent += lineContents continue - } - isInInnerMajorBlock = true - innerMajorBlockCount += 1 + } else { + isInInnerMajorBlock = true + innerMajorBlockCount += 1 + } currentBlockContent += lineContents } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && inMajorBlock { innerMajorBlockCount -= 1 diff --git a/cmd/sqlx_parser_test.go b/cmd/sqlx_parser_test.go index f4add4f..eb908b6 100644 --- a/cmd/sqlx_parser_test.go +++ b/cmd/sqlx_parser_test.go @@ -6,6 +6,29 @@ import ( "testing" ) +var simplePostOpsBlock = ` +post_operations { + select 1 + union all + select 2 +} +` + +var complexPreOpsBlock = ` +pre_operations { + ${when(incremental(),` + + "`" + ` + DELETE + FROM + ${self()} + WHERE + DATE(SNAPSHOT_DATE) = CURRENT_DATE()` + + "`" + ` + ) + } +} +` + func TestSqlxParser(t *testing.T) { tests := []struct { name string @@ -69,18 +92,12 @@ config { clusterBy: ["CITY", "STATE"] }, tags: ["TAG_1"] -} -pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} - -SELECT * FROM electric_cars WHERE model = $1;`, +}` + complexPreOpsBlock + simplePostOpsBlock + ` +SELECT * FROM electric_cars WHERE model = $1 +limit 100 + `, expected: sqlxParserMeta{ - numLines: 21, + numLines: 32, configBlockMeta: ConfigBlockMeta{ exsists: true, startOfConfigBlock: 2, @@ -99,24 +116,28 @@ config { }`, }, sqlBlocksMeta: SqlBlockMeta{ - exsists: true, - startOfSqlBlock: 21, - endOfSqlBlock: 21, - sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + exsists: true, + startOfSqlBlock: 30, + endOfSqlBlock: 32, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1 +limit 100 + `, formattedSqlBlockContent: "", }, preOpsBlocksMeta: []PreOpsBlockMeta{ { exsists: true, startOfPreOperationsBlock: 13, - endOfPreOperationsBlock: 19, - preOpsBlockContent: `pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -}`, + endOfPreOperationsBlock: 22, + preOpsBlockContent: strings.TrimPrefix(complexPreOpsBlock, "\n"), + }, + }, + postOpsBlocksMeta: []PostOpsBlockMeta{ + { + exsists: true, + startOfpostOperationsBlock: 24, + endOfpostOperationsBlock: 28, + postOpsBlockContent: strings.TrimPrefix(simplePostOpsBlock, "\n"), }, }, }, @@ -180,15 +201,34 @@ config { t.Errorf("[got]: endOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.endOfSqlBlock, tt.expected.sqlBlocksMeta.endOfSqlBlock) } - if len(tt.expected.preOpsBlocksMeta) > 0 { - if got.preOpsBlocksMeta[0].startOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock { - t.Errorf("[got]: startOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].startOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock) - } + if len(tt.expected.preOpsBlocksMeta) > 0 || len(tt.expected.preOpsBlocksMeta) > 0 { - if got.preOpsBlocksMeta[0].endOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock { - t.Errorf("[got]: endOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].endOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock) - } - } + if got.preOpsBlocksMeta[0].startOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock { + t.Errorf("[got]: startOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].startOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock) + } + + if got.preOpsBlocksMeta[0].endOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock { + t.Errorf("[got]: endOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].endOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock) + } + + if got.preOpsBlocksMeta[0].preOpsBlockContent != tt.expected.preOpsBlocksMeta[0].preOpsBlockContent { + t.Errorf("[got]: preOpsBlockContent = %q, [want]: %q", got.preOpsBlocksMeta[0].preOpsBlockContent, tt.expected.preOpsBlocksMeta[0].preOpsBlockContent) + } + + } + + if len(tt.expected.postOpsBlocksMeta) > 0 || len(tt.expected.postOpsBlocksMeta) > 0 { + if got.postOpsBlocksMeta[0].startOfpostOperationsBlock != tt.expected.postOpsBlocksMeta[0].startOfpostOperationsBlock { + t.Errorf("[got]: startOfpostOperationsBlock = %v, [want]: %v", got.postOpsBlocksMeta[0].startOfpostOperationsBlock, tt.expected.postOpsBlocksMeta[0].startOfpostOperationsBlock) + } + + if got.postOpsBlocksMeta[0].endOfpostOperationsBlock != tt.expected.postOpsBlocksMeta[0].endOfpostOperationsBlock { + t.Errorf("[got]: endOfpostOperationsBlock = %v, [want]: %v", got.postOpsBlocksMeta[0].endOfpostOperationsBlock, tt.expected.postOpsBlocksMeta[0].endOfpostOperationsBlock) + } + if got.postOpsBlocksMeta[0].postOpsBlockContent != tt.expected.postOpsBlocksMeta[0].postOpsBlockContent { + t.Errorf("[got]: postOpsBlockContent = %q, [want]: %q", got.postOpsBlocksMeta[0].postOpsBlockContent, tt.expected.postOpsBlocksMeta[0].postOpsBlockContent) + } + } }) } From 787a45a2abf3f5791f13d40d1de9f8a4c701291d Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 20:48:58 +0100 Subject: [PATCH 8/9] chore: formatting --- cmd/utils.go | 91 ++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/cmd/utils.go b/cmd/utils.go index 58a7bf1..4bfafe7 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -61,7 +61,7 @@ func formatSqlCode(sqlxFileMetaData *sqlxParserMeta, pythonScriptPath string, sq err := cmd.Run() if err != nil { logger.Error(stderr.String(), slog.String("file", sqlxFileMetaData.filepath), "error", err.Error()) - sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = string(queryString) + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = string(queryString) return ErrorFormattingSqlxFile } output := stdout.String() @@ -69,45 +69,46 @@ func formatSqlCode(sqlxFileMetaData *sqlxParserMeta, pythonScriptPath string, sq if sql_fluff_not_installed { log.Fatal(color.RedString("sqlfluff not installed. Please install sqlfluff using 'pip install sqlfluff'")) } - sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = output + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = output return nil } func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { - spaceBetweenBlocks := "\n\n" - spaceBetweenSameOps := "\n" - - formattedQuery := "" - prePostOpBlock := "" - - preOpsBlocks := sqlxFileMetaData.preOpsBlocksMeta - postOpsBlocks := sqlxFileMetaData.postOpsBlocksMeta - - preOpsBlockContent := "" - if len(preOpsBlocks) > 0 { - for _, preOpsBlock := range preOpsBlocks { - preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps - } - } - - postOpsBlockContent := "" - if len(postOpsBlocks) > 0 { - for _, postOpsBlock := range postOpsBlocks { - postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps - } - } - - if preOpsBlockContent == "" && postOpsBlockContent == "" { - prePostOpBlock = "" - } else { - prePostOpBlock = spaceBetweenBlocks + preOpsBlockContent + spaceBetweenBlocks + postOpsBlockContent - } - - formattedQuery = sqlxFileMetaData.configBlockMeta.configBlockContent + - prePostOpBlock + - spaceBetweenBlocks + - sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent - return formattedQuery + spaceBetweenBlocks := "\n\n" + // NOTE: Dataform at the time of writing this does not really have multiple preOpsBlocks or postOpsBlocks in its compiled json although it does not throw a compilation error if you put one + spaceBetweenSameOps := "\n" + + formattedQuery := "" + prePostOpBlock := "" + + preOpsBlocks := sqlxFileMetaData.preOpsBlocksMeta + postOpsBlocks := sqlxFileMetaData.postOpsBlocksMeta + + preOpsBlockContent := "" + if len(preOpsBlocks) > 0 { + for _, preOpsBlock := range preOpsBlocks { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps + } + } + + postOpsBlockContent := "" + if len(postOpsBlocks) > 0 { + for _, postOpsBlock := range postOpsBlocks { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps + } + } + + if preOpsBlockContent == "" && postOpsBlockContent == "" { + prePostOpBlock = "" + } else { + prePostOpBlock = spaceBetweenBlocks + preOpsBlockContent + spaceBetweenBlocks + postOpsBlockContent + } + + formattedQuery = sqlxFileMetaData.configBlockMeta.configBlockContent + + prePostOpBlock + + spaceBetweenBlocks + + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent + return formattedQuery } func writeContentsToFile(sqlxFileMetaData *sqlxParserMeta, formattingError error) { @@ -115,16 +116,16 @@ func writeContentsToFile(sqlxFileMetaData *sqlxParserMeta, formattingError error yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() - filPathSeparator := string(os.PathSeparator) - _definitions := "definitions" + filPathSeparator + filPathSeparator := string(os.PathSeparator) + _definitions := "definitions" + filPathSeparator baseFilepath := strings.Split(sqlxFileMetaData.filepath, _definitions) - formattedFilePath := filepath.Join("formatted", "definitions", baseFilepath[1]) + formattedFilePath := filepath.Join("formatted", "definitions", baseFilepath[1]) dirToCreate := formattedFilePath[:strings.LastIndex(formattedFilePath, filPathSeparator)] os.MkdirAll(dirToCreate, 0755) // TODO: make this configurable - formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) err := os.WriteFile(formattedFilePath, []byte(formattedQuery), 0664) if err != nil { @@ -145,7 +146,7 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingErro yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() - formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) err := os.WriteFile(sqlxFileMetaData.filepath, []byte(formattedQuery), 0664) if err != nil { @@ -162,13 +163,13 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingErro } func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { - sqlxFileMetaData, err := sqlxParser(sqlxFilePath) - // fmt.Printf("%+v\n", sqlxFileMetaData) + sqlxFileMetaData, err := sqlxParser(sqlxFilePath) + // fmt.Printf("%+v\n", sqlxFileMetaData) if err != nil { fmt.Println("Error finding config blocks:", err) } else { - pythonScriptPath := filepath.Join(".formatdataform", "sqlfluff_formatter.py") + pythonScriptPath := filepath.Join(".formatdataform", "sqlfluff_formatter.py") formattingError := formatSqlCode(&sqlxFileMetaData, pythonScriptPath, sqlfluffConfigPath, pythonExecutable, logger) if inplace { writeContentsToFileInPlace(&sqlxFileMetaData, formattingError) @@ -197,7 +198,7 @@ func createFileFromText(text string, filepath string) error { return err } else { f.WriteString(text) - fmt.Printf("file created at: `%s` \n", filepath) + fmt.Printf("file created at: `%s` \n", filepath) f.Close() } return nil From 91dacfe2b88698ebf6fd1d4621826e6fd06addb5 Mon Sep 17 00:00:00 2001 From: "ashish.alex10@gmail.com" Date: Tue, 20 Aug 2024 20:52:00 +0100 Subject: [PATCH 9/9] chore: handle spaces --- cmd/utils.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cmd/utils.go b/cmd/utils.go index 4bfafe7..4fd85e5 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -86,15 +86,24 @@ func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { preOpsBlockContent := "" if len(preOpsBlocks) > 0 { - for _, preOpsBlock := range preOpsBlocks { - preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps + for idx, preOpsBlock := range preOpsBlocks { + if idx == len(preOpsBlocks)-1 { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + } else { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps + } + } } postOpsBlockContent := "" if len(postOpsBlocks) > 0 { - for _, postOpsBlock := range postOpsBlocks { - postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps + for idx, postOpsBlock := range postOpsBlocks { + if idx == len(postOpsBlocks)-1 { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + } else { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps + } } }