diff --git a/cmd/sqlx_file_metadata.go b/cmd/sqlx_file_metadata.go deleted file mode 100644 index b641a35..0000000 --- a/cmd/sqlx_file_metadata.go +++ /dev/null @@ -1,164 +0,0 @@ -package cmd - -import ( - "bufio" - "fmt" - "io" - "os" - "strings" -) - -type sqlxFileMetaData struct { - filepath string - numLines int - configStartLine int - configEndLine int - configString string - preOperationsStartLine int - preOperationsEndLine int - preOperationsString string - queryString string - formattedQuery string -} - -func getSqlxFileMetaData(filepath string) (sqlxFileMetaData, error) { - file, err := os.Open(filepath) - if err != nil { - fmt.Println("Error opening file:", err) - return sqlxFileMetaData{}, err - } - - numLines, err := countLinesInFile(filepath) - - if err != nil { - fmt.Println("Error opening file:", err) - return sqlxFileMetaData{}, err - } - - // variables to keep track of where we are in the file - var configStartLine = 0 - var configEndLine = 0 - var preOperationsStartLine = 0 - var preOperationsEndLine = 0 - var preOperationsString = "" - var currentLineNumber = 0 - var configString = "" - var queryString = "" - - // flags to keep track of where we are in the file - var isConfigBlock = false - var isConfigBlockEnd = false - var isInInnerConfigBlock = false - var openCurlyBraceCount = 0 - var closeCurlyBraceCount = 0 - var preOperationsBlockStarted = false - var isInInnerPreOperationsBlock = false - var isInPreOperationsBlock = false - var queryBlockStarted = false - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - currentLineNumber++ - var line = scanner.Text() - - //TODO: Check if this line is ever hit ? - if err == io.EOF { - break // End of file - } - - // While we are in the config block, keep adding the lines to the configString - if isConfigBlock == true && isConfigBlockEnd == false { // we are in the config block - configString += line + "\n" - } - - // If the line contains the word "config" and if we are not already in the config block, then we start the config block - if strings.Contains(line, "config") && isConfigBlock == false { - isConfigBlock = true - configStartLine = currentLineNumber - configString += line + "\n" - } - - // keep track of open and close curly braces while we are in the config block - if strings.Contains(line, "{") && isConfigBlock == true { - openCurlyBraceCount++ - if (openCurlyBraceCount != closeCurlyBraceCount) && (openCurlyBraceCount > 1) { - isInInnerConfigBlock = true - } - } - - if strings.Contains(line, "}") && isConfigBlock == true { // TODO: breaks when you have curly brace before the config block ends - - if configStartLine == 0 { - configEndLine = 0 - // TODO: maybe we should return an error here - fmt.Errorf("No config block found in file: %s", filepath) - } else if isInInnerConfigBlock == true { - closeCurlyBraceCount++ - isInInnerConfigBlock = false // NOTE: does this mean that we only go to 1 nesting level ? - } else { - configEndLine = currentLineNumber - isConfigBlockEnd = true - isConfigBlock = false - } - } - - if isConfigBlockEnd == true && currentLineNumber != configEndLine { // query block started but looking for first non empty string - if strings.Contains(line, "pre_operations") { - isInPreOperationsBlock = true - preOperationsBlockStarted = true - openCurlyBraceCount = 0 - closeCurlyBraceCount = 0 - preOperationsStartLine = currentLineNumber - } - } - - if preOperationsBlockStarted == true && currentLineNumber != configEndLine { - preOperationsString += line + "\n" - if strings.Contains(line, "{") { - openCurlyBraceCount++ - if (openCurlyBraceCount != closeCurlyBraceCount) && (openCurlyBraceCount > 1) { - isInInnerPreOperationsBlock = true - } - } - - if strings.Contains(line, "}") { - closeCurlyBraceCount++ - if isInInnerPreOperationsBlock == true { - if closeCurlyBraceCount == openCurlyBraceCount { - isInPreOperationsBlock = false - isInInnerPreOperationsBlock = false - preOperationsEndLine = currentLineNumber - preOperationsBlockStarted = false - } - } - } - } - - if isConfigBlockEnd == true && preOperationsBlockStarted == false && currentLineNumber != preOperationsEndLine && currentLineNumber != configEndLine { - if line != "" { - queryBlockStarted = true - } - } - - if queryBlockStarted && isInPreOperationsBlock == false && currentLineNumber != preOperationsEndLine { // in the query block - if currentLineNumber == numLines { - queryString += line - } else { - queryString += line + "\n" - } - } - - } - - return sqlxFileMetaData{ - filepath: filepath, - numLines: numLines, - configStartLine: configStartLine, - configEndLine: configEndLine, - configString: configString, - preOperationsStartLine: preOperationsStartLine, - preOperationsEndLine: preOperationsEndLine, - preOperationsString: preOperationsString, - queryString: queryString, - }, nil -} diff --git a/cmd/sqlx_file_metadata_test.go b/cmd/sqlx_file_metadata_test.go deleted file mode 100644 index 3d3c235..0000000 --- a/cmd/sqlx_file_metadata_test.go +++ /dev/null @@ -1,299 +0,0 @@ -package cmd - -import ( - "os" - "strings" - "testing" -) - -// TODO: -// 1. If user tries to format a dataform config file that is invalid that might cause unexpected behavior - -func TestGetSqlxFileMetaData(t *testing.T) { - tests := []struct { - name string - content string - expected sqlxFileMetaData - wantErr bool - }{ - { - name: "Nested config blocks and single line query", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -SELECT * FROM electric_cars WHERE model = $1;`, - expected: sqlxFileMetaData{ - numLines: 12, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - { - name: "Pre operations query after config block", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} - -SELECT * FROM electric_cars WHERE model = $1;`, - expected: sqlxFileMetaData{ - numLines: 20, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - preOperationsStartLine: 13, - preOperationsEndLine: 19, - preOperationsString: `pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - { - name: "Pre operations query at the end of the file", - content: ` -config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} - -SELECT * FROM electric_cars WHERE model = $1; - -pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} - -`, - expected: sqlxFileMetaData{ - numLines: 23, - configStartLine: 2, - configEndLine: 12, - configString: `config { - type: "table", - schema: "electric_cars", - dependencies: 'ALL_EV_CARS_DATA', - bigquery: { - partitionBy: "MODEL", - requirePartitionFilter : true, - clusterBy: ["CITY", "STATE"] - }, - tags: ["TAG_1"] -} -`, - preOperationsStartLine: 16, - preOperationsEndLine: 22, - preOperationsString: `pre_operations { - ${when(incremental(), ` + "`" + `DELETE - FROM - ${self()} - WHERE - DATE(PIPELINE_RUN_DATETIME) = CURRENT_DATE()` + "`" + `)} -} -`, - queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - }, - wantErr: false, - }, - - { - name: "Minimal config and longer query and comment before config", - content: `-- some comment -config { - type: "table", - schema: "electric_cars" -} - - -WITH CTE1 AS ( - SELECT - MAKE - , COUNTY - , CITY - , STATE - , POSTAL_CODE - , MODEL - , MODEL_YEAR - , COUNT(VIN) AS CNT_VIN - FROM ${ref("ALL_EV_CARS_DATA")} - GROUP BY MAKE, COUNTY, CITY, STATE, POSTAL_CODE, MODEL, MODEL_YEAR - HAVING MAKE = ${constants.make} -) -SELECT * FROM CTE1 - `, - expected: sqlxFileMetaData{ - numLines: 22, - configStartLine: 2, - configEndLine: 5, - configString: `config { - type: "table", - schema: "electric_cars" -} -`, - queryString: ` -WITH CTE1 AS ( - SELECT - MAKE - , COUNTY - , CITY - , STATE - , POSTAL_CODE - , MODEL - , MODEL_YEAR - , COUNT(VIN) AS CNT_VIN - FROM ${ref("ALL_EV_CARS_DATA")} - GROUP BY MAKE, COUNTY, CITY, STATE, POSTAL_CODE, MODEL, MODEL_YEAR - HAVING MAKE = ${constants.make} -) -SELECT * FROM CTE1 - `, - }, - wantErr: false, - }, - //TODO: Need to handle case where file does not have a config block - // { - // name: "File without config", - // content: `-- name: GetElectricCars :many - // SELECT * FROM electric_cars WHERE model = $1;`, - // expected: sqlxFileMetaData{ - // numLines: 1, - // configStartLine: 0, - // configEndLine: 0, - // configString: "", - // queryString: `SELECT * FROM electric_cars WHERE model = $1;`, - // }, - // wantErr: false, - // }, - { - name: "Empty file", - content: "", - expected: sqlxFileMetaData{}, - wantErr: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create a temporary file - tmpfile, err := os.CreateTemp("", "test*.sqlx") - if err != nil { - t.Fatalf("Failed to create temp file: %v", err) - } - defer os.Remove(tmpfile.Name()) - - // Write content to the file - if _, err := tmpfile.Write([]byte(tt.content)); err != nil { - t.Fatalf("Failed to write to temp file: %v", err) - } - if err := tmpfile.Close(); err != nil { - t.Fatalf("Failed to close temp file: %v", err) - } - - // Call the function - got, err := getSqlxFileMetaData(tmpfile.Name()) - - // Check for errors - if (err != nil) != tt.wantErr { - t.Errorf("getSqlxFileMetaData() error = %v, wantErr %v", err, tt.wantErr) - return - } - - // Set the filepath in the expected result - tt.expected.filepath = tmpfile.Name() - - // Compare each field separately - if got.filepath != tt.expected.filepath { - t.Errorf("[got]: filepath = %v, [want]: %v", got.filepath, tt.expected.filepath) - } - if got.numLines != tt.expected.numLines { - t.Errorf("[got]: numLines = %v, [want]: %v", got.numLines, tt.expected.numLines) - } - if got.configStartLine != tt.expected.configStartLine { - t.Errorf("[got]: configStartLine = %v, [want]: %v", got.configStartLine, tt.expected.configStartLine) - } - if got.configEndLine != tt.expected.configEndLine { - t.Errorf("[got]: configEndLine = %v, [want]: %v", got.configEndLine, tt.expected.configEndLine) - } - if strings.TrimSpace(got.configString) != strings.TrimSpace(tt.expected.configString) { - t.Errorf("[got]: configString = %v, [want]: %v", got.configString, tt.expected.configString) - } - if got.preOperationsStartLine != tt.expected.preOperationsStartLine { - t.Errorf("[got]: preOperationsStartLine = %v, [want]: %v", got.preOperationsStartLine, tt.expected.preOperationsStartLine) - } - if got.preOperationsEndLine != tt.expected.preOperationsEndLine { - t.Errorf("[got]: preOperationsEndLine = %v, [want]: %v", got.preOperationsEndLine, tt.expected.preOperationsEndLine) - } - if strings.TrimSpace(got.preOperationsString) != strings.TrimSpace(tt.expected.preOperationsString) { - t.Errorf("[got]: preOperationsString = %v, [want]: %v", got.preOperationsString, tt.expected.preOperationsString) - } - if strings.TrimSpace(got.queryString) != strings.TrimSpace(tt.expected.queryString) { - t.Errorf("[got]: queryString = %v, [want]: %v", got.queryString, tt.expected.queryString) - } - }) - } -} diff --git a/cmd/sqlx_parser.go b/cmd/sqlx_parser.go new file mode 100644 index 0000000..b43d8a0 --- /dev/null +++ b/cmd/sqlx_parser.go @@ -0,0 +1,187 @@ +package cmd + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +type ConfigBlockMeta struct { + exsists bool + startOfConfigBlock int + endOfConfigBlock int + configBlockContent string +} + +type PreOpsBlockMeta struct { + exsists bool + startOfPreOperationsBlock int + endOfPreOperationsBlock int + preOpsBlockContent string +} + +type PostOpsBlockMeta struct { + exsists bool + startOfpostOperationsBlock int + endOfpostOperationsBlock int + postOpsBlockContent string +} + +type SqlBlockMeta struct { + exsists bool + startOfSqlBlock int + endOfSqlBlock int + sqlBlockContent string + formattedSqlBlockContent string +} + +type sqlxParserMeta struct { + filepath string + numLines int + configBlockMeta ConfigBlockMeta + preOpsBlocksMeta []PreOpsBlockMeta + postOpsBlocksMeta []PostOpsBlockMeta + sqlBlocksMeta SqlBlockMeta +} + +func sqlxParser(filepath string) (sqlxParserMeta, error) { + + var inMajorBlock = false + + var startOfConfigBlock = 0 + var endOfConfigBlock = 0 + var configBlockExsists = false + var configBlockContent = "" + + var preOpsBlocksMeta = []PreOpsBlockMeta{} + var startOfPreOperationsBlock = 0 + var endOfPreOperationsBlock = 0 + + var postOpsBlocksMeta = []PostOpsBlockMeta{} + var startOfpostOperationsBlock = 0 + var endOfpostOperationsBlock = 0 + + var startOfSqlBlock = 0 + var endOfSqlBlock = 0 + var sqlBlockExsists = false + var sqlBlockContent = "" + + var isInInnerMajorBlock = false + var innerMajorBlockCount = 0 + + var currentBlock = "" + var currentBlockContent = "" + + file, err := os.Open(filepath) + if err != nil { + fmt.Printf("Error: %v\n", err) + return sqlxParserMeta{}, err + } + + i := 0 + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + i++ + var lineContents = scanner.Text() + "\n" + + if strings.Contains(lineContents, "config {") { + inMajorBlock = true + currentBlock = "config" + startOfConfigBlock = i + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "post_operations {") && !inMajorBlock { + startOfpostOperationsBlock = i + inMajorBlock = true + currentBlock = "post_operations" + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "pre_operations {") && !inMajorBlock { + inMajorBlock = true + currentBlock = "pre_operations" + startOfPreOperationsBlock = i + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "{") && inMajorBlock { + if strings.Contains(lineContents, "}") { + currentBlockContent += lineContents + continue + } else { + isInInnerMajorBlock = true + innerMajorBlockCount += 1 + } + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && inMajorBlock { + innerMajorBlockCount -= 1 + currentBlockContent += lineContents + } else if strings.Contains(lineContents, "}") && innerMajorBlockCount == 0 && inMajorBlock { + if currentBlock == "config" { + currentBlockContent += lineContents + configBlockContent = currentBlockContent + endOfConfigBlock = i + configBlockExsists = true + currentBlock = "" + currentBlockContent = "" + } else if currentBlock == "pre_operations" { + endOfPreOperationsBlock = i + currentBlockContent += lineContents + preOpsBlockMeta := PreOpsBlockMeta{ + exsists: true, + startOfPreOperationsBlock: startOfPreOperationsBlock, + endOfPreOperationsBlock: endOfPreOperationsBlock, + preOpsBlockContent: currentBlockContent, + } + preOpsBlocksMeta = append(preOpsBlocksMeta, preOpsBlockMeta) + currentBlock = "" + currentBlockContent = "" + } else if currentBlock == "post_operations" { + endOfpostOperationsBlock = i + currentBlockContent += lineContents + postOpsBlockMeta := PostOpsBlockMeta{ + exsists: true, + startOfpostOperationsBlock: startOfpostOperationsBlock, + endOfpostOperationsBlock: endOfpostOperationsBlock, + postOpsBlockContent: currentBlockContent, + } + postOpsBlocksMeta = append(postOpsBlocksMeta, postOpsBlockMeta) + currentBlock = "" + currentBlockContent = "" + } + inMajorBlock = false + } else if strings.Contains(lineContents, "}") && isInInnerMajorBlock && innerMajorBlockCount >= 1 && !inMajorBlock { + innerMajorBlockCount -= 1 + currentBlockContent += lineContents + } else if lineContents != "\n" && !inMajorBlock { + if startOfSqlBlock == 0 { + startOfSqlBlock = i + sqlBlockExsists = true + sqlBlockContent += lineContents + endOfSqlBlock = i + } else { + sqlBlockContent += lineContents + endOfSqlBlock = i + } + } else if inMajorBlock { + currentBlockContent += lineContents + } + } + + return sqlxParserMeta{ + filepath: filepath, + numLines: i, + configBlockMeta: ConfigBlockMeta{ + exsists: configBlockExsists, + startOfConfigBlock: startOfConfigBlock, + endOfConfigBlock: endOfConfigBlock, + configBlockContent: configBlockContent, + }, + preOpsBlocksMeta: preOpsBlocksMeta, + postOpsBlocksMeta: postOpsBlocksMeta, + sqlBlocksMeta: SqlBlockMeta{ + exsists: sqlBlockExsists, + startOfSqlBlock: startOfSqlBlock, + endOfSqlBlock: endOfSqlBlock, + sqlBlockContent: sqlBlockContent, + formattedSqlBlockContent: "", + }, + }, nil +} diff --git a/cmd/sqlx_parser_test.go b/cmd/sqlx_parser_test.go new file mode 100644 index 0000000..eb908b6 --- /dev/null +++ b/cmd/sqlx_parser_test.go @@ -0,0 +1,235 @@ +package cmd + +import ( + "os" + "strings" + "testing" +) + +var simplePostOpsBlock = ` +post_operations { + select 1 + union all + select 2 +} +` + +var complexPreOpsBlock = ` +pre_operations { + ${when(incremental(),` + + "`" + ` + DELETE + FROM + ${self()} + WHERE + DATE(SNAPSHOT_DATE) = CURRENT_DATE()` + + "`" + ` + ) + } +} +` + +func TestSqlxParser(t *testing.T) { + tests := []struct { + name string + content string + expected sqlxParserMeta + wantErr bool + }{ + { + name: "Nested config blocks and single line query", + content: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +} +SELECT * FROM electric_cars WHERE model = $1;`, + expected: sqlxParserMeta{ + numLines: 13, + configBlockMeta: ConfigBlockMeta{ + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: `config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +} +`}, + sqlBlocksMeta: SqlBlockMeta{ + exsists: true, + startOfSqlBlock: 13, + endOfSqlBlock: 13, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1;`, + formattedSqlBlockContent: "", + }, + }, + wantErr: false, + }, + { + name: "Pre operations query after config block", + content: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +}` + complexPreOpsBlock + simplePostOpsBlock + ` +SELECT * FROM electric_cars WHERE model = $1 +limit 100 + `, + expected: sqlxParserMeta{ + numLines: 32, + configBlockMeta: ConfigBlockMeta{ + exsists: true, + startOfConfigBlock: 2, + endOfConfigBlock: 12, + configBlockContent: ` +config { + type: "table", + schema: "electric_cars", + dependencies: 'ALL_EV_CARS_DATA', + bigquery: { + partitionBy: "MODEL", + requirePartitionFilter : true, + clusterBy: ["CITY", "STATE"] + }, + tags: ["TAG_1"] +}`, + }, + sqlBlocksMeta: SqlBlockMeta{ + exsists: true, + startOfSqlBlock: 30, + endOfSqlBlock: 32, + sqlBlockContent: `SELECT * FROM electric_cars WHERE model = $1 +limit 100 + `, + formattedSqlBlockContent: "", + }, + preOpsBlocksMeta: []PreOpsBlockMeta{ + { + exsists: true, + startOfPreOperationsBlock: 13, + endOfPreOperationsBlock: 22, + preOpsBlockContent: strings.TrimPrefix(complexPreOpsBlock, "\n"), + }, + }, + postOpsBlocksMeta: []PostOpsBlockMeta{ + { + exsists: true, + startOfpostOperationsBlock: 24, + endOfpostOperationsBlock: 28, + postOpsBlockContent: strings.TrimPrefix(simplePostOpsBlock, "\n"), + }, + }, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a temporary file + tmpfile, err := os.CreateTemp("", "test*.sqlx") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpfile.Name()) + + // Write content to the file + if _, err := tmpfile.Write([]byte(tt.content)); err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + if err := tmpfile.Close(); err != nil { + t.Fatalf("Failed to close temp file: %v", err) + } + + // Call the function + got, err := sqlxParser(tmpfile.Name()) + + // Check for errors + if (err != nil) != tt.wantErr { + t.Errorf("getSqlxFileMetaData() error = %v, wantErr %v", err, tt.wantErr) + return + } + + // Set the filepath in the expected result + tt.expected.filepath = tmpfile.Name() + + // Compare each field separately + if got.filepath != tt.expected.filepath { + t.Errorf("[got]: filepath = %v, [want]: %v", got.filepath, tt.expected.filepath) + } + if got.numLines != tt.expected.numLines { + t.Errorf("[got]: numLines = %v, [want]: %v", got.numLines, tt.expected.numLines) + } + if got.configBlockMeta.startOfConfigBlock != tt.expected.configBlockMeta.startOfConfigBlock { + t.Errorf("[got]: configStartLine = %v, [want]: %v", got.configBlockMeta.startOfConfigBlock, tt.expected.configBlockMeta.startOfConfigBlock) + } + if got.configBlockMeta.endOfConfigBlock != tt.expected.configBlockMeta.endOfConfigBlock { + t.Errorf("[got]: configEndLine = %v, [want]: %v", got.configBlockMeta.endOfConfigBlock, tt.expected.configBlockMeta.endOfConfigBlock) + } + if strings.TrimSpace(got.configBlockMeta.configBlockContent) != strings.TrimSpace(tt.expected.configBlockMeta.configBlockContent) { + t.Errorf("[got]: configString = %v, [want]: %v", got.configBlockMeta.configBlockContent, tt.expected.configBlockMeta.configBlockContent) + } + if strings.TrimSpace(got.sqlBlocksMeta.sqlBlockContent) != strings.TrimSpace(tt.expected.sqlBlocksMeta.sqlBlockContent) { + t.Errorf("[got]: sqlBlockContent = %v, [want]: %v", got.sqlBlocksMeta.sqlBlockContent, tt.expected.sqlBlocksMeta.sqlBlockContent) + } + + if got.sqlBlocksMeta.startOfSqlBlock != tt.expected.sqlBlocksMeta.startOfSqlBlock { + t.Errorf("[got]: startOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.startOfSqlBlock, tt.expected.sqlBlocksMeta.startOfSqlBlock) + } + if got.sqlBlocksMeta.endOfSqlBlock != tt.expected.sqlBlocksMeta.endOfSqlBlock { + t.Errorf("[got]: endOfSqlBlock = %v, [want]: %v", got.sqlBlocksMeta.endOfSqlBlock, tt.expected.sqlBlocksMeta.endOfSqlBlock) + } + + if len(tt.expected.preOpsBlocksMeta) > 0 || len(tt.expected.preOpsBlocksMeta) > 0 { + + if got.preOpsBlocksMeta[0].startOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock { + t.Errorf("[got]: startOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].startOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].startOfPreOperationsBlock) + } + + if got.preOpsBlocksMeta[0].endOfPreOperationsBlock != tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock { + t.Errorf("[got]: endOfPreOperationsBlock = %v, [want]: %v", got.preOpsBlocksMeta[0].endOfPreOperationsBlock, tt.expected.preOpsBlocksMeta[0].endOfPreOperationsBlock) + } + + if got.preOpsBlocksMeta[0].preOpsBlockContent != tt.expected.preOpsBlocksMeta[0].preOpsBlockContent { + t.Errorf("[got]: preOpsBlockContent = %q, [want]: %q", got.preOpsBlocksMeta[0].preOpsBlockContent, tt.expected.preOpsBlocksMeta[0].preOpsBlockContent) + } + + } + + if len(tt.expected.postOpsBlocksMeta) > 0 || len(tt.expected.postOpsBlocksMeta) > 0 { + if got.postOpsBlocksMeta[0].startOfpostOperationsBlock != tt.expected.postOpsBlocksMeta[0].startOfpostOperationsBlock { + t.Errorf("[got]: startOfpostOperationsBlock = %v, [want]: %v", got.postOpsBlocksMeta[0].startOfpostOperationsBlock, tt.expected.postOpsBlocksMeta[0].startOfpostOperationsBlock) + } + + if got.postOpsBlocksMeta[0].endOfpostOperationsBlock != tt.expected.postOpsBlocksMeta[0].endOfpostOperationsBlock { + t.Errorf("[got]: endOfpostOperationsBlock = %v, [want]: %v", got.postOpsBlocksMeta[0].endOfpostOperationsBlock, tt.expected.postOpsBlocksMeta[0].endOfpostOperationsBlock) + } + if got.postOpsBlocksMeta[0].postOpsBlockContent != tt.expected.postOpsBlocksMeta[0].postOpsBlockContent { + t.Errorf("[got]: postOpsBlockContent = %q, [want]: %q", got.postOpsBlocksMeta[0].postOpsBlockContent, tt.expected.postOpsBlocksMeta[0].postOpsBlockContent) + } + } + + }) + } +} diff --git a/cmd/utils.go b/cmd/utils.go index 7208d78..4fd85e5 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -48,8 +48,8 @@ func findSqlxFiles(dataformRootDirectory string) *[]string { return nil } -func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) error { - queryString := *&sqlxFileMetaData.queryString +func formatSqlCode(sqlxFileMetaData *sqlxParserMeta, pythonScriptPath string, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) error { + queryString := *&sqlxFileMetaData.sqlBlocksMeta.sqlBlockContent cmd := exec.Command(pythonExecutable, pythonScriptPath, string(sqlfluffConfigPath), string(queryString)) @@ -61,7 +61,7 @@ func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, err := cmd.Run() if err != nil { logger.Error(stderr.String(), slog.String("file", sqlxFileMetaData.filepath), "error", err.Error()) - sqlxFileMetaData.formattedQuery = string(queryString) // If there is an error, return the original query + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = string(queryString) return ErrorFormattingSqlxFile } output := stdout.String() @@ -69,32 +69,74 @@ func formatSqlCode(sqlxFileMetaData *sqlxFileMetaData, pythonScriptPath string, if sql_fluff_not_installed { log.Fatal(color.RedString("sqlfluff not installed. Please install sqlfluff using 'pip install sqlfluff'")) } - sqlxFileMetaData.formattedQuery = output + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent = output return nil } -func writeContentsToFile(sqlxFileMetaData *sqlxFileMetaData, formattingError error) { +func finalFormmatedSqlxFileContents(sqlxFileMetaData *sqlxParserMeta) string { + spaceBetweenBlocks := "\n\n" + // NOTE: Dataform at the time of writing this does not really have multiple preOpsBlocks or postOpsBlocks in its compiled json although it does not throw a compilation error if you put one + spaceBetweenSameOps := "\n" + + formattedQuery := "" + prePostOpBlock := "" + + preOpsBlocks := sqlxFileMetaData.preOpsBlocksMeta + postOpsBlocks := sqlxFileMetaData.postOpsBlocksMeta + + preOpsBlockContent := "" + if len(preOpsBlocks) > 0 { + for idx, preOpsBlock := range preOpsBlocks { + if idx == len(preOpsBlocks)-1 { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + } else { + preOpsBlockContent += preOpsBlock.preOpsBlockContent + spaceBetweenSameOps + } + + } + } + + postOpsBlockContent := "" + if len(postOpsBlocks) > 0 { + for idx, postOpsBlock := range postOpsBlocks { + if idx == len(postOpsBlocks)-1 { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + } else { + postOpsBlockContent += postOpsBlock.postOpsBlockContent + spaceBetweenSameOps + } + } + } + + if preOpsBlockContent == "" && postOpsBlockContent == "" { + prePostOpBlock = "" + } else { + prePostOpBlock = spaceBetweenBlocks + preOpsBlockContent + spaceBetweenBlocks + postOpsBlockContent + } + + formattedQuery = sqlxFileMetaData.configBlockMeta.configBlockContent + + prePostOpBlock + + spaceBetweenBlocks + + sqlxFileMetaData.sqlBlocksMeta.formattedSqlBlockContent + return formattedQuery +} + +func writeContentsToFile(sqlxFileMetaData *sqlxParserMeta, formattingError error) { yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() - filPathSeparator := string(os.PathSeparator) - _definitions := "definitions" + filPathSeparator + filPathSeparator := string(os.PathSeparator) + _definitions := "definitions" + filPathSeparator baseFilepath := strings.Split(sqlxFileMetaData.filepath, _definitions) - formattedFilePath := filepath.Join("formatted", "definitions", baseFilepath[1]) + formattedFilePath := filepath.Join("formatted", "definitions", baseFilepath[1]) dirToCreate := formattedFilePath[:strings.LastIndex(formattedFilePath, filPathSeparator)] os.MkdirAll(dirToCreate, 0755) // TODO: make this configurable - completeQuery := "" - if sqlxFileMetaData.preOperationsString == "" { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.formattedQuery - } else { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.preOperationsString + "\n\n" + sqlxFileMetaData.formattedQuery - } + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) - err := os.WriteFile(formattedFilePath, []byte(completeQuery), 0664) + err := os.WriteFile(formattedFilePath, []byte(formattedQuery), 0664) if err != nil { fmt.Println("Error writing to file:", err) return @@ -108,18 +150,14 @@ func writeContentsToFile(sqlxFileMetaData *sqlxFileMetaData, formattingError err } } -func writeContentsToFileInPlace(sqlxFileMetaData *sqlxFileMetaData, formattingError error) { +func writeContentsToFileInPlace(sqlxFileMetaData *sqlxParserMeta, formattingError error) { yellow := color.New(color.FgYellow).SprintFunc() red := color.New(color.FgRed).SprintFunc() - completeQuery := "" - if sqlxFileMetaData.preOperationsString == "" { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.formattedQuery - } else { - completeQuery = sqlxFileMetaData.configString + "\n\n" + sqlxFileMetaData.preOperationsString + "\n\n" + sqlxFileMetaData.formattedQuery - } - err := os.WriteFile(sqlxFileMetaData.filepath, []byte(completeQuery), 0664) + formattedQuery := finalFormmatedSqlxFileContents(sqlxFileMetaData) + + err := os.WriteFile(sqlxFileMetaData.filepath, []byte(formattedQuery), 0664) if err != nil { fmt.Println("Error writing to file:", err) return @@ -134,12 +172,13 @@ func writeContentsToFileInPlace(sqlxFileMetaData *sqlxFileMetaData, formattingEr } func formatSqlxFile(sqlxFilePath string, inplace bool, sqlfluffConfigPath string, pythonExecutable string, logger *slog.Logger) { - sqlxFileMetaData, err := getSqlxFileMetaData(sqlxFilePath) + sqlxFileMetaData, err := sqlxParser(sqlxFilePath) + // fmt.Printf("%+v\n", sqlxFileMetaData) if err != nil { fmt.Println("Error finding config blocks:", err) } else { - pythonScriptPath := filepath.Join(".formatdataform", "sqlfluff_formatter.py") + pythonScriptPath := filepath.Join(".formatdataform", "sqlfluff_formatter.py") formattingError := formatSqlCode(&sqlxFileMetaData, pythonScriptPath, sqlfluffConfigPath, pythonExecutable, logger) if inplace { writeContentsToFileInPlace(&sqlxFileMetaData, formattingError) @@ -160,34 +199,6 @@ func getIoReader(filepath string) (io.Reader, error) { return file, nil } -// Gives number of lines by reading the file in chunks, supposed to be faster than lineCounterV1 (https://stackoverflow.com/questions/24562942/golang-how-do-i-determine-the-number-of-lines-in-a-file-efficiently) - -func lineCounterV3(reader io.Reader) (int, error) { - buf := make([]byte, 32*1024) - count := 0 - lineSep := []byte{'\n'} - - for { - c, err := reader.Read(buf) - count += bytes.Count(buf[:c], lineSep) - - switch { - case err == io.EOF: - return count, nil - case err != nil: - return count, err - } - } -} - -func countLinesInFile(filepath string) (int, error) { - reader, err := getIoReader(filepath) - if err != nil { - return 0, err - } - return lineCounterV3(reader) -} - func createFileFromText(text string, filepath string) error { f, err := os.Create(filepath) @@ -196,7 +207,7 @@ func createFileFromText(text string, filepath string) error { return err } else { f.WriteString(text) - fmt.Printf("file created at: `%s` \n", filepath) + fmt.Printf("file created at: `%s` \n", filepath) f.Close() } return nil diff --git a/cmd/utils_test.go b/cmd/utils_test.go deleted file mode 100644 index 627b062..0000000 --- a/cmd/utils_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package cmd - -import ( - "strings" - "testing" -) - -func TestLineCounterV3(t *testing.T) { - tests := []struct { - name string - input string - want int - wantErr bool - }{ - {"Normal file 3 lines", "Line 1\nLine 2\nLine 3\n", 3, false}, - {"Empty file", "", 0, false}, - {"No newline at end", "Line 1\nLine 2\nLine 3", 2, false}, - {"Single line", "Single line", 0, false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - reader := strings.NewReader(tt.input) - got, err := lineCounterV3(reader) - if (err != nil) != tt.wantErr { - t.Errorf("lineCounterV3() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("lineCounterV3() = %v, want %v", got, tt.want) - } - }) - } -}