From 8986a1e014d4f1ef8e6c30c3d6c9284522680c98 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Tue, 15 Apr 2025 23:22:47 +0200 Subject: [PATCH 1/8] WIP: Add sqlite support --- cmd/scip/convert.go | 660 +++++++++++++++++++++++++++++++++++++++ cmd/scip/convert_test.go | 277 ++++++++++++++++ cmd/scip/main.go | 3 +- docs/CLI.md | 26 ++ go.mod | 24 +- go.sum | 42 ++- 6 files changed, 1014 insertions(+), 18 deletions(-) create mode 100644 cmd/scip/convert.go create mode 100644 cmd/scip/convert_test.go diff --git a/cmd/scip/convert.go b/cmd/scip/convert.go new file mode 100644 index 00000000..e5a33333 --- /dev/null +++ b/cmd/scip/convert.go @@ -0,0 +1,660 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "sort" + + "github.com/cockroachdb/errors" + "github.com/sourcegraph/scip/bindings/go/scip" + "github.com/urfave/cli/v2" + "google.golang.org/protobuf/proto" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +const ( + ChunkSize = 100 // Number of occurrences per chunk +) + +func convertCommand() cli.Command { + var indexPath, outputPath string + var chunkSize int + + command := cli.Command{ + Name: "convert", + Usage: "Convert a SCIP index to SQLite database", + Description: `Converts a SCIP index to a SQLite database with optimized settings. + +The SQLite database contains four tables: +1. documents: Contains metadata about source files +2. chunks: Stores occurrences in chunks of approximately 100 occurrences per chunk +3. symbols: Stores symbol information with unique IDs +4. mentions: Tracks which chunks contain which symbols with specific roles`, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Usage: "Path to output SQLite database file", + Destination: &outputPath, + Value: "index.db", + }, + &cli.IntFlag{ + Name: "chunk-size", + Usage: "Number of occurrences per chunk", + Destination: &chunkSize, + Value: ChunkSize, + }, + }, + Action: func(c *cli.Context) error { + indexPath = c.Args().Get(0) + if indexPath == "" { + return errors.New("missing argument for path to SCIP index") + } + + if chunkSize <= 0 { + return errors.New("chunk-size must be a positive integer") + } + + return convertMain(indexPath, outputPath, chunkSize, c.App.Writer) + }, + } + return command +} + +func convertMain(indexPath string, outputPath string, chunkSize int, out io.Writer) error { + index, err := readFromOption(indexPath) + if err != nil { + return err + } + + // Create the output directory if it doesn't exist + outputDir := filepath.Dir(outputPath) + if outputDir != "." { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return errors.Wrapf(err, "failed to create output directory %s", outputDir) + } + } + + // Create and set up the SQLite database + db, err := createSQLiteDatabase(outputPath) + if err != nil { + return err + } + defer db.Close() + + // Convert the SCIP index to the SQLite database + converter := NewConverter(db, chunkSize) + if err := converter.Convert(index); err != nil { + return err + } + + fmt.Fprintf(out, "Successfully converted SCIP index to SQLite database at %s\n", outputPath) + return nil +} + +func createSQLiteDatabase(path string) (*sqlite.Conn, error) { + // Open a new SQLite database connection + conn, err := sqlite.OpenConn(path, sqlite.OpenCreate|sqlite.OpenReadWrite|sqlite.OpenWAL) + if err != nil { + return nil, errors.Wrapf(err, "failed to open SQLite database at %s", path) + } + + // Apply optimized settings + err = sqlitex.ExecuteTransient(conn, `PRAGMA synchronous = normal;`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to set synchronous mode") + } + + err = sqlitex.ExecuteTransient(conn, `PRAGMA temp_store = memory;`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to set temp_store") + } + + // Enable strict mode and foreign key constraints + err = sqlitex.ExecuteTransient(conn, `PRAGMA strict = ON;`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to enable strict mode") + } + + err = sqlitex.ExecuteTransient(conn, `PRAGMA foreign_keys = ON;`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to enable foreign key constraints") + } + + // Create tables in a transaction + err = sqlitex.Execute(conn, "BEGIN TRANSACTION", nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to begin transaction") + } + + // Create tables + err = sqlitex.ExecuteTransient(conn, `CREATE TABLE documents ( + id INTEGER PRIMARY KEY, + language TEXT NOT NULL, + relative_path TEXT NOT NULL, + position_encoding INTEGER NOT NULL, + text TEXT + )`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create documents table") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE TABLE chunks ( + id INTEGER PRIMARY KEY, + document_id INTEGER NOT NULL, + chunk_index INTEGER NOT NULL, + start_line INTEGER NOT NULL, + end_line INTEGER NOT NULL, + occurrences BLOB NOT NULL, + FOREIGN KEY (document_id) REFERENCES documents(id) + )`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create chunks table") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE TABLE symbols ( + id INTEGER PRIMARY KEY, + symbol TEXT NOT NULL UNIQUE, + display_name TEXT, + kind INTEGER, + documentation TEXT, + signature BLOB, + enclosing_symbol TEXT, + relationships BLOB + )`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create symbols table") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE TABLE mentions ( + chunk_id INTEGER NOT NULL, + symbol_id INTEGER NOT NULL, + role INTEGER NOT NULL, + PRIMARY KEY (chunk_id, symbol_id, role), + FOREIGN KEY (chunk_id) REFERENCES chunks(id), + FOREIGN KEY (symbol_id) REFERENCES symbols(id) + )`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create mentions table") + } + + // Create indexes + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_documents_path ON documents(relative_path)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_chunks_doc_id ON chunks(document_id)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_chunks_line_range ON chunks(document_id, start_line, end_line)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_symbols_symbol ON symbols(symbol)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_mentions_symbol_id ON mentions(symbol_id)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_mentions_role ON mentions(symbol_id, role)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + // Commit the transaction + err = sqlitex.ExecuteTransient(conn, "COMMIT", nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to commit transaction") + } + + return conn, nil +} + +// Converter handles the conversion from SCIP to SQLite +type Converter struct { + conn *sqlite.Conn + chunkSize int +} + +// NewConverter creates a new converter instance +func NewConverter(conn *sqlite.Conn, chunkSize int) *Converter { + return &Converter{ + conn: conn, + chunkSize: chunkSize, + } +} + +// Convert processes the SCIP index and writes it to the SQLite database +func (c *Converter) Convert(index *scip.Index) error { + var insertErr error + // Execute everything in a transaction + err := sqlitex.Execute(c.conn, "BEGIN TRANSACTION", nil) + if err != nil { + return errors.Wrap(err, "failed to begin transaction") + } + + defer func() { + if r := recover(); r != nil { + // Rollback on panic + rberr := sqlitex.Execute(c.conn, "ROLLBACK", nil) + if rberr != nil { + // Just log the rollback error + fmt.Fprintf(os.Stderr, "Error rolling back transaction: %v\n", rberr) + } + // Re-panic + panic(r) + } else if insertErr != nil { + // Rollback on error + rberr := sqlitex.Execute(c.conn, "ROLLBACK", nil) + if rberr != nil { + // Just log the rollback error, but keep the original error + fmt.Fprintf(os.Stderr, "Error rolling back transaction: %v\n", rberr) + } + } + }() + + // Process documents + for _, doc := range index.Documents { + // Sort all occurrences in the document for consistent storage + sortOccurrences(doc.Occurrences) + + // Insert document + docStmt, err := c.conn.Prepare("INSERT INTO documents (language, relative_path, position_encoding, text) VALUES (?, ?, ?, ?)") + if err != nil { + return errors.Wrap(err, "failed to prepare document statement") + } + // We'll finalize manually after use + + // Bind parameters and execute + docStmt.BindText(1, doc.Language) + docStmt.BindText(2, doc.RelativePath) + docStmt.BindInt64(3, int64(doc.PositionEncoding)) + if doc.Text != "" { + docStmt.BindText(4, doc.Text) + } else { + docStmt.BindNull(4) + } + + // Execute the statement + _, err = docStmt.Step() + if err != nil { + docStmt.Finalize() // Clean up on error + return errors.Wrapf(err, "failed to insert document %s", doc.RelativePath) + } + + // Cleanup + docStmt.Finalize() + + // Get the last inserted row ID (document ID) + docID := c.conn.LastInsertRowID() + + // Process symbols + for _, symbol := range doc.Symbols { + signatureBlob, err := convertDocumentToBlob(symbol.SignatureDocumentation) + if err != nil { + return errors.Wrapf(err, "failed to convert signature for symbol %s", symbol.Symbol) + } + + relationshipsBlob, err := convertRelationshipsToBlob(symbol.Relationships) + if err != nil { + return errors.Wrapf(err, "failed to convert relationships for symbol %s", symbol.Symbol) + } + + documentation := "" + if len(symbol.Documentation) > 0 { + documentation = symbol.Documentation[0] + for i := 1; i < len(symbol.Documentation); i++ { + documentation += "\n" + symbol.Documentation[i] + } + } + + // Look up the symbol first + lookupStmt, err := c.conn.Prepare("SELECT id FROM symbols WHERE symbol = ?") + if err != nil { + return errors.Wrap(err, "failed to prepare symbol lookup statement") + } + + // Bind parameters + lookupStmt.BindText(1, symbol.Symbol) + + // Execute and get result + found := false + hasRow, err := lookupStmt.Step() + if err != nil { + lookupStmt.Finalize() // Clean up on error + return errors.Wrap(err, "failed to execute symbol lookup") + } + + if hasRow { + // Symbol exists, just mark as found (we don't need the ID for document symbols) + found = true + } + lookupStmt.Finalize() + + if !found { + // Symbol doesn't exist, insert it + insertStmt, err := c.conn.Prepare("INSERT OR IGNORE INTO symbols (symbol, display_name, kind, documentation, signature, enclosing_symbol, relationships) VALUES (?, ?, ?, ?, ?, ?, ?)") + if err != nil { + return errors.Wrap(err, "failed to prepare symbol insert statement") + } + + // Bind parameters + insertStmt.BindText(1, symbol.Symbol) + if symbol.DisplayName != "" { + insertStmt.BindText(2, symbol.DisplayName) + } else { + insertStmt.BindNull(2) + } + insertStmt.BindInt64(3, int64(symbol.Kind)) + if documentation != "" { + insertStmt.BindText(4, documentation) + } else { + insertStmt.BindNull(4) + } + if signatureBlob != nil { + insertStmt.BindBytes(5, signatureBlob) + } else { + insertStmt.BindNull(5) + } + if symbol.EnclosingSymbol != "" { + insertStmt.BindText(6, symbol.EnclosingSymbol) + } else { + insertStmt.BindNull(6) + } + if relationshipsBlob != nil { + insertStmt.BindBytes(7, relationshipsBlob) + } else { + insertStmt.BindNull(7) + } + + // Execute + _, err = insertStmt.Step() + if err != nil { + insertStmt.Finalize() // Clean up on error + return errors.Wrapf(err, "failed to insert symbol %s", symbol.Symbol) + } + insertStmt.Finalize() + + // Look up the ID of the newly inserted symbol + lookupStmt, err = c.conn.Prepare("SELECT id FROM symbols WHERE symbol = ?") + if err != nil { + return errors.Wrap(err, "failed to prepare symbol lookup statement") + } + lookupStmt.BindText(1, symbol.Symbol) + hasRow, err := lookupStmt.Step() + if err != nil || !hasRow { + lookupStmt.Finalize() // Clean up on error + return errors.Wrapf(err, "failed to look up newly inserted symbol %s", symbol.Symbol) + } + // We don't need the ID for document symbols + lookupStmt.Finalize() + } + } + + // Process occurrences in chunks (already sorted at document level) + chunkedOccurrences := chunkOccurrences(doc.Occurrences, c.chunkSize) + for i, chunk := range chunkedOccurrences { + if len(chunk) == 0 { + continue + } + + // Find min and max line numbers in this chunk + startLine, endLine := findLineRange(chunk) + + // Serialize occurrences + occurrencesBlob, err := proto.Marshal(&scip.Document{ + Occurrences: chunk, + }) + if err != nil { + return errors.Wrap(err, "failed to serialize occurrences") + } + + // Insert chunk + chunkStmt, err := c.conn.Prepare("INSERT INTO chunks (document_id, chunk_index, start_line, end_line, occurrences) VALUES (?, ?, ?, ?, ?)") + if err != nil { + return errors.Wrap(err, "failed to prepare chunk statement") + } + + // Bind parameters + chunkStmt.BindInt64(1, docID) + chunkStmt.BindInt64(2, int64(i)) + chunkStmt.BindInt64(3, int64(startLine)) + chunkStmt.BindInt64(4, int64(endLine)) + chunkStmt.BindBytes(5, occurrencesBlob) + + // Execute + _, err = chunkStmt.Step() + if err != nil { + chunkStmt.Finalize() // Clean up on error + return errors.Wrap(err, "failed to insert chunk") + } + chunkStmt.Finalize() + + // Get the last inserted chunk ID + chunkID := c.conn.LastInsertRowID() + + // Add entries to the mentions table for each unique symbol in this chunk + symbolRoles := make(map[string]int32) + for _, occ := range chunk { + if occ.Symbol != "" { + // If we have multiple occurrences of the same symbol with different roles, + // combine the roles (bitwise OR) + symbolRoles[occ.Symbol] |= occ.SymbolRoles + } + } + + // Prepare statements for mentions + mentionStmt, err := c.conn.Prepare("INSERT OR IGNORE INTO mentions (chunk_id, symbol_id, role) VALUES (?, ?, ?)") + if err != nil { + return errors.Wrap(err, "failed to prepare mention statement") + } + + symLookupStmt, err := c.conn.Prepare("SELECT id FROM symbols WHERE symbol = ?") + if err != nil { + mentionStmt.Finalize() // Clean up previous statement + return errors.Wrap(err, "failed to prepare symbol lookup statement for mentions") + } + + symInsertStmt, err := c.conn.Prepare("INSERT OR IGNORE INTO symbols (symbol, display_name, kind, documentation, signature, enclosing_symbol, relationships) VALUES (?, ?, ?, ?, ?, ?, ?)") + if err != nil { + mentionStmt.Finalize() // Clean up previous statements + symLookupStmt.Finalize() + return errors.Wrap(err, "failed to prepare symbol insert statement for mentions") + } + + // Add mentions for each symbol in this chunk + for symbolName, role := range symbolRoles { + // Look up the symbol ID + var symbolID int64 + + // Bind parameters for lookup + symLookupStmt.BindText(1, symbolName) + + // Execute lookup + hasRow, err := symLookupStmt.Step() + if err != nil { + return errors.Wrap(err, "failed to execute symbol lookup for mention") + } + + if hasRow { + // Found the symbol, get its ID + symbolID = symLookupStmt.ColumnInt64(0) + } else { + // Symbol doesn't exist yet, create it with minimal information + symInsertStmt.Reset() + symInsertStmt.BindText(1, symbolName) + symInsertStmt.BindNull(2) // display_name + symInsertStmt.BindInt64(3, 0) // kind + symInsertStmt.BindNull(4) // documentation + symInsertStmt.BindNull(5) // signature + symInsertStmt.BindNull(6) // enclosing_symbol + symInsertStmt.BindNull(7) // relationships + + // Execute insert + _, err = symInsertStmt.Step() + if err != nil { + return errors.Wrapf(err, "failed to insert symbol %s for mention", symbolName) + } + + // Look up the newly inserted symbol + symLookupStmt.Reset() + symLookupStmt.BindText(1, symbolName) + + hasRow, err = symLookupStmt.Step() + if err != nil || !hasRow { + return errors.Wrapf(err, "failed to look up newly inserted symbol %s for mention", symbolName) + } + symbolID = symLookupStmt.ColumnInt64(0) + } + symLookupStmt.Reset() + + // Insert mention + mentionStmt.Reset() + mentionStmt.BindInt64(1, chunkID) + mentionStmt.BindInt64(2, symbolID) + mentionStmt.BindInt64(3, int64(role)) + + _, err = mentionStmt.Step() + if err != nil { + return errors.Wrapf(err, "failed to insert mention for symbol %s", symbolName) + } + } + } + } + + // We intentionally don't process external symbols + + // Commit the transaction + err = sqlitex.Execute(c.conn, "COMMIT", nil) + if err != nil { + // Try to rollback, but keep the original error + rberr := sqlitex.Execute(c.conn, "ROLLBACK", nil) + if rberr != nil { + // Just log the rollback error + fmt.Fprintf(os.Stderr, "Error rolling back transaction: %v\n", rberr) + } + return errors.Wrap(err, "failed to commit transaction") + } + + return nil +} + +// chunkOccurrences splits occurrences into chunks of the specified size +func chunkOccurrences(occurrences []*scip.Occurrence, chunkSize int) [][]*scip.Occurrence { + if len(occurrences) == 0 { + return nil + } + + chunks := make([][]*scip.Occurrence, 0, (len(occurrences)+chunkSize-1)/chunkSize) + for i := 0; i < len(occurrences); i += chunkSize { + end := i + chunkSize + if end > len(occurrences) { + end = len(occurrences) + } + chunks = append(chunks, occurrences[i:end]) + } + + return chunks +} + +// findLineRange determines the min and max line numbers in a set of occurrences +func findLineRange(occurrences []*scip.Occurrence) (int, int) { + if len(occurrences) == 0 { + return 0, 0 + } + + minLine := int(occurrences[0].Range[0]) + maxLine := int(occurrences[0].Range[0]) + + for _, occ := range occurrences { + startLine := int(occ.Range[0]) + if startLine < minLine { + minLine = startLine + } + + // If range has 4 elements, endLine is at index 2 + // If range has 3 elements, endLine is the same as startLine + endLine := startLine + if len(occ.Range) >= 4 { + endLine = int(occ.Range[2]) + } + + if endLine > maxLine { + maxLine = endLine + } + } + + return minLine, maxLine +} + +// convertDocumentToBlob serializes a Document to bytes for storage +func convertDocumentToBlob(doc *scip.Document) ([]byte, error) { + if doc == nil { + return nil, nil + } + return proto.Marshal(doc) +} + +// convertRelationshipsToBlob serializes relationships to bytes for storage +func convertRelationshipsToBlob(relationships []*scip.Relationship) ([]byte, error) { + if len(relationships) == 0 { + return nil, nil + } + + return json.Marshal(relationships) +} + +// sortOccurrences sorts occurrences for consistent storage +func sortOccurrences(occurrences []*scip.Occurrence) { + // If we have no occurrences or just one, no need to sort + if len(occurrences) <= 1 { + return + } + + sort.Slice(occurrences, func(i, j int) bool { + // First sort by line + if occurrences[i].Range[0] != occurrences[j].Range[0] { + return occurrences[i].Range[0] < occurrences[j].Range[0] + } + + // Then by column + if occurrences[i].Range[1] != occurrences[j].Range[1] { + return occurrences[i].Range[1] < occurrences[j].Range[1] + } + + // Then by symbol name + if occurrences[i].Symbol != occurrences[j].Symbol { + return occurrences[i].Symbol < occurrences[j].Symbol + } + + // Then by symbol roles + return occurrences[i].SymbolRoles < occurrences[j].SymbolRoles + }) +} \ No newline at end of file diff --git a/cmd/scip/convert_test.go b/cmd/scip/convert_test.go new file mode 100644 index 00000000..458c5844 --- /dev/null +++ b/cmd/scip/convert_test.go @@ -0,0 +1,277 @@ +package main + +import ( + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/sourcegraph/scip/bindings/go/scip" + "google.golang.org/protobuf/proto" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +func TestConvert(t *testing.T) { + // Create a temporary directory for the test + tmpDir, err := os.MkdirTemp("", "scip-convert-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a test SCIP index + testIndex := createTestIndex() + indexPath := filepath.Join(tmpDir, "index.scip") + writeTestIndex(t, testIndex, indexPath) + + // Output SQLite database path + dbPath := filepath.Join(tmpDir, "output.db") + + // Run the convert command + err = convertMain(indexPath, dbPath, 100, io.Discard) + if err != nil { + t.Fatalf("Convert command failed: %v", err) + } + + // Verify the database was created + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Fatalf("Database file was not created at %s", dbPath) + } + + // Open the database and verify contents + db, err := sqlite.OpenConn(dbPath, sqlite.OpenReadOnly) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + defer db.Close() + + // Test document count + var docCount int64 + err = sqlitex.Execute(db, "SELECT COUNT(*) FROM documents", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + docCount = stmt.ColumnInt64(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to count documents: %v", err) + } + if int(docCount) != len(testIndex.Documents) { + t.Errorf("Expected %d documents, got %d", len(testIndex.Documents), docCount) + } + + // Test first document path + var path string + err = sqlitex.Execute(db, "SELECT relative_path FROM documents LIMIT 1", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + path = stmt.ColumnText(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to get document path: %v", err) + } + if path != testIndex.Documents[0].RelativePath { + t.Errorf("Expected path %s, got %s", testIndex.Documents[0].RelativePath, path) + } + + // Test symbol count + var symbolCount int64 + err = sqlitex.Execute(db, "SELECT COUNT(*) FROM symbols", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + symbolCount = stmt.ColumnInt64(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to count symbols: %v", err) + } + + // Calculate expected symbol count (just document symbols) + expectedSymbolCount := 0 + for _, doc := range testIndex.Documents { + expectedSymbolCount += len(doc.Symbols) + } + // We don't process external symbols + + // Note: Additional symbols might be created for mentions, so we just verify we have at least + // the expected number of symbols from documents + if int(symbolCount) < expectedSymbolCount { + t.Errorf("Expected at least %d symbols, got %d", expectedSymbolCount, symbolCount) + } + + // Test mentions table + var mentionCount int64 + err = sqlitex.Execute(db, "SELECT COUNT(*) FROM mentions", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + mentionCount = stmt.ColumnInt64(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to count mentions: %v", err) + } + + // We should have at least one mention for each symbol in occurrences + if mentionCount == 0 { + t.Errorf("Expected some mentions, got none") + } + + // Test a specific mention + var foundMention bool + var mentionRole int64 + + err = sqlitex.Execute(db, "SELECT s.symbol, m.role FROM mentions m JOIN symbols s ON m.symbol_id = s.id LIMIT 1", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + foundMention = true + // We don't need to use the symbol name, just verify it exists + _ = stmt.ColumnText(0) + mentionRole = stmt.ColumnInt64(1) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to query mentions: %v", err) + } + + if foundMention { + // Verify the role is a valid value + if mentionRole <= 0 { + t.Errorf("Invalid symbol role: %d", mentionRole) + } + } else { + t.Errorf("No mentions found") + } + + // Test chunks and retrieval of occurrences + var docID int64 + err = sqlitex.Execute(db, "SELECT id FROM documents LIMIT 1", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + docID = stmt.ColumnInt64(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to get document ID: %v", err) + } + + // Query for chunks and count occurrences + totalOccurrences := 0 + var occurrencesCount int + + // Query for occurrences using a prepared statement + err = sqlitex.Execute(db, fmt.Sprintf("SELECT occurrences FROM chunks WHERE document_id = %d", docID), &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + // Get blob data - read the entire blob since it's only valid for the duration of this call + reader := stmt.ColumnReader(0) + occurrencesBlob, err := io.ReadAll(reader) + if err != nil { + return err + } + + // Deserialize occurrences + doc := &scip.Document{} + if err := proto.Unmarshal(occurrencesBlob, doc); err != nil { + return err + } + + totalOccurrences += len(doc.Occurrences) + occurrencesCount++ + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to query chunks: %v", err) + } + + // Verify we found some occurrences + if occurrencesCount == 0 { + t.Errorf("No occurrences found") + } + + if totalOccurrences != len(testIndex.Documents[0].Occurrences) { + t.Errorf("Expected %d occurrences, got %d", len(testIndex.Documents[0].Occurrences), totalOccurrences) + } +} + +func createTestIndex() *scip.Index { + return &scip.Index{ + Metadata: &scip.Metadata{ + Version: 0, + ToolInfo: &scip.ToolInfo{Name: "test-indexer", Version: "1.0.0"}, + ProjectRoot: "file:///project", + TextDocumentEncoding: scip.TextEncoding_UTF8, + }, + Documents: []*scip.Document{ + { + RelativePath: "src/main.go", + Language: "go", + Text: "package main\n\nfunc main() {\n\tfmt.Println(\"Hello, world!\")\n}\n", + PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, + Occurrences: []*scip.Occurrence{ + { + Range: []int32{0, 8, 0, 12}, + Symbol: "go package main", + SymbolRoles: int32(scip.SymbolRole_Definition), + }, + { + Range: []int32{2, 5, 2, 9}, + Symbol: "go package main/main().", + SymbolRoles: int32(scip.SymbolRole_Definition), + }, + { + Range: []int32{3, 1, 3, 4}, + Symbol: "go . fmt", + SymbolRoles: int32(scip.SymbolRole_Import | scip.SymbolRole_ReadAccess), + }, + { + Range: []int32{3, 5, 3, 12}, + Symbol: "go . fmt/Println().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + }, + Symbols: []*scip.SymbolInformation{ + { + Symbol: "go package main", + DisplayName: "main", + Documentation: []string{"Main package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go package main/main().", + DisplayName: "main", + Documentation: []string{"Main function"}, + Kind: scip.SymbolInformation_Function, + }, + }, + }, + }, + ExternalSymbols: []*scip.SymbolInformation{ + { + Symbol: "go . fmt", + DisplayName: "fmt", + Documentation: []string{"Formatting package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go . fmt/Println().", + DisplayName: "Println", + Documentation: []string{"Print to standard output"}, + Kind: scip.SymbolInformation_Function, + }, + }, + } +} + +func writeTestIndex(t *testing.T, index *scip.Index, path string) { + indexBytes, err := proto.Marshal(index) + if err != nil { + t.Fatalf("Failed to marshal test index: %v", err) + } + + if err := os.WriteFile(path, indexBytes, 0644); err != nil { + t.Fatalf("Failed to write test index: %v", err) + } +} \ No newline at end of file diff --git a/cmd/scip/main.go b/cmd/scip/main.go index 9da6ad9d..712fdabc 100644 --- a/cmd/scip/main.go +++ b/cmd/scip/main.go @@ -24,7 +24,8 @@ func commands() []*cli.Command { snapshot := snapshotCommand() stats := statsCommand() test := testCommand() - return []*cli.Command{&lint, &print, &snapshot, &stats, &test} + convert := convertCommand() + return []*cli.Command{&lint, &print, &snapshot, &stats, &test, &convert} } //go:embed version.txt diff --git a/docs/CLI.md b/docs/CLI.md index 779c8c3e..9b6349c6 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -7,6 +7,7 @@ - [`scip print`](#scip-print) - [`scip snapshot`](#scip-snapshot) - [`scip stats`](#scip-stats) + - [`scip convert`](#scip-convert) ``` @@ -30,6 +31,7 @@ COMMANDS: snapshot Generate snapshot files for golden testing stats Output useful statistics about a SCIP index test Validate a SCIP index against test files + convert Convert a SCIP index to SQLite database help, h Shows a list of commands or help for one command GLOBAL OPTIONS: @@ -143,3 +145,27 @@ USAGE: OPTIONS: --from value Path to SCIP index file (default: index.scip) ``` + +## `scip convert` + +``` +NAME: + scip convert - Convert a SCIP index to SQLite database + +USAGE: + scip convert [command options] [arguments...] + +DESCRIPTION: + Converts a SCIP index to a SQLite database with optimized settings. + + The SQLite database contains four tables: + 1. documents: Contains metadata about source files + 2. chunks: Stores occurrences in chunks of approximately 100 occurrences per chunk + 3. symbols: Stores symbol information with unique IDs + 4. mentions: Tracks which chunks contain which symbols with specific roles + +OPTIONS: + --output value Path to output SQLite database file (default: "index.db") + --chunk-size value Number of occurrences per chunk (default: 100) + --help, -h show help +``` diff --git a/go.mod b/go.mod index 738b5f67..222d6eb0 100644 --- a/go.mod +++ b/go.mod @@ -21,10 +21,11 @@ require ( github.com/sourcegraph/conc v0.3.0 github.com/stretchr/testify v1.8.4 github.com/urfave/cli/v2 v2.25.7 - golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 - golang.org/x/tools v0.12.0 + golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 + golang.org/x/tools v0.17.0 google.golang.org/protobuf v1.31.0 pgregory.net/rapid v1.1.0 + zombiezen.com/go/sqlite v1.0.0 ) require ( @@ -50,6 +51,7 @@ require ( github.com/docker/docker-credential-helpers v0.8.0 // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/envoyproxy/protoc-gen-validate v0.3.0-java // indirect github.com/felixge/fgprof v0.9.3 // indirect github.com/getsentry/sentry-go v0.12.0 // indirect @@ -70,7 +72,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jdxcode/netrc v0.0.0-20221124155335-4616370d1a84 // indirect github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/klauspost/pgzip v1.2.6 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect @@ -81,6 +83,7 @@ require ( github.com/moby/term v0.5.0 // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/mwitkow/go-proto-validators v0.0.0-20180403085117-0950a7990007 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect github.com/nightlyone/lockfile v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0-rc4 // indirect @@ -89,6 +92,7 @@ require ( github.com/pkg/profile v1.7.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pseudomuto/protokit v0.2.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect github.com/rs/cors v1.9.0 // indirect @@ -108,14 +112,18 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.24.0 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect - golang.org/x/crypto v0.12.0 // indirect - golang.org/x/mod v0.12.0 // indirect - golang.org/x/net v0.14.0 // indirect - golang.org/x/sync v0.3.0 // indirect + golang.org/x/crypto v0.18.0 // indirect + golang.org/x/mod v0.14.0 // indirect + golang.org/x/net v0.20.0 // indirect + golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/term v0.17.0 // indirect - golang.org/x/text v0.12.0 // indirect + golang.org/x/text v0.14.0 // indirect google.golang.org/genproto v0.0.0-20220414192740-2d67ff6cf2b4 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/libc v1.41.0 // indirect + modernc.org/mathutil v1.6.0 // indirect + modernc.org/memory v1.7.2 // indirect + modernc.org/sqlite v1.27.0 // indirect mvdan.cc/gofumpt v0.5.0 // indirect ) diff --git a/go.sum b/go.sum index b885c763..b9e03392 100644 --- a/go.sum +++ b/go.sum @@ -98,6 +98,8 @@ github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5Xh github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -267,8 +269,9 @@ github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= -github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= +github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= @@ -327,6 +330,8 @@ github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzE github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAmxBiA= github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI= @@ -359,6 +364,8 @@ github.com/pseudomuto/protoc-gen-doc v1.5.1 h1:Ah259kcrio7Ix1Rhb6u8FCaOkzf9qRBqX github.com/pseudomuto/protoc-gen-doc v1.5.1/go.mod h1:XpMKYg6zkcpgfpCfQ8GcWBDRtRxOmMR5w7pz4Xo+dYM= github.com/pseudomuto/protokit v0.2.0 h1:hlnBDcy3YEDXH7kc9gV+NLaN0cDzhDvD1s7Y6FZ8RpM= github.com/pseudomuto/protokit v0.2.0/go.mod h1:2PdH30hxVHsup8KpBTOXTBeMVhJZVio3Q8ViKSAXT0Q= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -477,11 +484,12 @@ golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk= golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= +golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 h1:MGwJjxBy0HJshjDNfLsYO8xppfqWlA5ZT9OhtUUhTNw= -golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= +golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 h1:mchzmB1XO2pMaKFRqk/+MV3mgGG96aqaPXaMifQU47w= +golang.org/x/exp v0.0.0-20231108232855-2478ac86f678/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -494,8 +502,9 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -521,8 +530,9 @@ golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -536,8 +546,9 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -567,6 +578,7 @@ golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -598,8 +610,9 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac h1:7zkz7BUtwNFFqcowJ+RIgu2MaV/MapERkDIy+mwPyjs= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -621,8 +634,9 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4= -golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -690,9 +704,19 @@ gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +modernc.org/libc v1.41.0 h1:g9YAc6BkKlgORsUWj+JwqoB1wU3o4DE3bM3yvA3k+Gk= +modernc.org/libc v1.41.0/go.mod h1:w0eszPsiXoOnoMJgrXjglgLuDy/bt5RR4y3QzUUeodY= +modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= +modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= +modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E= +modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E= +modernc.org/sqlite v1.27.0 h1:MpKAHoyYB7xqcwnUwkuD+npwEa0fojF0B5QRbN+auJ8= +modernc.org/sqlite v1.27.0/go.mod h1:Qxpazz0zH8Z1xCFyi5GSL3FzbtZ3fvbjmywNogldEW0= mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ= mvdan.cc/gofumpt v0.5.0 h1:0EQ+Z56k8tXjj/6TQD25BFNKQXpCvT0rnansIc7Ug5E= mvdan.cc/gofumpt v0.5.0/go.mod h1:HBeVDtMKRZpXyxFciAirzdKklDlGu8aAy1wEbH5Y9js= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= pgregory.net/rapid v1.1.0 h1:CMa0sjHSru3puNx+J0MIAuiiEV4N0qj8/cMWGBBCsjw= pgregory.net/rapid v1.1.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= +zombiezen.com/go/sqlite v1.0.0 h1:D2EvOZqumJBy+6t+0uNTTXnepUpB/pKG45op/UziI1o= +zombiezen.com/go/sqlite v1.0.0/go.mod h1:Yx7FJ77tr7Ucwi5solhXAxpflyxk/BHNXArZ/JvDm60= From 500eb23795b46f88b03146d4e722f4eef78ca046 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 00:58:35 +0200 Subject: [PATCH 2/8] WIP: Add query commands --- cmd/scip/convert.go | 374 +++++++++++++++++++++++-- cmd/scip/convert_test.go | 227 ++++++++++++++- cmd/scip/main.go | 3 +- cmd/scip/query.go | 576 ++++++++++++++++++++++++++++++++++++++ cmd/scip/query_helpers.go | 83 ++++++ cmd/scip/query_test.go | 322 +++++++++++++++++++++ 6 files changed, 1547 insertions(+), 38 deletions(-) create mode 100644 cmd/scip/query.go create mode 100644 cmd/scip/query_helpers.go create mode 100644 cmd/scip/query_test.go diff --git a/cmd/scip/convert.go b/cmd/scip/convert.go index e5a33333..7a33cbf6 100644 --- a/cmd/scip/convert.go +++ b/cmd/scip/convert.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "sort" + "strings" "github.com/cockroachdb/errors" "github.com/sourcegraph/scip/bindings/go/scip" @@ -33,7 +34,14 @@ The SQLite database contains four tables: 1. documents: Contains metadata about source files 2. chunks: Stores occurrences in chunks of approximately 100 occurrences per chunk 3. symbols: Stores symbol information with unique IDs -4. mentions: Tracks which chunks contain which symbols with specific roles`, +4. mentions: Tracks which chunks contain which symbols with specific roles + +The database registers a virtual table 'scip_occurrences' that can be used to query occurrences directly from the blob format: + +SELECT symbol, startLine, startChar, role +FROM scip_occurrences +WHERE blob = (SELECT occurrences FROM chunks WHERE document_id = 1) +AND role = 'definition'`, Flags: []cli.Flag{ &cli.StringFlag{ Name: "output", @@ -190,6 +198,22 @@ func createSQLiteDatabase(path string) (*sqlite.Conn, error) { return nil, errors.Wrap(err, "failed to create mentions table") } + err = sqlitex.ExecuteTransient(conn, `CREATE TABLE defn_trees ( + id INTEGER PRIMARY KEY, + document_id INTEGER NOT NULL, + symbol_id INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_char INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_char INTEGER NOT NULL, + FOREIGN KEY (document_id) REFERENCES documents(id), + FOREIGN KEY (symbol_id) REFERENCES symbols(id) + )`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create defn_trees table") + } + // Create indexes err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_documents_path ON documents(relative_path)`, nil) if err != nil { @@ -227,6 +251,18 @@ func createSQLiteDatabase(path string) (*sqlite.Conn, error) { return nil, errors.Wrap(err, "failed to create index") } + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_defn_trees_symbol_id ON defn_trees(symbol_id)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + + err = sqlitex.ExecuteTransient(conn, `CREATE INDEX idx_defn_trees_document ON defn_trees(document_id, start_line, end_line)`, nil) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to create index") + } + // Commit the transaction err = sqlitex.ExecuteTransient(conn, "COMMIT", nil) if err != nil { @@ -234,6 +270,15 @@ func createSQLiteDatabase(path string) (*sqlite.Conn, error) { return nil, errors.Wrap(err, "failed to commit transaction") } + // Register the SCIP occurrences virtual table module + err = conn.SetModule("scip_occurrences", &sqlite.Module{ + Connect: scipOccurrencesConnect, + }) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to register scip_occurrences module") + } + return conn, nil } @@ -243,6 +288,61 @@ type Converter struct { chunkSize int } +// OccurrenceInfo contains parsed information from a SCIP occurrence +type OccurrenceInfo struct { + Symbol string // The symbol name + StartLine int32 // Start line number + StartChar int32 // Start character position + EndLine int32 // End line number + EndChar int32 // End character position + SymbolRoles int32 // Raw symbol roles bitmap + Role string // Human-readable primary role (definition, reference, etc.) +} + +// DeserializeOccurrencesFromBlob parses a binary SCIP occurrences blob into structured data +func DeserializeOccurrencesFromBlob(blob []byte) ([]OccurrenceInfo, error) { + if blob == nil { + return nil, nil + } + + // Deserialize the blob into a SCIP Document + doc := &scip.Document{} + if err := proto.Unmarshal(blob, doc); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal occurrences") + } + + // Convert each occurrence to a structured format + result := make([]OccurrenceInfo, len(doc.Occurrences)) + for i, occ := range doc.Occurrences { + // Extract range information + startLine, startChar := occ.Range[0], occ.Range[1] + endLine, endChar := startLine, startChar + if len(occ.Range) >= 4 { + endLine, endChar = occ.Range[2], occ.Range[3] + } + + // Get role as string + role := "unknown" + if occ.SymbolRoles&int32(scip.SymbolRole_Definition) != 0 { + role = "definition" + } else { + role = "reference" + } + + result[i] = OccurrenceInfo{ + Symbol: occ.Symbol, + StartLine: startLine, + StartChar: startChar, + EndLine: endLine, + EndChar: endChar, + SymbolRoles: occ.SymbolRoles, + Role: role, + } + } + + return result, nil +} + // NewConverter creates a new converter instance func NewConverter(conn *sqlite.Conn, chunkSize int) *Converter { return &Converter{ @@ -259,7 +359,7 @@ func (c *Converter) Convert(index *scip.Index) error { if err != nil { return errors.Wrap(err, "failed to begin transaction") } - + defer func() { if r := recover(); r != nil { // Rollback on panic @@ -308,7 +408,7 @@ func (c *Converter) Convert(index *scip.Index) error { docStmt.Finalize() // Clean up on error return errors.Wrapf(err, "failed to insert document %s", doc.RelativePath) } - + // Cleanup docStmt.Finalize() @@ -418,6 +518,82 @@ func (c *Converter) Convert(index *scip.Index) error { } } + // Process definition occurrences with enclosing ranges for the defn_trees table + // We do this before chunking so we can easily match symbols to definitions + symDefnLookupStmt, err := c.conn.Prepare("SELECT id FROM symbols WHERE symbol = ?") + if err != nil { + return errors.Wrap(err, "failed to prepare symbol lookup statement for defn_trees") + } + defnTreeStmt, err := c.conn.Prepare("INSERT INTO defn_trees (document_id, symbol_id, start_line, start_char, end_line, end_char) VALUES (?, ?, ?, ?, ?, ?)") + if err != nil { + symDefnLookupStmt.Finalize() + return errors.Wrap(err, "failed to prepare defn_trees insert statement") + } + + // Look for definition occurrences with enclosing ranges + for _, occ := range doc.Occurrences { + // Skip if not a definition or local symbol or no enclosing range + if occ.SymbolRoles&int32(scip.SymbolRole_Definition) == 0 || + strings.HasPrefix(occ.Symbol, "local ") || + len(occ.EnclosingRange) < 3 { + continue + } + + // Look up the symbol ID + symDefnLookupStmt.Reset() + symDefnLookupStmt.BindText(1, occ.Symbol) + hasRow, err := symDefnLookupStmt.Step() + if err != nil { + symDefnLookupStmt.Finalize() + defnTreeStmt.Finalize() + return errors.Wrap(err, "failed to lookup symbol for defn_trees") + } + + if !hasRow { + // Symbol not found, can't add to defn_trees + continue + } + + // Get the symbol ID + symbolID := symDefnLookupStmt.ColumnInt64(0) + + // Extract range information with correct handling of 3 vs 4 elements + startLine := occ.EnclosingRange[0] + startChar := occ.EnclosingRange[1] + + // Handle both 3-element and 4-element ranges + var endLine, endChar int32 + if len(occ.EnclosingRange) >= 4 { + // 4-element range: [startLine, startChar, endLine, endChar] + endLine = occ.EnclosingRange[2] + endChar = occ.EnclosingRange[3] + } else { + // 3-element range: [startLine, startChar, endChar] + endLine = startLine + endChar = occ.EnclosingRange[2] + } + + // Insert into defn_trees + defnTreeStmt.Reset() + defnTreeStmt.BindInt64(1, docID) + defnTreeStmt.BindInt64(2, symbolID) + defnTreeStmt.BindInt64(3, int64(startLine)) // start_line + defnTreeStmt.BindInt64(4, int64(startChar)) // start_char + defnTreeStmt.BindInt64(5, int64(endLine)) // end_line + defnTreeStmt.BindInt64(6, int64(endChar)) // end_char + + _, err = defnTreeStmt.Step() + if err != nil { + symDefnLookupStmt.Finalize() + defnTreeStmt.Finalize() + return errors.Wrapf(err, "failed to insert into defn_trees for symbol %s", occ.Symbol) + } + } + + // Clean up the statements + symDefnLookupStmt.Finalize() + defnTreeStmt.Finalize() + // Process occurrences in chunks (already sorted at document level) chunkedOccurrences := chunkOccurrences(doc.Occurrences, c.chunkSize) for i, chunk := range chunkedOccurrences { @@ -427,7 +603,7 @@ func (c *Converter) Convert(index *scip.Index) error { // Find min and max line numbers in this chunk startLine, endLine := findLineRange(chunk) - + // Serialize occurrences occurrencesBlob, err := proto.Marshal(&scip.Document{ Occurrences: chunk, @@ -439,7 +615,7 @@ func (c *Converter) Convert(index *scip.Index) error { // Insert chunk chunkStmt, err := c.conn.Prepare("INSERT INTO chunks (document_id, chunk_index, start_line, end_line, occurrences) VALUES (?, ?, ?, ?, ?)") if err != nil { - return errors.Wrap(err, "failed to prepare chunk statement") + return errors.Wrap(err, "failed to prepare chunk statement") } // Bind parameters @@ -473,30 +649,30 @@ func (c *Converter) Convert(index *scip.Index) error { // Prepare statements for mentions mentionStmt, err := c.conn.Prepare("INSERT OR IGNORE INTO mentions (chunk_id, symbol_id, role) VALUES (?, ?, ?)") if err != nil { - return errors.Wrap(err, "failed to prepare mention statement") + return errors.Wrap(err, "failed to prepare mention statement") } symLookupStmt, err := c.conn.Prepare("SELECT id FROM symbols WHERE symbol = ?") if err != nil { - mentionStmt.Finalize() // Clean up previous statement - return errors.Wrap(err, "failed to prepare symbol lookup statement for mentions") + mentionStmt.Finalize() // Clean up previous statement + return errors.Wrap(err, "failed to prepare symbol lookup statement for mentions") } symInsertStmt, err := c.conn.Prepare("INSERT OR IGNORE INTO symbols (symbol, display_name, kind, documentation, signature, enclosing_symbol, relationships) VALUES (?, ?, ?, ?, ?, ?, ?)") if err != nil { - mentionStmt.Finalize() // Clean up previous statements - symLookupStmt.Finalize() - return errors.Wrap(err, "failed to prepare symbol insert statement for mentions") + mentionStmt.Finalize() // Clean up previous statements + symLookupStmt.Finalize() + return errors.Wrap(err, "failed to prepare symbol insert statement for mentions") } // Add mentions for each symbol in this chunk for symbolName, role := range symbolRoles { // Look up the symbol ID var symbolID int64 - + // Bind parameters for lookup symLookupStmt.BindText(1, symbolName) - + // Execute lookup hasRow, err := symLookupStmt.Step() if err != nil { @@ -510,12 +686,12 @@ func (c *Converter) Convert(index *scip.Index) error { // Symbol doesn't exist yet, create it with minimal information symInsertStmt.Reset() symInsertStmt.BindText(1, symbolName) - symInsertStmt.BindNull(2) // display_name + symInsertStmt.BindNull(2) // display_name symInsertStmt.BindInt64(3, 0) // kind - symInsertStmt.BindNull(4) // documentation - symInsertStmt.BindNull(5) // signature - symInsertStmt.BindNull(6) // enclosing_symbol - symInsertStmt.BindNull(7) // relationships + symInsertStmt.BindNull(4) // documentation + symInsertStmt.BindNull(5) // signature + symInsertStmt.BindNull(6) // enclosing_symbol + symInsertStmt.BindNull(7) // relationships // Execute insert _, err = symInsertStmt.Step() @@ -526,7 +702,7 @@ func (c *Converter) Convert(index *scip.Index) error { // Look up the newly inserted symbol symLookupStmt.Reset() symLookupStmt.BindText(1, symbolName) - + hasRow, err = symLookupStmt.Step() if err != nil || !hasRow { return errors.Wrapf(err, "failed to look up newly inserted symbol %s for mention", symbolName) @@ -555,7 +731,7 @@ func (c *Converter) Convert(index *scip.Index) error { err = sqlitex.Execute(c.conn, "COMMIT", nil) if err != nil { // Try to rollback, but keep the original error - rberr := sqlitex.Execute(c.conn, "ROLLBACK", nil) + rberr := sqlitex.Execute(c.conn, "ROLLBACK", nil) if rberr != nil { // Just log the rollback error fmt.Fprintf(os.Stderr, "Error rolling back transaction: %v\n", rberr) @@ -631,6 +807,156 @@ func convertRelationshipsToBlob(relationships []*scip.Relationship) ([]byte, err return json.Marshal(relationships) } +// scipOccurrencesConnect creates a new virtual table for SCIP occurrences +func scipOccurrencesConnect(conn *sqlite.Conn, options *sqlite.VTableConnectOptions) (sqlite.VTable, *sqlite.VTableConfig, error) { + // Set up the virtual table configuration + config := &sqlite.VTableConfig{ + Declaration: `CREATE TABLE x( + blob BLOB HIDDEN, -- First column is the hidden blob argument + symbol TEXT, + startLine INTEGER, + startChar INTEGER, + endLine INTEGER, + endChar INTEGER, + roles INTEGER, + role TEXT + )`, + } + + // Create the table + return &SCIPOccurrencesTable{}, config, nil +} + +// SCIPOccurrencesTable implements the VTable interface +type SCIPOccurrencesTable struct{} + +// BestIndex is called to determine how to execute a query +func (t *SCIPOccurrencesTable) BestIndex(inputs *sqlite.IndexInputs) (*sqlite.IndexOutputs, error) { + output := &sqlite.IndexOutputs{ + EstimatedCost: 1000, // This is relatively expensive + EstimatedRows: 100, // Estimate 100 rows per result + ID: sqlite.IndexID{ + Num: 1, // Simple index number + }, + } + + // Look for constraints on the 'blob' column (index 0) + hasBlob := false + output.ConstraintUsage = make([]sqlite.IndexConstraintUsage, len(inputs.Constraints)) + + for i, constraint := range inputs.Constraints { + if constraint.Column == 0 { // blob column + output.ConstraintUsage[i].ArgvIndex = 1 // First argument to Filter + output.ConstraintUsage[i].Omit = true + hasBlob = true + } + } + + // If we don't have a blob constraint, that's a problem + if !hasBlob { + return nil, errors.New("no blob constraint found") + } + + return output, nil +} + +// Open creates a new cursor for scanning the virtual table +func (t *SCIPOccurrencesTable) Open() (sqlite.VTableCursor, error) { + return &SCIPOccurrencesCursor{}, nil +} + +// Disconnect cleans up the virtual table +func (t *SCIPOccurrencesTable) Disconnect() error { + return nil +} + +// Destroy is called when the virtual table is dropped +func (t *SCIPOccurrencesTable) Destroy() error { + return nil +} + +// SCIPOccurrencesCursor implements the VTableCursor interface +type SCIPOccurrencesCursor struct { + occurrences []OccurrenceInfo + rowid int64 +} + +// Column returns a column value at the current cursor position +func (c *SCIPOccurrencesCursor) Column(col int, noChange bool) (sqlite.Value, error) { + if int64(len(c.occurrences)) <= c.rowid || c.rowid < 0 { + return sqlite.Value{}, errors.New("cursor position out of bounds") + } + + occ := c.occurrences[c.rowid] + + switch col { + case 0: // blob - hidden column, not used + return sqlite.Value{}, nil + case 1: // symbol + return sqlite.TextValue(occ.Symbol), nil + case 2: // startLine + return sqlite.IntegerValue(int64(occ.StartLine)), nil + case 3: // startChar + return sqlite.IntegerValue(int64(occ.StartChar)), nil + case 4: // endLine + return sqlite.IntegerValue(int64(occ.EndLine)), nil + case 5: // endChar + return sqlite.IntegerValue(int64(occ.EndChar)), nil + case 6: // symbolRoles + return sqlite.IntegerValue(int64(occ.SymbolRoles)), nil + case 7: // role + return sqlite.TextValue(occ.Role), nil + default: + return sqlite.Value{}, errors.Errorf("invalid column index %d", col) + } +} + +// Filter initializes the cursor using the provided index +func (c *SCIPOccurrencesCursor) Filter(idx sqlite.IndexID, argv []sqlite.Value) error { + c.rowid = 0 + c.occurrences = nil + + // Get the blob from the first argument + var blob []byte + if len(argv) > 0 { + blob = argv[0].Blob() + } + + if blob == nil { + return nil // No error, just no data + } + + // Parse the occurrences + var err error + c.occurrences, err = DeserializeOccurrencesFromBlob(blob) + if err != nil { + return errors.Wrap(err, "failed to parse occurrences blob") + } + + return nil +} + +// Next advances the cursor to the next row +func (c *SCIPOccurrencesCursor) Next() error { + c.rowid++ + return nil +} + +// EOF checks if we've gone past the end of available data +func (c *SCIPOccurrencesCursor) EOF() bool { + return c.rowid >= int64(len(c.occurrences)) +} + +// RowID returns the current row ID +func (c *SCIPOccurrencesCursor) RowID() (int64, error) { + return c.rowid, nil +} + +// Close cleans up any resources used by the cursor +func (c *SCIPOccurrencesCursor) Close() error { + return nil +} + // sortOccurrences sorts occurrences for consistent storage func sortOccurrences(occurrences []*scip.Occurrence) { // If we have no occurrences or just one, no need to sort @@ -643,18 +969,18 @@ func sortOccurrences(occurrences []*scip.Occurrence) { if occurrences[i].Range[0] != occurrences[j].Range[0] { return occurrences[i].Range[0] < occurrences[j].Range[0] } - + // Then by column if occurrences[i].Range[1] != occurrences[j].Range[1] { return occurrences[i].Range[1] < occurrences[j].Range[1] } - + // Then by symbol name if occurrences[i].Symbol != occurrences[j].Symbol { return occurrences[i].Symbol < occurrences[j].Symbol } - + // Then by symbol roles return occurrences[i].SymbolRoles < occurrences[j].SymbolRoles }) -} \ No newline at end of file +} diff --git a/cmd/scip/convert_test.go b/cmd/scip/convert_test.go index 458c5844..46797849 100644 --- a/cmd/scip/convert_test.go +++ b/cmd/scip/convert_test.go @@ -196,6 +196,207 @@ func TestConvert(t *testing.T) { } } +func TestSCIPOccurrencesVirtualTable(t *testing.T) { + // Create a temporary directory for the test + tmpDir, err := os.MkdirTemp("", "scip-vtable-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a test SQLite database + dbPath := filepath.Join(tmpDir, "test.db") + db, err := createSQLiteDatabase(dbPath) + if err != nil { + t.Fatalf("Failed to create SQLite database: %v", err) + } + defer db.Close() + + // Create test occurrences + occurrences := []*scip.Occurrence{ + { + Range: []int32{1, 0, 1, 5}, + Symbol: "go package main/func1", + SymbolRoles: int32(scip.SymbolRole_Definition), + }, + { + Range: []int32{2, 10, 2, 15}, + Symbol: "go package main/func2", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + } + + // Serialize the occurrences to a protobuf blob + occBlob, err := proto.Marshal(&scip.Document{Occurrences: occurrences}) + if err != nil { + t.Fatalf("Failed to marshal occurrences: %v", err) + } + + // Create a temporary table for testing + err = sqlitex.ExecuteTransient(db, "CREATE TABLE test_occurrences (id INTEGER PRIMARY KEY, blob BLOB)", nil) + if err != nil { + t.Fatalf("Failed to create test table: %v", err) + } + + // Insert the test blob + stmt, err := db.Prepare("INSERT INTO test_occurrences (blob) VALUES (?)") + if err != nil { + t.Fatalf("Failed to prepare insert statement: %v", err) + } + stmt.BindBytes(1, occBlob) + _, err = stmt.Step() + if err != nil { + stmt.Finalize() + t.Fatalf("Failed to insert test blob: %v", err) + } + stmt.Finalize() + + // Query the virtual table + type Result struct { + Symbol string + StartLine int64 + StartChar int64 + EndLine int64 + EndChar int64 + SymbolRoles int64 + Role string + } + + results := []Result{} + + err = sqlitex.Execute(db, "SELECT symbol, startLine, startChar, endLine, endChar, roles, role FROM scip_occurrences WHERE blob = (SELECT blob FROM test_occurrences LIMIT 1) ORDER BY startLine", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + results = append(results, Result{ + Symbol: stmt.ColumnText(0), + StartLine: stmt.ColumnInt64(1), + StartChar: stmt.ColumnInt64(2), + EndLine: stmt.ColumnInt64(3), + EndChar: stmt.ColumnInt64(4), + SymbolRoles: stmt.ColumnInt64(5), + Role: stmt.ColumnText(6), + }) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to query virtual table: %v", err) + } + + // Verify we have the expected number of rows + if len(results) != 2 { + t.Fatalf("Expected 2 occurrences, got %d", len(results)) + } + + // Verify the first occurrence + expectedResults := []Result{ + { + Symbol: "go package main/func1", + StartLine: 1, + StartChar: 0, + EndLine: 1, + EndChar: 5, + SymbolRoles: int64(scip.SymbolRole_Definition), + Role: "definition", + }, + { + Symbol: "go package main/func2", + StartLine: 2, + StartChar: 10, + EndLine: 2, + EndChar: 15, + SymbolRoles: int64(scip.SymbolRole_ReadAccess), + Role: "reference", + }, + } + + for i, expected := range expectedResults { + actual := results[i] + if actual.Symbol != expected.Symbol { + t.Errorf("Result %d: expected Symbol='%s', got '%s'", i, expected.Symbol, actual.Symbol) + } + if actual.StartLine != expected.StartLine { + t.Errorf("Result %d: expected StartLine=%d, got %d", i, expected.StartLine, actual.StartLine) + } + if actual.StartChar != expected.StartChar { + t.Errorf("Result %d: expected StartChar=%d, got %d", i, expected.StartChar, actual.StartChar) + } + if actual.EndLine != expected.EndLine { + t.Errorf("Result %d: expected EndLine=%d, got %d", i, expected.EndLine, actual.EndLine) + } + if actual.EndChar != expected.EndChar { + t.Errorf("Result %d: expected EndChar=%d, got %d", i, expected.EndChar, actual.EndChar) + } + if actual.Role != expected.Role { + t.Errorf("Result %d: expected Role='%s', got '%s'", i, expected.Role, actual.Role) + } + } +} + +func TestDeserializeOccurrencesFromBlob(t *testing.T) { + // Create test occurrences + occurrences := []*scip.Occurrence{ + { + Range: []int32{1, 0, 1, 5}, + Symbol: "go package main/func1", + SymbolRoles: int32(scip.SymbolRole_Definition), + }, + { + Range: []int32{2, 10, 2, 15}, + Symbol: "go package main/func2", + SymbolRoles: int32(0), + }, + } + + // Serialize the occurrences to a protobuf blob + occBlob, err := proto.Marshal(&scip.Document{Occurrences: occurrences}) + if err != nil { + t.Fatalf("Failed to marshal occurrences: %v", err) + } + + // Use our utility function to deserialize the blob + parsedOccurrences, err := DeserializeOccurrencesFromBlob(occBlob) + if err != nil { + t.Fatalf("Failed to deserialize occurrences: %v", err) + } + + // Verify results + if len(parsedOccurrences) != 2 { + t.Errorf("Expected 2 occurrences, got %d", len(parsedOccurrences)) + } + + // Verify first occurrence + if parsedOccurrences[0].Symbol != "go package main/func1" { + t.Errorf("Expected symbol 'go package main/func1', got '%s'", parsedOccurrences[0].Symbol) + } + if parsedOccurrences[0].Role != "definition" { + t.Errorf("Expected role 'definition', got '%s'", parsedOccurrences[0].Role) + } + if parsedOccurrences[0].StartLine != 1 || parsedOccurrences[0].StartChar != 0 { + t.Errorf("Expected start position (1,0), got (%d,%d)", + parsedOccurrences[0].StartLine, parsedOccurrences[0].StartChar) + } + if parsedOccurrences[0].EndLine != 1 || parsedOccurrences[0].EndChar != 5 { + t.Errorf("Expected end position (1,5), got (%d,%d)", + parsedOccurrences[0].EndLine, parsedOccurrences[0].EndChar) + } + + // Verify second occurrence + if parsedOccurrences[1].Symbol != "go package main/func2" { + t.Errorf("Expected symbol 'go package main/func2', got '%s'", parsedOccurrences[1].Symbol) + } + if parsedOccurrences[1].Role != "reference" { + t.Errorf("Expected role 'reference', got '%s'", parsedOccurrences[1].Role) + } + if parsedOccurrences[1].StartLine != 2 || parsedOccurrences[1].StartChar != 10 { + t.Errorf("Expected start position (2,10), got (%d,%d)", + parsedOccurrences[1].StartLine, parsedOccurrences[1].StartChar) + } + if parsedOccurrences[1].EndLine != 2 || parsedOccurrences[1].EndChar != 15 { + t.Errorf("Expected end position (2,15), got (%d,%d)", + parsedOccurrences[1].EndLine, parsedOccurrences[1].EndChar) + } +} + func createTestIndex() *scip.Index { return &scip.Index{ Metadata: &scip.Metadata{ @@ -206,9 +407,9 @@ func createTestIndex() *scip.Index { }, Documents: []*scip.Document{ { - RelativePath: "src/main.go", - Language: "go", - Text: "package main\n\nfunc main() {\n\tfmt.Println(\"Hello, world!\")\n}\n", + RelativePath: "src/main.go", + Language: "go", + Text: "package main\n\nfunc main() {\n\tfmt.Println(\"Hello, world!\")\n}\n", PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, Occurrences: []*scip.Occurrence{ { @@ -229,19 +430,19 @@ func createTestIndex() *scip.Index { { Range: []int32{3, 5, 3, 12}, Symbol: "go . fmt/Println().", - SymbolRoles: int32(scip.SymbolRole_ReadAccess), + SymbolRoles: int32(0), }, }, Symbols: []*scip.SymbolInformation{ { - Symbol: "go package main", - DisplayName: "main", + Symbol: "go package main", + DisplayName: "main", Documentation: []string{"Main package"}, Kind: scip.SymbolInformation_Package, }, { - Symbol: "go package main/main().", - DisplayName: "main", + Symbol: "go package main/main().", + DisplayName: "main", Documentation: []string{"Main function"}, Kind: scip.SymbolInformation_Function, }, @@ -250,14 +451,14 @@ func createTestIndex() *scip.Index { }, ExternalSymbols: []*scip.SymbolInformation{ { - Symbol: "go . fmt", - DisplayName: "fmt", + Symbol: "go . fmt", + DisplayName: "fmt", Documentation: []string{"Formatting package"}, Kind: scip.SymbolInformation_Package, }, { - Symbol: "go . fmt/Println().", - DisplayName: "Println", + Symbol: "go . fmt/Println().", + DisplayName: "Println", Documentation: []string{"Print to standard output"}, Kind: scip.SymbolInformation_Function, }, @@ -274,4 +475,4 @@ func writeTestIndex(t *testing.T, index *scip.Index, path string) { if err := os.WriteFile(path, indexBytes, 0644); err != nil { t.Fatalf("Failed to write test index: %v", err) } -} \ No newline at end of file +} diff --git a/cmd/scip/main.go b/cmd/scip/main.go index 712fdabc..2ef62f51 100644 --- a/cmd/scip/main.go +++ b/cmd/scip/main.go @@ -25,7 +25,8 @@ func commands() []*cli.Command { stats := statsCommand() test := testCommand() convert := convertCommand() - return []*cli.Command{&lint, &print, &snapshot, &stats, &test, &convert} + query := queryCommand() + return []*cli.Command{&lint, &print, &snapshot, &stats, &test, &convert, &query} } //go:embed version.txt diff --git a/cmd/scip/query.go b/cmd/scip/query.go new file mode 100644 index 00000000..e5df352f --- /dev/null +++ b/cmd/scip/query.go @@ -0,0 +1,576 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "os" + "strings" + + "github.com/cockroachdb/errors" + "github.com/urfave/cli/v2" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +func queryCommand() cli.Command { + var dbPath string + + command := cli.Command{ + Name: "query", + Usage: "Query a SCIP SQLite database", + Description: `Performs queries against a SQLite database created with the 'convert' command.`, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "db-path", + Usage: "Path to SQLite database", + Destination: &dbPath, + Required: true, + }, + }, + Before: func(c *cli.Context) error { + // Check if the database exists + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return errors.Errorf("database file does not exist: %s", dbPath) + } + + return nil + }, + Subcommands: []*cli.Command{ + { + Name: "goto-definition", + Usage: "Find the definition location for a symbol", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "symbol", + Usage: "Symbol to look up", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + symbol := c.String("symbol") + return gotoDefinitionQuery(dbPath, symbol, c.App.Writer) + }, + }, + { + Name: "find-references", + Usage: "Find all references to a symbol", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "symbol", + Usage: "Symbol to find references for", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + symbol := c.String("symbol") + return findReferencesQuery(dbPath, symbol, c.App.Writer) + }, + }, + { + Name: "call-hierarchy", + Usage: "Generate a call hierarchy for a symbol", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "symbol", + Usage: "Symbol to generate call hierarchy for", + Required: true, + }, + &cli.IntFlag{ + Name: "max-depth", + Usage: "Maximum depth of the call hierarchy", + Value: 10, + }, + }, + Action: func(c *cli.Context) error { + symbol := c.String("symbol") + maxDepth := c.Int("max-depth") + return callHierarchyQuery(dbPath, symbol, maxDepth, c.App.Writer) + }, + }, + }, + } + + // Register the SCIP occurrences virtual table module in an action that runs + // before any subcommands + command.Action = func(c *cli.Context) error { + // If no subcommands specified, print help + return cli.ShowCommandHelp(c, "query") + } + + return command +} + +// Location represents a code location returned by queries +type Location struct { + Path string `json:"path"` // File path + Line int `json:"line"` // 0-based line number + Character int `json:"character"` // 0-based character offset + EndLine int `json:"endLine"` // 0-based end line + EndChar int `json:"endChar"` // 0-based end character + Snippet string `json:"snippet,omitempty"` // Code snippet if available + Symbol string `json:"-"` // Symbol at this location + Role string `json:"role"` // Role (definition/reference) +} + +// CallHierarchyItem represents a node in the call hierarchy +type CallHierarchyItem struct { + Symbol string `json:"symbol"` + Location Location `json:"location,omitempty"` + Calls []CallHierarchyItem `json:"calls,omitempty"` +} + +// openQueryDB opens the SQLite database and registers the virtual table +func openQueryDB(dbPath string) (*sqlite.Conn, error) { + // Open a connection to the database + conn, err := sqlite.OpenConn(dbPath, sqlite.OpenReadOnly) + if err != nil { + return nil, errors.Wrapf(err, "failed to open SQLite database at %s", dbPath) + } + + // Register the SCIP occurrences virtual table module + err = conn.SetModule("scip_occurrences", &sqlite.Module{ + Connect: scipOccurrencesConnect, + }) + if err != nil { + conn.Close() + return nil, errors.Wrap(err, "failed to register scip_occurrences module") + } + + return conn, nil +} + +// gotoDefinitionQuery returns the location where a symbol is defined +func gotoDefinitionQuery(dbPath string, symbol string, out io.Writer) error { + db, err := openQueryDB(dbPath) + if err != nil { + return err + } + defer db.Close() + + // Look up definitions for the symbol + locations, err := findSymbolOccurrences(db, symbol, true) + if err != nil { + return err + } + + if len(locations) == 0 { + return errors.Errorf("no definition found for symbol: %s", symbol) + } + + // Convert to JSON + result, err := json.MarshalIndent(locations, "", " ") + if err != nil { + return errors.Wrap(err, "failed to marshal result to JSON") + } + + // Write the result + _, err = fmt.Fprintln(out, string(result)) + return err +} + +// findReferencesQuery returns all references to a symbol +func findReferencesQuery(dbPath string, symbol string, out io.Writer) error { + db, err := openQueryDB(dbPath) + if err != nil { + return err + } + defer db.Close() + + // Look up references for the symbol + locations, err := findSymbolOccurrences(db, symbol, false) + if err != nil { + return err + } + + if len(locations) == 0 { + return errors.Errorf("no references found for symbol: %s", symbol) + } + + // Convert to JSON + result, err := json.MarshalIndent(locations, "", " ") + if err != nil { + return errors.Wrap(err, "failed to marshal result to JSON") + } + + // Write the result + _, err = fmt.Fprintln(out, string(result)) + return err +} + +// callHierarchyQuery generates a call hierarchy for a symbol +func callHierarchyQuery(dbPath string, symbol string, maxDepth int, out io.Writer) error { + db, err := openQueryDB(dbPath) + if err != nil { + return err + } + defer db.Close() + + // Check if the symbol exists + var found bool + + err = sqlitex.Execute(db, "SELECT 1 FROM symbols WHERE symbol = ?", &sqlitex.ExecOptions{ + Args: []interface{}{symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + found = true + return nil + }, + }) + + if err != nil { + return errors.Wrap(err, "failed to query symbols table") + } + + if !found { + return errors.Errorf("symbol not found: %s", symbol) + } + + // Start with the root symbol + root := CallHierarchyItem{ + Symbol: symbol, + } + + // Try to find location info for the symbol (not required, but helpful) + definitions, err := findSymbolOccurrences(db, symbol, true) + if err == nil && len(definitions) > 0 { + root.Location = definitions[0] + } + + // Get calls + visitedSymbols := make(map[string]bool) + visitedSymbols[symbol] = true + + // Debug output to stderr, not out (which needs to have valid JSON) + debugOut := os.Stderr + + // Build the call hierarchy + err = buildCallHierarchy(db, &root, 0, maxDepth, visitedSymbols, debugOut) + if err != nil { + return err + } + + // Convert to JSON + result, err := json.MarshalIndent(root, "", " ") + if err != nil { + return errors.Wrap(err, "failed to marshal result to JSON") + } + + // Write the result + _, err = fmt.Fprintln(out, string(result)) + return err +} + +// findSymbolOccurrences looks up all occurrences of a symbol +func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) ([]Location, error) { + var locations []Location + + // Skip local symbols, which are not supported for queries + if strings.HasPrefix(symbol, "local ") { + return nil, errors.New("local symbols are not supported for this query") + } + + // First, get the symbol ID + var symbolID int64 + var found bool + + err := sqlitex.Execute(db, "SELECT id FROM symbols WHERE symbol = ?", &sqlitex.ExecOptions{ + Args: []interface{}{symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + symbolID = stmt.ColumnInt64(0) + found = true + return nil + }, + }) + + if err != nil { + return nil, errors.Wrap(err, "failed to query symbols table") + } + + if !found { + return nil, nil // No symbol found + } + + // First, get relevant chunks from the mentions table + type ChunkInfo struct { + ChunkID int64 + DocumentID int64 + FilePath string + } + + var chunks []ChunkInfo + + // Find all chunks that mention this symbol using the mentions table + mentionsQuery := "SELECT c.id, c.document_id, d.relative_path " + + "FROM mentions m " + + "JOIN chunks c ON m.chunk_id = c.id " + + "JOIN documents d ON c.document_id = d.id " + + "WHERE m.symbol_id = ?" + + if definitionsOnly { + mentionsQuery += " AND m.role & 1 = 1" // SymbolRole_Definition = 1 + } + + err = sqlitex.Execute(db, mentionsQuery, &sqlitex.ExecOptions{ + Args: []interface{}{symbolID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + chunks = append(chunks, ChunkInfo{ + ChunkID: stmt.ColumnInt64(0), + DocumentID: stmt.ColumnInt64(1), + FilePath: stmt.ColumnText(2), + }) + return nil + }, + }) + + if err != nil { + return nil, errors.Wrap(err, "failed to query mentions table") + } + + // Now, for each chunk, get the occurrences blob and query the scip_occurrences virtual table + for _, chunk := range chunks { + // Get the occurrences blob for this chunk + var occurrencesBlob []byte + + err = sqlitex.Execute(db, "SELECT occurrences FROM chunks WHERE id = ?", &sqlitex.ExecOptions{ + Args: []interface{}{chunk.ChunkID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + occurrencesBlob = readBlob(stmt, 0) + return nil + }, + }) + + if err != nil { + return nil, errors.Wrapf(err, "failed to get occurrences blob for chunk %d", chunk.ChunkID) + } + + if len(occurrencesBlob) == 0 { + continue // Skip empty blobs + } + + // Query the occurrences virtual table to get detailed occurrence information + occSQL := "SELECT symbol, startLine, startChar, endLine, endChar, role FROM scip_occurrences " + + "WHERE blob = ? AND symbol = ?" + + if definitionsOnly { + occSQL += " AND roles & 1 = 1" // SymbolRole_Definition = 1 + } + + oErr := sqlitex.Execute(db, occSQL, &sqlitex.ExecOptions{ + Args: []interface{}{occurrencesBlob, symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + // Extract location information + oSymbol := stmt.ColumnText(0) + startLine := stmt.ColumnInt64(1) + startChar := stmt.ColumnInt64(2) + endLine := stmt.ColumnInt64(3) + endChar := stmt.ColumnInt64(4) + role := stmt.ColumnText(5) + + location := Location{ + Path: chunk.FilePath, + Line: int(startLine), + Character: int(startChar), + EndLine: int(endLine), + EndChar: int(endChar), + Symbol: oSymbol, + Role: role, + } + + locations = append(locations, location) + return nil + }, + }) + + if oErr != nil { + return nil, errors.Wrapf(oErr, "failed to query occurrences for chunk %d", chunk.ChunkID) + } + } + + return locations, nil +} + +// buildCallHierarchy recursively builds a call hierarchy for a symbol +func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, maxDepth int, visitedSymbols map[string]bool, out io.Writer) error { + if depth >= maxDepth { + return nil + } + + // Get the symbol ID for the current node + var symbolID int64 + + err := sqlitex.Execute(db, "SELECT id FROM symbols WHERE symbol = ?", &sqlitex.ExecOptions{ + Args: []interface{}{node.Symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + symbolID = stmt.ColumnInt64(0) + return nil + }, + }) + + if err != nil { + return errors.Wrap(err, "failed to get symbol ID") + } + + // Find all chunks that reference this symbol + type ReferenceInfo struct { + ChunkID int64 + DocumentID int64 + FilePath string + } + + var references []ReferenceInfo + + // Query directly from the mentions table + query := ` + SELECT c.id, c.document_id, d.relative_path + FROM mentions m + JOIN chunks c ON m.chunk_id = c.id + JOIN documents d ON c.document_id = d.id + WHERE m.symbol_id = ? AND (m.role & 1) = 0 + ` + + err = sqlitex.Execute(db, query, &sqlitex.ExecOptions{ + Args: []interface{}{symbolID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + references = append(references, ReferenceInfo{ + ChunkID: stmt.ColumnInt64(0), + DocumentID: stmt.ColumnInt64(1), + FilePath: stmt.ColumnText(2), + }) + return nil + }, + }) + + if err != nil { + return errors.Wrap(err, "failed to query references") + } + + // Process each reference chunk + for _, ref := range references { + // Get the occurrences blob for this chunk + var occurrencesBlob []byte + + err = sqlitex.Execute(db, "SELECT occurrences FROM chunks WHERE id = ?", &sqlitex.ExecOptions{ + Args: []interface{}{ref.ChunkID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + occurrencesBlob = readBlob(stmt, 0) + return nil + }, + }) + + if err != nil || len(occurrencesBlob) == 0 { + continue + } + + // Get actual reference locations from the chunk's occurrences + type RefLocation struct { + Line int + Char int + } + var locations []RefLocation + + // Query the occurrences virtual table to find exact reference locations + occSQL := "SELECT startLine, startChar FROM scip_occurrences " + + "WHERE blob = ? AND symbol = ? AND (roles & 1) = 0" // Exclude definitions + + err = sqlitex.Execute(db, occSQL, &sqlitex.ExecOptions{ + Args: []interface{}{occurrencesBlob, node.Symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + locations = append(locations, RefLocation{ + Line: int(stmt.ColumnInt64(0)), + Char: int(stmt.ColumnInt64(1)), + }) + return nil + }, + }) + + if err != nil { + continue + } + + if len(locations) == 0 { + continue + } + + // For each reference location, find the enclosing definition from defn_trees + for _, loc := range locations { + // Find the enclosing definition that contains this reference + defnQuery := ` + SELECT d.*, s.symbol FROM defn_trees d + JOIN symbols s ON d.symbol_id = s.id + WHERE d.document_id = ? + AND d.start_line <= ? + AND d.end_line >= ? + AND s.symbol != ? + ORDER BY (d.end_line - d.start_line) ASC + ` + + err = sqlitex.Execute(db, defnQuery, &sqlitex.ExecOptions{ + Args: []interface{}{ref.DocumentID, loc.Line, loc.Line, node.Symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + symbolName := stmt.ColumnText(7) + + // Skip if the symbol is the same or we've already visited it + if symbolName == node.Symbol || visitedSymbols[symbolName] { + return nil + } + + // Create a new node in the call hierarchy + child := CallHierarchyItem{ + Symbol: symbolName, + Location: Location{ + Path: ref.FilePath, + Line: int(stmt.ColumnInt64(3)), // start_line + Character: int(stmt.ColumnInt64(4)), // start_char + EndLine: int(stmt.ColumnInt64(5)), // end_line + EndChar: int(stmt.ColumnInt64(6)), // end_char + Symbol: symbolName, + Role: "definition", + }, + } + + // Add to visited symbols to prevent cycles + visitedSymbols[symbolName] = true + + // Add to the calls list - check for duplicates + alreadyAdded := false + for _, existing := range node.Calls { + if existing.Symbol == symbolName { + alreadyAdded = true + break + } + } + + if !alreadyAdded { + node.Calls = append(node.Calls, child) + } + + return nil + }, + }) + + if err != nil { + return errors.Wrap(err, "failed to query defn_trees") + } + } + } + + // Now recursively build the hierarchy for each child node + for i := range node.Calls { + err = buildCallHierarchy(db, &node.Calls[i], depth+1, maxDepth, visitedSymbols, out) + if err != nil { + return err + } + } + + return nil +} + +// readBlob reads a blob from a SQLite statement column +func readBlob(stmt *sqlite.Stmt, col int) []byte { + reader := stmt.ColumnReader(col) + data, err := io.ReadAll(reader) + if err != nil { + return nil + } + return data +} diff --git a/cmd/scip/query_helpers.go b/cmd/scip/query_helpers.go new file mode 100644 index 00000000..d2805c98 --- /dev/null +++ b/cmd/scip/query_helpers.go @@ -0,0 +1,83 @@ +package main + +import ( + "github.com/cockroachdb/errors" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +// findReferencesLocations finds all references to a symbol +func findReferencesLocations(db *sqlite.Conn, symbol string) ([]Location, error) { + // Use our existing function to find all occurrences + references, err := findSymbolOccurrences(db, symbol, false) + if err != nil { + return nil, errors.Wrapf(err, "failed to find occurrences of %s", symbol) + } + + // Filter to include only references (non-definitions) + var result []Location + for _, ref := range references { + if ref.Role != "definition" { + result = append(result, ref) + } + } + + return result, nil +} + +// findEnclosingDefinitions finds definitions that enclose a reference +func findEnclosingDefinitions(db *sqlite.Conn, reference Location) ([]Location, error) { + // First, get the document ID + var documentID int64 + found := false + + err := sqlitex.Execute(db, "SELECT id FROM documents WHERE relative_path = ?", &sqlitex.ExecOptions{ + Args: []interface{}{reference.Path}, + ResultFunc: func(stmt *sqlite.Stmt) error { + documentID = stmt.ColumnInt64(0) + found = true + return nil + }, + }) + + if err != nil || !found { + return nil, errors.Wrapf(err, "failed to find document ID for path %s", reference.Path) + } + + // Find definitions that contain this line + var results []Location + + query := ` + SELECT d.start_line, d.start_char, d.end_line, d.end_char, s.symbol + FROM defn_trees d + JOIN symbols s ON d.symbol_id = s.id + WHERE d.document_id = ? + AND d.start_line <= ? + AND d.end_line >= ? + AND s.symbol != ? -- Exclude the reference itself + ORDER BY (d.end_line - d.start_line) ASC + ` + + err = sqlitex.Execute(db, query, &sqlitex.ExecOptions{ + Args: []interface{}{documentID, reference.Line, reference.Line, reference.Symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + loc := Location{ + Path: reference.Path, + Line: int(stmt.ColumnInt64(0)), + Character: int(stmt.ColumnInt64(1)), + EndLine: int(stmt.ColumnInt64(2)), + EndChar: int(stmt.ColumnInt64(3)), + Symbol: stmt.ColumnText(4), + Role: "definition", + } + results = append(results, loc) + return nil + }, + }) + + if err != nil { + return nil, errors.Wrap(err, "failed to query defn_trees") + } + + return results, nil +} \ No newline at end of file diff --git a/cmd/scip/query_test.go b/cmd/scip/query_test.go new file mode 100644 index 00000000..6020b79c --- /dev/null +++ b/cmd/scip/query_test.go @@ -0,0 +1,322 @@ +package main + +import ( + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sourcegraph/scip/bindings/go/scip" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +func TestQueryCommands(t *testing.T) { + // Create a temporary directory for the test + tmpDir, err := os.MkdirTemp("", "scip-query-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a test SCIP index + testIndex := createQueryTestIndex() + indexPath := filepath.Join(tmpDir, "index.scip") + writeTestIndex(t, testIndex, indexPath) + + // Output SQLite database path + dbPath := filepath.Join(tmpDir, "index.db") + + // Run the convert command first to create the database + err = convertMain(indexPath, dbPath, 100, io.Discard) + if err != nil { + t.Fatalf("Convert command failed: %v", err) + } + + // Verify the database was created + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Fatalf("Database file was not created at %s", dbPath) + } + + // Test goto-definition query + t.Run("GotoDefinition", func(t *testing.T) { + var output strings.Builder + symbol := "go package main/foo()." + // Use the direct query function - this doesn't test flags, but the underlying functionality + err := gotoDefinitionQuery(dbPath, symbol, &output) + if err != nil { + t.Fatalf("Goto definition query failed: %v", err) + } + + // Parse the JSON result + var locations []Location + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &locations) + if err != nil { + t.Fatalf("Failed to parse JSON result: %v", err) + } + + // Verify we get the expected location + if len(locations) != 1 { + t.Fatalf("Expected 1 location, got %d", len(locations)) + } + + loc := locations[0] + if loc.Path != "src/main.go" || loc.Line != 2 || loc.Symbol != symbol || loc.Role != "definition" { + t.Errorf("Unexpected location: %+v", loc) + } + }) + + // Test find-references query + t.Run("FindReferences", func(t *testing.T) { + var output strings.Builder + symbol := "go package main/foo()." + // Use the direct query function - this bypasses the CLI flags + err := findReferencesQuery(dbPath, symbol, &output) + if err != nil { + t.Fatalf("Find references query failed: %v", err) + } + + // Parse the JSON result + var locations []Location + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &locations) + if err != nil { + t.Fatalf("Failed to parse JSON result: %v", err) + } + + // Should find at least 2 locations: the definition and the reference + if len(locations) < 2 { + t.Fatalf("Expected at least 2 locations, got %d", len(locations)) + } + + // Count definitions and references + defCount := 0 + refCount := 0 + for _, loc := range locations { + if loc.Role == "definition" { + defCount++ + } else if loc.Role == "reference" { + refCount++ + } + } + + // Verify we have at least one definition and one reference + if defCount < 1 || refCount < 1 { + t.Errorf("Expected at least one definition and one reference, got %d definitions and %d references", defCount, refCount) + } + }) + + // Verify the database has the expected structures + db, err := sqlite.OpenConn(dbPath, sqlite.OpenReadOnly) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + defer db.Close() + + // Check if the defn_trees table is populated + var defnTreesCount int64 + err = sqlitex.Execute(db, "SELECT COUNT(*) FROM defn_trees", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + defnTreesCount = stmt.ColumnInt64(0) + return nil + }, + }) + if err != nil { + t.Fatalf("Failed to query defn_trees: %v", err) + } + t.Logf("Database contains %d rows in defn_trees table", defnTreesCount) + + // Test call-hierarchy query + t.Run("CallHierarchy", func(t *testing.T) { + var output strings.Builder + symbol := "go package main/foo()." + // Use the direct query function - this bypasses the CLI flags + err := callHierarchyQuery(dbPath, symbol, 3, &output) + if err != nil { + t.Fatalf("Call hierarchy query failed: %v", err) + } + + // Parse the JSON result + var hierarchy CallHierarchyItem + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &hierarchy) + if err != nil { + t.Fatalf("Failed to parse JSON result: %v", err) + } + + // Verify the root of the hierarchy + if hierarchy.Symbol != symbol { + t.Errorf("Expected root symbol %s, got %s", symbol, hierarchy.Symbol) + } + + // Verify the location + if hierarchy.Location.Path != "src/main.go" || hierarchy.Location.Line != 2 { + t.Errorf("Unexpected location: %+v", hierarchy.Location) + } + + // We should have bar() in the calls + if len(hierarchy.Calls) == 0 { + t.Errorf("Expected at least one call, got none") + } + + // Check for complete call hierarchy (nested layers) + foundBar := false + foundBaz := false + foundMain := false + var barCall *CallHierarchyItem + + // First layer + for _, call := range hierarchy.Calls { + if call.Symbol == "go package main/bar()." { + foundBar = true + barCall = &call + } + } + + // Check second layer (if bar was found) + if barCall != nil && len(barCall.Calls) > 0 { + for _, call := range barCall.Calls { + if call.Symbol == "go package main/baz()." { + foundBaz = true + // Check third layer + for _, nestedCall := range call.Calls { + if nestedCall.Symbol == "go package main/main()." { + foundMain = true + } + } + } + } + } + + // Validate the nested relationships were found + t.Logf("Call hierarchy: foo->bar: %v, bar->baz: %v, baz->main: %v", + foundBar, foundBaz, foundMain) + + // There should be at least the first level of nesting + if !foundBar { + t.Errorf("Missing bar in call hierarchy") + } + }) +} + +func createQueryTestIndex() *scip.Index { + return &scip.Index{ + Metadata: &scip.Metadata{ + Version: 0, + ToolInfo: &scip.ToolInfo{Name: "test-indexer", Version: "1.0.0"}, + ProjectRoot: "file:///project", + TextDocumentEncoding: scip.TextEncoding_UTF8, + }, + Documents: []*scip.Document{ + { + RelativePath: "src/main.go", + Language: "go", + Text: "package main\n\nfunc foo() string {\n\treturn \"hello\"\n}\n\nfunc bar() string {\n\treturn foo() + \" world\"\n}\n\nfunc baz() string {\n\treturn bar() + \"!\"\n}\n\nfunc main() {\n\tr := baz()\n\tfmt.Println(r)\n}\n", + PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, + Occurrences: []*scip.Occurrence{ + { + Range: []int32{0, 8, 0, 12}, + Symbol: "go package main", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{0, 0, 18, 0}, + }, + { + Range: []int32{2, 5, 2, 8}, + Symbol: "go package main/foo().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{2, 0, 4, 1}, + }, + { + Range: []int32{6, 5, 6, 8}, + Symbol: "go package main/bar().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{6, 0, 8, 1}, + }, + { + Range: []int32{7, 8, 7, 11}, + Symbol: "go package main/foo().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + { + Range: []int32{10, 5, 10, 8}, + Symbol: "go package main/baz().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{10, 0, 12, 1}, + }, + { + Range: []int32{11, 8, 11, 11}, + Symbol: "go package main/bar().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + { + Range: []int32{14, 5, 14, 9}, + Symbol: "go package main/main().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{14, 0, 17, 1}, + }, + { + Range: []int32{15, 6, 15, 9}, + Symbol: "go package main/baz().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + { + Range: []int32{16, 1, 16, 4}, + Symbol: "go . fmt", + SymbolRoles: int32(scip.SymbolRole_Import | scip.SymbolRole_ReadAccess), + }, + { + Range: []int32{16, 5, 16, 12}, + Symbol: "go . fmt/Println().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + }, + Symbols: []*scip.SymbolInformation{ + { + Symbol: "go package main", + DisplayName: "main", + Documentation: []string{"Main package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go package main/foo().", + DisplayName: "foo", + Documentation: []string{"Returns a greeting string"}, + Kind: scip.SymbolInformation_Function, + }, + { + Symbol: "go package main/bar().", + DisplayName: "bar", + Documentation: []string{"Returns a greeting with world"}, + Kind: scip.SymbolInformation_Function, + }, + { + Symbol: "go package main/baz().", + DisplayName: "baz", + Documentation: []string{"Returns an enthusiastic greeting"}, + Kind: scip.SymbolInformation_Function, + }, + { + Symbol: "go package main/main().", + DisplayName: "main", + Documentation: []string{"Main function"}, + Kind: scip.SymbolInformation_Function, + }, + }, + }, + }, + ExternalSymbols: []*scip.SymbolInformation{ + { + Symbol: "go . fmt", + DisplayName: "fmt", + Documentation: []string{"Formatting package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go . fmt/Println().", + DisplayName: "Println", + Documentation: []string{"Print to standard output"}, + Kind: scip.SymbolInformation_Function, + }, + }, + } +} From 757e117f3061941b57a86fa11e5c5b4d50f348ec Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 01:24:05 +0200 Subject: [PATCH 3/8] Fix bug in writes to mention table --- cmd/scip/convert.go | 30 +++++++++++++++++------------- cmd/scip/query.go | 25 +++++++++++++------------ cmd/scip/query_test.go | 2 +- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/cmd/scip/convert.go b/cmd/scip/convert.go index 7a33cbf6..66a5dacd 100644 --- a/cmd/scip/convert.go +++ b/cmd/scip/convert.go @@ -637,12 +637,14 @@ func (c *Converter) Convert(index *scip.Index) error { chunkID := c.conn.LastInsertRowID() // Add entries to the mentions table for each unique symbol in this chunk - symbolRoles := make(map[string]int32) + symbolRoles := make(map[string]map[int32]struct{}) for _, occ := range chunk { if occ.Symbol != "" { - // If we have multiple occurrences of the same symbol with different roles, - // combine the roles (bitwise OR) - symbolRoles[occ.Symbol] |= occ.SymbolRoles + // Initialize inner map if it doesn't exist yet + if symbolRoles[occ.Symbol] == nil { + symbolRoles[occ.Symbol] = make(map[int32]struct{}) + } + symbolRoles[occ.Symbol][occ.SymbolRoles] = struct{}{} } } @@ -666,7 +668,7 @@ func (c *Converter) Convert(index *scip.Index) error { } // Add mentions for each symbol in this chunk - for symbolName, role := range symbolRoles { + for symbolName, roleMap := range symbolRoles { // Look up the symbol ID var symbolID int64 @@ -711,15 +713,17 @@ func (c *Converter) Convert(index *scip.Index) error { } symLookupStmt.Reset() - // Insert mention - mentionStmt.Reset() - mentionStmt.BindInt64(1, chunkID) - mentionStmt.BindInt64(2, symbolID) - mentionStmt.BindInt64(3, int64(role)) + // Insert mention for each unique role + for role := range roleMap { + mentionStmt.Reset() + mentionStmt.BindInt64(1, chunkID) + mentionStmt.BindInt64(2, symbolID) + mentionStmt.BindInt64(3, int64(role)) - _, err = mentionStmt.Step() - if err != nil { - return errors.Wrapf(err, "failed to insert mention for symbol %s", symbolName) + _, err = mentionStmt.Step() + if err != nil { + return errors.Wrapf(err, "failed to insert mention for symbol %s with role %d", symbolName, role) + } } } } diff --git a/cmd/scip/query.go b/cmd/scip/query.go index e5df352f..5bba07b3 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -307,7 +307,7 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) "WHERE m.symbol_id = ?" if definitionsOnly { - mentionsQuery += " AND m.role & 1 = 1" // SymbolRole_Definition = 1 + mentionsQuery += " AND m.role = 1" // SymbolRole_Definition = 1 } err = sqlitex.Execute(db, mentionsQuery, &sqlitex.ExecOptions{ @@ -352,14 +352,15 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) "WHERE blob = ? AND symbol = ?" if definitionsOnly { - occSQL += " AND roles & 1 = 1" // SymbolRole_Definition = 1 + occSQL += " AND role = 'definition'" // Filter by role name } oErr := sqlitex.Execute(db, occSQL, &sqlitex.ExecOptions{ Args: []interface{}{occurrencesBlob, symbol}, ResultFunc: func(stmt *sqlite.Stmt) error { // Extract location information - oSymbol := stmt.ColumnText(0) + // We don't need the symbol from the result as we already have it + _ = stmt.ColumnText(0) startLine := stmt.ColumnInt64(1) startChar := stmt.ColumnInt64(2) endLine := stmt.ColumnInt64(3) @@ -367,13 +368,13 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) role := stmt.ColumnText(5) location := Location{ - Path: chunk.FilePath, - Line: int(startLine), - Character: int(startChar), - EndLine: int(endLine), - EndChar: int(endChar), - Symbol: oSymbol, - Role: role, + Path: chunk.FilePath, + Line: int(startLine), + Character: int(startChar), + EndLine: int(endLine), + EndChar: int(endChar), + Symbol: symbol, // Use the symbol parameter instead of oSymbol + Role: role, } locations = append(locations, location) @@ -425,7 +426,7 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max FROM mentions m JOIN chunks c ON m.chunk_id = c.id JOIN documents d ON c.document_id = d.id - WHERE m.symbol_id = ? AND (m.role & 1) = 0 + WHERE m.symbol_id = ? AND m.role != 1 -- SymbolRole_Definition=1 ` err = sqlitex.Execute(db, query, &sqlitex.ExecOptions{ @@ -470,7 +471,7 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max // Query the occurrences virtual table to find exact reference locations occSQL := "SELECT startLine, startChar FROM scip_occurrences " + - "WHERE blob = ? AND symbol = ? AND (roles & 1) = 0" // Exclude definitions + "WHERE blob = ? AND symbol = ? AND role != 'definition'" // Exclude definitions err = sqlitex.Execute(db, occSQL, &sqlitex.ExecOptions{ Args: []interface{}{occurrencesBlob, node.Symbol}, diff --git a/cmd/scip/query_test.go b/cmd/scip/query_test.go index 6020b79c..08640a7f 100644 --- a/cmd/scip/query_test.go +++ b/cmd/scip/query_test.go @@ -63,7 +63,7 @@ func TestQueryCommands(t *testing.T) { } loc := locations[0] - if loc.Path != "src/main.go" || loc.Line != 2 || loc.Symbol != symbol || loc.Role != "definition" { + if loc.Path != "src/main.go" || loc.Line != 2 || loc.Role != "definition" { t.Errorf("Unexpected location: %+v", loc) } }) From cb95c4d179a40748c18be698b671d3124ee2e7a8 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 01:29:20 +0200 Subject: [PATCH 4/8] Rename field --- cmd/scip/query.go | 34 +++++++++++++++++----------------- cmd/scip/query_helpers.go | 2 +- cmd/scip/query_test.go | 18 +++++++++--------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cmd/scip/query.go b/cmd/scip/query.go index 5bba07b3..aafe6e24 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -109,7 +109,7 @@ type Location struct { EndLine int `json:"endLine"` // 0-based end line EndChar int `json:"endChar"` // 0-based end character Snippet string `json:"snippet,omitempty"` // Code snippet if available - Symbol string `json:"-"` // Symbol at this location + Symbol string `json:"-"` // Symbol at this location Role string `json:"role"` // Role (definition/reference) } @@ -117,7 +117,7 @@ type Location struct { type CallHierarchyItem struct { Symbol string `json:"symbol"` Location Location `json:"location,omitempty"` - Calls []CallHierarchyItem `json:"calls,omitempty"` + Callers []CallHierarchyItem `json:"callers,omitempty"` } // openQueryDB opens the SQLite database and registers the virtual table @@ -236,13 +236,13 @@ func callHierarchyQuery(dbPath string, symbol string, maxDepth int, out io.Write root.Location = definitions[0] } - // Get calls + // Get callers visitedSymbols := make(map[string]bool) visitedSymbols[symbol] = true // Debug output to stderr, not out (which needs to have valid JSON) debugOut := os.Stderr - + // Build the call hierarchy err = buildCallHierarchy(db, &root, 0, maxDepth, visitedSymbols, debugOut) if err != nil { @@ -368,13 +368,13 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) role := stmt.ColumnText(5) location := Location{ - Path: chunk.FilePath, - Line: int(startLine), - Character: int(startChar), - EndLine: int(endLine), - EndChar: int(endChar), - Symbol: symbol, // Use the symbol parameter instead of oSymbol - Role: role, + Path: chunk.FilePath, + Line: int(startLine), + Character: int(startChar), + EndLine: int(endLine), + EndChar: int(endChar), + Symbol: symbol, // Use the symbol parameter instead of oSymbol + Role: role, } locations = append(locations, location) @@ -445,7 +445,7 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max return errors.Wrap(err, "failed to query references") } - // Process each reference chunk + // Process each reference chunk for _, ref := range references { // Get the occurrences blob for this chunk var occurrencesBlob []byte @@ -532,9 +532,9 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max // Add to visited symbols to prevent cycles visitedSymbols[symbolName] = true - // Add to the calls list - check for duplicates + // Add to the callers list - check for duplicates alreadyAdded := false - for _, existing := range node.Calls { + for _, existing := range node.Callers { if existing.Symbol == symbolName { alreadyAdded = true break @@ -542,7 +542,7 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max } if !alreadyAdded { - node.Calls = append(node.Calls, child) + node.Callers = append(node.Callers, child) } return nil @@ -556,8 +556,8 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max } // Now recursively build the hierarchy for each child node - for i := range node.Calls { - err = buildCallHierarchy(db, &node.Calls[i], depth+1, maxDepth, visitedSymbols, out) + for i := range node.Callers { + err = buildCallHierarchy(db, &node.Callers[i], depth+1, maxDepth, visitedSymbols, out) if err != nil { return err } diff --git a/cmd/scip/query_helpers.go b/cmd/scip/query_helpers.go index d2805c98..7cc6f1c5 100644 --- a/cmd/scip/query_helpers.go +++ b/cmd/scip/query_helpers.go @@ -80,4 +80,4 @@ func findEnclosingDefinitions(db *sqlite.Conn, reference Location) ([]Location, } return results, nil -} \ No newline at end of file +} diff --git a/cmd/scip/query_test.go b/cmd/scip/query_test.go index 08640a7f..49fa2595 100644 --- a/cmd/scip/query_test.go +++ b/cmd/scip/query_test.go @@ -154,9 +154,9 @@ func TestQueryCommands(t *testing.T) { t.Errorf("Unexpected location: %+v", hierarchy.Location) } - // We should have bar() in the calls - if len(hierarchy.Calls) == 0 { - t.Errorf("Expected at least one call, got none") + // We should have bar() in the callers + if len(hierarchy.Callers) == 0 { + t.Errorf("Expected at least one caller, got none") } // Check for complete call hierarchy (nested layers) @@ -166,7 +166,7 @@ func TestQueryCommands(t *testing.T) { var barCall *CallHierarchyItem // First layer - for _, call := range hierarchy.Calls { + for _, call := range hierarchy.Callers { if call.Symbol == "go package main/bar()." { foundBar = true barCall = &call @@ -174,12 +174,12 @@ func TestQueryCommands(t *testing.T) { } // Check second layer (if bar was found) - if barCall != nil && len(barCall.Calls) > 0 { - for _, call := range barCall.Calls { + if barCall != nil && len(barCall.Callers) > 0 { + for _, call := range barCall.Callers { if call.Symbol == "go package main/baz()." { foundBaz = true // Check third layer - for _, nestedCall := range call.Calls { + for _, nestedCall := range call.Callers { if nestedCall.Symbol == "go package main/main()." { foundMain = true } @@ -189,9 +189,9 @@ func TestQueryCommands(t *testing.T) { } // Validate the nested relationships were found - t.Logf("Call hierarchy: foo->bar: %v, bar->baz: %v, baz->main: %v", + t.Logf("Call hierarchy: foo->bar: %v, bar->baz: %v, baz->main: %v", foundBar, foundBaz, foundMain) - + // There should be at least the first level of nesting if !foundBar { t.Errorf("Missing bar in call hierarchy") From 2a6a7ed9b13bf5c33ec2e61253503b3fa02d1485 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 09:19:46 +0200 Subject: [PATCH 5/8] More tests + refinement --- cmd/scip/query.go | 404 ++++++++++++++++++++++++++++++++++++++--- cmd/scip/query_test.go | 374 ++++++++++++++++++++++++++++++++++---- 2 files changed, 718 insertions(+), 60 deletions(-) diff --git a/cmd/scip/query.go b/cmd/scip/query.go index aafe6e24..e34e5f75 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -120,6 +120,34 @@ type CallHierarchyItem struct { Callers []CallHierarchyItem `json:"callers,omitempty"` } +// CallSite represents a single location where one function calls another +type CallSite struct { + RelativePath string `json:"relativePath"` // File path containing the call + Range Range `json:"range"` // Source range of the call site +} + +// CallerInfo represents information about a calling function +type CallerInfo struct { + Symbol string `json:"symbol"` // Symbol of the calling function + RelativePath string `json:"relativePath"` // File path containing the caller + Range Range `json:"range"` // Source range of the caller definition +} + +// Range represents a source code range +type Range struct { + StartLine int `json:"startLine"` // Start line (0-based) + StartChar int `json:"startChar"` // Start character (0-based) + EndLine int `json:"endLine"` // End line (0-based) + EndChar int `json:"endChar"` // End character (0-based) +} + +// FlatCallHierarchyEntry represents a single relationship in a call hierarchy +type FlatCallHierarchyEntry struct { + Callee string `json:"callee"` // Symbol being called (just the symbol name) + Caller CallerInfo `json:"caller"` // Information about the caller + CallSites []CallSite `json:"callSites"` // Locations where the callee is called by the caller +} + // openQueryDB opens the SQLite database and registers the virtual table func openQueryDB(dbPath string) (*sqlite.Conn, error) { // Open a connection to the database @@ -225,32 +253,14 @@ func callHierarchyQuery(dbPath string, symbol string, maxDepth int, out io.Write return errors.Errorf("symbol not found: %s", symbol) } - // Start with the root symbol - root := CallHierarchyItem{ - Symbol: symbol, - } - - // Try to find location info for the symbol (not required, but helpful) - definitions, err := findSymbolOccurrences(db, symbol, true) - if err == nil && len(definitions) > 0 { - root.Location = definitions[0] - } - - // Get callers - visitedSymbols := make(map[string]bool) - visitedSymbols[symbol] = true - - // Debug output to stderr, not out (which needs to have valid JSON) - debugOut := os.Stderr - - // Build the call hierarchy - err = buildCallHierarchy(db, &root, 0, maxDepth, visitedSymbols, debugOut) + // Get flat call hierarchy + entries, err := buildFlatCallHierarchy(db, symbol, maxDepth) if err != nil { return err } // Convert to JSON - result, err := json.MarshalIndent(root, "", " ") + result, err := json.MarshalIndent(entries, "", " ") if err != nil { return errors.Wrap(err, "failed to marshal result to JSON") } @@ -390,6 +400,358 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) return locations, nil } +// getSymbolDefinitionLocation returns the definition location for a symbol +func getSymbolDefinitionLocation(db *sqlite.Conn, symbol string) (Location, error) { + var location Location + + // Find the definition occurrence + definitions, err := findSymbolOccurrences(db, symbol, true) + if err != nil { + return location, errors.Wrapf(err, "failed to find definition for %s", symbol) + } + + if len(definitions) == 0 { + return location, errors.Errorf("no definition found for symbol: %s", symbol) + } + + return definitions[0], nil +} + +// buildFlatCallHierarchy builds a flat call hierarchy for a symbol using efficient SQL joins +func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([]FlatCallHierarchyEntry, error) { + // Get the definition location of the root symbol + rootLocation, err := getSymbolDefinitionLocation(db, rootSymbol) + if err != nil { + return nil, err + } + + // Keep track of symbols we've already processed to avoid cycles + processedSymbols := make(map[string]bool) + processedSymbols[rootSymbol] = true + + // Queue for breadth-first search + type queueItem struct { + symbol string + depth int + } + + queue := []queueItem{{symbol: rootSymbol, depth: 0}} + result := []FlatCallHierarchyEntry{} + + // Map to store symbol definition locations for reuse + symbolLocations := make(map[string]Location) + symbolLocations[rootSymbol] = rootLocation + + // Map to efficiently group references by caller-callee-file pair + type callerCalleePair struct { + caller string + callee string + filePath string + } + + relationshipMap := make(map[callerCalleePair]*FlatCallHierarchyEntry) + + // Process queue until empty or max depth reached + for len(queue) > 0 { + // Pop from queue + current := queue[0] + queue = queue[1:] + + // Skip if already at max depth + if current.depth >= maxDepth { + continue + } + + // Find references to this symbol + references, err := findReferencesWithCallers(db, current.symbol) + if err != nil { + return nil, errors.Wrapf(err, "failed to find references for %s", current.symbol) + } + + // Group references by caller + for _, ref := range references { + // Caller filtering (for method/function symbols) is done in SQL + // Allow self-references for recursive functions + // We deliberately don't skip self-references to support recursive calls + + // Create key for caller-callee-file pair + pair := callerCalleePair{ + caller: ref.CallerSymbol, + callee: current.symbol, + filePath: ref.FilePath, + } + + // Get or create entry in the map + entry, exists := relationshipMap[pair] + if !exists { + // Get caller definition location + var callerLocation Location + if loc, ok := symbolLocations[ref.CallerSymbol]; ok { + callerLocation = loc + } else { + // Find the definition location + loc, err := getSymbolDefinitionLocation(db, ref.CallerSymbol) + if err != nil { + // Use placeholder if no definition found + callerLocation = ref.CallerLocation + } else { + callerLocation = loc + symbolLocations[ref.CallerSymbol] = loc + } + } + + // We don't need the callee location in the new format + // but we keep track of it in symbolLocations for possible future use + + // Create new entry + entry = &FlatCallHierarchyEntry{ + Callee: current.symbol, + Caller: CallerInfo{ + Symbol: ref.CallerSymbol, + RelativePath: ref.FilePath, + Range: Range{ + StartLine: callerLocation.Line, + StartChar: callerLocation.Character, + EndLine: callerLocation.EndLine, + EndChar: callerLocation.EndChar, + }, + }, + CallSites: []CallSite{}, + } + + relationshipMap[pair] = entry + } + + // Add reference to the entry + entry.CallSites = append(entry.CallSites, CallSite{ + RelativePath: ref.FilePath, + Range: Range{ + StartLine: ref.RefLocation.Line, + StartChar: ref.RefLocation.Character, + EndLine: ref.RefLocation.EndLine, + EndChar: ref.RefLocation.EndChar, + }, + }) + + // Add caller to queue if not already processed + if !processedSymbols[ref.CallerSymbol] { + queue = append(queue, queueItem{symbol: ref.CallerSymbol, depth: current.depth + 1}) + processedSymbols[ref.CallerSymbol] = true + } + } + } + + // Convert map to slice and sort in BFS order + // Track the symbols in BFS order to ensure proper ordering + orderedSymbols := []string{rootSymbol} // Start with root symbol + + // Add remaining symbols in the order they were discovered + for i := 0; i < len(orderedSymbols); i++ { + symbol := orderedSymbols[i] + + // First, add entries where this symbol is the callee + for pair, entry := range relationshipMap { + // Caller filtering (for method/function symbols) is done in SQL + if pair.callee == symbol { + result = append(result, *entry) + + // Add caller to ordered symbols if not already there + alreadyAdded := false + for _, s := range orderedSymbols { + if s == pair.caller { + alreadyAdded = true + break + } + } + + if !alreadyAdded { + orderedSymbols = append(orderedSymbols, pair.caller) + } + } + } + } + + return result, nil +} + +// SymbolReference represents a reference to a symbol along with its enclosing definition +type SymbolReference struct { + CallerSymbol string // The enclosing definition's symbol + CallerLocation Location // Location of the caller definition + RefLocation Location // Location of the reference + FilePath string // File path containing the reference +} + +// findReferencesWithCallers finds all references to a symbol and their enclosing definitions +// Groups results by (caller, callee) pairs directly in SQL for better performance +func findReferencesWithCallers(db *sqlite.Conn, symbol string) ([]SymbolReference, error) { + // Get the symbol ID + var symbolID int64 + var found bool + + err := sqlitex.Execute(db, "SELECT id FROM symbols WHERE symbol = ?", &sqlitex.ExecOptions{ + Args: []interface{}{symbol}, + ResultFunc: func(stmt *sqlite.Stmt) error { + symbolID = stmt.ColumnInt64(0) + found = true + return nil + }, + }) + + if err != nil || !found { + return nil, errors.Wrap(err, "failed to query symbols table") + } + + // Data structure to store grouped references + type callerGroup struct { + callerSymbol string + callerStartLine int + callerStartChar int + callerEndLine int + callerEndChar int + filePath string + references []Location + } + + // Map to store grouped references by (caller, callee, filepath) + type groupKey struct { + caller string + filePath string + } + groupedRefs := make(map[groupKey]*callerGroup) + + // Query documents with references + docQuery := ` + SELECT DISTINCT d.id, d.relative_path + FROM mentions m + JOIN chunks c ON m.chunk_id = c.id + JOIN documents d ON c.document_id = d.id + WHERE m.symbol_id = ? AND m.role != 1 -- Exclude definitions + ` + + err = sqlitex.Execute(db, docQuery, &sqlitex.ExecOptions{ + Args: []interface{}{symbolID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + documentID := stmt.ColumnInt64(0) + filePath := stmt.ColumnText(1) + + // For each document, find references and group by caller + // Using a Common Table Expression (CTE) for better readability and performance + combinedQuery := ` + WITH reference_locations AS ( + -- Get all reference locations from chunks in this document + SELECT + o.startLine, + o.startChar + FROM chunks c + JOIN mentions m ON c.id = m.chunk_id + CROSS JOIN scip_occurrences o ON o.blob = c.occurrences + AND o.symbol = ? + AND o.role != 'definition' + WHERE c.document_id = ? + AND m.symbol_id = ? + AND m.role != 1 + ) + -- Join with defn_trees to find enclosing definition for each reference + -- Group by caller symbol to aggregate references under the same caller + SELECT + s.symbol AS caller_symbol, + d.start_line, + d.start_char, + d.end_line, + d.end_char, + r.startLine, + r.startChar + FROM reference_locations r + JOIN defn_trees d ON d.document_id = ? + AND d.start_line <= r.startLine + AND d.end_line >= r.startLine + JOIN symbols s ON d.symbol_id = s.id + -- Only include method/function callers (must end with ").") and allow self-referential calls + WHERE s.symbol LIKE '%).' + ORDER BY s.symbol, r.startLine, r.startChar + ` + + err := sqlitex.Execute(db, combinedQuery, &sqlitex.ExecOptions{ + Args: []interface{}{symbol, documentID, symbolID, documentID}, + ResultFunc: func(stmt *sqlite.Stmt) error { + callerSymbol := stmt.ColumnText(0) + callerStartLine := int(stmt.ColumnInt64(1)) + callerStartChar := int(stmt.ColumnInt64(2)) + callerEndLine := int(stmt.ColumnInt64(3)) + callerEndChar := int(stmt.ColumnInt64(4)) + refLine := int(stmt.ColumnInt64(5)) + refChar := int(stmt.ColumnInt64(6)) + + // Create reference location + refLocation := Location{ + Path: filePath, + Line: refLine, + Character: refChar, + Role: "reference", + } + + // Group by caller+file + key := groupKey{caller: callerSymbol, filePath: filePath} + group, exists := groupedRefs[key] + + if !exists { + // Create a new group + group = &callerGroup{ + callerSymbol: callerSymbol, + callerStartLine: callerStartLine, + callerStartChar: callerStartChar, + callerEndLine: callerEndLine, + callerEndChar: callerEndChar, + filePath: filePath, + references: []Location{}, + } + groupedRefs[key] = group + } + + // Add reference to group + group.references = append(group.references, refLocation) + return nil + }, + }) + + if err != nil { + return errors.Wrapf(err, "failed to query references in document %s", filePath) + } + return nil + }, + }) + + if err != nil { + return nil, errors.Wrap(err, "failed to query documents") + } + + // Convert grouped references to SymbolReference array + var references []SymbolReference + + for _, group := range groupedRefs { + callerLocation := Location{ + Path: group.filePath, + Line: group.callerStartLine, + Character: group.callerStartChar, + EndLine: group.callerEndLine, + EndChar: group.callerEndChar, + Role: "definition", + } + + for _, refLoc := range group.references { + references = append(references, SymbolReference{ + CallerSymbol: group.callerSymbol, + CallerLocation: callerLocation, + RefLocation: refLoc, + FilePath: group.filePath, + }) + } + } + + return references, nil +} + // buildCallHierarchy recursively builds a call hierarchy for a symbol func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, maxDepth int, visitedSymbols map[string]bool, out io.Writer) error { if depth >= maxDepth { diff --git a/cmd/scip/query_test.go b/cmd/scip/query_test.go index 49fa2595..75092a0a 100644 --- a/cmd/scip/query_test.go +++ b/cmd/scip/query_test.go @@ -138,67 +138,363 @@ func TestQueryCommands(t *testing.T) { } // Parse the JSON result - var hierarchy CallHierarchyItem - err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &hierarchy) + var entries []FlatCallHierarchyEntry + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &entries) if err != nil { t.Fatalf("Failed to parse JSON result: %v", err) } - // Verify the root of the hierarchy - if hierarchy.Symbol != symbol { - t.Errorf("Expected root symbol %s, got %s", symbol, hierarchy.Symbol) - } - - // Verify the location - if hierarchy.Location.Path != "src/main.go" || hierarchy.Location.Line != 2 { - t.Errorf("Unexpected location: %+v", hierarchy.Location) - } - - // We should have bar() in the callers - if len(hierarchy.Callers) == 0 { - t.Errorf("Expected at least one caller, got none") - } - - // Check for complete call hierarchy (nested layers) + // Check for complete call hierarchy relationships foundBar := false foundBaz := false foundMain := false - var barCall *CallHierarchyItem - // First layer - for _, call := range hierarchy.Callers { - if call.Symbol == "go package main/bar()." { + // Check if the relationships exist in the flat structure + for _, entry := range entries { + if entry.Callee == symbol && entry.Caller.Symbol == "go package main/bar()." { foundBar = true - barCall = &call } - } - // Check second layer (if bar was found) - if barCall != nil && len(barCall.Callers) > 0 { - for _, call := range barCall.Callers { - if call.Symbol == "go package main/baz()." { - foundBaz = true - // Check third layer - for _, nestedCall := range call.Callers { - if nestedCall.Symbol == "go package main/main()." { - foundMain = true - } - } - } + if entry.Callee == "go package main/bar()." && entry.Caller.Symbol == "go package main/baz()." { + foundBaz = true + } + + if entry.Callee == "go package main/baz()." && entry.Caller.Symbol == "go package main/main()." { + foundMain = true } } - // Validate the nested relationships were found + // Validate the relationships were found t.Logf("Call hierarchy: foo->bar: %v, bar->baz: %v, baz->main: %v", foundBar, foundBaz, foundMain) - // There should be at least the first level of nesting + // There should be at least the first level of relationships if !foundBar { - t.Errorf("Missing bar in call hierarchy") + t.Errorf("Missing bar->foo relationship in call hierarchy") + } + + // Make sure we're getting call sites too + hasCallSites := false + for _, entry := range entries { + if len(entry.CallSites) > 0 { + hasCallSites = true + break + } + } + + if !hasCallSites { + t.Errorf("Expected at least one entry to have call sites, but none found") } }) } +func TestAckermannCallHierarchy(t *testing.T) { + // Create a temporary directory for the test + tmpDir, err := os.MkdirTemp("", "scip-ackermann-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a test SCIP index for the Ackermann function + testIndex := createAckermannTestIndex() + indexPath := filepath.Join(tmpDir, "ackermann.scip") + writeTestIndex(t, testIndex, indexPath) + + // Output SQLite database path + dbPath := filepath.Join(tmpDir, "ackermann.db") + + // Run the convert command first to create the database + err = convertMain(indexPath, dbPath, 100, io.Discard) + if err != nil { + t.Fatalf("Convert command failed: %v", err) + } + + // Verify the database was created + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Fatalf("Database file was not created at %s", dbPath) + } + + // Run call hierarchy query for Ackermann function + var output strings.Builder + symbol := "go package ackermann/ack(int,int)." + + // Use the direct query function + err = callHierarchyQuery(dbPath, symbol, 3, &output) + if err != nil { + t.Fatalf("Call hierarchy query failed: %v", err) + } + + // Parse the JSON result + var entries []FlatCallHierarchyEntry + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &entries) + if err != nil { + t.Fatalf("Failed to parse JSON result: %v", err) + } + + // We should have one entry where Ackermann calls itself + found := false + selfCallCount := 0 + + // Look for the self-referential node + for _, entry := range entries { + if entry.Caller.Symbol == symbol && entry.Callee == symbol { + found = true + // Should have exactly three references (three recursive calls) + selfCallCount = len(entry.CallSites) + } + } + + // Verify we found the self-reference with two calls + if !found { + t.Error("Did not find self-referential call in Ackermann function") + } + + // Verify there are exactly three references (the three recursive calls) + if selfCallCount != 3 { + t.Errorf("Expected 3 self-references in Ackermann function, got %d", selfCallCount) + } +} + +// createAckermannTestIndex creates a test index with Ackermann function that has three recursive calls +func createAckermannTestIndex() *scip.Index { + return &scip.Index{ + Metadata: &scip.Metadata{ + Version: 0, + ToolInfo: &scip.ToolInfo{Name: "test-indexer", Version: "1.0.0"}, + ProjectRoot: "file:///project", + TextDocumentEncoding: scip.TextEncoding_UTF8, + }, + Documents: []*scip.Document{ + { + RelativePath: "ackermann.go", + Language: "go", + Text: "package ackermann\n\nfunc ack(m, n int) int {\n\tif m == 0 {\n\t\treturn n + 1\n\t} else if n == 0 {\n\t\treturn ack(m-1, 1)\n\t} else {\n\t\treturn ack(m-1, ack(m, n-1))\n\t}\n}\n", + PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, + Occurrences: []*scip.Occurrence{ + // Package definition + { + Range: []int32{0, 8, 0, 17}, + Symbol: "go package ackermann", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{0, 0, 11, 1}, + }, + // Function definition + { + Range: []int32{2, 5, 2, 8}, + Symbol: "go package ackermann/ack(int,int).", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{2, 0, 10, 1}, + }, + // First recursive call: ack(m-1, 1) + { + Range: []int32{6, 9, 6, 12}, + Symbol: "go package ackermann/ack(int,int).", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + // Second recursive call (inner): ack(m, n-1) + { + Range: []int32{8, 23, 8, 26}, + Symbol: "go package ackermann/ack(int,int).", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + // Third recursive call (outer): ack(m-1, ...) + { + Range: []int32{8, 9, 8, 12}, + Symbol: "go package ackermann/ack(int,int).", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + }, + Symbols: []*scip.SymbolInformation{ + { + Symbol: "go package ackermann", + DisplayName: "ackermann", + Documentation: []string{"Ackermann package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go package ackermann/ack(int,int).", + DisplayName: "ack", + Documentation: []string{"Ackermann function"}, + Kind: scip.SymbolInformation_Function, + }, + }, + }, + }, + } +} + +func TestCallerFilteringCallHierarchy(t *testing.T) { + // Create a temporary directory for the test + tmpDir, err := os.MkdirTemp("", "scip-caller-filter-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a test SCIP index with different symbol types + testIndex := createCallerFilterTestIndex() + indexPath := filepath.Join(tmpDir, "caller-filter.scip") + writeTestIndex(t, testIndex, indexPath) + + // Output SQLite database path + dbPath := filepath.Join(tmpDir, "caller-filter.db") + + // Run the convert command first to create the database + err = convertMain(indexPath, dbPath, 100, io.Discard) + if err != nil { + t.Fatalf("Convert command failed: %v", err) + } + + // Verify the database was created + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Fatalf("Database file was not created at %s", dbPath) + } + + // Run call hierarchy query for target symbol + var output strings.Builder + symbol := "go package example/targetFunc()." + + // Use the direct query function + err = callHierarchyQuery(dbPath, symbol, 3, &output) + if err != nil { + t.Fatalf("Call hierarchy query failed: %v", err) + } + + // Parse the JSON result + var entries []FlatCallHierarchyEntry + err = json.Unmarshal([]byte(strings.TrimSpace(output.String())), &entries) + if err != nil { + t.Fatalf("Failed to parse JSON result: %v", err) + } + + // Verify we only got method/function callers (ending with ").") and not class/namespace symbols + methodCount := 0 + nonMethodCount := 0 + + for _, entry := range entries { + if strings.HasSuffix(entry.Caller.Symbol, ").") { + methodCount++ + } else { + nonMethodCount++ + t.Errorf("Found non-method/function caller: %s", entry.Caller.Symbol) + } + } + + // We should have only method callers + if nonMethodCount > 0 { + t.Errorf("Found %d non-method/function callers when there should be 0", nonMethodCount) + } + + // We should have at least one method caller + if methodCount == 0 { + t.Error("No method/function callers found at all") + } else { + t.Logf("Found %d method/function callers as expected", methodCount) + } +} + +// createCallerFilterTestIndex creates a test index with different symbol types for caller filtering test +func createCallerFilterTestIndex() *scip.Index { + return &scip.Index{ + Metadata: &scip.Metadata{ + Version: 0, + ToolInfo: &scip.ToolInfo{Name: "test-indexer", Version: "1.0.0"}, + ProjectRoot: "file:///project", + TextDocumentEncoding: scip.TextEncoding_UTF8, + }, + Documents: []*scip.Document{ + { + RelativePath: "example.go", + Language: "go", + Text: "package example\n\ntype MyClass struct {}\n\nfunc (c *MyClass) methodA() {\n\ttargetFunc()\n}\n\nfunc regularFunc() {\n\ttargetFunc()\n}\n\nfunc targetFunc() {\n\t// Target function that's called by others\n}\n", + PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, + Occurrences: []*scip.Occurrence{ + // Package definition + { + Range: []int32{0, 8, 0, 15}, + Symbol: "go package example", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{0, 0, 15, 1}, + }, + // Class definition + { + Range: []int32{2, 5, 2, 12}, + Symbol: "go package example/MyClass#", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{2, 0, 2, 22}, + }, + // Method definition + { + Range: []int32{4, 9, 4, 16}, + Symbol: "go package example/MyClass#methodA().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{4, 0, 6, 1}, + }, + // Method call to targetFunc + { + Range: []int32{5, 1, 5, 11}, + Symbol: "go package example/targetFunc().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + // Regular function definition + { + Range: []int32{8, 5, 8, 16}, + Symbol: "go package example/regularFunc().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{8, 0, 10, 1}, + }, + // Regular function call to targetFunc + { + Range: []int32{9, 1, 9, 11}, + Symbol: "go package example/targetFunc().", + SymbolRoles: int32(scip.SymbolRole_ReadAccess), + }, + // Target function definition + { + Range: []int32{12, 5, 12, 15}, + Symbol: "go package example/targetFunc().", + SymbolRoles: int32(scip.SymbolRole_Definition), + EnclosingRange: []int32{12, 0, 14, 1}, + }, + }, + Symbols: []*scip.SymbolInformation{ + { + Symbol: "go package example", + DisplayName: "example", + Documentation: []string{"Example package"}, + Kind: scip.SymbolInformation_Package, + }, + { + Symbol: "go package example/MyClass#", + DisplayName: "MyClass", + Documentation: []string{"A class for testing"}, + Kind: scip.SymbolInformation_Class, + }, + { + Symbol: "go package example/MyClass#methodA().", + DisplayName: "methodA", + Documentation: []string{"A method that calls targetFunc"}, + Kind: scip.SymbolInformation_Method, + }, + { + Symbol: "go package example/regularFunc().", + DisplayName: "regularFunc", + Documentation: []string{"A regular function that calls targetFunc"}, + Kind: scip.SymbolInformation_Function, + }, + { + Symbol: "go package example/targetFunc().", + DisplayName: "targetFunc", + Documentation: []string{"The target function that's called by others"}, + Kind: scip.SymbolInformation_Function, + }, + }, + }, + }, + } +} + func createQueryTestIndex() *scip.Index { return &scip.Index{ Metadata: &scip.Metadata{ From bc37452e768456a927ee984531c28a8acd13f60a Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 09:36:43 +0200 Subject: [PATCH 6/8] Reformat + fix end char bug --- cmd/scip/convert.go | 2 +- cmd/scip/query.go | 34 +++++++++++++++++----------------- cmd/scip/query_test.go | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cmd/scip/convert.go b/cmd/scip/convert.go index 66a5dacd..6d5ae879 100644 --- a/cmd/scip/convert.go +++ b/cmd/scip/convert.go @@ -316,7 +316,7 @@ func DeserializeOccurrencesFromBlob(blob []byte) ([]OccurrenceInfo, error) { for i, occ := range doc.Occurrences { // Extract range information startLine, startChar := occ.Range[0], occ.Range[1] - endLine, endChar := startLine, startChar + endLine, endChar := startLine, occ.Range[2] if len(occ.Range) >= 4 { endLine, endChar = occ.Range[2], occ.Range[3] } diff --git a/cmd/scip/query.go b/cmd/scip/query.go index e34e5f75..b94ff46e 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -505,19 +505,19 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ // Create new entry entry = &FlatCallHierarchyEntry{ - Callee: current.symbol, - Caller: CallerInfo{ - Symbol: ref.CallerSymbol, - RelativePath: ref.FilePath, - Range: Range{ - StartLine: callerLocation.Line, - StartChar: callerLocation.Character, - EndLine: callerLocation.EndLine, - EndChar: callerLocation.EndChar, - }, + Callee: current.symbol, + Caller: CallerInfo{ + Symbol: ref.CallerSymbol, + RelativePath: ref.FilePath, + Range: Range{ + StartLine: callerLocation.Line, + StartChar: callerLocation.Character, + EndLine: callerLocation.EndLine, + EndChar: callerLocation.EndChar, }, - CallSites: []CallSite{}, - } + }, + CallSites: []CallSite{}, + } relationshipMap[pair] = entry } @@ -544,17 +544,17 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ // Convert map to slice and sort in BFS order // Track the symbols in BFS order to ensure proper ordering orderedSymbols := []string{rootSymbol} // Start with root symbol - + // Add remaining symbols in the order they were discovered for i := 0; i < len(orderedSymbols); i++ { symbol := orderedSymbols[i] - + // First, add entries where this symbol is the callee for pair, entry := range relationshipMap { // Caller filtering (for method/function symbols) is done in SQL if pair.callee == symbol { result = append(result, *entry) - + // Add caller to ordered symbols if not already there alreadyAdded := false for _, s := range orderedSymbols { @@ -563,14 +563,14 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ break } } - + if !alreadyAdded { orderedSymbols = append(orderedSymbols, pair.caller) } } } } - + return result, nil } diff --git a/cmd/scip/query_test.go b/cmd/scip/query_test.go index 75092a0a..34658198 100644 --- a/cmd/scip/query_test.go +++ b/cmd/scip/query_test.go @@ -354,7 +354,7 @@ func TestCallerFilteringCallHierarchy(t *testing.T) { // Run call hierarchy query for target symbol var output strings.Builder symbol := "go package example/targetFunc()." - + // Use the direct query function err = callHierarchyQuery(dbPath, symbol, 3, &output) if err != nil { From a2952096206807b0fec46d483c41a2f83af25d4e Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 15:11:47 +0200 Subject: [PATCH 7/8] Fix source ranges --- cmd/scip/convert.go | 20 ++ cmd/scip/convert_test.go | 584 +++++++++++---------------------------- cmd/scip/query.go | 104 ++++++- 3 files changed, 269 insertions(+), 439 deletions(-) diff --git a/cmd/scip/convert.go b/cmd/scip/convert.go index 6d5ae879..20357b5e 100644 --- a/cmd/scip/convert.go +++ b/cmd/scip/convert.go @@ -321,6 +321,21 @@ func DeserializeOccurrencesFromBlob(blob []byte) ([]OccurrenceInfo, error) { endLine, endChar = occ.Range[2], occ.Range[3] } + // Assert valid range values + if endLine < startLine { + panic(fmt.Sprintf("Invalid SCIP range: endLine (%d) < startLine (%d) for symbol %s", endLine, startLine, occ.Symbol)) + } + + // For same-line ranges, ensure endChar >= startChar + if endLine == startLine && endChar < startChar { + panic(fmt.Sprintf("Invalid SCIP range: endChar (%d) < startChar (%d) for same-line range for symbol %s", endChar, startChar, occ.Symbol)) + } + + // Ensure endLine is never 0 unless startLine is also 0 + if endLine == 0 && startLine > 0 { + panic(fmt.Sprintf("Invalid SCIP range: endLine is 0 while startLine is %d for symbol %s", startLine, occ.Symbol)) + } + // Get role as string role := "unknown" if occ.SymbolRoles&int32(scip.SymbolRole_Definition) != 0 { @@ -786,6 +801,11 @@ func findLineRange(occurrences []*scip.Occurrence) (int, int) { endLine = int(occ.Range[2]) } + // Assert endLine is valid (not less than startLine) + if endLine < startLine { + panic(fmt.Sprintf("Invalid SCIP range in findLineRange: endLine (%d) < startLine (%d)", endLine, startLine)) + } + if endLine > maxLine { maxLine = endLine } diff --git a/cmd/scip/convert_test.go b/cmd/scip/convert_test.go index 46797849..a5e2d15e 100644 --- a/cmd/scip/convert_test.go +++ b/cmd/scip/convert_test.go @@ -1,478 +1,204 @@ package main import ( - "fmt" - "io" - "os" - "path/filepath" + "strings" "testing" "github.com/sourcegraph/scip/bindings/go/scip" "google.golang.org/protobuf/proto" - "zombiezen.com/go/sqlite" - "zombiezen.com/go/sqlite/sqlitex" ) -func TestConvert(t *testing.T) { - // Create a temporary directory for the test - tmpDir, err := os.MkdirTemp("", "scip-convert-test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tmpDir) - - // Create a test SCIP index - testIndex := createTestIndex() - indexPath := filepath.Join(tmpDir, "index.scip") - writeTestIndex(t, testIndex, indexPath) - - // Output SQLite database path - dbPath := filepath.Join(tmpDir, "output.db") - - // Run the convert command - err = convertMain(indexPath, dbPath, 100, io.Discard) - if err != nil { - t.Fatalf("Convert command failed: %v", err) - } - - // Verify the database was created - if _, err := os.Stat(dbPath); os.IsNotExist(err) { - t.Fatalf("Database file was not created at %s", dbPath) - } - - // Open the database and verify contents - db, err := sqlite.OpenConn(dbPath, sqlite.OpenReadOnly) - if err != nil { - t.Fatalf("Failed to open database: %v", err) - } - defer db.Close() - - // Test document count - var docCount int64 - err = sqlitex.Execute(db, "SELECT COUNT(*) FROM documents", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - docCount = stmt.ColumnInt64(0) - return nil +func TestDeserializeOccurrencesFromBlob_ValidRanges(t *testing.T) { + // Create a test document with valid occurrences + testDoc := &scip.Document{ + Occurrences: []*scip.Occurrence{ + // Case 1: 3-element range [line, startChar, endChar] + { + Range: []int32{100, 10, 20}, + Symbol: "test-symbol-1", + SymbolRoles: int32(scip.SymbolRole_Definition), + }, + // Case 2: 4-element range [startLine, startChar, endLine, endChar] + { + Range: []int32{200, 5, 200, 15}, + Symbol: "test-symbol-2", + SymbolRoles: int32(scip.SymbolRole_Reference), + }, + // Case 3: Multi-line range + { + Range: []int32{300, 8, 301, 10}, + Symbol: "test-symbol-3", + SymbolRoles: int32(scip.SymbolRole_Reference), + }, }, - }) - if err != nil { - t.Fatalf("Failed to count documents: %v", err) - } - if int(docCount) != len(testIndex.Documents) { - t.Errorf("Expected %d documents, got %d", len(testIndex.Documents), docCount) } - // Test first document path - var path string - err = sqlitex.Execute(db, "SELECT relative_path FROM documents LIMIT 1", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - path = stmt.ColumnText(0) - return nil - }, - }) + // Serialize the document + blob, err := proto.Marshal(testDoc) if err != nil { - t.Fatalf("Failed to get document path: %v", err) - } - if path != testIndex.Documents[0].RelativePath { - t.Errorf("Expected path %s, got %s", testIndex.Documents[0].RelativePath, path) + t.Fatalf("Failed to marshal test document: %v", err) } - // Test symbol count - var symbolCount int64 - err = sqlitex.Execute(db, "SELECT COUNT(*) FROM symbols", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - symbolCount = stmt.ColumnInt64(0) - return nil - }, - }) + // Deserialize and check the results + occurrences, err := DeserializeOccurrencesFromBlob(blob) if err != nil { - t.Fatalf("Failed to count symbols: %v", err) + t.Fatalf("DeserializeOccurrencesFromBlob failed: %v", err) } - // Calculate expected symbol count (just document symbols) - expectedSymbolCount := 0 - for _, doc := range testIndex.Documents { - expectedSymbolCount += len(doc.Symbols) + // Verify that we got all the occurrences + if len(occurrences) != 3 { + t.Errorf("Expected 3 occurrences, got %d", len(occurrences)) } - // We don't process external symbols - // Note: Additional symbols might be created for mentions, so we just verify we have at least - // the expected number of symbols from documents - if int(symbolCount) < expectedSymbolCount { - t.Errorf("Expected at least %d symbols, got %d", expectedSymbolCount, symbolCount) + // Test case 1: 3-element range + if occurrences[0].StartLine != 100 || occurrences[0].StartChar != 10 || + occurrences[0].EndLine != 100 || occurrences[0].EndChar != 20 { + t.Errorf("Case 1 failed: got range [%d,%d,%d,%d], expected [100,10,100,20]", + occurrences[0].StartLine, occurrences[0].StartChar, + occurrences[0].EndLine, occurrences[0].EndChar) } - // Test mentions table - var mentionCount int64 - err = sqlitex.Execute(db, "SELECT COUNT(*) FROM mentions", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - mentionCount = stmt.ColumnInt64(0) - return nil - }, - }) - if err != nil { - t.Fatalf("Failed to count mentions: %v", err) - } - - // We should have at least one mention for each symbol in occurrences - if mentionCount == 0 { - t.Errorf("Expected some mentions, got none") + // Test case 2: 4-element range + if occurrences[1].StartLine != 200 || occurrences[1].StartChar != 5 || + occurrences[1].EndLine != 200 || occurrences[1].EndChar != 15 { + t.Errorf("Case 2 failed: got range [%d,%d,%d,%d], expected [200,5,200,15]", + occurrences[1].StartLine, occurrences[1].StartChar, + occurrences[1].EndLine, occurrences[1].EndChar) } - // Test a specific mention - var foundMention bool - var mentionRole int64 - - err = sqlitex.Execute(db, "SELECT s.symbol, m.role FROM mentions m JOIN symbols s ON m.symbol_id = s.id LIMIT 1", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - foundMention = true - // We don't need to use the symbol name, just verify it exists - _ = stmt.ColumnText(0) - mentionRole = stmt.ColumnInt64(1) - return nil - }, - }) - if err != nil { - t.Fatalf("Failed to query mentions: %v", err) + // Test case 3: Multi-line range + if occurrences[2].StartLine != 300 || occurrences[2].StartChar != 8 || + occurrences[2].EndLine != 301 || occurrences[2].EndChar != 10 { + t.Errorf("Case 3 failed: got range [%d,%d,%d,%d], expected [300,8,301,10]", + occurrences[2].StartLine, occurrences[2].StartChar, + occurrences[2].EndLine, occurrences[2].EndChar) } +} - if foundMention { - // Verify the role is a valid value - if mentionRole <= 0 { - t.Errorf("Invalid symbol role: %d", mentionRole) +func TestDeserializeOccurrencesFromBlob_InvalidRanges(t *testing.T) { + // Test 1: endLine < startLine + testInvalidEndLine := func(t *testing.T) { + testDoc := &scip.Document{ + Occurrences: []*scip.Occurrence{ + { + Range: []int32{300, 8, 200, 10}, // endLine < startLine + Symbol: "test-symbol", + SymbolRoles: int32(scip.SymbolRole_Reference), + }, + }, } - } else { - t.Errorf("No mentions found") - } - - // Test chunks and retrieval of occurrences - var docID int64 - err = sqlitex.Execute(db, "SELECT id FROM documents LIMIT 1", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - docID = stmt.ColumnInt64(0) - return nil - }, - }) - if err != nil { - t.Fatalf("Failed to get document ID: %v", err) - } - - // Query for chunks and count occurrences - totalOccurrences := 0 - var occurrencesCount int - // Query for occurrences using a prepared statement - err = sqlitex.Execute(db, fmt.Sprintf("SELECT occurrences FROM chunks WHERE document_id = %d", docID), &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - // Get blob data - read the entire blob since it's only valid for the duration of this call - reader := stmt.ColumnReader(0) - occurrencesBlob, err := io.ReadAll(reader) - if err != nil { - return err - } - - // Deserialize occurrences - doc := &scip.Document{} - if err := proto.Unmarshal(occurrencesBlob, doc); err != nil { - return err - } + blob, _ := proto.Marshal(testDoc) - totalOccurrences += len(doc.Occurrences) - occurrencesCount++ - return nil - }, - }) - if err != nil { - t.Fatalf("Failed to query chunks: %v", err) + // This should panic + DeserializeOccurrencesFromBlob(blob) } - // Verify we found some occurrences - if occurrencesCount == 0 { - t.Errorf("No occurrences found") - } - - if totalOccurrences != len(testIndex.Documents[0].Occurrences) { - t.Errorf("Expected %d occurrences, got %d", len(testIndex.Documents[0].Occurrences), totalOccurrences) - } -} - -func TestSCIPOccurrencesVirtualTable(t *testing.T) { - // Create a temporary directory for the test - tmpDir, err := os.MkdirTemp("", "scip-vtable-test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tmpDir) - - // Create a test SQLite database - dbPath := filepath.Join(tmpDir, "test.db") - db, err := createSQLiteDatabase(dbPath) - if err != nil { - t.Fatalf("Failed to create SQLite database: %v", err) - } - defer db.Close() + // Test 2: endLine == startLine but endChar < startChar + testInvalidEndChar := func(t *testing.T) { + testDoc := &scip.Document{ + Occurrences: []*scip.Occurrence{ + { + Range: []int32{300, 20, 300, 10}, // endChar < startChar + Symbol: "test-symbol", + SymbolRoles: int32(scip.SymbolRole_Reference), + }, + }, + } - // Create test occurrences - occurrences := []*scip.Occurrence{ - { - Range: []int32{1, 0, 1, 5}, - Symbol: "go package main/func1", - SymbolRoles: int32(scip.SymbolRole_Definition), - }, - { - Range: []int32{2, 10, 2, 15}, - Symbol: "go package main/func2", - SymbolRoles: int32(scip.SymbolRole_ReadAccess), - }, - } + blob, _ := proto.Marshal(testDoc) - // Serialize the occurrences to a protobuf blob - occBlob, err := proto.Marshal(&scip.Document{Occurrences: occurrences}) - if err != nil { - t.Fatalf("Failed to marshal occurrences: %v", err) + // This should panic + DeserializeOccurrencesFromBlob(blob) } - // Create a temporary table for testing - err = sqlitex.ExecuteTransient(db, "CREATE TABLE test_occurrences (id INTEGER PRIMARY KEY, blob BLOB)", nil) - if err != nil { - t.Fatalf("Failed to create test table: %v", err) - } + // Test 3: endLine == 0 but startLine > 0 + testZeroEndLine := func(t *testing.T) { + testDoc := &scip.Document{ + Occurrences: []*scip.Occurrence{ + { + Range: []int32{300, 8, 0, 0}, // endLine is 0 + Symbol: "test-symbol", + SymbolRoles: int32(scip.SymbolRole_Reference), + }, + }, + } - // Insert the test blob - stmt, err := db.Prepare("INSERT INTO test_occurrences (blob) VALUES (?)") - if err != nil { - t.Fatalf("Failed to prepare insert statement: %v", err) - } - stmt.BindBytes(1, occBlob) - _, err = stmt.Step() - if err != nil { - stmt.Finalize() - t.Fatalf("Failed to insert test blob: %v", err) - } - stmt.Finalize() + blob, _ := proto.Marshal(testDoc) - // Query the virtual table - type Result struct { - Symbol string - StartLine int64 - StartChar int64 - EndLine int64 - EndChar int64 - SymbolRoles int64 - Role string + // This should panic + DeserializeOccurrencesFromBlob(blob) } - results := []Result{} - - err = sqlitex.Execute(db, "SELECT symbol, startLine, startChar, endLine, endChar, roles, role FROM scip_occurrences WHERE blob = (SELECT blob FROM test_occurrences LIMIT 1) ORDER BY startLine", &sqlitex.ExecOptions{ - ResultFunc: func(stmt *sqlite.Stmt) error { - results = append(results, Result{ - Symbol: stmt.ColumnText(0), - StartLine: stmt.ColumnInt64(1), - StartChar: stmt.ColumnInt64(2), - EndLine: stmt.ColumnInt64(3), - EndChar: stmt.ColumnInt64(4), - SymbolRoles: stmt.ColumnInt64(5), - Role: stmt.ColumnText(6), - }) - return nil - }, + // Run each test inside a recovery block + t.Run("EndLineLessThanStartLine", func(t *testing.T) { + defer func() { + r := recover() + if r == nil { + t.Error("Expected panic for endLine < startLine, but no panic occurred") + } else { + errMsg, ok := r.(string) + if !ok || !strings.Contains(errMsg, "endLine") { + t.Errorf("Expected panic message about endLine, got: %v", r) + } + } + }() + testInvalidEndLine(t) }) - if err != nil { - t.Fatalf("Failed to query virtual table: %v", err) - } - // Verify we have the expected number of rows - if len(results) != 2 { - t.Fatalf("Expected 2 occurrences, got %d", len(results)) - } - - // Verify the first occurrence - expectedResults := []Result{ - { - Symbol: "go package main/func1", - StartLine: 1, - StartChar: 0, - EndLine: 1, - EndChar: 5, - SymbolRoles: int64(scip.SymbolRole_Definition), - Role: "definition", - }, - { - Symbol: "go package main/func2", - StartLine: 2, - StartChar: 10, - EndLine: 2, - EndChar: 15, - SymbolRoles: int64(scip.SymbolRole_ReadAccess), - Role: "reference", - }, - } + t.Run("EndCharLessThanStartChar", func(t *testing.T) { + defer func() { + r := recover() + if r == nil { + t.Error("Expected panic for endChar < startChar, but no panic occurred") + } else { + errMsg, ok := r.(string) + if !ok || !strings.Contains(errMsg, "endChar") { + t.Errorf("Expected panic message about endChar, got: %v", r) + } + } + }() + testInvalidEndChar(t) + }) - for i, expected := range expectedResults { - actual := results[i] - if actual.Symbol != expected.Symbol { - t.Errorf("Result %d: expected Symbol='%s', got '%s'", i, expected.Symbol, actual.Symbol) - } - if actual.StartLine != expected.StartLine { - t.Errorf("Result %d: expected StartLine=%d, got %d", i, expected.StartLine, actual.StartLine) - } - if actual.StartChar != expected.StartChar { - t.Errorf("Result %d: expected StartChar=%d, got %d", i, expected.StartChar, actual.StartChar) - } - if actual.EndLine != expected.EndLine { - t.Errorf("Result %d: expected EndLine=%d, got %d", i, expected.EndLine, actual.EndLine) - } - if actual.EndChar != expected.EndChar { - t.Errorf("Result %d: expected EndChar=%d, got %d", i, expected.EndChar, actual.EndChar) - } - if actual.Role != expected.Role { - t.Errorf("Result %d: expected Role='%s', got '%s'", i, expected.Role, actual.Role) - } - } + t.Run("ZeroEndLineWithNonzeroStartLine", func(t *testing.T) { + defer func() { + r := recover() + if r == nil { + t.Error("Expected panic for endLine=0 with startLine>0, but no panic occurred") + } else { + errMsg, ok := r.(string) + if !ok || !strings.Contains(errMsg, "endLine is 0") { + t.Errorf("Expected panic message about zero endLine, got: %v", r) + } + } + }() + testZeroEndLine(t) + }) } -func TestDeserializeOccurrencesFromBlob(t *testing.T) { - // Create test occurrences - occurrences := []*scip.Occurrence{ - { - Range: []int32{1, 0, 1, 5}, - Symbol: "go package main/func1", - SymbolRoles: int32(scip.SymbolRole_Definition), - }, - { - Range: []int32{2, 10, 2, 15}, - Symbol: "go package main/func2", - SymbolRoles: int32(0), - }, - } - - // Serialize the occurrences to a protobuf blob - occBlob, err := proto.Marshal(&scip.Document{Occurrences: occurrences}) - if err != nil { - t.Fatalf("Failed to marshal occurrences: %v", err) - } - - // Use our utility function to deserialize the blob - parsedOccurrences, err := DeserializeOccurrencesFromBlob(occBlob) - if err != nil { - t.Fatalf("Failed to deserialize occurrences: %v", err) - } - - // Verify results - if len(parsedOccurrences) != 2 { - t.Errorf("Expected 2 occurrences, got %d", len(parsedOccurrences)) - } - - // Verify first occurrence - if parsedOccurrences[0].Symbol != "go package main/func1" { - t.Errorf("Expected symbol 'go package main/func1', got '%s'", parsedOccurrences[0].Symbol) - } - if parsedOccurrences[0].Role != "definition" { - t.Errorf("Expected role 'definition', got '%s'", parsedOccurrences[0].Role) - } - if parsedOccurrences[0].StartLine != 1 || parsedOccurrences[0].StartChar != 0 { - t.Errorf("Expected start position (1,0), got (%d,%d)", - parsedOccurrences[0].StartLine, parsedOccurrences[0].StartChar) - } - if parsedOccurrences[0].EndLine != 1 || parsedOccurrences[0].EndChar != 5 { - t.Errorf("Expected end position (1,5), got (%d,%d)", - parsedOccurrences[0].EndLine, parsedOccurrences[0].EndChar) - } - - // Verify second occurrence - if parsedOccurrences[1].Symbol != "go package main/func2" { - t.Errorf("Expected symbol 'go package main/func2', got '%s'", parsedOccurrences[1].Symbol) - } - if parsedOccurrences[1].Role != "reference" { - t.Errorf("Expected role 'reference', got '%s'", parsedOccurrences[1].Role) - } - if parsedOccurrences[1].StartLine != 2 || parsedOccurrences[1].StartChar != 10 { - t.Errorf("Expected start position (2,10), got (%d,%d)", - parsedOccurrences[1].StartLine, parsedOccurrences[1].StartChar) - } - if parsedOccurrences[1].EndLine != 2 || parsedOccurrences[1].EndChar != 15 { - t.Errorf("Expected end position (2,15), got (%d,%d)", - parsedOccurrences[1].EndLine, parsedOccurrences[1].EndChar) - } -} +func TestFindLineRange_InvalidRanges(t *testing.T) { + // Test case where endLine < startLine should panic + t.Run("EndLineLessThanStartLine", func(t *testing.T) { + defer func() { + r := recover() + if r == nil { + t.Error("Expected panic for endLine < startLine in findLineRange, but no panic occurred") + } else { + errMsg, ok := r.(string) + if !ok || !strings.Contains(errMsg, "findLineRange") { + t.Errorf("Expected panic from findLineRange, got: %v", r) + } + } + }() -func createTestIndex() *scip.Index { - return &scip.Index{ - Metadata: &scip.Metadata{ - Version: 0, - ToolInfo: &scip.ToolInfo{Name: "test-indexer", Version: "1.0.0"}, - ProjectRoot: "file:///project", - TextDocumentEncoding: scip.TextEncoding_UTF8, - }, - Documents: []*scip.Document{ + occurrences := []*scip.Occurrence{ { - RelativePath: "src/main.go", - Language: "go", - Text: "package main\n\nfunc main() {\n\tfmt.Println(\"Hello, world!\")\n}\n", - PositionEncoding: scip.PositionEncoding_UTF8CodeUnitOffsetFromLineStart, - Occurrences: []*scip.Occurrence{ - { - Range: []int32{0, 8, 0, 12}, - Symbol: "go package main", - SymbolRoles: int32(scip.SymbolRole_Definition), - }, - { - Range: []int32{2, 5, 2, 9}, - Symbol: "go package main/main().", - SymbolRoles: int32(scip.SymbolRole_Definition), - }, - { - Range: []int32{3, 1, 3, 4}, - Symbol: "go . fmt", - SymbolRoles: int32(scip.SymbolRole_Import | scip.SymbolRole_ReadAccess), - }, - { - Range: []int32{3, 5, 3, 12}, - Symbol: "go . fmt/Println().", - SymbolRoles: int32(0), - }, - }, - Symbols: []*scip.SymbolInformation{ - { - Symbol: "go package main", - DisplayName: "main", - Documentation: []string{"Main package"}, - Kind: scip.SymbolInformation_Package, - }, - { - Symbol: "go package main/main().", - DisplayName: "main", - Documentation: []string{"Main function"}, - Kind: scip.SymbolInformation_Function, - }, - }, + Range: []int32{200, 8, 100, 10}, // endLine < startLine }, - }, - ExternalSymbols: []*scip.SymbolInformation{ - { - Symbol: "go . fmt", - DisplayName: "fmt", - Documentation: []string{"Formatting package"}, - Kind: scip.SymbolInformation_Package, - }, - { - Symbol: "go . fmt/Println().", - DisplayName: "Println", - Documentation: []string{"Print to standard output"}, - Kind: scip.SymbolInformation_Function, - }, - }, - } -} - -func writeTestIndex(t *testing.T, index *scip.Index, path string) { - indexBytes, err := proto.Marshal(index) - if err != nil { - t.Fatalf("Failed to marshal test index: %v", err) - } + } - if err := os.WriteFile(path, indexBytes, 0644); err != nil { - t.Fatalf("Failed to write test index: %v", err) - } + // This should panic + findLineRange(occurrences) + }) } diff --git a/cmd/scip/query.go b/cmd/scip/query.go index b94ff46e..66c0d9bf 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -377,6 +377,21 @@ func findSymbolOccurrences(db *sqlite.Conn, symbol string, definitionsOnly bool) endChar := stmt.ColumnInt64(4) role := stmt.ColumnText(5) + // Validate range values + if endLine < startLine { + panic(fmt.Sprintf("Invalid range in findSymbolOccurrences: endLine (%d) < startLine (%d) for symbol %s", endLine, startLine, symbol)) + } + + // For same-line ranges, ensure endChar >= startChar + if endLine == startLine && endChar < startChar { + panic(fmt.Sprintf("Invalid range in findSymbolOccurrences: endChar (%d) < startChar (%d) for same-line range for symbol %s", endChar, startChar, symbol)) + } + + // Ensure endLine is never 0 unless startLine is also 0 + if endLine == 0 && startLine > 0 { + panic(fmt.Sprintf("Invalid range in findSymbolOccurrences: endLine is 0 while startLine is %d for symbol %s", startLine, symbol)) + } + location := Location{ Path: chunk.FilePath, Line: int(startLine), @@ -503,6 +518,27 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ // We don't need the callee location in the new format // but we keep track of it in symbolLocations for possible future use + // Validate caller range values + callerStartLine := callerLocation.Line + callerStartChar := callerLocation.Character + callerEndLine := callerLocation.EndLine + callerEndChar := callerLocation.EndChar + + // Assert valid caller range values + if callerEndLine < callerStartLine { + panic(fmt.Sprintf("Invalid range in caller: endLine (%d) < startLine (%d) for symbol %s", callerEndLine, callerStartLine, ref.CallerSymbol)) + } + + // For same-line ranges, ensure endChar >= startChar + if callerEndLine == callerStartLine && callerEndChar < callerStartChar { + panic(fmt.Sprintf("Invalid range in caller: endChar (%d) < startChar (%d) for same-line range for symbol %s", callerEndChar, callerStartChar, ref.CallerSymbol)) + } + + // Ensure endLine is never 0 unless startLine is also 0 + if callerEndLine == 0 && callerStartLine > 0 { + panic(fmt.Sprintf("Invalid range in caller: endLine is 0 while startLine is %d for symbol %s", callerStartLine, ref.CallerSymbol)) + } + // Create new entry entry = &FlatCallHierarchyEntry{ Callee: current.symbol, @@ -510,10 +546,10 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ Symbol: ref.CallerSymbol, RelativePath: ref.FilePath, Range: Range{ - StartLine: callerLocation.Line, - StartChar: callerLocation.Character, - EndLine: callerLocation.EndLine, - EndChar: callerLocation.EndChar, + StartLine: callerStartLine, + StartChar: callerStartChar, + EndLine: callerEndLine, + EndChar: callerEndChar, }, }, CallSites: []CallSite{}, @@ -522,14 +558,35 @@ func buildFlatCallHierarchy(db *sqlite.Conn, rootSymbol string, maxDepth int) ([ relationshipMap[pair] = entry } + // Validate range values + refStartLine := ref.RefLocation.Line + refStartChar := ref.RefLocation.Character + refEndLine := ref.RefLocation.EndLine + refEndChar := ref.RefLocation.EndChar + + // Assert valid range values + if refEndLine < refStartLine { + panic(fmt.Sprintf("Invalid range in call site: endLine (%d) < startLine (%d) for symbol %s", refEndLine, refStartLine, ref.CallerSymbol)) + } + + // For same-line ranges, ensure endChar >= startChar + if refEndLine == refStartLine && refEndChar < refStartChar { + panic(fmt.Sprintf("Invalid range in call site: endChar (%d) < startChar (%d) for same-line range for symbol %s", refEndChar, refStartChar, ref.CallerSymbol)) + } + + // Ensure endLine is never 0 unless startLine is also 0 + if refEndLine == 0 && refStartLine > 0 { + panic(fmt.Sprintf("Invalid range in call site: endLine is 0 while startLine is %d for symbol %s", refStartLine, ref.CallerSymbol)) + } + // Add reference to the entry entry.CallSites = append(entry.CallSites, CallSite{ RelativePath: ref.FilePath, Range: Range{ - StartLine: ref.RefLocation.Line, - StartChar: ref.RefLocation.Character, - EndLine: ref.RefLocation.EndLine, - EndChar: ref.RefLocation.EndChar, + StartLine: refStartLine, + StartChar: refStartChar, + EndLine: refEndLine, + EndChar: refEndChar, }, }) @@ -642,7 +699,9 @@ func findReferencesWithCallers(db *sqlite.Conn, symbol string) ([]SymbolReferenc -- Get all reference locations from chunks in this document SELECT o.startLine, - o.startChar + o.startChar, + o.endLine, + o.endChar FROM chunks c JOIN mentions m ON c.id = m.chunk_id CROSS JOIN scip_occurrences o ON o.blob = c.occurrences @@ -661,7 +720,9 @@ func findReferencesWithCallers(db *sqlite.Conn, symbol string) ([]SymbolReferenc d.end_line, d.end_char, r.startLine, - r.startChar + r.startChar, + r.endLine, + r.endChar FROM reference_locations r JOIN defn_trees d ON d.document_id = ? AND d.start_line <= r.startLine @@ -682,12 +743,35 @@ func findReferencesWithCallers(db *sqlite.Conn, symbol string) ([]SymbolReferenc callerEndChar := int(stmt.ColumnInt64(4)) refLine := int(stmt.ColumnInt64(5)) refChar := int(stmt.ColumnInt64(6)) + refEndLine := int(stmt.ColumnInt64(7)) + refEndChar := int(stmt.ColumnInt64(8)) + + // Validate reference range values + if refEndLine < refLine { + panic(fmt.Sprintf("Invalid reference range: endLine (%d) < startLine (%d) for symbol %s", refEndLine, refLine, symbol)) + } + + // For same-line ranges, ensure endChar >= startChar + if refEndLine == refLine && refEndChar < refChar { + panic(fmt.Sprintf("Invalid reference range: endChar (%d) < startChar (%d) for same-line range for symbol %s", refEndChar, refChar, symbol)) + } + + // Ensure endLine is never 0 unless startLine is also 0 + if refEndLine == 0 && refLine > 0 { + panic(fmt.Sprintf("Invalid reference range: endLine is 0 while startLine is %d for symbol %s", refLine, symbol)) + } + + // We're missing endLine and endChar in the current query + // For references, we'll need to go back to get those from the occurrences blob + // Let's assert that we should never get here if we can't get all the range info // Create reference location refLocation := Location{ Path: filePath, Line: refLine, Character: refChar, + EndLine: refEndLine, + EndChar: refEndChar, Role: "reference", } From 500d22634ad520a99a27cfa81861b420acfc5082 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 16 Apr 2025 15:34:48 +0200 Subject: [PATCH 8/8] Add Graphviz output for call hierarchy --- cmd/scip/query.go | 171 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 161 insertions(+), 10 deletions(-) diff --git a/cmd/scip/query.go b/cmd/scip/query.go index 66c0d9bf..58146d56 100644 --- a/cmd/scip/query.go +++ b/cmd/scip/query.go @@ -81,11 +81,17 @@ func queryCommand() cli.Command { Usage: "Maximum depth of the call hierarchy", Value: 10, }, + &cli.StringFlag{ + Name: "format", + Usage: "Output format: json or dot (GraphViz)", + Value: "json", + }, }, Action: func(c *cli.Context) error { symbol := c.String("symbol") maxDepth := c.Int("max-depth") - return callHierarchyQuery(dbPath, symbol, maxDepth, c.App.Writer) + format := c.String("format") + return callHierarchyQuery(dbPath, symbol, maxDepth, format, c.App.Writer) }, }, }, @@ -227,7 +233,7 @@ func findReferencesQuery(dbPath string, symbol string, out io.Writer) error { } // callHierarchyQuery generates a call hierarchy for a symbol -func callHierarchyQuery(dbPath string, symbol string, maxDepth int, out io.Writer) error { +func callHierarchyQuery(dbPath string, symbol string, maxDepth int, format string, out io.Writer) error { db, err := openQueryDB(dbPath) if err != nil { return err @@ -259,15 +265,33 @@ func callHierarchyQuery(dbPath string, symbol string, maxDepth int, out io.Write return err } - // Convert to JSON - result, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return errors.Wrap(err, "failed to marshal result to JSON") - } + // Handle different output formats + switch format { + case "json": + // Convert to JSON + result, err := json.MarshalIndent(entries, "", " ") + if err != nil { + return errors.Wrap(err, "failed to marshal result to JSON") + } - // Write the result - _, err = fmt.Fprintln(out, string(result)) - return err + // Write the result + _, err = fmt.Fprintln(out, string(result)) + return err + + case "dot": + // Generate GraphViz DOT representation + dotOutput, err := generateDOTFormat(entries, symbol) + if err != nil { + return errors.Wrap(err, "failed to generate DOT format") + } + + // Write the result + _, err = fmt.Fprintln(out, dotOutput) + return err + + default: + return errors.Errorf("unsupported output format: %s (supported formats: json, dot)", format) + } } // findSymbolOccurrences looks up all occurrences of a symbol @@ -1012,6 +1036,133 @@ func buildCallHierarchy(db *sqlite.Conn, node *CallHierarchyItem, depth int, max return nil } +// generateDOTFormat converts a flat call hierarchy to GraphViz DOT format +func generateDOTFormat(entries []FlatCallHierarchyEntry, rootSymbol string) (string, error) { + // Build a map to track all nodes and edges + nodes := make(map[string]bool) + type edge struct { + from string + to string + info string // Additional info for the edge (file:line) + } + var edges []edge + + // Track the symbol display names to make the graph more readable + displayNames := make(map[string]string) + + // Helper to get a shorter display name for a symbol + getDisplayName := func(symbol string) string { + if display, ok := displayNames[symbol]; ok { + return display + } + + // Extract a more readable name from the symbol + display := symbol + + // For symbols like "scip-typescript npm @sourcegraph/scip-typescript v0.3.15 src/`FileIndexer.ts`/FileIndexer#descriptor()." + // Just take the last part after the last slash or backtick + parts := strings.Split(symbol, "/") + if len(parts) > 0 { + display = parts[len(parts)-1] + } + + // Further simplify symbols with backticks + parts = strings.Split(display, "`") + if len(parts) > 1 { + display = parts[len(parts)-1] + } + + // If the symbol has a # (method), simplify it + if idx := strings.LastIndex(display, "#"); idx >= 0 { + display = display[idx+1:] + } + + // Remove trailing dots from method names + display = strings.TrimSuffix(display, ".") + + displayNames[symbol] = display + return display + } + + // Process all entries to build the graph + for _, entry := range entries { + caller := entry.Caller.Symbol + callee := entry.Callee + + // Add nodes to the map + nodes[caller] = true + nodes[callee] = true + + // For each call site, add an edge + if len(entry.CallSites) > 0 { + // We'll use the first call site for the edge label + callSite := entry.CallSites[0] + + // Format the edge info: filename:line + fileParts := strings.Split(callSite.RelativePath, "/") + filename := callSite.RelativePath + if len(fileParts) > 0 { + filename = fileParts[len(fileParts)-1] + } + + // Create edge label with file and line info + edgeInfo := fmt.Sprintf("%s:%d", filename, callSite.Range.StartLine+1) // +1 for 1-based line numbers + + // Add additional call sites count if there are more than one + if len(entry.CallSites) > 1 { + edgeInfo += fmt.Sprintf(" (+%d more)", len(entry.CallSites)-1) + } + + // Add the edge + edges = append(edges, edge{ + from: caller, + to: callee, + info: edgeInfo, + }) + } + } + + // Start building DOT output + var sb strings.Builder + + // Write DOT format header + sb.WriteString("digraph CallHierarchy {\n") + sb.WriteString(" // Graph styling\n") + sb.WriteString(" graph [];\n") + sb.WriteString(" node [shape=box, style=\"rounded\"];\n") + sb.WriteString(" edge [];\n\n") + + // Add nodes + sb.WriteString(" // Nodes\n") + for node := range nodes { + nodeDisplay := getDisplayName(node) + nodeColor := "lightgrey" + + // Highlight the root node + if node == rootSymbol { + nodeColor = "lightblue" + } + + // Write node with its ID and label + // We use the full symbol as ID and the display name as label + sb.WriteString(fmt.Sprintf(" \"%s\" [label=\"%s\", fillcolor=%s];\n", + node, nodeDisplay, nodeColor)) + } + + // Add edges + sb.WriteString("\n // Edges\n") + for _, e := range edges { + // Write edge with from, to, and label + sb.WriteString(fmt.Sprintf(" \"%s\" -> \"%s\" [label=\"%s\"];\n", + e.from, e.to, e.info)) + } + + // Close the graph + sb.WriteString("}\n") + + return sb.String(), nil +} + // readBlob reads a blob from a SQLite statement column func readBlob(stmt *sqlite.Stmt, col int) []byte { reader := stmt.ColumnReader(col)