diff --git a/redisgraph_bulk_loader/entity_file.py b/redisgraph_bulk_loader/entity_file.py index 2771421..be392c8 100644 --- a/redisgraph_bulk_loader/entity_file.py +++ b/redisgraph_bulk_loader/entity_file.py @@ -65,6 +65,11 @@ def typed_prop_to_binary(prop_val, prop_type): # Remove leading and trailing whitespace prop_val = prop_val.strip() + if prop_val == "": + # An empty string indicates a NULL property. + # TODO This is not allowed in Cypher, consider how to handle it here rather than in-module. + return struct.pack(format_str, 0) + # TODO allow ID type specification if prop_type == Type.LONG: try: @@ -107,7 +112,7 @@ def typed_prop_to_binary(prop_val, prop_type): return array_prop_to_binary(format_str, prop_val) # If it hasn't returned by this point, it is trying to set it to a type that it can't adopt - raise Exception("unable to parse [" + prop_val + "] with type ["+repr(prop_type)+"]") + raise SchemaError("unable to parse [" + prop_val + "] with type ["+repr(prop_type)+"]") # Convert a single CSV property field with an inferred type into a binary stream. @@ -227,14 +232,14 @@ def convert_header_with_schema(self, header): # Multiple colons found in column name, emit error. # TODO might need to check for backtick escapes if len(pair) > 2: - raise CSVError("Field '%s' had %d colons" % field, len(field)) + raise CSVError("%s: Field '%s' had %d colons" % (self.infile.name, field, len(field))) # Convert the column type. col_type = convert_schema_type(pair[1].upper().strip()) # If the column did not have a name but the type requires one, emit an error. if len(pair[0]) == 0 and col_type not in (Type.ID, Type.START_ID, Type.END_ID, Type.IGNORE): - raise SchemaError("Each property in the header should be a colon-separated pair") + raise SchemaError("%s: Each property in the header should be a colon-separated pair" % (self.infile.name)) else: # We have a column name and a type. # Only store the name if the column's values should be added as properties. diff --git a/redisgraph_bulk_loader/label.py b/redisgraph_bulk_loader/label.py index 0fea1a8..80013b6 100644 --- a/redisgraph_bulk_loader/label.py +++ b/redisgraph_bulk_loader/label.py @@ -63,7 +63,11 @@ def process_entities(self): id_field = self.id_namespace + '.' + str(id_field) self.update_node_dictionary(id_field) - row_binary = self.pack_props(row) + try: + row_binary = self.pack_props(row) + except SchemaError as e: + # TODO why is line_num off by one? + raise SchemaError("%s:%d %s" % (self.infile.name, self.reader.line_num - 1, str(e))) row_binary_len = len(row_binary) # If the addition of this entity will make the binary token grow too large, # send the buffer now. diff --git a/redisgraph_bulk_loader/relation_type.py b/redisgraph_bulk_loader/relation_type.py index cec0aa3..cdb0205 100644 --- a/redisgraph_bulk_loader/relation_type.py +++ b/redisgraph_bulk_loader/relation_type.py @@ -61,12 +61,16 @@ def process_entities(self): src = self.query_buffer.nodes[start_id] dest = self.query_buffer.nodes[end_id] except KeyError as e: - print("Relationship specified a non-existent identifier. src: %s; dest: %s" % (row[self.start_id], row[self.end_id])) + print("%s:%d Relationship specified a non-existent identifier. src: %s; dest: %s" % + (self.infile.name, self.reader.line_num - 1, row[self.start_id], row[self.end_id])) if self.config.skip_invalid_edges is False: raise e continue fmt = "=QQ" # 8-byte unsigned ints for src and dest - row_binary = struct.pack(fmt, src, dest) + self.pack_props(row) + try: + row_binary = struct.pack(fmt, src, dest) + self.pack_props(row) + except SchemaError as e: + raise SchemaError("%s:%d %s" % (self.infile.name, self.reader.line_num, str(e))) row_binary_len = len(row_binary) # If the addition of this entity will make the binary token grow too large, # send the buffer now. diff --git a/test/test_bulk_loader.py b/test/test_bulk_loader.py index 79645c4..5d817af 100644 --- a/test/test_bulk_loader.py +++ b/test/test_bulk_loader.py @@ -660,7 +660,7 @@ def test16_error_on_schema_failure(self): except Exception as e: # Verify that the correct exception is raised. self.assertEqual(sys.exc_info()[0].__name__, 'SchemaError') - self.assertIn("Could not parse 'strval' as an array", e.args) + self.assertIn("Could not parse 'strval' as an array", str(e)) def test17_ensure_index_is_created(self): graphname = "index_test"