From 6259f02160ff7d9a54588bf58a9d940298c96f1c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 7 Feb 2021 15:31:27 +1100 Subject: [PATCH 1/2] Update version --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ae54e1a..b7e6296 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 1.0.2 +version = 1.0.3 description = Contextually-keyed word vectors url = https://github.com/explosion/sense2vec author = Explosion From c53b8d26333598eba44f3c5f4177ed9f2ee84c14 Mon Sep 17 00:00:00 2001 From: ericfeunekes Date: Tue, 2 Mar 2021 16:07:26 -0400 Subject: [PATCH 2/2] moved last with output file in scripts/01 parse.py (#132) Co-authored-by: Eric Feunekes --- scripts/01_parse.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/01_parse.py b/scripts/01_parse.py index 1f7e5bd..700abd0 100644 --- a/scripts/01_parse.py +++ b/scripts/01_parse.py @@ -12,9 +12,9 @@ out_dir=("Path to output directory", "positional", None, str), spacy_model=("Name of spaCy model to use", "positional", None, str), n_process=("Number of processes (multiprocessing)", "option", "n", int), - max_docs=("Maximum docs per batch", "option", "m", int), + max_docs=("Maximum docs per batch", "option", "m", int), ) -def main(in_file, out_dir, spacy_model="en_core_web_sm", n_process=1, max_docs=10**6): +def main(in_file, out_dir, spacy_model="en_core_web_sm", n_process=1, max_docs=10 ** 6): """ Step 1: Parse raw text with spaCy @@ -50,12 +50,13 @@ def main(in_file, out_dir, spacy_model="en_core_web_sm", n_process=1, max_docs=1 f.write(doc_bin_bytes) msg.good(f"Saved parsed docs to file", output_file.resolve()) doc_bin = DocBin(attrs=["POS", "TAG", "DEP", "ENT_TYPE", "ENT_IOB"]) + batch_num += 1 + output_file = output_path / f"{input_path.stem}-{batch_num}.spacy" + doc_bin_bytes = doc_bin.to_bytes() with output_file.open("wb") as f: - batch_num += 1 - output_file = output_path / f"{input_path.stem}-{batch_num}.spacy" - doc_bin_bytes = doc_bin.to_bytes() f.write(doc_bin_bytes) - msg.good(f"Complete. Saved final parsed docs to file", output_file.resolve()) + msg.good(f"Complete. Saved final parsed docs to file", output_file.resolve()) + if __name__ == "__main__": plac.call(main)