Skip to content

Commit 477acd8

Browse files
committed
Add lang as a field to the models so that we can put an abbreviation of the lang on the model cards
1 parent 879b33f commit 477acd8

File tree

1 file changed

+21
-20
lines changed

1 file changed

+21
-20
lines changed

hugging_corenlp.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,52 +16,52 @@
1616

1717
from huggingface_hub import Repository, HfApi, HfFolder
1818

19-
def get_model_card(lang):
19+
def get_model_card(lang, model):
2020
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
2121
model_card = """---
2222
tags:
2323
- corenlp
2424
library_tag: corenlp
25-
language:
26-
- {lang}
25+
language: {lang}
2726
license: gpl-2.0
2827
---
29-
# Core NLP model for {lang}
28+
# Core NLP model for {model}
3029
CoreNLP is your one stop shop for natural language processing in Java! CoreNLP enables users to derive linguistic annotations for text, including token and sentence boundaries, parts of speech, named entities, numeric and time values, dependency and constituency parses, coreference, sentiment, quote attributions, and relations.
3130
Find more about it in [our website](https://stanfordnlp.github.io/CoreNLP) and our [GitHub repository](https://github.com/stanfordnlp/CoreNLP).
3231
3332
This card and repo were automatically prepared with `hugging_corenlp.py` in the `stanfordnlp/huggingface-models` repo
3433
3534
Last updated {now}
36-
""".format(lang=lang, now=now)
35+
""".format(lang=lang, model=model, now=now)
3736
return model_card
3837

38+
# lang is an abbrev to use in the model card
3939
# local_name is a potential alternate name for the file
4040
# remote_name is the name to use when pushing remotely
4141
# repo_name is the repo name if corenlp-model is not suitable for some reason
42-
Model = namedtuple("Model", 'model_name, local_name, remote_name, repo_name')
42+
Model = namedtuple("Model", 'model_name, lang, local_name, remote_name, repo_name')
4343

4444
MODELS = [
45-
Model("CoreNLP", "stanford-corenlp-latest.zip", "stanford-corenlp-latest.zip", "CoreNLP"),
46-
Model("arabic", "stanford-arabic-corenlp-models-current.jar", None, None),
47-
Model("chinese", "stanford-chinese-corenlp-models-current.jar", None, None),
48-
Model("english-default", "stanford-corenlp-models-current.jar", None, None),
49-
Model("english-extra", "stanford-english-corenlp-models-current.jar", None, None),
50-
Model("english-kbp", "stanford-english-kbp-corenlp-models-current.jar", None, None),
51-
Model("french", "stanford-french-corenlp-models-current.jar", None, None),
52-
Model("german", "stanford-german-corenlp-models-current.jar", None, None),
53-
Model("hungarian", "stanford-hungarian-corenlp-models-current.jar", None, None),
54-
Model("italian", "stanford-italian-corenlp-models-current.jar", None, None),
55-
Model("spanish", "stanford-spanish-corenlp-models-current.jar", None, None),
45+
Model("CoreNLP", "en", "stanford-corenlp-latest.zip", "stanford-corenlp-latest.zip", "CoreNLP"),
46+
Model("arabic", "ar", "stanford-arabic-corenlp-models-current.jar", None, None),
47+
Model("chinese", "zh", "stanford-chinese-corenlp-models-current.jar", None, None),
48+
Model("english-default", "en", "stanford-corenlp-models-current.jar", None, None),
49+
Model("english-extra", "en", "stanford-english-corenlp-models-current.jar", None, None),
50+
Model("english-kbp", "en", "stanford-english-kbp-corenlp-models-current.jar", None, None),
51+
Model("french", "fr", "stanford-french-corenlp-models-current.jar", None, None),
52+
Model("german", "de", "stanford-german-corenlp-models-current.jar", None, None),
53+
Model("hungarian", "hu", "stanford-hungarian-corenlp-models-current.jar", None, None),
54+
Model("italian", "it", "stanford-italian-corenlp-models-current.jar", None, None),
55+
Model("spanish", "es", "stanford-spanish-corenlp-models-current.jar", None, None),
5656
]
5757

58-
def write_model_card(repo_local_path, model):
58+
def write_model_card(repo_local_path, lang, model):
5959
"""
6060
Write a README for the current model to the given path
6161
"""
6262
readme_path = os.path.join(repo_local_path, "README.md")
6363
with open(readme_path, "w") as f:
64-
f.write(get_model_card(model))
64+
f.write(get_model_card(lang, model))
6565

6666
def parse_args():
6767
parser = argparse.ArgumentParser()
@@ -79,6 +79,7 @@ def push_to_hub():
7979

8080
for model in MODELS:
8181
# Create the repository
82+
lang = model.lang
8283
model_name = model.model_name
8384
repo_name = model.repo_name if model.repo_name else "corenlp-%s" % model_name
8485
repo_url = api.create_repo(
@@ -117,7 +118,7 @@ def push_to_hub():
117118
shutil.copy(src, dst)
118119

119120
# Create the model card
120-
write_model_card(repo_local_path, model_name)
121+
write_model_card(repo_local_path, lang, model_name)
121122

122123
# Push the model
123124
# note: the error of not having anything to push will hopefully

0 commit comments

Comments
 (0)