Add lang as a field to the models so that we can put an abbreviation of the lang on the model cards

AngledLuffa · AngledLuffa · commit 477acd88bfca · 2022-05-26T11:53:17.000-07:00
diff --git a/hugging_corenlp.py b/hugging_corenlp.py
@@ -16,52 +16,52 @@
 
 from huggingface_hub import  Repository, HfApi, HfFolder
 
-def get_model_card(lang):
+def get_model_card(lang, model):
     now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
     model_card = """---
 tags:
 - corenlp
 library_tag: corenlp
-language:
-- {lang}
+language: {lang}
 license: gpl-2.0
 ---
-# Core NLP model for {lang}
+# Core NLP model for {model}
 CoreNLP is your one stop shop for natural language processing in Java! CoreNLP enables users to derive linguistic annotations for text, including token and sentence boundaries, parts of speech, named entities, numeric and time values, dependency and constituency parses, coreference, sentiment, quote attributions, and relations.
 Find more about it in [our website](https://stanfordnlp.github.io/CoreNLP) and our [GitHub repository](https://github.com/stanfordnlp/CoreNLP).
 
 This card and repo were automatically prepared with `hugging_corenlp.py` in the `stanfordnlp/huggingface-models` repo
 
 Last updated {now}
-""".format(lang=lang, now=now)
+""".format(lang=lang, model=model, now=now)
     return model_card
 
+# lang is an abbrev to use in the model card
 # local_name is a potential alternate name for the file
 # remote_name is the name to use when pushing remotely
 # repo_name is the repo name if corenlp-model is not suitable for some reason
-Model = namedtuple("Model", 'model_name, local_name, remote_name, repo_name')
+Model = namedtuple("Model", 'model_name, lang, local_name, remote_name, repo_name')
 
 MODELS = [
-    Model("CoreNLP",          "stanford-corenlp-latest.zip",                     "stanford-corenlp-latest.zip", "CoreNLP"),
-    Model("arabic",           "stanford-arabic-corenlp-models-current.jar",      None,                          None),
-    Model("chinese",          "stanford-chinese-corenlp-models-current.jar",     None,                          None),
-    Model("english-default",  "stanford-corenlp-models-current.jar",             None,                          None),
-    Model("english-extra",    "stanford-english-corenlp-models-current.jar",     None,                          None),
-    Model("english-kbp",      "stanford-english-kbp-corenlp-models-current.jar", None,                          None),
-    Model("french",           "stanford-french-corenlp-models-current.jar",      None,                          None),
-    Model("german",           "stanford-german-corenlp-models-current.jar",      None,                          None),
-    Model("hungarian",        "stanford-hungarian-corenlp-models-current.jar",   None,                          None),
-    Model("italian",          "stanford-italian-corenlp-models-current.jar",     None,                          None),
-    Model("spanish",          "stanford-spanish-corenlp-models-current.jar",     None,                          None),
+    Model("CoreNLP",          "en",   "stanford-corenlp-latest.zip",                     "stanford-corenlp-latest.zip", "CoreNLP"),
+    Model("arabic",           "ar",   "stanford-arabic-corenlp-models-current.jar",      None,                          None),
+    Model("chinese",          "zh",   "stanford-chinese-corenlp-models-current.jar",     None,                          None),
+    Model("english-default",  "en",   "stanford-corenlp-models-current.jar",             None,                          None),
+    Model("english-extra",    "en",   "stanford-english-corenlp-models-current.jar",     None,                          None),
+    Model("english-kbp",      "en",   "stanford-english-kbp-corenlp-models-current.jar", None,                          None),
+    Model("french",           "fr",   "stanford-french-corenlp-models-current.jar",      None,                          None),
+    Model("german",           "de",   "stanford-german-corenlp-models-current.jar",      None,                          None),
+    Model("hungarian",        "hu",   "stanford-hungarian-corenlp-models-current.jar",   None,                          None),
+    Model("italian",          "it",   "stanford-italian-corenlp-models-current.jar",     None,                          None),
+    Model("spanish",          "es",   "stanford-spanish-corenlp-models-current.jar",     None,                          None),
 ]
 
-def write_model_card(repo_local_path, model):
+def write_model_card(repo_local_path, lang, model):
     """
     Write a README for the current model to the given path
     """
     readme_path = os.path.join(repo_local_path, "README.md")
     with open(readme_path, "w") as f:
-        f.write(get_model_card(model))
+        f.write(get_model_card(lang, model))
 
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -79,6 +79,7 @@ def push_to_hub():
 
     for model in MODELS:
         # Create the repository
+        lang = model.lang
         model_name = model.model_name
         repo_name = model.repo_name if model.repo_name else "corenlp-%s" % model_name
         repo_url = api.create_repo(
@@ -117,7 +118,7 @@ def push_to_hub():
         shutil.copy(src, dst)
 
         # Create the model card
-        write_model_card(repo_local_path, model_name)
+        write_model_card(repo_local_path, lang, model_name)
 
         # Push the model
         # note: the error of not having anything to push will hopefully