Commit 707db08c authored by Marko Kollo's avatar Marko Kollo 😄
Browse files

Initial more exact download criteria.

parent ee057584
Pipeline #5382 passed with stage
in 12 minutes and 25 seconds
......@@ -122,13 +122,18 @@ class MLP:
Downloads Stanza resources if not present in resources directory.
By default all is downloaded into data directory under package directory.
model_types = ["depparse", "lemma", "pos", "tokenize"]
stanza_resource_path = pathlib.Path(resource_dir) / "stanza"
if logger:"Downloading Stanza models into the directory: {str(stanza_resource_path)}")
if logger:"Downloading Stanza models into the directory: {str(stanza_resource_path)}")
stanza_resource_path.mkdir(parents=True, exist_ok=True) # Create the directories with default permissions including parents.
for language_code in supported_langs:
# rglob is for recursive filename pattern matching, if it matches nothing
# then the necessary files do not exist and we should download them.
if not list(stanza_resource_path.rglob("{}*".format(language_code))):
lang_dir_exists = True if list(stanza_resource_path.rglob("{}*".format(language_code))) else False
model_folders_exists = all([(stanza_resource_path / language_code / model_type).exists() for model_type in model_types])
if not (lang_dir_exists and model_folders_exists):, str(stanza_resource_path))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment