Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
texta
texta-mlp-python
Commits
aa11932f
Commit
aa11932f
authored
Aug 23, 2021
by
Marko Kollo
😄
Browse files
Tests for on-demand resource loading.
parent
f54be4bc
Changes
2
Hide whitespace changes
Inline
Side-by-side
tests/test_mlp.py
View file @
aa11932f
import
json
import
pytest
import
regex
as
re
from
texta_mlp.entity_mapper
import
EntityMapper
from
texta_mlp.mlp
import
MLP
...
...
@@ -236,3 +238,22 @@ def test_parsing_empty_list_in_dictionary(mlp: MLP):
result
=
mlp
.
process_docs
([{
"empty_list_field"
:
[]}],
doc_paths
=
[
"empty_list_field"
])
for
key
in
result
:
assert
"mlp"
not
in
key
def
test_that_models_are_loaded_on_demand
():
pipeline
=
MLP
(
language_codes
=
[
"et"
,
"en"
],
logging_level
=
"info"
,
use_gpu
=
False
)
stanza_pipelines
=
pipeline
.
_stanza_pipelines
assert
len
(
stanza_pipelines
.
keys
())
==
0
result
=
pipeline
.
process
(
raw_text
=
"Tere, minu nimi on Joonas, kas saaksite öelda, mis kell praegu on?"
,
analyzers
=
[
"lemmas"
],
lang
=
"et"
)
assert
"et"
in
pipeline
.
_stanza_pipelines
assert
len
(
pipeline
.
_stanza_pipelines
.
keys
())
==
1
result
=
pipeline
.
process
(
raw_text
=
"Hello there, my name is Joonas, how do you do!?"
,
analyzers
=
[
"lemmas"
])
assert
"en"
in
pipeline
.
_stanza_pipelines
assert
len
(
pipeline
.
_stanza_pipelines
)
==
2
def
test_that_entity_mapper_is_loaded_on_demand
():
pipeline
=
MLP
(
language_codes
=
[
"et"
],
logging_level
=
"info"
,
use_gpu
=
False
)
assert
pipeline
.
_entity_mapper
is
None
pipeline
.
process
(
raw_text
=
"Tere, minu nimi on Joonas, kas saaksite öelda, mis kell praegu on?"
,
analyzers
=
[
"entities"
],
lang
=
"et"
)
assert
isinstance
(
pipeline
.
_entity_mapper
,
EntityMapper
)
texta_mlp/mlp.py
View file @
aa11932f
...
...
@@ -81,7 +81,7 @@ class MLP:
self
.
use_default_lang
=
use_default_language_code
self
.
resource_dir
=
resource_dir
self
.
_
_
stanza_pipelines
=
{}
self
.
_stanza_pipelines
=
{}
self
.
logging_level
=
logging_level
self
.
use_gpu
=
use_gpu
self
.
stanza_resource_path
=
pathlib
.
Path
(
self
.
resource_dir
)
/
"stanza"
...
...
@@ -93,7 +93,7 @@ class MLP:
self
.
prepare_resources
(
refresh_data
)
self
.
_
_
entity_mapper
=
None
self
.
_entity_mapper
=
None
self
.
loaded_entity_files
=
[]
self
.
not_entities
=
self
.
_load_not_entities
()
...
...
@@ -235,15 +235,14 @@ class MLP:
def
get_entity_mapper
(
self
):
if
self
.
__entity_mapper
is
None
:
self
.
__entity_mapper
=
self
.
_load_entity_mapper
()
return
self
.
__entity_mapper
if
self
.
_entity_mapper
is
None
:
self
.
_entity_mapper
=
self
.
_load_entity_mapper
()
return
self
.
_entity_mapper
def
get_stanza_pipeline
(
self
,
lang
:
str
):
if
lang
not
in
self
.
_
_
stanza_pipelines
:
if
lang
not
in
self
.
_stanza_pipelines
:
try
:
self
.
_
_
stanza_pipelines
[
lang
]
=
stanza
.
Pipeline
(
self
.
_stanza_pipelines
[
lang
]
=
stanza
.
Pipeline
(
lang
=
lang
,
dir
=
str
(
self
.
stanza_resource_path
),
processors
=
self
.
_get_stanza_processors
(
lang
),
...
...
@@ -253,7 +252,7 @@ class MLP:
# This is for CUDA OOM exceptions. Fall back to CPU if needed.
except
RuntimeError
:
self
.
_
_
stanza_pipelines
[
lang
]
=
stanza
.
Pipeline
(
self
.
_stanza_pipelines
[
lang
]
=
stanza
.
Pipeline
(
lang
=
lang
,
dir
=
str
(
self
.
stanza_resource_path
),
processors
=
self
.
_get_stanza_processors
(
lang
),
...
...
@@ -261,7 +260,7 @@ class MLP:
logging_level
=
self
.
logging_level
,
)
return
self
.
_
_
stanza_pipelines
[
lang
]
return
self
.
_stanza_pipelines
[
lang
]
def
_get_stanza_tokens
(
self
,
lang
:
str
,
raw_text
:
str
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment