Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
texta
texta-mlp-python
Commits
7c142123
Commit
7c142123
authored
Jun 04, 2021
by
Wael Ramadan
Browse files
Merge branch 'improve_error_handling' into 'master'
Improve error handling See merge request
!12
parents
2f8dba52
e230874c
Pipeline
#5715
passed with stage
in 21 minutes and 42 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
texta_mlp/document.py
View file @
7c142123
...
...
@@ -51,6 +51,7 @@ class Document:
doc_path
:
str
=
"text"
,
json_doc
:
dict
=
None
,
analyzers
:
list
=
[],
error
:
str
=
""
,
):
self
.
original_text
=
original_text
...
...
@@ -58,6 +59,7 @@ class Document:
self
.
analyzers
=
analyzers
self
.
dominant_language_code
=
dominant_language_code
self
.
analysis_lang
=
analysis_lang
self
.
error
=
error
self
.
json_doc
=
json_doc
self
.
entity_mapper
=
entity_mapper
...
...
@@ -166,6 +168,8 @@ class Document:
"detected"
:
self
.
dominant_language_code
,
"analysis"
:
self
.
analysis_lang
}
if
self
.
error
:
container
[
"error"
]
=
self
.
error
if
"lemmas"
in
self
.
analyzers
:
container
[
"lemmas"
]
=
self
.
get_lemma
()
...
...
texta_mlp/exceptions.py
View file @
7c142123
...
...
@@ -5,3 +5,7 @@ class LanguageNotSupported(Exception):
class
BoundedListEmpty
(
Exception
):
"""Raised when in Concatenator class the BOUNDS are not yet loaded, but concatenate() is tried"""
pass
class
StanzaPipelineFail
(
Exception
):
"""Raised when Stanza pipelines fail to load."""
pass
texta_mlp/mlp.py
View file @
7c142123
...
...
@@ -202,10 +202,10 @@ class MLP:
'''
if
lang
not
in
self
.
supported_langs
:
analysis_lang
=
self
.
default_lang
sentences
,
entities
=
self
.
_get_stanza_tokens
(
analysis_lang
,
processed_text
)
if
processed_text
else
([],
[])
sentences
,
entities
,
e
=
self
.
_get_stanza_tokens
(
analysis_lang
,
processed_text
)
if
processed_text
else
([],
[]
,
""
)
else
:
analysis_lang
=
lang
sentences
,
entities
=
self
.
_get_stanza_tokens
(
analysis_lang
,
processed_text
)
if
processed_text
else
([],
[])
sentences
,
entities
,
e
=
self
.
_get_stanza_tokens
(
analysis_lang
,
processed_text
)
if
processed_text
else
([],
[]
,
""
)
document
=
Document
(
original_text
=
processed_text
,
...
...
@@ -217,7 +217,8 @@ class MLP:
json_doc
=
json_object
,
doc_path
=
doc_paths
,
entity_mapper
=
self
.
entity_mapper
,
concat_resources
=
self
.
concat_resources
concat_resources
=
self
.
concat_resources
,
error
=
e
)
return
document
...
...
@@ -230,19 +231,26 @@ class MLP:
def
_get_stanza_tokens
(
self
,
lang
:
str
,
raw_text
:
str
):
pipeline
=
self
.
stanza_pipelines
[
lang
](
raw_text
)
sentences
=
[]
entities
=
[]
pip_pat
=
re
.
compile
(
r
"(?<=\d)_(?=\d)"
)
for
sentence
in
pipeline
.
sentences
:
words
=
[]
for
word
in
sentence
.
words
:
words
.
append
(
word
)
sentences
.
append
(
words
)
for
entity
in
sentence
.
entities
:
entities
.
append
(
entity
)
return
sentences
,
entities
e
=
""
try
:
pipeline
=
self
.
stanza_pipelines
[
lang
](
raw_text
)
pip_pat
=
re
.
compile
(
r
"(?<=\d)_(?=\d)"
)
for
sentence
in
pipeline
.
sentences
:
words
=
[]
for
word
in
sentence
.
words
:
words
.
append
(
word
)
sentences
.
append
(
words
)
for
entity
in
sentence
.
entities
:
entities
.
append
(
entity
)
except
Exception
as
e
:
self
.
logger
.
exception
(
e
)
return
sentences
,
entities
,
repr
(
e
)
return
sentences
,
entities
,
e
def
_get_stanza_ner
(
self
,
lang
:
str
,
raw_text
:
str
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment