Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
texta
texta-mlp-python
Commits
9d5a381c
Commit
9d5a381c
authored
Oct 18, 2021
by
Raul Sirel
Browse files
merge fixes
parent
9a58af57
Pipeline
#6816
canceled with stage
in 2 minutes and 10 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
VERSION
View file @
9d5a381c
1.1
4.6
1.1
5.0
tests/test_mlp.py
View file @
9d5a381c
...
...
@@ -46,6 +46,8 @@ def test_mlp_process(mlp: MLP):
assert
"lemmas"
in
mlp_text
assert
isinstance
(
mlp_text
[
"lemmas"
],
str
)
is
True
assert
"word_features"
in
mlp_text
assert
"language"
in
mlp_text
assert
isinstance
(
mlp_text
[
"language"
],
dict
)
is
True
...
...
texta_mlp/document.py
View file @
9d5a381c
...
...
@@ -45,8 +45,6 @@ class Document:
original_text
:
str
,
dominant_language_code
:
str
,
analysis_lang
:
str
,
stanza_sentences
:
[
list
],
stanza_entities
,
stanza_document
:
stanza
.
Document
=
None
,
entity_mapper
:
Optional
[
EntityMapper
]
=
None
,
doc_path
:
str
=
"text_mlp"
,
...
...
@@ -304,8 +302,7 @@ class Document:
def
pos_tags
(
self
):
if
"sentences"
in
self
.
analyzers
:
for
i
,
sent
in
enumerate
(
self
.
stanza_sentences
):
#print(sent)
tags_in_sent
=
[
word
.
upos
if
word
and
word
.
upos
and
word
.
upos
!=
"_"
else
"X"
if
word
.
upos
==
"_"
else
"X"
for
word
in
sent
]
tags_in_sent
=
[
word
.
upos
if
word
and
word
.
upos
and
word
.
upos
!=
"_"
else
"X"
if
word
.
upos
==
"_"
else
"X"
for
word
in
sent
.
words
]
for
tag
in
tags_in_sent
:
self
.
__pos_tags
.
append
(
tag
)
# if not last item
...
...
@@ -322,7 +319,7 @@ class Document:
def
word_features
(
self
):
if
"sentences"
in
self
.
analyzers
:
for
i
,
sent
in
enumerate
(
self
.
stanza_sentences
):
tags_in_sent
=
[
word
.
feats
if
word
and
word
.
feats
and
word
.
feats
!=
"_"
else
"X"
if
word
.
feats
==
"_"
else
"X"
for
word
in
sent
]
tags_in_sent
=
[
word
.
feats
if
word
and
word
.
feats
and
word
.
feats
!=
"_"
else
"X"
if
word
.
feats
==
"_"
else
"X"
for
word
in
sent
.
words
]
for
tag
in
tags_in_sent
:
self
.
__word_features
.
append
(
tag
)
# if not last item
...
...
texta_mlp/mlp.py
View file @
9d5a381c
...
...
@@ -63,7 +63,7 @@ SUPPORTED_ANALYZERS = (
DEFAULT_ANALYZERS
=
[
"lemmas"
,
"pos_tags"
,
#
"word_features",
"word_features"
,
"transliteration"
,
"ner"
,
"addresses"
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment