Commit 9d5a381c authored by Raul Sirel's avatar Raul Sirel
Browse files

merge fixes

parent 9a58af57
Pipeline #6816 canceled with stage
in 2 minutes and 10 seconds
......@@ -46,6 +46,8 @@ def test_mlp_process(mlp: MLP):
assert "lemmas" in mlp_text
assert isinstance(mlp_text["lemmas"], str) is True
assert "word_features" in mlp_text
assert "language" in mlp_text
assert isinstance(mlp_text["language"], dict) is True
......
......@@ -45,8 +45,6 @@ class Document:
original_text: str,
dominant_language_code: str,
analysis_lang: str,
stanza_sentences: [list],
stanza_entities,
stanza_document: stanza.Document = None,
entity_mapper: Optional[EntityMapper] = None,
doc_path: str = "text_mlp",
......@@ -304,8 +302,7 @@ class Document:
def pos_tags(self):
if "sentences" in self.analyzers:
for i,sent in enumerate(self.stanza_sentences):
#print(sent)
tags_in_sent = [word.upos if word and word.upos and word.upos != "_" else "X" if word.upos == "_" else "X" for word in sent]
tags_in_sent = [word.upos if word and word.upos and word.upos != "_" else "X" if word.upos == "_" else "X" for word in sent.words]
for tag in tags_in_sent:
self.__pos_tags.append(tag)
# if not last item
......@@ -322,7 +319,7 @@ class Document:
def word_features(self):
if "sentences" in self.analyzers:
for i,sent in enumerate(self.stanza_sentences):
tags_in_sent = [word.feats if word and word.feats and word.feats != "_" else "X" if word.feats == "_" else "X" for word in sent]
tags_in_sent = [word.feats if word and word.feats and word.feats != "_" else "X" if word.feats == "_" else "X" for word in sent.words]
for tag in tags_in_sent:
self.__word_features.append(tag)
# if not last item
......
......@@ -63,7 +63,7 @@ SUPPORTED_ANALYZERS = (
DEFAULT_ANALYZERS = [
"lemmas",
"pos_tags",
#"word_features",
"word_features",
"transliteration",
"ner",
"addresses",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment