Commit 6b4ddb78 authored by Wael Ramadan's avatar Wael Ramadan
Browse files

return and save stanza error in es

parent 0def44e5
Pipeline #5696 failed with stage
in 18 minutes and 45 seconds
......@@ -44,6 +44,7 @@ class Document:
original_text: str,
dominant_language_code: str,
analysis_lang: str,
error: str,
stanza_sentences: [list],
stanza_entities,
concat_resources: dict,
......@@ -58,6 +59,7 @@ class Document:
self.analyzers = analyzers
self.dominant_language_code = dominant_language_code
self.analysis_lang = analysis_lang
self.error = error
self.json_doc = json_doc
self.entity_mapper = entity_mapper
......@@ -166,6 +168,7 @@ class Document:
"detected": self.dominant_language_code,
"analysis": self.analysis_lang
}
container["error"] = self.error
if "lemmas" in self.analyzers:
container["lemmas"] = self.get_lemma()
......
......@@ -202,10 +202,10 @@ class MLP:
'''
if lang not in self.supported_langs:
analysis_lang = self.default_lang
sentences, entities = self._get_stanza_tokens(analysis_lang, processed_text) if processed_text else ([], [])
sentences, entities, e = self._get_stanza_tokens(analysis_lang, processed_text) if processed_text else ([], [], "")
else:
analysis_lang = lang
sentences, entities = self._get_stanza_tokens(analysis_lang, processed_text) if processed_text else ([], [])
sentences, entities, e = self._get_stanza_tokens(analysis_lang, processed_text) if processed_text else ([], [], "")
document = Document(
original_text=processed_text,
......@@ -217,7 +217,8 @@ class MLP:
json_doc=json_object,
doc_path=doc_paths,
entity_mapper=self.entity_mapper,
concat_resources=self.concat_resources
concat_resources=self.concat_resources,
error=e
)
return document
......@@ -232,22 +233,24 @@ class MLP:
def _get_stanza_tokens(self, lang: str, raw_text: str):
sentences = []
entities = []
e = ""
try:
pipeline = self.stanza_pipelines[lang](raw_text)
pip_pat = re.compile(r"(?<=\d)_(?=\d)")
for sentence in pipeline.sentences:
words = []
for word in sentence.words:
words.append(word)
sentences.append(words)
for entity in sentence.entities:
entities.append(entity)
except Exception as e:
self.logger.exception(e)
return sentences, entities
pip_pat = re.compile(r"(?<=\d)_(?=\d)")
for sentence in pipeline.sentences:
words = []
for word in sentence.words:
words.append(word)
sentences.append(words)
for entity in sentence.entities:
entities.append(entity)
return sentences, entities
return sentences, entities, repr(e)
return sentences, entities, e
def _get_stanza_ner(self, lang: str, raw_text: str):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment