Improve Error Handling
TTK Log:
[2021-05-24 15:51:32,035: ERROR/ForkPoolWorker-1] Task apply_mlp_on_index[177ed543-581d-4193-832b-e5f36b760db2] raised unexpected: RuntimeError('stack expects a non-empty TensorList')
Traceback (most recent call last):
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/celery/app/trace.py", line 412, in trace_task
R = retval = fun(*args, **kwargs)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/celery/app/trace.py", line 704, in __protected_call__
return self.run(*args, **kwargs)
File "/var/texta-rest/toolkit/mlp/tasks.py", line 99, in apply_mlp_on_index
raise e
File "/var/texta-rest/toolkit/mlp/tasks.py", line 92, in apply_mlp_on_index
elastic_response = ed.bulk_update(actions=actions)
File "/var/texta-rest/toolkit/elastic/decorators.py", line 18, in func_wrapper
return func(*args, **kwargs)
File "/var/texta-rest/toolkit/elastic/tools/document.py", line 143, in bulk_update
return bulk(client=self.core.es, actions=actions, refresh=refresh, request_timeout=30, chunk_size=chunk_size)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/elasticsearch/helpers/actions.py", line 396, in bulk
for ok, item in streaming_bulk(client, actions, *args, **kwargs):
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/elasticsearch/helpers/actions.py", line 308, in streaming_bulk
actions, chunk_size, max_chunk_bytes, client.transport.serializer
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/elasticsearch/helpers/actions.py", line 155, in _chunk_actions
for action, data in actions:
File "/var/texta-rest/toolkit/elastic/tools/document.py", line 161, in add_type_to_docs
for action in actions:
File "/var/texta-rest/toolkit/mlp/helpers.py", line 24, in process_mlp_actions
mlp_processed = mlp_class.process_docs(document_sources, analyzers=analyzers, doc_paths=field_data)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/texta_mlp/mlp.py", line 357, in process_docs
doc = self.generate_document(raw_text, analyzers, document, doc_paths=doc_path)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/texta_mlp/mlp.py", line 208, in generate_document
sentences, entities = self._get_stanza_tokens(analysis_lang, processed_text) if processed_text else ([], [])
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/texta_mlp/mlp.py", line 233, in _get_stanza_tokens
pipeline = self.stanza_pipelines[lang](raw_text)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/pipeline/core.py", line 210, in __call__
doc = self.process(doc)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/pipeline/core.py", line 204, in process
doc = process(doc)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/pipeline/tokenize_processor.py", line 92, in process
no_ssplit=self.config.get('no_ssplit', False))
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/models/tokenization/utils.py", line 153, in output_predictions
pred1 = np.argmax(trainer.predict(batch1), axis=2)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/models/tokenization/trainer.py", line 67, in predict
pred = self.model(units, features)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/stanza/models/tokenization/model.py", line 49, in forward
inp, _ = self.rnn(emb)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/opt/conda/envs/texta-rest/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 570, in forward
self.dropout, self.training, self.bidirectional, self.batch_first)
RuntimeError: stack expects a non-empty TensorList
i think we need some generic mlp error that can be logged on ttk side
and this way it wouldn't kill the worker
some custom exception here: https://git.texta.ee/texta/texta-mlp-python/-/blob/master/texta_mlp/exceptions.py
i guess when depending on stanza anything could go wrong on their side, but we can't have that kill our pipelines