Commit 9a26a2eb authored by Raul Sirel's avatar Raul Sirel
Browse files

add spans param to process

parent 9265e5b6
......@@ -37,7 +37,7 @@ test_texts = [
def test_mlp_process(mlp: MLP):
for test_text in test_texts:
# process text
result = mlp.process(test_text)
result = mlp.process(test_text, spans="sentence")
print("\nMLP process output:", result)
# test result form
......
......@@ -297,7 +297,7 @@ class MLP:
return "tokenize,pos,lemma"
def process(self, raw_text: str, analyzers: list = DEFAULT_ANALYZERS, lang=None):
def process(self, raw_text: str, analyzers: list = DEFAULT_ANALYZERS, lang=None, spans="text"):
"""
Processes raw text.
:param: raw_text str: Text to be processed.
......@@ -313,7 +313,7 @@ class MLP:
# document class.
self.__apply_analyzer(document, analyzer)
if "sentences" in analyzers:
if "sentences" in analyzers and spans == "sentence":
document.fact_spans_to_sent()
return document.to_json()
......@@ -360,7 +360,7 @@ class MLP:
self.logger.exception(e)
def process_docs(self, docs: List[dict], doc_paths: List[str], analyzers=DEFAULT_ANALYZERS):
def process_docs(self, docs: List[dict], doc_paths: List[str], analyzers=DEFAULT_ANALYZERS, spans="text"):
"""
:param docs: Contains tuples with two dicts inside them, the first being the document to be analyzed and the second is the meta information that corresponds to the document for transport purposes later on.
:param doc_paths: Dot separated paths for how to traverse the dict for the text value you want to analyze.
......@@ -383,7 +383,7 @@ class MLP:
# document class.
self.__apply_analyzer(doc, analyzer)
if "sentences" in analyzers:
if "sentences" in analyzers and spans == "sentence":
doc.fact_spans_to_sent()
result = doc.document_to_json(use_default_doc_path=False)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment