Commit ee057584 authored by Marko Kollo's avatar Marko Kollo 😄
Browse files

Add function to edit nested dictionaries.

parent c728e137
Pipeline #5178 passed with stages
in 18 minutes and 7 seconds
......@@ -44,7 +44,7 @@ class Document:
original_text: str,
dominant_language_code: str,
analysis_lang: str,
stanza_sentences: [list],
stanza_sentences: [list],
stanza_entities,
concat_resources: dict,
entity_mapper: Optional[EntityMapper] = None,
......@@ -118,6 +118,20 @@ class Document:
self.__texta_facts.append(fact)
@staticmethod
def edit_doc(doc: dict, doc_path: str, new_value) -> dict:
"""
:param doc: Original dictionary into which you add content.
:param doc_path: Dot separated field paths to the nested value you wish to add.
:param new_value: Value you wish to add into the nested path.
:return: Dictionary with the new value added into it according to the path.
"""
wrapper = PelicanJson(doc)
path = doc_path.split(".")
wrapper.set_nested_value(path, new_value, force=True)
return wrapper.convert()
def document_to_json(self, use_default_doc_path=True) -> dict:
"""
:param use_default_doc_path: Normal string values will be given the default path for facts but for dictionary input you already have them.
......@@ -137,13 +151,18 @@ class Document:
container = dict()
container["text"] = self.get_words(ssplit="sentences" in self.analyzers)
texta_facts = self.facts_to_json()
container["language"] = {"detected": self.dominant_language_code,
"analysis": self.analysis_lang}
if "lemmas" in self.analyzers: container["lemmas"] = self.get_lemma()
if "pos_tags" in self.analyzers: container["pos_tags"] = self.get_pos_tags()
container["language"] = {
"detected": self.dominant_language_code,
"analysis": self.analysis_lang
}
if "lemmas" in self.analyzers:
container["lemmas"] = self.get_lemma()
if "pos_tags" in self.analyzers:
container["pos_tags"] = self.get_pos_tags()
# if "sentiment" in self.analyzers: container["sentiment"] = self.get_sentiment()
if "transliteration" in self.analyzers and self.__transliteration: container[
"transliteration"] = self.get_transliteration()
if "transliteration" in self.analyzers and self.__transliteration:
container["transliteration"] = self.get_transliteration()
if use_default_doc_path:
for fact in texta_facts["texta_facts"]:
fact["doc_path"] = "text.text"
......@@ -170,11 +189,12 @@ class Document:
for sent in self.stanza_sentences:
self.__words.append([word.text for word in sent])
def sentences(self):
pass
def get_words(self, ssplit = False) -> str:
def get_words(self, ssplit=False) -> str:
if ssplit:
return "\n".join([" ".join(sent_words) for sent_words in self.__words])
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment