Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
texta
texta-mlp-python
Commits
ee057584
Commit
ee057584
authored
Apr 01, 2021
by
Marko Kollo
😄
Browse files
Add function to edit nested dictionaries.
parent
c728e137
Pipeline
#5178
passed with stages
in 18 minutes and 7 seconds
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
VERSION
View file @
ee057584
1.10.
3
1.10.
4
texta_mlp/document.py
View file @
ee057584
...
...
@@ -44,7 +44,7 @@ class Document:
original_text
:
str
,
dominant_language_code
:
str
,
analysis_lang
:
str
,
stanza_sentences
:
[
list
],
stanza_sentences
:
[
list
],
stanza_entities
,
concat_resources
:
dict
,
entity_mapper
:
Optional
[
EntityMapper
]
=
None
,
...
...
@@ -118,6 +118,20 @@ class Document:
self
.
__texta_facts
.
append
(
fact
)
@
staticmethod
def
edit_doc
(
doc
:
dict
,
doc_path
:
str
,
new_value
)
->
dict
:
"""
:param doc: Original dictionary into which you add content.
:param doc_path: Dot separated field paths to the nested value you wish to add.
:param new_value: Value you wish to add into the nested path.
:return: Dictionary with the new value added into it according to the path.
"""
wrapper
=
PelicanJson
(
doc
)
path
=
doc_path
.
split
(
"."
)
wrapper
.
set_nested_value
(
path
,
new_value
,
force
=
True
)
return
wrapper
.
convert
()
def
document_to_json
(
self
,
use_default_doc_path
=
True
)
->
dict
:
"""
:param use_default_doc_path: Normal string values will be given the default path for facts but for dictionary input you already have them.
...
...
@@ -137,13 +151,18 @@ class Document:
container
=
dict
()
container
[
"text"
]
=
self
.
get_words
(
ssplit
=
"sentences"
in
self
.
analyzers
)
texta_facts
=
self
.
facts_to_json
()
container
[
"language"
]
=
{
"detected"
:
self
.
dominant_language_code
,
"analysis"
:
self
.
analysis_lang
}
if
"lemmas"
in
self
.
analyzers
:
container
[
"lemmas"
]
=
self
.
get_lemma
()
if
"pos_tags"
in
self
.
analyzers
:
container
[
"pos_tags"
]
=
self
.
get_pos_tags
()
container
[
"language"
]
=
{
"detected"
:
self
.
dominant_language_code
,
"analysis"
:
self
.
analysis_lang
}
if
"lemmas"
in
self
.
analyzers
:
container
[
"lemmas"
]
=
self
.
get_lemma
()
if
"pos_tags"
in
self
.
analyzers
:
container
[
"pos_tags"
]
=
self
.
get_pos_tags
()
# if "sentiment" in self.analyzers: container["sentiment"] = self.get_sentiment()
if
"transliteration"
in
self
.
analyzers
and
self
.
__transliteration
:
container
[
"transliteration"
]
=
self
.
get_transliteration
()
if
"transliteration"
in
self
.
analyzers
and
self
.
__transliteration
:
container
[
"transliteration"
]
=
self
.
get_transliteration
()
if
use_default_doc_path
:
for
fact
in
texta_facts
[
"texta_facts"
]:
fact
[
"doc_path"
]
=
"text.text"
...
...
@@ -170,11 +189,12 @@ class Document:
for
sent
in
self
.
stanza_sentences
:
self
.
__words
.
append
([
word
.
text
for
word
in
sent
])
def
sentences
(
self
):
pass
def
get_words
(
self
,
ssplit
=
False
)
->
str
:
def
get_words
(
self
,
ssplit
=
False
)
->
str
:
if
ssplit
:
return
"
\n
"
.
join
([
" "
.
join
(
sent_words
)
for
sent_words
in
self
.
__words
])
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment