Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
texta
texta-mlp-python
Commits
0e94c4eb
Commit
0e94c4eb
authored
May 07, 2021
by
Marko Kollo
😄
Browse files
Temporarily removed memory optimization bc of issues with MWT dependency.
parent
2aa62f7d
Pipeline
#5455
passed with stages
in 21 minutes and 36 seconds
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
VERSION
View file @
0e94c4eb
1.10.
4
1.10.
5
texta_mlp/mlp.py
View file @
0e94c4eb
import
logging
import
os
import
pathlib
import
regex
as
re
import
shutil
from
typing
import
List
,
Optional
from
urllib.parse
import
urlparse
from
urllib.request
import
urlopen
import
regex
as
re
import
stanza
from
bs4
import
BeautifulSoup
from
langdetect
import
detect
from
pelecanus
import
PelicanJson
from
typing
import
List
,
Optional
from
urllib.parse
import
urlparse
from
urllib.request
import
urlopen
from
texta_mlp.document
import
Document
from
texta_mlp.entity_mapper
import
EntityMapper
from
texta_mlp.exceptions
import
LanguageNotSupported
from
texta_mlp.utils
import
parse_bool_env
...
...
@@ -269,7 +269,6 @@ class MLP:
stanza_resource_path
=
pathlib
.
Path
(
self
.
resource_dir
)
/
"stanza"
stanza_pipelines
[
lang
]
=
stanza
.
Pipeline
(
lang
=
lang
,
processors
=
self
.
_get_stanza_processors
(
lang
),
dir
=
str
(
stanza_resource_path
),
use_gpu
=
self
.
use_gpu
,
logging_level
=
logging_level
...
...
@@ -277,18 +276,6 @@ class MLP:
return
stanza_pipelines
@
staticmethod
def
_get_stanza_processors
(
lang
):
"""
Returns processor options based on language and NER support in Stanza.
"""
print
(
lang
)
if
lang
in
STANZA_NER_SUPPORT
:
return
"tokenize,pos,lemma,ner"
else
:
return
"tokenize,pos,lemma"
def
process
(
self
,
raw_text
:
str
,
analyzers
:
list
=
[
"all"
],
lang
=
None
):
"""
Processes raw text.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment