Mapping NER entity names
Read more: https://github.com/stanfordnlp/stanza/issues/904
The names and the number of different entities extracted with NER models might vary depending on the language / NER model. It would be more convenient, if entities conveying the same information were mapped together and added under the same fact name (e.g. "PER", "PERS", "PERSON" -> "PER"). Furthermore, it would be nice to add a selection menu to let the user decide which entitites to extract (but might be difficult to implement as the language of the documents is often unknown before and thus we wouldn't know the available options).
{
"fr": [
"LOC",
"MISC",
"ORG",
"PER"
],
"en": [
"CARDINAL",
"DATE",
"EVENT",
"FAC",
"GPE",
"LANGUAGE",
"LAW",
"LOC",
"MONEY",
"NORP",
"ORDINAL",
"ORG",
"PERCENT",
"PERSON",
"PRODUCT",
"QUANTITY",
"TIME",
"WORK_OF_ART"
],
"zh-hans": [
"CARDINAL",
"DATE",
"EVENT",
"FAC",
"GPE",
"LANGUAGE",
"LAW",
"LOC",
"MONEY",
"NORP",
"ORDINAL",
"ORG",
"PERCENT",
"PERSON",
"PRODUCT",
"QUANTITY",
"TIME",
"WORK_OF_ART"
],
"ru": [
"LOC",
"MISC",
"ORG",
"PER"
],
"uk": [
"LOC",
"MISC",
"ORG",
"PERS"
],
"ar": [
"LOC",
"MISC",
"ORG",
"PER"
],
"hu": [
"LOC",
"MISC",
"ORG",
"PER"
],
"af": [
"LOC",
"MISC",
"ORG",
"PERS"
],
"bg": [
"EVT",
"LOC",
"ORG",
"PER",
"PRO"
],
"fi": [
"DATE",
"EVENT",
"LOC",
"ORG",
"PER",
"PRO"
],
"my": [
"LOC",
"NE",
"NUM",
"ORG",
"PNAME",
"RACE",
"TIME"
],
"it": [
"LOC",
"ORG",
"PER"
],
"de": [
"LOC",
"MISC",
"ORG",
"PER"
],
"nl": [
"LOC",
"MISC",
"ORG",
"PER"
],
"vi": [
"LOCATION",
"MISCELLANEOUS",
"ORGANIZATION",
"PERSON"
],
"es": [
"LOC",
"MISC",
"ORG",
"PER"
]
}