File size: 5,231 Bytes
0e55bc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
from numpy import string_
import re
en_to_ar_camel = {
'B-LOC' : 'مكان',
'B-ORG': 'مؤسسة',
'B-PERS': 'شخص',
'B-MISC': 'معنى بموضوعات متنوعة',
'I-LOC': 'مكان',
'I-ORG': 'مؤسسة',
'I-PERS': 'شحص',
'I-MISC': 'معنى بموضوعات متنوعة',
}
en_to_ar = {
"B-Artist" : "فنان",
"I-Artist" :"فنان",
"B-Sound": "صوت",
"I-Sound":"صوت",
"B-Educational": "تعليمي",
"I-Educational":"تعليمي",
"B-Building-Grounds":"أراضي البناء",
"I-Building-Grounds":"أراضي البناء",
"B-Population-Center":"مركز سكني",
"B-Nation":"شعب(أمة)",
"B-State-or-Province":"ولاية أو مقاطعة",
"I-State-or-Province": "ولاية أو مقاطعة",
"B-Water-Body": "مسطح مائي",
"I-Water-Body":"مسطح مائي",
"B-Land-Region-Natural": "أرض طبيعية",
"I-Land-Region-Natural":"أرض طبيعية",
"B-Software":"سوفتوير(برمجيات)",
"I-Software":"سوفتوير(برمجيات)",
"B-Scientist": "عالم",
"B-Book":"كتاب",
"I-Book":"كتاب",
"I-Scientist":"عالم",
"B-Group":"مجموعة",
"B-Celestial":"سماوي",
"B-Police":"شرطة",
"I-Police":"شرطة",
"I-Population-Center":"مركز سكني",
"I-Celestial":"سماوي",
"B-Engineer":"مهندس",
"I-Engineer":"مهندس",
"B-Projectile":"قذيفة",
"B-Government":"حكومة",
"I-Government":"حكومة",
"B-Commercial":"تجاري",
"I-Commercial":"تجاري",
"B-Continent":"قارة",
"B-Air":"هواء",
"I-Air":"هواء",
"B-Other_PER":"شخص",
"I-Other_PER":"شخص",
"I-Group":"مجموعة",
"B-Politician":"سياسي",
"I-Politician":"سياسي",
"B-Athlete":"رياضي",
"I-Athlete":"رياضي",
"B-Religious_ORG":"مؤسسة دينية",
"I-Religious_ORG":"مؤسسة دينية",
"B-Path":"طريق",
"I-Path":"طريق",
"B-Media":"إعلام",
"I-Media":"إعلام",
"B-Non-Governmental":"غير حكومي",
"I-Non-Governmental":"غير حكومي",
"B-County-or-District":"مدينة أو ضاحية",
"I-County-or-District":"مدينة أو ضاحية",
"B-Businessperson":"رجل أعمال",
"B-Lawyer":"محامي",
"I-Lawyer":"محامي",
"B-GPE-Cluster":"",
"I-GPE-Cluster":"",
"I-Nation":"شعب(أمة)",
"B-Religious_PER":"شخص ديني",
"I-Religious_PER":"شخص ديني",
"I-Businessperson":"رجل أعمال",
"B-Medical-Science":"علوم طبية",
"I-Medical-Science":"علوم طبية",
"B-Movie":"فيلم",
"I-Movie":"فيلم",
"B-Water":"ماء",
"I-Water":"ماء",
"B-Drug":"دواء",
"B-Hardware":"عتاد",
"I-Hardware":"عتاد",
"B-Subarea-Facility":"منشأة منطقة فرعية",
"I-Subarea-Facility":"منشأة منطقة فرعية",
"B-Blunt":"فظ",
"B-Airport":"مطار",
"I-Blunt": "فظ",
"I-Drug":"دواء",
"B-Sports":"رياضة",
"I-Sports":"رياضة",
"B-Shooting":"رماية",
"I-Shooting":"رماية",
"B-Food":"طعام",
"I-Food":"طعام",
"I-Continent":"قارة",
"B-Nuclear":"نووي",
"I-Nuclear":"نووي",
"B-Entertainment":"ترفيه",
"I-Entertainment":"ترفيه",
"I-Projectile":"قذيفة",
"B-Land":"أرض",
"B-Sharp":"حاد",
"I-Airport":"مطار",
"I-Land":"أرض",
"B-Plant":"نبات",
"I-Plant":"نبات",
"B-Exploding":"منفجر",
"I-Exploding":"منفجر",
"B-Chemical":"كيميائي",
"I-Chemical": "كيميائي",
}
def get_separate_entities(labels, tokens):
"""
takes labels and token , return full name entity (mohamed, salah --> "mohamed salah")
this will be used to search in wikipedia
"""
res = []
b_before = False
temp = ""
key_value = ()
for i in range(len(labels)):
print(res)
curr = labels[i]
if("B-" in curr):
if(b_before):
key_value = (temp[:-1], 1)
res.append(key_value)
temp = tokens[i] + ' '
else:
b_before = True
temp += tokens[i] + ' '
if(i == len(labels)-1):
key_value = (temp[:-1], 1)
res.append(key_value)
# print("temp is:" + str(temp))
elif("I-" in curr):
temp += tokens[i] + ' '
if(i == len(labels)-1):
key_value = (temp[:-1], 1)
res.append(key_value)
else:
if(temp == ""):
key_value = (tokens[i], 0)
res.append(key_value)
else:
key_value = (temp[:-1], 1)
res.append(key_value)
key_value = (tokens[i], 0)
res.append(key_value)
temp = ""
b_before = False
print(res)
return res
|