samariddin commited on
Commit
9415e6f
·
1 Parent(s): defebef
app.py CHANGED
@@ -1,571 +1,21 @@
1
- import requests
2
- import streamlit as st
3
- import wikipedia
4
- from wikipedia import WikipediaPage
5
- import pandas as pd
6
- import spacy
7
- import unicodedata
8
- from nltk.corpus import stopwords
9
- import numpy as np
10
- import nltk
11
- from newspaper import Article
12
-
13
- nltk.download('stopwords')
14
- from string import punctuation
15
- import json
16
- import time
17
- from datetime import datetime, timedelta
18
- import urllib
19
- from io import BytesIO
20
- from PIL import Image, UnidentifiedImageError
21
- from SPARQLWrapper import SPARQLWrapper, JSON, N3
22
- from fuzzywuzzy import process, fuzz
23
- from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode
24
  from transformers import pipeline
25
- import en_core_web_lg
26
-
27
- sparql = SPARQLWrapper('https://dbpedia.org/sparql')
28
-
29
-
30
- class ExtractArticleEntities:
31
- """ Extract article entities from a document using natural language processing (NLP) and fuzzy matching.
32
- Parameters
33
- - text: a string or the text of a news article to be parsed
34
- Usage:
35
- import ExtractArticleEntities
36
- instantiate with text parameter ie. entities = ExtractArticleEntities(text)
37
- retrieve Who, What, When, Where entities with entities.www_json
38
- Non-organised entities with entiities.json
39
- """
40
-
41
- def __init__(self, text):
42
- self.text = text # preprocess text at initialisation
43
- self.text = self.preprocessing(self.text)
44
- print(self.text)
45
- print('_____text_____')
46
- self.json = {}
47
- # Create empty dataframe to hold entity data for ease of processing
48
- self.entity_df = pd.DataFrame(columns=["entity", "description"])
49
- # Load the spacy model
50
-
51
- # self.nlp = en_core_web_lg.load()
52
- self.nlp = pipeline(model="51la5/roberta-large-NER")
53
-
54
- # Parse the text
55
- self.entity_df = self.get_who_what_where_when()
56
- # Disambiguate entities
57
-
58
- self.entity_df = self.fuzzy_disambiguation()
59
- self.get_related_entity()
60
- self.get_popularity()
61
- # Create JSON representation of entities
62
- self.entity_df = self.entity_df.drop_duplicates(subset=["description"])
63
-
64
- self.entity_df = self.entity_df.reset_index(drop=True)
65
-
66
- # ungrouped entity returned as json
67
- self.json = self.entity_json()
68
- # return json with entities grouped into who, what, where, when keys
69
- self.www_json = self.get_wwww_json()
70
-
71
- # def get_related_entity(self):
72
- # entities = self.entity_df.description
73
- # labels = self.entity_df.entity
74
- # related_entity = []
75
- # for entity, label in zip(entities, labels):
76
- # if label in ('PERSON', 'ORG','GPE','NORP','LOC'):
77
- # related_entity.append(wikipedia.search(entity, 3))
78
- # else:
79
- # related_entity.append([None])
80
-
81
- # self.entity_df['Wikipedia Entity'] = related_entity
82
-
83
- def get_popularity(self):
84
- # names = self.entity_df.description
85
- # related_names = self.entity_df['Matched Entity']
86
- # for name, related_name in zip(names, related_names):
87
- # if related_name:
88
- # related_name.append(name)
89
- # pytrends.build_payload(related_name, timeframe='now 4-d')
90
- # st.dataframe(pytrends.interest_over_time())
91
- # time.sleep(2)
92
- master_df = pd.DataFrame()
93
- view_list = []
94
- for entity in self.entity_df['Matched Entity']:
95
- if entity:
96
- entity_to_look = entity[0]
97
- # print(entity_to_look, '_______')
98
- entity_to_look = entity_to_look.replace(' ', '_')
99
- print(entity_to_look, '_______')
100
- headers = {
101
- 'accept': 'application/json',
102
- 'User-Agent': 'Foo bar'
103
- }
104
-
105
- now = datetime.now()
106
- now_dt = now.strftime(r'%Y%m%d')
107
- week_back = now - timedelta(days=7)
108
- week_back_dt = week_back.strftime(r'%Y%m%d')
109
- resp = requests.get(
110
- f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{entity_to_look}/daily/{week_back_dt}/{now_dt}',
111
- headers=headers)
112
- data = resp.json()
113
- # print(data)
114
- df = pd.json_normalize(data['items'])
115
- view_count = sum(df['views'])
116
-
117
- else:
118
- view_count = 0
119
- view_list.append(view_count)
120
-
121
- self.entity_df['Views'] = view_list
122
-
123
- for entity in ('PERSON', 'ORG', 'GPE', 'NORP', 'LOC'):
124
- related_entity_view_list = []
125
- grouped_df = self.entity_df[self.entity_df['entity'] == entity]
126
- grouped_df['Matched count'] = grouped_df['fuzzy_match'].apply(len)
127
- grouped_df['Wiki count'] = grouped_df['Matched Entity'].apply(len)
128
-
129
- grouped_df = grouped_df.sort_values(by=['Views', 'Matched count', 'Wiki count'],
130
- ascending=False).reset_index(drop=True)
131
- if not grouped_df.empty:
132
- # st.dataframe(grouped_df)
133
- master_df = pd.concat([master_df, grouped_df])
134
-
135
- self.sorted_entity_df = master_df
136
- if 'Views' in self.sorted_entity_df:
137
- self.sorted_entity_df = self.sorted_entity_df.sort_values(by=['Views'], ascending=False).reset_index(
138
- drop=True)
139
- # st.dataframe(self.sorted_entity_df)
140
- # names = grouped_df['description'][:5].values
141
- # print(names, type(names))
142
- # if names.any():
143
- # # pytrends.build_payload(names, timeframe='now 1-m')
144
- # st.dataframe(pytrends.get_historical_interest(names,
145
- # year_start=2022, month_start=10, day_start=1,
146
- # hour_start=0,
147
- # year_end=2022, month_end=10, day_end=21,
148
- # hour_end=0, cat=0, geo='', gprop='', sleep=0))
149
- # st.dataframe()
150
- # time.sleep(2)
151
- # st.dataframe(grouped_df)
152
-
153
- def get_related_entity(self):
154
- names = self.entity_df.description
155
- entities = self.entity_df.entity
156
- self.related_entity = []
157
- match_scores = []
158
- for name, entity in zip(names, entities):
159
- if entity in ('PERSON', 'ORG', 'GPE', 'NORP', 'LOC'):
160
- related_names = wikipedia.search(name, 10)
161
- self.related_entity.append(related_names)
162
- matches = process.extract(name, related_names)
163
- match_scores.append([match[0] for match in matches if match[1] >= 90])
164
- else:
165
- self.related_entity.append([None])
166
- match_scores.append([])
167
- # Remove nulls
168
-
169
- self.entity_df['Wikipedia Entity'] = self.related_entity
170
- self.entity_df['Matched Entity'] = match_scores
171
-
172
- def fuzzy_disambiguation(self):
173
- # Load the entity data
174
- self.entity_df['fuzzy_match'] = ''
175
- # Load the entity data
176
- person_choices = self.entity_df.loc[self.entity_df['entity'] == 'PERSON']
177
- org_choices = self.entity_df.loc[self.entity_df['entity'] == 'ORG']
178
- where_choices = self.entity_df.loc[self.entity_df['entity'] == 'GPE']
179
- norp_choices = self.entity_df.loc[self.entity_df['entity'] == 'NORP']
180
- loc_choices = self.entity_df.loc[self.entity_df['entity'] == 'LOC']
181
- date_choices = self.entity_df.loc[self.entity_df['entity'] == 'DATE']
182
-
183
- def fuzzy_match(row, choices):
184
- '''This function disambiguates entities by looking for maximum three matches with a score of 80 or more
185
- for each of the entity types. If there is no match, then the function returns None. '''
186
- match = process.extract(row["description"], choices["description"], limit=3)
187
-
188
- match = [m[0] for m in match if m[1] > 80 and m[1] != 100]
189
-
190
- if len(match) == 0:
191
- match = []
192
-
193
- if match:
194
- self.fuzzy_match_dict[row["description"]] = match
195
-
196
- return match
197
-
198
- # Apply the fuzzy matching function to the entity dataframe
199
-
200
- self.fuzzy_match_dict = {}
201
-
202
- for i, row in self.entity_df.iterrows():
203
-
204
- if row['entity'] == 'PERSON':
205
-
206
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, person_choices)
207
-
208
- elif row['entity'] == 'ORG':
209
-
210
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, org_choices)
211
- elif row['entity'] == 'GPE':
212
-
213
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, where_choices)
214
-
215
- elif row['entity'] == 'NORP':
216
-
217
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, norp_choices)
218
- elif row['entity'] == 'LOC':
219
-
220
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, loc_choices)
221
- elif row['entity'] == 'DATE':
222
-
223
- self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, date_choices)
224
-
225
- return self.entity_df
226
-
227
- def preprocessing(self, text):
228
- """This function takes a text string and strips out all punctuation. It then normalizes the string to a
229
- normalized form (using the "NFKD" normalization algorithm). Finally, it strips any special characters and
230
- converts them to their unicode equivalents. """
231
-
232
- # remove punctuation
233
- text = text.translate(str.maketrans("", "", punctuation))
234
- # normalize the text
235
- stop_words = stopwords.words('english')
236
-
237
- # Removing Stop words can cause losing context, instead stopwords can be utilized for knowledge
238
- filtered_words = [word for word in self.text.split()] # if word not in stop_words]
239
-
240
- # This is very hacky. Need a better way of handling bad encoding
241
- pre_text = " ".join(filtered_words)
242
- pre_text = pre_text = pre_text.replace(' ', ' ')
243
- pre_text = pre_text.replace('’', "'")
244
- pre_text = pre_text.replace('“', '"')
245
- pre_text = pre_text.replace('â€', '"')
246
- pre_text = pre_text.replace('‘', "'")
247
- pre_text = pre_text.replace('…', '...')
248
- pre_text = pre_text.replace('–', '-')
249
- pre_text = pre_text.replace("\x9d", '-')
250
- # normalize the text
251
- pre_text = unicodedata.normalize("NFKD", pre_text)
252
- # strip punctuation again as some remains in first pass
253
- pre_text = pre_text.translate(str.maketrans("", "", punctuation))
254
-
255
- return pre_text
256
-
257
- def get_who_what_where_when(self):
258
- """Get entity information in a document.
259
- This function will return a DataFrame with the following columns:
260
- - entity: the entity being queried
261
- - description: a brief description of the entity
262
- Usage:
263
- get_who_what_where_when(text)
264
- Example:
265
- > get_who_what_where_when('This is a test')
266
- PERSON
267
- ORG
268
- GPE
269
- LOC
270
- PRODUCT
271
- EVENT
272
- LAW
273
- LANGUAGE
274
- NORP
275
- DATE
276
- GPE
277
- TIME"""
278
-
279
- # list to hold entity data
280
- article_entity_list = []
281
- # tokenize the text
282
- doc = self.nlp(self.text)
283
- # iterate over the entities in the document but only keep those which are meaningful
284
- desired_entities = ['PERSON', 'ORG', 'GPE', 'LOC', 'PRODUCT', 'EVENT', 'LAW', 'LANGUAGE', 'NORP', 'DATE', 'GPE',
285
- 'TIME']
286
- self.label_dict = {}
287
-
288
- # stop_words = stopwords.words('english')
289
- for ent in doc.ents:
290
-
291
- self.label_dict[ent] = ent.label_
292
- if ent.label_ in desired_entities:
293
- # add the entity to the list
294
- entity_dict = {ent.label_: ent.text}
295
-
296
- article_entity_list.append(entity_dict)
297
-
298
- # dedupe the entities but only on exact match of values as occasional it will assign an ORG entity to PER
299
- deduplicated_entities = {frozenset(item.values()):
300
- item for item in article_entity_list}.values()
301
- # create a dataframe from the entities
302
- for record in deduplicated_entities:
303
- record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
304
- self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
305
-
306
- print(self.entity_df)
307
- print('______________________')
308
- return self.entity_df
309
-
310
- def entity_json(self):
311
- """Returns a JSON representation of an entity defined by the `entity_df` dataframe. The `entity_json` function
312
- will return a JSON object with the following fields:
313
- - entity: The type of the entity in the text
314
- - description: The name of the entity as described in the input text
315
- - fuzzy_match: A list of fuzzy matches for the entity. This is useful for disambiguating entities that are similar
316
- """
317
-
318
- self.json = json.loads(self.entity_df.to_json(orient='records'))
319
- # self.json = json.dumps(self.json, indent=2)
320
- return self.json
321
-
322
- def get_wwww_json(self):
323
- """This function returns a JSON representation of the `get_who_what_where_when` function. The `get_www_json`
324
- function will return a JSON object with the following fields:
325
- - entity: The type of the entity in the text
326
- - description: The name of the entity as described in the input text
327
- - fuzzy_match: A list of fuzzy matches for the entity. This is useful for disambiguating entities that are similar
328
- """
329
-
330
- # create a json object from the entity dataframe
331
- who_dict = {"who": [ent for ent in self.entity_json() if ent['entity'] in ['ORG', 'PERSON']]}
332
- where_dict = {"where": [ent for ent in self.entity_json() if ent['entity'] in ['GPE', 'LOC']]}
333
- when_dict = {"when": [ent for ent in self.entity_json() if ent['entity'] in ['DATE', 'TIME']]}
334
- what_dict = {
335
- "what": [ent for ent in self.entity_json() if ent['entity'] in ['PRODUCT', 'EVENT', 'LAW', 'LANGUAGE',
336
- 'NORP']]}
337
- article_wwww = [who_dict, where_dict, when_dict, what_dict]
338
- self.wwww_json = json.dumps(article_wwww, indent=2)
339
-
340
- return self.wwww_json
341
-
342
-
343
- news_article = st.text_input('Paste an Article here to be parsed')
344
- if 'parsed' not in st.session_state:
345
- st.session_state['parsed'] = None
346
- st.session_state['article'] = None
347
- if news_article:
348
- st.write('Your news article is')
349
- st.write(news_article)
350
-
351
- if st.button('Get details'):
352
-
353
- parsed = ExtractArticleEntities(news_article)
354
- if parsed:
355
- st.session_state['article'] = parsed.sorted_entity_df
356
- st.session_state['parsed'] = True
357
- st.session_state['json'] = parsed.www_json
358
-
359
-
360
- # if not st.session_state['article'].empty:
361
-
362
- def preprocessing(text):
363
- """This function takes a text string and strips out all punctuation. It then normalizes the string to a
364
- normalized form (using the "NFKD" normalization algorithm). Finally, it strips any special characters and
365
- converts them to their unicode equivalents. """
366
-
367
- # remove punctuation
368
- if text:
369
- text = text.translate(str.maketrans("", "", punctuation))
370
- # normalize the text
371
- stop_words = stopwords.words('english')
372
-
373
- # Removing Stop words can cause losing context, instead stopwords can be utilized for knowledge
374
- filtered_words = [word for word in text.split()] # if word not in stop_words]
375
-
376
- # This is very hacky. Need a better way of handling bad encoding
377
- pre_text = " ".join(filtered_words)
378
- pre_text = pre_text = pre_text.replace(' ', ' ')
379
- pre_text = pre_text.replace('’', "'")
380
- pre_text = pre_text.replace('“', '"')
381
- pre_text = pre_text.replace('â€', '"')
382
- pre_text = pre_text.replace('‘', "'")
383
- pre_text = pre_text.replace('…', '...')
384
- pre_text = pre_text.replace('–', '-')
385
- pre_text = pre_text.replace("\x9d", '-')
386
- # normalize the text
387
- pre_text = unicodedata.normalize("NFKD", pre_text)
388
- # strip punctuation again as some remains in first pass
389
- pre_text = pre_text.translate(str.maketrans("", "", punctuation))
390
-
391
- else:
392
- pre_text = None
393
- return pre_text
394
-
395
-
396
- def filter_wiki_df(df):
397
- key_list = df.keys()[:2]
398
- # df.to_csv('test.csv')
399
- df = df[key_list]
400
- # if len(df.keys()) == 2:
401
- df['Match Check'] = np.where(df[df.keys()[0]] != df[df.keys()[1]], True, False)
402
-
403
- df = df[df['Match Check'] != False]
404
- df = df[key_list]
405
- df = df.dropna(how='any').reset_index(drop=True)
406
- # filtered_term = []
407
- # for terms in df[df.keys()[0]]:
408
- # if isinstance(terms, str):
409
- # filtered_term.append(preprocessing(terms))
410
- # else:
411
- # filtered_term.append(None)
412
- # df[df.keys()[0]] = filtered_term
413
- df.rename(columns={key_list[0]: 'Attribute', key_list[1]: 'Value'}, inplace=True)
414
-
415
- return df
416
-
417
-
418
- def get_entity_from_selectbox(related_entity):
419
- entity = st.selectbox('Please select the term:', related_entity, key='foo')
420
- if entity:
421
- summary_entity = wikipedia.summary(entity, 3)
422
- return summary_entity
423
-
424
-
425
- if st.session_state['parsed']:
426
- df = st.session_state['article']
427
- # left, right = st.columns(2)
428
- # with left:
429
- df_to_st = pd.DataFrame()
430
-
431
- df_to_st['Name'] = df['description']
432
- df_to_st['Is a type of'] = df['entity']
433
- df_to_st['Related to'] = df['Matched Entity']
434
- df_to_st['Is a type of'] = df_to_st['Is a type of'].replace({'PERSON': 'Person',
435
- 'ORG': 'Organization',
436
- 'GPE': 'Political Location',
437
- 'NORP': 'Political or Religious Groups',
438
- 'LOC': 'Non Political Location'})
439
- gb = GridOptionsBuilder.from_dataframe(df_to_st)
440
- gb.configure_pagination(paginationAutoPageSize=True) # Add pagination
441
- gb.configure_side_bar() # Add a sidebar
442
- gb.configure_selection('multiple', use_checkbox=True,
443
- groupSelectsChildren="Group checkbox select children") # Enable multi-row selection
444
- gridOptions = gb.build()
445
-
446
- # st.dataframe(df_to_st)
447
- grid_response = AgGrid(
448
- df_to_st,
449
- gridOptions=gridOptions,
450
- data_return_mode='AS_INPUT',
451
- update_mode='MODEL_CHANGED',
452
- fit_columns_on_grid_load=False,
453
- enable_enterprise_modules=True,
454
- height=350,
455
- width='100%',
456
- reload_data=True
457
- )
458
-
459
- data = grid_response['data']
460
- selected = grid_response['selected_rows']
461
- selected_df = pd.DataFrame(selected)
462
- if not selected_df.empty:
463
- selected_entity = selected_df[['Name', 'Is a type of', 'Related to']]
464
- st.dataframe(selected_entity)
465
-
466
- # with right:
467
- # st.json(st.session_state['json'])
468
-
469
- entities_list = df['description']
470
- # selected_entity = st.selectbox('Which entity you want to choose?',
471
- # entities_list)
472
- if not selected_df.empty and selected_entity['Name'].any():
473
-
474
- # lookup_url = rf'https://lookup.dbpedia.org/api/search?query={selected_entity}'
475
- # r = requests.get(lookup_url)
476
-
477
- selected_row = df.loc[df['description'] == selected_entity['Name'][0]]
478
-
479
- entity_value = selected_row.values
480
- # st.write('Entity is a ', entity_value[0][0])
481
- label, name, fuzzy, related, related_match, _, _, _ = entity_value[0]
482
- not_matched = [word for word in related if word not in related_match]
483
- fuzzy = fuzzy[0] if len(fuzzy) > 0 else ''
484
- related = related[0] if len(related) > 0 else ''
485
- not_matched = not_matched[0] if len(not_matched) > 0 else related
486
-
487
- related_entity_list = [name, fuzzy, not_matched]
488
- related_entity = entity_value[0][1:]
489
-
490
- google_query_term = ' '.join(related_entity_list)
491
- # search()
492
- try:
493
- urls = [i for i in search(google_query_term, stop=10, pause=2.0, tld='com', lang='en', tbs='0',
494
- user_agent=get_random_user_agent())]
495
- except:
496
- urls = []
497
- # urls = search(google_query_term+' news latest', num_results=10)
498
- st.session_state['wiki_summary'] = False
499
- all_related_entity = []
500
- for el in related_entity[:-2]:
501
- if isinstance(el, str):
502
- all_related_entity.append(el)
503
- elif isinstance(el, int):
504
- all_related_entity.append(str(el))
505
- else:
506
- all_related_entity.extend(el)
507
- # [ if type(el) == 'int' all_related_entity.extend(el) else all_related_entity.extend([el])for el in related_entity]
508
- for entity in all_related_entity:
509
- # try:
510
- if True:
511
- if entity:
512
- entity = entity.replace(' ', '_')
513
- query = f'''
514
- SELECT ?name ?comment ?image
515
- WHERE {{ dbr:{entity} rdfs:label ?name.
516
- dbr:{entity} rdfs:comment ?comment.
517
- dbr:{entity} dbo:thumbnail ?image.
518
-
519
- FILTER (lang(?name) = 'en')
520
- FILTER (lang(?comment) = 'en')
521
- }}'''
522
- sparql.setQuery(query)
523
-
524
- sparql.setReturnFormat(JSON)
525
- qres = sparql.query().convert()
526
- if qres['results']['bindings']:
527
- result = qres['results']['bindings'][0]
528
- name, comment, image_url = result['name']['value'], result['comment']['value'], result['image'][
529
- 'value']
530
- # urllib.request.urlretrieve(image_url, "img.jpg")
531
-
532
- # img = Image.open("/Users/anujkarn/NER/img.jpg")
533
- wiki_url = f'https://en.wikipedia.org/wiki/{entity}'
534
-
535
- st.write(name)
536
- # st.image(img)
537
- st.write(image_url)
538
- # try:
539
- response = requests.get(image_url)
540
- try:
541
- related_image = Image.open(BytesIO(response.content))
542
- st.image(related_image)
543
- except UnidentifiedImageError:
544
- st.write('Not able to get image')
545
- pass
546
 
547
- # except error as e:
548
- # st.write(f'Image not parsed because of : {e}')
549
- summary_entity = comment
550
- wiki_knowledge_df = pd.read_html(wiki_url)[0]
551
- wiki_knowledge_df = filter_wiki_df(wiki_knowledge_df)
552
 
553
- st.write('Showing desciption for entity:', name)
554
- st.dataframe(wiki_knowledge_df)
555
- # if st.button('Want something else?'):
556
- # summary_entity = get_entity_from_selectbox(all_related_entity)
557
- break
558
- # summary_entity = wikipedia.summary(entity, 3)
559
- else:
560
- summary_entity = None
561
- if not summary_entity:
562
- try:
563
- summary_entity = get_entity_from_selectbox(all_related_entity)
564
- # page = WikipediaPage(entity)
565
 
566
- except wikipedia.exceptions.DisambiguationError:
567
- st.write('Disambiguation is there for term')
 
 
 
568
 
569
- if selected_entity['Name'].any():
570
- st.write(f'Summary for {selected_entity["Name"][0]}')
571
- st.write(summary_entity)
 
1
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+ tokenizer = AutoTokenizer.from_pretrained("51la5/roberta-large-NER")
5
+ model = AutoModelForTokenClassification.from_pretrained("51la5/roberta-large-NER")
6
+ classifier = pipeline("ner", model=model, tokenizer=tokenizer,grouped_entities=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ def get_ner(text):
9
+ output = classifier(text)
10
+ for elm in output:
11
+ elm['entity'] = elm['entity_group']
12
+ return {"text": text, "entities": output}
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ demo = gr.Interface(fn=get_ner,
16
+ title="Atoqli nomlarni topish(NER)",
17
+ inputs=gr.Textbox(lines=4, placeholder="Matinni kiriting!", label="Matn*"),
18
+ outputs=gr.HighlightedText(label="Natija:")
19
+ )
20
 
21
+ demo.launch()
 
 
flagged/Output/tmp0n4o_4xk.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmp53qk_28w.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmp5geu50qc.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmpilj11fim.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmpir2yb79m.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmpl1qism4a.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmps39i7gz8.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/Output/tmpu4xpbklk.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [["", null], ["Alya", "PER"], [" told ", null], ["Jasmine", "PER"], [" that ", null], ["Andrew", "PER"], [" could pay with cash..", null]]
flagged/log.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Input,Output,flag,username,timestamp
2
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmp5geu50qc.json,,,2022-12-27 12:42:53.593547
3
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmp53qk_28w.json,,,2022-12-27 12:42:58.863873
4
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmpir2yb79m.json,,,2022-12-27 12:42:59.338345
5
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmpu4xpbklk.json,,,2022-12-27 12:42:59.579644
6
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmpl1qism4a.json,,,2022-12-27 12:42:59.767400
7
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmp0n4o_4xk.json,,,2022-12-27 12:43:01.036641
8
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmpilj11fim.json,,,2022-12-27 12:43:02.413494
9
+ Alya told Jasmine that Andrew could pay with cash..,/home/pc-work/Documents/Git/HuggingFace/Uz-NER/flagged/Output/tmps39i7gz8.json,,,2022-12-27 12:43:02.875712
requirements.txt CHANGED
@@ -1,16 +1,3 @@
1
- https://huggingface.co/spacy/en_core_web_lg/resolve/main/en_core_web_lg-any-py3-none-any.whl
2
- fastapi==0.88.0
3
- fuzzywuzzy==0.18.0
4
- matplotlib==3.3.4
5
- newspaper3k==0.2.8
6
- nltk==3.6.1
7
- numpy==1.19.5
8
- pandas==1.2.4
9
- Pillow==9.3.0
10
- requests==2.25.1
11
- spacy
12
- SPARQLWrapper==2.0.0
13
- streamlit==1.11.1
14
- wikipedia==1.4.0
15
- streamlit-aggrid
16
- transformers==2.5.0
 
1
+ torch
2
+ transformers
3
+ gradio