Spaces:
Sleeping
Sleeping
Farhan1572
commited on
Commit
•
c2e0593
1
Parent(s):
e4bdfb9
Update app.py
Browse files
app.py
CHANGED
@@ -1,31 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import spacy
|
2 |
import gradio as gr
|
3 |
from spacy import displacy
|
4 |
from pdfminer.high_level import extract_text
|
5 |
|
|
|
6 |
nlp = spacy.load("en_cv_info_extr")
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
colors[label] = "linear-gradient(90deg, #aa9cfc, #fc9ce7)"
|
11 |
-
|
12 |
-
options = {"ents": list(nlp.get_pipe('ner').labels), "colors": colors}
|
13 |
|
14 |
def resume_ner(file):
|
|
|
15 |
resume = extract_text(file.name)
|
|
|
|
|
16 |
doc = nlp(resume)
|
|
|
|
|
17 |
html = displacy.render(doc, style="ent", page=True, options=options)
|
|
|
|
|
18 |
html = (
|
19 |
-
"<div style='max-width:100%; max-height:500px; overflow:auto'>"
|
20 |
+ html
|
21 |
+ "</div>"
|
22 |
)
|
|
|
23 |
return html
|
24 |
|
|
|
25 |
demo = gr.Interface(
|
26 |
resume_ner,
|
27 |
gr.File(file_types=[".pdf"]),
|
28 |
["html"],
|
29 |
)
|
30 |
|
31 |
-
|
|
|
|
1 |
+
# import spacy
|
2 |
+
# import gradio as gr
|
3 |
+
# from spacy import displacy
|
4 |
+
# from pdfminer.high_level import extract_text
|
5 |
+
|
6 |
+
# nlp = spacy.load("en_cv_info_extr")
|
7 |
+
|
8 |
+
# colors = {}
|
9 |
+
# for label in nlp.get_pipe('ner').labels:
|
10 |
+
# colors[label] = "linear-gradient(90deg, #aa9cfc, #fc9ce7)"
|
11 |
+
|
12 |
+
# options = {"ents": list(nlp.get_pipe('ner').labels), "colors": colors}
|
13 |
+
|
14 |
+
# def resume_ner(file):
|
15 |
+
# resume = extract_text(file.name)
|
16 |
+
# doc = nlp(resume)
|
17 |
+
# html = displacy.render(doc, style="ent", page=True, options=options)
|
18 |
+
# html = (
|
19 |
+
# "<div style='max-width:100%; max-height:500px; overflow:auto'>"
|
20 |
+
# + html
|
21 |
+
# + "</div>"
|
22 |
+
# )
|
23 |
+
# return html
|
24 |
+
|
25 |
+
# demo = gr.Interface(
|
26 |
+
# resume_ner,
|
27 |
+
# gr.File(file_types=[".pdf"]),
|
28 |
+
# ["html"],
|
29 |
+
# )
|
30 |
+
|
31 |
+
# demo.launch()
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
import spacy
|
40 |
import gradio as gr
|
41 |
from spacy import displacy
|
42 |
from pdfminer.high_level import extract_text
|
43 |
|
44 |
+
# Load the custom NER model
|
45 |
nlp = spacy.load("en_cv_info_extr")
|
46 |
|
47 |
+
# Define the options for displacy.render with no colors
|
48 |
+
options = {"ents": list(nlp.get_pipe('ner').labels), "colors": {}}
|
|
|
|
|
|
|
49 |
|
50 |
def resume_ner(file):
|
51 |
+
# Extract text from the PDF
|
52 |
resume = extract_text(file.name)
|
53 |
+
|
54 |
+
# Process the text with the NLP model
|
55 |
doc = nlp(resume)
|
56 |
+
|
57 |
+
# Render the entities in plain HTML (no colors)
|
58 |
html = displacy.render(doc, style="ent", page=True, options=options)
|
59 |
+
|
60 |
+
# Wrap the HTML in a div for better display
|
61 |
html = (
|
62 |
+
"<div style='max-width:100%; max-height:500px; overflow:auto; font-family: Arial, sans-serif;'>"
|
63 |
+ html
|
64 |
+ "</div>"
|
65 |
)
|
66 |
+
|
67 |
return html
|
68 |
|
69 |
+
# Create the Gradio interface
|
70 |
demo = gr.Interface(
|
71 |
resume_ner,
|
72 |
gr.File(file_types=[".pdf"]),
|
73 |
["html"],
|
74 |
)
|
75 |
|
76 |
+
# Launch the Gradio app
|
77 |
+
demo.launch()
|