Chris Finlayson
commited on
Commit
·
bef0c15
1
Parent(s):
2f9755f
Update to deps
Browse files- app.py +11 -9
- graph.png +0 -0
- requirements.txt +11 -5
app.py
CHANGED
@@ -92,15 +92,17 @@ def get_relation(sent): # Define a function to get the relation from a sentence
|
|
92 |
def execute_process(file, edge): # Define a function to execute the process
|
93 |
candidate_sentences = read_pdf(file) # Read the PDF file
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
104 |
unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None # Get the unique edges
|
105 |
edge_counts = kg_df['edge'].value_counts() # Get the counts of the edges
|
106 |
unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values}) # Create a DataFrame of the unique edges and their counts
|
|
|
92 |
def execute_process(file, edge): # Define a function to execute the process
|
93 |
candidate_sentences = read_pdf(file) # Read the PDF file
|
94 |
|
95 |
+
if 'kg_df' not in globals() or 'file' not in globals() or file != globals()['file']: # Only execute if kg_df is not defined or if the file is not consistent with the persisted global
|
96 |
+
entity_pairs = [] # Initialize an empty list for the entity pairs
|
97 |
+
for i in tqdm(candidate_sentences["sentence"]): # For each sentence in the DataFrame
|
98 |
+
entity_pairs.append(get_entities(i)) # Append the entities to the list
|
99 |
+
relations = [get_relation(i) for i in tqdm(candidate_sentences['sentence'])] # Get the relations for each sentence
|
100 |
+
|
101 |
+
source = [i[0] for i in entity_pairs] # Extract the subjects
|
102 |
+
target = [i[1] for i in entity_pairs] # Extract the objects
|
103 |
+
globals()['kg_df'] = pd.DataFrame({'source':source, 'target':target, 'edge':relations}) # Create a DataFrame of the sources, targets, and edges
|
104 |
+
globals()['file'] = file # Persist the file into a global variable
|
105 |
+
|
106 |
unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None # Get the unique edges
|
107 |
edge_counts = kg_df['edge'].value_counts() # Get the counts of the edges
|
108 |
unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values}) # Create a DataFrame of the unique edges and their counts
|
graph.png
CHANGED
![]() |
![]() |
requirements.txt
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
-
gradio
|
2 |
-
PyMuPDF
|
3 |
-
transformers
|
4 |
-
plotly
|
5 |
-
spacy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==1.7.7
|
2 |
+
PyMuPDF==1.18.14
|
3 |
+
transformers==4.6.1
|
4 |
+
plotly==4.14.3
|
5 |
+
spacy==3.0.6
|
6 |
+
beautifulsoup4==4.9.3
|
7 |
+
pandas==1.2.4
|
8 |
+
requests==2.25.1
|
9 |
+
networkx==2.5.1
|
10 |
+
matplotlib==3.4.2
|
11 |
+
tqdm==4.61.1
|