johaunh commited on
Commit
bf917cb
·
2 Parent(s): c72ce99 90c8aa5

Merge branch 'main' of https://huggingface.co/spaces/jhatchett/Text2KG

Browse files
Files changed (2) hide show
  1. main.py +16 -0
  2. requirements.txt +1 -1
main.py CHANGED
@@ -3,6 +3,7 @@ import re
3
  import secrets
4
  import string
5
  import yaml
 
6
  from datetime import datetime
7
  from zipfile import ZipFile
8
 
@@ -10,6 +11,7 @@ import gradio as gr
10
  import nltk
11
  import pandas as pd
12
  from langchain.embeddings import OpenAIEmbeddings
 
13
  from langchain.chains import SimpleSequentialChain
14
  from langchain.chat_models import ChatOpenAI
15
  from nltk.tokenize import sent_tokenize
@@ -139,6 +141,14 @@ def extract_knowledge_graph(api_key: str, batch_size: int, modules: list[str], t
139
 
140
  steps = []
141
 
 
 
 
 
 
 
 
 
142
  for module in modules:
143
  m = module.lower().replace(' ', '_')
144
  steps.append(m)
@@ -148,10 +158,13 @@ def extract_knowledge_graph(api_key: str, batch_size: int, modules: list[str], t
148
 
149
  pipeline.init(steps)
150
 
 
151
  # split text into batches
152
  sentences = sent_tokenize(text)
153
  batches = [" ".join(sentences[i:i+batch_size])
154
  for i in range(0, len(sentences), batch_size)]
 
 
155
 
156
  # create KG
157
  knowledge_graph = []
@@ -179,6 +192,9 @@ def extract_knowledge_graph(api_key: str, batch_size: int, modules: list[str], t
179
  "_timestamp": now,
180
  "batch_size": batch_size,
181
  "modules": steps
 
 
 
182
  }
183
 
184
  # unique identifier for local saving
 
3
  import secrets
4
  import string
5
  import yaml
6
+ import yaml
7
  from datetime import datetime
8
  from zipfile import ZipFile
9
 
 
11
  import nltk
12
  import pandas as pd
13
  from langchain.embeddings import OpenAIEmbeddings
14
+ from langchain.embeddings import OpenAIEmbeddings
15
  from langchain.chains import SimpleSequentialChain
16
  from langchain.chat_models import ChatOpenAI
17
  from nltk.tokenize import sent_tokenize
 
141
 
142
  steps = []
143
 
144
+ for module in modules:
145
+ m = module.lower().replace(' ', '_')
146
+ steps.append(m)
147
+
148
+ if (len(steps) == 0) or (steps[-1] != "triplet_extraction"):
149
+ steps.append("triplet_extraction")
150
+ steps = []
151
+
152
  for module in modules:
153
  m = module.lower().replace(' ', '_')
154
  steps.append(m)
 
158
 
159
  pipeline.init(steps)
160
 
161
+ # split text into batches
162
  # split text into batches
163
  sentences = sent_tokenize(text)
164
  batches = [" ".join(sentences[i:i+batch_size])
165
  for i in range(0, len(sentences), batch_size)]
166
+ batches = [" ".join(sentences[i:i+batch_size])
167
+ for i in range(0, len(sentences), batch_size)]
168
 
169
  # create KG
170
  knowledge_graph = []
 
192
  "_timestamp": now,
193
  "batch_size": batch_size,
194
  "modules": steps
195
+ "_timestamp": now,
196
+ "batch_size": batch_size,
197
+ "modules": steps
198
  }
199
 
200
  # unique identifier for local saving
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
  gradio==4.2.0
2
  langchain==0.0.335
3
- matplotlib==3.7.1
4
  nltk==3.7
5
  openai==0.27.4
6
  pandas==2.0.3
7
  PyYAML==6.0
 
8
  tqdm==4.65.0
 
1
  gradio==4.2.0
2
  langchain==0.0.335
 
3
  nltk==3.7
4
  openai==0.27.4
5
  pandas==2.0.3
6
  PyYAML==6.0
7
+ scikit-learn==1.2.2
8
  tqdm==4.65.0