johaunh commited on
Commit
6e88b42
·
1 Parent(s): 1644984
Files changed (2) hide show
  1. README.md +6 -3
  2. text2kg.py +52 -33
README.md CHANGED
@@ -23,6 +23,8 @@ infile
23
  optional flag to save intermediary GPT prompts/replies
24
  ```
25
 
 
 
26
  ## File structure
27
 
28
  ### [`data`](./data/)
@@ -34,6 +36,7 @@ Name | Description | Source
34
 
35
  ## References
36
 
37
- 1. A case study in bootstrapping ontology graphs from textbooks (V. K. Chaudhri et al., 2021)
38
- 2. Seq2KG: an end-to-end neural model for domain agnostic knowledge graph (not text graph) construction from text (M. Stewart & W. Liu, 2020)
39
- 3. Language models are open knowledge graphs (C. Wang et al., 2020)
 
 
23
  optional flag to save intermediary GPT prompts/replies
24
  ```
25
 
26
+ In my experiments, the rate of the processing is about 2 seconds per sentence per pipeline step.
27
+
28
  ## File structure
29
 
30
  ### [`data`](./data/)
 
36
 
37
  ## References
38
 
39
+ 1. A case study in bootstrapping ontology graphs from textbooks. (V. K. Chaudhri et al., 2021)
40
+ 2. Seq2KG: an end-to-end neural model for domain agnostic knowledge graph (not text graph) construction from text. (M. Stewart & W. Liu, 2020)
41
+ 3. Language models are open knowledge graphs. (C. Wang et al., 2020)
42
+ 4. ProofWriter: generating implications, proofs, and abductive statements over natural language. (O. Tafjord et al., 2020)
text2kg.py CHANGED
@@ -3,25 +3,63 @@ import os
3
  from argparse import ArgumentParser
4
  from datetime import date
5
 
 
 
6
  from nltk.tokenize import sent_tokenize
7
- from tqdm import tqdm
8
 
9
  from pipeline import Text2KG
10
 
11
 
 
 
 
12
  def parse_args():
13
  parser = ArgumentParser()
14
- parser.add_argument("infile", type=str)
15
  parser.add_argument("--output", type=str, default="./output")
16
- parser.add_argument("--cookbook", type=str, default="./recipes.json",
17
- help="path to cookbook")
18
- parser.add_argument("--recipe", type=str, default=None,
19
  help="name of recipe to use"),
20
- parser.add_argument("--thoughts", action="store_true")
 
 
 
21
 
22
  return parser.parse_args()
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def save(name, item, args):
26
 
27
  os.makedirs(args.output, exist_ok=True)
@@ -35,33 +73,14 @@ def save(name, item, args):
35
 
36
 
37
  def main(args):
38
- with open(args.cookbook) as f:
39
- cookbook = json.load(f)
40
-
41
- recipe = None
42
- for item in cookbook:
43
- if item["name"] == args.recipe:
44
- recipe = item
45
- if recipe is None:
46
- raise ValueError(f"Recipe '{args.recipe}' does not exist in cookbook.")
47
-
48
- pipe = Text2KG(recipe)
49
-
50
- with open(args.infile) as f:
51
- text = f.read()
52
-
53
- sentences = sent_tokenize(text.replace('\n', ' '))
54
-
55
- triplets = [pipe(s) for s in tqdm(sentences)]
56
-
57
- output = [{"sentence": s, "triplets": t} for s, t in zip(sentences, triplets)]
58
-
59
- save("triplets", output, args)
60
-
61
- if args.thoughts:
62
- save("thoughts", pipe.history, args)
63
-
64
- return output
65
 
66
 
67
  if __name__ == "__main__":
 
3
  from argparse import ArgumentParser
4
  from datetime import date
5
 
6
+ import gradio as gr
7
+ import tqdm
8
  from nltk.tokenize import sent_tokenize
 
9
 
10
  from pipeline import Text2KG
11
 
12
 
13
+ COOKBOOK = "./recipes.json"
14
+
15
+
16
  def parse_args():
17
  parser = ArgumentParser()
18
+ parser.add_argument("--infile", type=str)
19
  parser.add_argument("--output", type=str, default="./output")
20
+ # parser.add_argument("--cookbook", type=str, default=COOKBOOK,
21
+ # help="path to prompt recipes")
22
+ parser.add_argument("--recipe", type=str, choices=["Direct", "Traditional", "LogicBased"],
23
  help="name of recipe to use"),
24
+ # parser.add_argument("--thoughts", action="store_true",
25
+ # help="whether to save GPT prompt/response chain")
26
+ parser.add_argument("--demo", action="store_true",
27
+ help="execute Gradio app; overrides other arguments")
28
 
29
  return parser.parse_args()
30
 
31
 
32
+ def text2kg(recipe: str, text: str, progress=gr.Progress()):
33
+ with open(COOKBOOK) as f:
34
+ cookbook = json.load(f)
35
+
36
+ for item in cookbook:
37
+ if item["name"] == recipe:
38
+ prompts = item
39
+
40
+ pipe = Text2KG(prompts)
41
+ sentences = sent_tokenize(text.replace("\n", " "))
42
+
43
+ triplets = [pipe(s) for s in progress.tqdm(sentences, desc="Processing")]
44
+ output = [{"sentence": s, "triplets": t} for s, t in zip(sentences, triplets)]
45
+
46
+ return output
47
+
48
+
49
+ class App:
50
+ def __init__(self):
51
+
52
+ demo = gr.Interface(
53
+ fn=text2kg,
54
+ inputs=[
55
+ gr.Radio(["Direct", "Traditional", "LogicBased"], label="Recipe"),
56
+ gr.Textbox(lines=2, placeholder="Text Here...", label="Input Text")
57
+ ],
58
+ outputs=gr.JSON(label="KG Triplets"),
59
+ )
60
+ demo.queue(concurrency_count=10).launch()
61
+
62
+
63
  def save(name, item, args):
64
 
65
  os.makedirs(args.output, exist_ok=True)
 
73
 
74
 
75
  def main(args):
76
+ if args.demo:
77
+ App()
78
+ else:
79
+ with open(args.infile) as f:
80
+ text = f.read()
81
+
82
+ output = text2kg(recipe=args.recipe, text=text, progress=tqdm)
83
+ save("triplets", output, args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  if __name__ == "__main__":