Spaces:

jhatchett
/

Words2Wisdom

Sleeping

App Files Files Community

johaunh commited on Aug 31, 2023

Commit

6e88b42

1 Parent(s): 1644984

Add demo

Browse files

Files changed (2) hide show

README.md +6 -3
text2kg.py +52 -33

README.md CHANGED Viewed

@@ -23,6 +23,8 @@ infile
     optional flag to save intermediary GPT prompts/replies
 ```
 ## File structure
 ### [`data`](./data/)
@@ -34,6 +36,7 @@ Name | Description | Source
 ## References
-1. A case study in bootstrapping ontology graphs from textbooks (V. K. Chaudhri et al., 2021)
-2. Seq2KG: an end-to-end neural model for domain agnostic knowledge graph (not text graph) construction from text (M. Stewart & W. Liu, 2020)
-3. Language models are open knowledge graphs (C. Wang et al., 2020)

     optional flag to save intermediary GPT prompts/replies
 ```
+In my experiments, the rate of the processing is about 2 seconds per sentence per pipeline step.
 ## File structure
 ### [`data`](./data/)
 ## References
+1. A case study in bootstrapping ontology graphs from textbooks. (V. K. Chaudhri et al., 2021)
+2. Seq2KG: an end-to-end neural model for domain agnostic knowledge graph (not text graph) construction from text. (M. Stewart & W. Liu, 2020)
+3. Language models are open knowledge graphs. (C. Wang et al., 2020)
+4. ProofWriter: generating implications, proofs, and abductive statements over natural language. (O. Tafjord et al., 2020)

text2kg.py CHANGED Viewed

@@ -3,25 +3,63 @@ import os
 from argparse import ArgumentParser
 from datetime import date
 from nltk.tokenize import sent_tokenize
-from tqdm import tqdm
 from pipeline import Text2KG
 def parse_args():
     parser = ArgumentParser()
-    parser.add_argument("infile", type=str)
     parser.add_argument("--output", type=str, default="./output")
-    parser.add_argument("--cookbook", type=str, default="./recipes.json",
-                        help="path to cookbook")
-    parser.add_argument("--recipe", type=str, default=None,
                         help="name of recipe to use"),
-    parser.add_argument("--thoughts", action="store_true")
     return parser.parse_args()
 def save(name, item, args):
     os.makedirs(args.output, exist_ok=True)
@@ -35,33 +73,14 @@ def save(name, item, args):
 def main(args):
-    with open(args.cookbook) as f:
-        cookbook = json.load(f)
-    recipe = None
-    for item in cookbook:
-        if item["name"] == args.recipe:
-            recipe = item
-    if recipe is None:
-        raise ValueError(f"Recipe '{args.recipe}' does not exist in cookbook.")
-    pipe = Text2KG(recipe)
-    with open(args.infile) as f:
-        text = f.read()
-    sentences = sent_tokenize(text.replace('\n', ' '))
-    triplets = [pipe(s) for s in tqdm(sentences)]
-    output = [{"sentence": s, "triplets": t} for s, t in zip(sentences, triplets)]
-    save("triplets", output, args)
-    if args.thoughts:
-        save("thoughts", pipe.history, args)
-    return output
 if __name__ == "__main__":

 from argparse import ArgumentParser
 from datetime import date
+import gradio as gr
+import tqdm
 from nltk.tokenize import sent_tokenize
 from pipeline import Text2KG
+COOKBOOK = "./recipes.json"
 def parse_args():
     parser = ArgumentParser()
+    parser.add_argument("--infile", type=str)
     parser.add_argument("--output", type=str, default="./output")
+    # parser.add_argument("--cookbook", type=str, default=COOKBOOK,
+    #                     help="path to prompt recipes")
+    parser.add_argument("--recipe", type=str, choices=["Direct", "Traditional", "LogicBased"],
                         help="name of recipe to use"),
+    # parser.add_argument("--thoughts", action="store_true",
+    #                     help="whether to save GPT prompt/response chain")
+    parser.add_argument("--demo", action="store_true",
+                        help="execute Gradio app; overrides other arguments")
     return parser.parse_args()
+def text2kg(recipe: str, text: str, progress=gr.Progress()):
+    with open(COOKBOOK) as f:
+        cookbook = json.load(f)
+    for item in cookbook:
+        if item["name"] == recipe:
+            prompts = item
+    pipe = Text2KG(prompts)
+    sentences = sent_tokenize(text.replace("\n", " "))
+    triplets = [pipe(s) for s in progress.tqdm(sentences, desc="Processing")]
+    output = [{"sentence": s, "triplets": t} for s, t in zip(sentences, triplets)]
+    return output
+class App:
+    def __init__(self):
+        demo = gr.Interface(
+            fn=text2kg,
+            inputs=[
+                gr.Radio(["Direct", "Traditional", "LogicBased"], label="Recipe"),
+                gr.Textbox(lines=2, placeholder="Text Here...", label="Input Text")
+            ],
+            outputs=gr.JSON(label="KG Triplets"),
+        )
+        demo.queue(concurrency_count=10).launch()
 def save(name, item, args):
     os.makedirs(args.output, exist_ok=True)
 def main(args):
+    if args.demo:
+        App()
+    else:
+        with open(args.infile) as f:
+            text = f.read()
+        output = text2kg(recipe=args.recipe, text=text, progress=tqdm)
+        save("triplets", output, args)
 if __name__ == "__main__":