Spaces:
Running
Running
File size: 5,238 Bytes
3273f67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import io
import os
import sys
from zipfile import ZipFile
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
import pandas as pd
import streamlit as st
import streamlit.components.v1 as st_components
from pandas import DataFrame
from pyvis.network import Network
from words2wisdom import CONFIG_DIR
from words2wisdom.config import Config as W2WConfig
from words2wisdom.pipeline import Pipeline
def create_graph(df: DataFrame):
graph = Network(directed=True)
entities = pd.concat([df.subject, df.object]).unique()
graph.add_nodes(entities, label=entities, title=entities)
df_iterable = (
df.drop_duplicates(
subset=["subject", "relation", "object"]
)
.iterrows()
)
for _, row in df_iterable:
graph.add_edge(row.subject, row.object, label=row.relation)
graph.save_graph("/tmp/graph.html")
HtmlFile = open("/tmp/graph.html")
return st_components.html(HtmlFile.read(), height=625)
@st.cache_data
def create_zip_bytes(file_contents):
buffer = io.BytesIO()
with ZipFile(buffer, 'w') as zip_file:
for filename, content in file_contents.items():
zip_file.writestr(filename, content)
return buffer.getvalue()
st.set_page_config(page_title="Words2Wisdom",
page_icon="π")
st.title("π Words2Wisdom")
st.write("Generate knowledge graphs from unstructured text using GPT.")
# parameters
with st.sidebar:
st.title("Parameters")
st.write("The API Key is required. Feel free to customize the other parameters, if you'd like!")
openai_api_key = st.text_input(
label="π **OpenAI API Key**",
type="password",
help="Learn how to get your own [here](https://platform.openai.com/docs/api-reference/authentication)."
)
st.divider()
with st.expander("π° **Pipeline parameters**"):
formatter = lambda x: x.replace("_", " ").title()
words_per_batch = st.number_input(
label="Words per Batch",
min_value=0,
max_value=200,
value=150,
help="Batch text into paragraphs containing at least N words, if possible."
)
preprocess = st.selectbox(
label="Preprocess",
options=("None", "clause_deconstruction"),
index=1,
format_func=formatter,
help="Method for text simplification."
)
extraction = st.selectbox(
label="Generation",
options=("triplet_extraction",),
index=0,
format_func=formatter,
help="Method for KG generation."
)
with st.expander("π€ **LLM parameters**"):
model = st.selectbox(
label="Model",
options=("gpt-3.5-turbo",),
index=0,
help="ID of the model to use."
)
temperature = st.slider(
label="Temperature",
min_value=0.0,
max_value=2.0,
value=1.0,
step=0.1,
format="%.1f",
help=(
"What sampling temperature to use."
" Higher values will make the output more random;"
" lower values will make it more focused/deterministic."
)
)
# API Key warning
if not openai_api_key.startswith('sk-'):
st.warning('Please enter your OpenAI API key.', icon='β οΈ')
# tabs
tab1, tab2 = st.tabs(["Input Text", "File Upload"])
# text input tab
with tab1:
text1 = st.text_area(label="Enter text:")
submitted1 = tab1.button(label="Generate!", use_container_width=True)
# file upload tab
with tab2:
file2 = tab2.file_uploader(label="Upload text file:", type="txt")
submitted2 = tab2.button(key="filebtn", label="Generate!", use_container_width=True)
# w2w config
w2w_config = W2WConfig.read_ini(os.path.join(CONFIG_DIR, "default_config.ini"))
w2w_config.pipeline = {
"words_per_batch": words_per_batch,
"preprocess": [] if preprocess == "None" else [preprocess],
"extraction": extraction
}
w2w_config.llm["openai_api_key"] = openai_api_key
# main logic
if (submitted1 or submitted2) and openai_api_key.startswith("sk-"):
with st.status("Generating knowledge graph..."):
st.write("Initializing pipeline...")
pipe = Pipeline(w2w_config)
st.write("Executing pipeline...")
if submitted1:
text = text1
elif submitted2:
text = file2.read().decode()
text_batches, knowledge_graph = pipe.run(text)
st.write("Complete.")
st.divider()
kg_viz = create_graph(knowledge_graph)
st.error("**Warning:** The page will refresh when you download the data!", icon="π¨")
download = st.download_button(
label="Download data",
data=create_zip_bytes({
"text_batches.csv": (
DataFrame(text_batches, columns=["text"])
.to_csv(index_label="batch_id")
),
"kg.csv": knowledge_graph.to_csv(index=False),
"config.ini": pipe.serialize()
}),
file_name="output.zip",
use_container_width=True,
type="primary"
)
|