Spaces:

hacpdsae2023
/

shortstorynetwork

Runtime error

File size: 3,423 Bytes

3503538
 
101b319
1154940
3503538
 
b4b1fd9
2df8c8c
9884e92
1bce5cd
090671b
 
 
 
 
 
6517c43
da97d65
1fdb11f
b6c7b40
 
090671b
6517c43
b4b1fd9
090671b
 
6517c43
090671b
f0ca479
755eb55
f0ca479
090671b
6517c43
 
e0b660a
665a998
 
090671b
f0ca479
1bce5cd
d1cba40
090671b
 
 
2df8c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0ca479
2df8c8c
 
 
 
 
 
 
 
 
 
 
02a8d5f
2df8c8c

import streamlit as st
from datasets import load_dataset
import networkx as nx
import numpy as np
dataset = load_dataset("roneneldan/TinyStories")

st.write(dataset['train'][10]['text'])

threshhold = st.slider('Threshhold',0.0,1.0,step=0.1)

#-------------------------------------------------------------
#-------------------------------------------------------------

from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sentences from the data set
#sentences = [item['text'] for item in dataset['train'][:10]]

#sentences = [dataset['train'][0],dataset['train'][1],dataset['train'][2]]
sentences = [dataset['train'][ii] for ii in range(10)]

#Compute embedding 
embeddings = model.encode(sentences, convert_to_tensor=True)

#Compute cosine-similarities
cosine_scores = util.cos_sim(embeddings, embeddings)

# creating adjacency matrix
A = np.zeros((len(sentences),len(sentences)))

#Output the pairs with their score
for i in range(len(sentences)):
    for j in range(i):
        #st.write("{} \t\t {} \t\t Score: {:.4f}".format(sentences[i], sentences[j], cosine_scores[i][j]))
        A[i][j] = cosine_scores[i][j]
        A[j][i] = cosine_scores[i][j]

#G = nx.from_numpy_array(A)
G = nx.from_numpy_array(cosine_scores.numpy()>threshhold)


#-------------------------------------------------------------
#-------------------------------------------------------------
# ego_graph.py
# An example of how to plot a node's ego network 
# (egonet). This indirectly showcases slightly more involved 
# interoperability between streamlit-agraph and networkx.

# An egonet can be # created from (almost) any network (graph),
# and exemplifies the # concept of a subnetwork (subgraph):
# A node's egonet is the (sub)network comprised of the focal node 
# and all the nodes to whom it is adjacent. The edges included
# in the egonet are those nodes are both included in the aforementioned
# nodes. 

# Use the following command to launch the app
# streamlit run <path-to-script>.py

# standard library dependencies
from operator import itemgetter

# external dependencies
import networkx as nx
from streamlit_agraph import agraph, Node, Edge, Config

# First create a graph using the Barabasi-Albert model
n = 2000
m = 2
#G = nx.generators.barabasi_albert_graph(n, m, seed=2023)

# Then find the node with the largest degree; 
# This node's egonet will be the focus of this example.
node_and_degree = G.degree()
most_connected_node = sorted(G.degree, key=lambda x: x[1], reverse=True)[0]
degree = G.degree(most_connected_node)

# Create egonet for the focal node
hub_ego = nx.ego_graph(G, most_connected_node[0])

# Now create the equivalent Node and Edge lists
nodes = [Node(title=str(sentences[i]['text']), id=i, label='node_'+str(i), size=20) for i in hub_ego.nodes]
edges = [Edge(source=i, target=j, type="CURVE_SMOOTH") for (i,j) in G.edges
        if i in hub_ego.nodes and j in hub_ego.nodes]


config = Config(width=500, 
                height=500, 
                directed=True,
                nodeHighlightBehavior=False, 
                highlightColor="#F7A7A6", # or "blue"
                collapsible=False,
                node={'labelProperty':'label'},
                # **kwargs e.g. node_size=1000 or node_color="blue"
                ) 

return_value = agraph(nodes=nodes, 
                      edges=edges, 
                      config=config)