Spaces:

varun500
/

knowledge_graph

Runtime error

File size: 3,687 Bytes

import streamlit as st
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans


def main():
    st.title("Financial Graph App")
    st.write("Enter a financial sentence and see its similarity to predefined keywords.")

    # User input
    financial_sentence = st.text_area("Enter the financial sentence", value="")

    # Check if the user entered a sentence
    if financial_sentence.strip() != "":
        # Predefined keywords
        keywords = [
            "Finance",
            "Fiscal",
            "Quarterly results",
            "Revenue",
            "Profit",
        ]

        # Load the pre-trained Sentence-Transformers model
        model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
        st.set_option('deprecation.showPyplotGlobalUse', False)
        # Generate word embeddings for the financial sentence and keywords
        sentence_embedding = model.encode([financial_sentence])
        keyword_embeddings = model.encode(keywords)

        # Calculate cosine similarity between the sentence embedding and keyword embeddings
        similarity_scores = cosine_similarity(sentence_embedding, keyword_embeddings)[0]

        # Create a graph
        G = nx.Graph()

        # Add the sentence embedding as a node to the graph
        G.add_node(financial_sentence, embedding=sentence_embedding[0])

        # Add the keyword embeddings as nodes to the graph
        for keyword, embedding, similarity in zip(keywords, keyword_embeddings, similarity_scores):
            G.add_node(keyword, embedding=embedding, similarity=similarity)

        # Add edges between the sentence and keywords with their similarity scores as weights
        for keyword, similarity in zip(keywords, similarity_scores):
            G.add_edge(financial_sentence, keyword, weight=similarity)

        # Perform KNN clustering on the keyword embeddings
        kmeans = KMeans(n_clusters=3)
        cluster_labels = kmeans.fit_predict(keyword_embeddings)

        # Add cluster labels as node attributes
        for node, cluster_label in zip(G.nodes, cluster_labels):
            G.nodes[node]["cluster"] = cluster_label

        # Set node positions using spring layout
        pos = nx.spring_layout(G)

        # Get unique cluster labels
        unique_clusters = set(cluster_labels)

        # Assign colors to clusters
        cluster_colors = ["lightblue", "lightgreen", "lightyellow"]

        # Draw nodes with cluster colors
        nx.draw_networkx_nodes(
            G,
            pos,
            node_color=[cluster_colors[G.nodes[node].get("cluster", 0)] for node in G.nodes],
            node_size=800,
        )

        # Draw edges
        nx.draw_networkx_edges(G, pos, edge_color="gray", width=1, alpha=0.7)

        # Draw labels
        nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold")

        # Draw edge labels (cosine similarity scores)
        edge_labels = nx.get_edge_attributes(G, "weight")
        nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)

        # Set plot attributes
        plt.title("Financial Context and Keywords")
        plt.axis("off")

        # Save the graph as an image
        plt.savefig("financial_graph.png")

        # Show the graph
        st.pyplot()

        # Save the similarity scores in a CSV file
        df = pd.DataFrame({"Keyword": keywords, "Cosine Similarity": similarity_scores})
        st.write("Similarity Scores:")
        st.dataframe(df)


if __name__ == "__main__":
    main()