Spaces:

varun500
/

knowledge_graph

Runtime error

App Files Files Community

varun500 commited on Jun 26, 2023

Commit

fb19adb

1 Parent(s): cb0541c

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+import networkx as nx
+import matplotlib.pyplot as plt
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.cluster import KMeans
+def main():
+    st.title("Financial Graph App")
+    st.write("Enter a financial sentence and see its similarity to predefined keywords.")
+    # User input
+    financial_sentence = st.text_area("Enter the financial sentence", value="")
+    # Check if the user entered a sentence
+    if financial_sentence.strip() != "":
+        # Predefined keywords
+        keywords = [
+            "Finance",
+            "Fiscal",
+            "Quarterly results",
+            "Revenue",
+            "Profit",
+        ]
+        # Load the pre-trained Sentence-Transformers model
+        model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+        # Generate word embeddings for the financial sentence and keywords
+        sentence_embedding = model.encode([financial_sentence])
+        keyword_embeddings = model.encode(keywords)
+        # Calculate cosine similarity between the sentence embedding and keyword embeddings
+        similarity_scores = cosine_similarity(sentence_embedding, keyword_embeddings)[0]
+        # Create a graph
+        G = nx.Graph()
+        # Add the sentence embedding as a node to the graph
+        G.add_node(financial_sentence, embedding=sentence_embedding[0])
+        # Add the keyword embeddings as nodes to the graph
+        for keyword, embedding, similarity in zip(keywords, keyword_embeddings, similarity_scores):
+            G.add_node(keyword, embedding=embedding, similarity=similarity)
+        # Add edges between the sentence and keywords with their similarity scores as weights
+        for keyword, similarity in zip(keywords, similarity_scores):
+            G.add_edge(financial_sentence, keyword, weight=similarity)
+        # Perform KNN clustering on the keyword embeddings
+        kmeans = KMeans(n_clusters=3)
+        cluster_labels = kmeans.fit_predict(keyword_embeddings)
+        # Add cluster labels as node attributes
+        for node, cluster_label in zip(G.nodes, cluster_labels):
+            G.nodes[node]["cluster"] = cluster_label
+        # Set node positions using spring layout
+        pos = nx.spring_layout(G)
+        # Get unique cluster labels
+        unique_clusters = set(cluster_labels)
+        # Assign colors to clusters
+        cluster_colors = ["lightblue", "lightgreen", "lightyellow"]
+        # Draw nodes with cluster colors
+        nx.draw_networkx_nodes(
+            G,
+            pos,
+            node_color=[cluster_colors[G.nodes[node].get("cluster", 0)] for node in G.nodes],
+            node_size=800,
+        )
+        # Draw edges
+        nx.draw_networkx_edges(G, pos, edge_color="gray", width=1, alpha=0.7)
+        # Draw labels
+        nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold")
+        # Draw edge labels (cosine similarity scores)
+        edge_labels = nx.get_edge_attributes(G, "weight")
+        nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
+        # Set plot attributes
+        plt.title("Financial Context and Keywords")
+        plt.axis("off")
+        # Save the graph as an image
+        plt.savefig("financial_graph.png")
+        # Show the graph
+        st.pyplot()
+        # Save the similarity scores in a CSV file
+        df = pd.DataFrame({"Keyword": keywords, "Cosine Similarity": similarity_scores})
+        st.write("Similarity Scores:")
+        st.dataframe(df)
+if __name__ == "__main__":
+    main()