Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 9, 2024

Commit

97aae78

verified ·

1 Parent(s): c672b82

Update pages/21_GraphRag.py

Browse files

Files changed (1) hide show

pages/21_GraphRag.py +60 -46

pages/21_GraphRag.py CHANGED Viewed

@@ -1,46 +1,60 @@
-import streamlit as st
-import graphrag
-import inspect
-st.title("GraphRAG Module Explorer")
-# Display all attributes and functions in the graphrag module
-st.header("GraphRAG Module Contents")
-graphrag_contents = dir(graphrag)
-for item in graphrag_contents:
-    attr = getattr(graphrag, item)
-    st.subheader(f"{item}")
-    st.write(f"Type: {type(attr)}")
-    if inspect.isclass(attr):
-        st.write("Class Methods:")
-        for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
-            st.write(f"- {name}")
-            st.write(f"  Signature: {inspect.signature(method)}")
-            st.write(f"  Docstring: {method.__doc__}")
-    elif inspect.isfunction(attr):
-        st.write("Function:")
-        st.write(f"Signature: {inspect.signature(attr)}")
-        st.write(f"Docstring: {attr.__doc__}")
-    elif isinstance(attr, (int, float, str, bool)):
-        st.write(f"Value: {attr}")
-    st.write("---")
-# Display the module's docstring if available
-if graphrag.__doc__:
-    st.header("GraphRAG Module Documentation")
-    st.write(graphrag.__doc__)
-st.header("Next Steps")
-st.write("""
-Based on the information above, we need to determine:
-1. How to create a graph representation of text using graphrag.
-2. How to process this graph representation for analysis.
-3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
-Please review the module contents and let me know which components seem most relevant for our text analysis task.
-""")

+import torch
+from transformers import AutoTokenizer, AutoModel
+from sentence_transformers import SentenceTransformer
+import networkx as nx
+import matplotlib.pyplot as plt
+# Load pre-trained model and tokenizer
+model_name = "bert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+# Function to get embeddings
+def get_embeddings(texts):
+    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    return outputs.last_hidden_state[:, 0, :].numpy()
+# Sample data (replace with your own data import)
+documents = [
+    "The quick brown fox jumps over the lazy dog.",
+    "A journey of a thousand miles begins with a single step.",
+    "To be or not to be, that is the question.",
+    "All that glitters is not gold.",
+]
+# Get embeddings for documents
+embeddings = get_embeddings(documents)
+# Create graph
+G = nx.Graph()
+# Add nodes and edges based on cosine similarity
+threshold = 0.5  # Adjust this threshold as needed
+for i in range(len(documents)):
+    G.add_node(i, text=documents[i])
+    for j in range(i+1, len(documents)):
+        similarity = torch.cosine_similarity(torch.tensor(embeddings[i]), torch.tensor(embeddings[j]), dim=0)
+        if similarity > threshold:
+            G.add_edge(i, j, weight=similarity.item())
+# Visualize the graph
+pos = nx.spring_layout(G)
+nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, font_size=8, font_weight='bold')
+edge_labels = nx.get_edge_attributes(G, 'weight')
+nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
+plt.title("Document Similarity Graph")
+plt.show()
+# Example of querying the graph
+query = "What is the meaning of life?"
+query_embedding = get_embeddings([query])[0]
+# Find most similar document
+similarities = [torch.cosine_similarity(torch.tensor(query_embedding), torch.tensor(emb), dim=0) for emb in embeddings]
+most_similar_idx = max(range(len(similarities)), key=similarities.__getitem__)
+print(f"Most similar document to the query: {documents[most_similar_idx]}")
+# You can extend this to implement more complex graph-based retrieval algorithms