Spaces:
Sleeping
Sleeping
Update pages/21_GraphRag.py
Browse files- pages/21_GraphRag.py +32 -35
pages/21_GraphRag.py
CHANGED
@@ -3,19 +3,34 @@ import pandas as pd
|
|
3 |
from transformers import AutoTokenizer, AutoModel
|
4 |
import torch
|
5 |
import graphrag
|
|
|
6 |
|
7 |
# Diagnostic Section
|
8 |
st.title("Graphrag Module Investigation")
|
9 |
|
10 |
-
st.write("Graphrag version:", graphrag.__version__)
|
11 |
st.write("Contents of graphrag module:")
|
12 |
-
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Main Application Section
|
21 |
st.title("Graphrag Text Analysis")
|
@@ -29,18 +44,15 @@ def load_model():
|
|
29 |
# Initialize Graphrag model
|
30 |
# Note: This part may need to be adjusted based on the actual structure of graphrag
|
31 |
model = None
|
32 |
-
for item in
|
33 |
if 'model' in item.lower() or 'rag' in item.lower():
|
34 |
model_class = getattr(graphrag, item)
|
35 |
-
if
|
36 |
try:
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
hidden_size=768,
|
42 |
-
intermediate_size=3072,
|
43 |
-
)
|
44 |
break
|
45 |
except Exception as e:
|
46 |
st.write(f"Tried initializing {item}, but got error: {str(e)}")
|
@@ -57,6 +69,7 @@ def process_text(text, tokenizer, model):
|
|
57 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
58 |
with torch.no_grad():
|
59 |
outputs = model(**inputs)
|
|
|
60 |
# Process outputs based on your specific task
|
61 |
# This is a placeholder; adjust according to your model's output
|
62 |
if hasattr(outputs, 'logits'):
|
@@ -64,29 +77,13 @@ def process_text(text, tokenizer, model):
|
|
64 |
elif isinstance(outputs, torch.Tensor):
|
65 |
logits = outputs
|
66 |
else:
|
67 |
-
return "Unexpected output format"
|
68 |
|
69 |
probabilities = torch.softmax(logits, dim=1)
|
70 |
return probabilities.tolist()[0]
|
71 |
|
72 |
tokenizer, model = load_model()
|
73 |
|
74 |
-
# File uploader
|
75 |
-
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
76 |
-
|
77 |
-
if uploaded_file is not None:
|
78 |
-
data = pd.read_csv(uploaded_file)
|
79 |
-
st.write(data.head())
|
80 |
-
|
81 |
-
if st.button("Process Data"):
|
82 |
-
results = []
|
83 |
-
for text in data['text']: # Assuming your CSV has a 'text' column
|
84 |
-
result = process_text(text, tokenizer, model)
|
85 |
-
results.append(result)
|
86 |
-
|
87 |
-
data['results'] = results
|
88 |
-
st.write(data)
|
89 |
-
|
90 |
# Text input for single prediction
|
91 |
text_input = st.text_area("Enter text for analysis:")
|
92 |
if st.button("Analyze Text"):
|
@@ -96,5 +93,5 @@ if st.button("Analyze Text"):
|
|
96 |
else:
|
97 |
st.write("Please enter some text to analyze.")
|
98 |
|
99 |
-
#
|
100 |
-
st.markdown("
|
|
|
3 |
from transformers import AutoTokenizer, AutoModel
|
4 |
import torch
|
5 |
import graphrag
|
6 |
+
import inspect
|
7 |
|
8 |
# Diagnostic Section
|
9 |
st.title("Graphrag Module Investigation")
|
10 |
|
|
|
11 |
st.write("Contents of graphrag module:")
|
12 |
+
module_contents = dir(graphrag)
|
13 |
+
st.write(module_contents)
|
14 |
|
15 |
+
st.write("Detailed information about graphrag module contents:")
|
16 |
+
for item in module_contents:
|
17 |
+
attr = getattr(graphrag, item)
|
18 |
+
st.write(f"Name: {item}")
|
19 |
+
st.write(f"Type: {type(attr)}")
|
20 |
+
|
21 |
+
if inspect.isclass(attr):
|
22 |
+
st.write("Class Methods and Attributes:")
|
23 |
+
for name, value in inspect.getmembers(attr):
|
24 |
+
if not name.startswith('_'): # Exclude private methods/attributes
|
25 |
+
st.write(f" - {name}: {type(value)}")
|
26 |
+
|
27 |
+
if callable(attr):
|
28 |
+
st.write("Signature:")
|
29 |
+
st.write(inspect.signature(attr))
|
30 |
+
st.write("Docstring:")
|
31 |
+
st.write(inspect.getdoc(attr))
|
32 |
+
|
33 |
+
st.write("---")
|
34 |
|
35 |
# Main Application Section
|
36 |
st.title("Graphrag Text Analysis")
|
|
|
44 |
# Initialize Graphrag model
|
45 |
# Note: This part may need to be adjusted based on the actual structure of graphrag
|
46 |
model = None
|
47 |
+
for item in module_contents:
|
48 |
if 'model' in item.lower() or 'rag' in item.lower():
|
49 |
model_class = getattr(graphrag, item)
|
50 |
+
if inspect.isclass(model_class):
|
51 |
try:
|
52 |
+
# Attempt to initialize the model
|
53 |
+
# You may need to adjust the parameters based on the actual class signature
|
54 |
+
model = model_class(bert_model)
|
55 |
+
st.success(f"Successfully initialized {item}")
|
|
|
|
|
|
|
56 |
break
|
57 |
except Exception as e:
|
58 |
st.write(f"Tried initializing {item}, but got error: {str(e)}")
|
|
|
69 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
70 |
with torch.no_grad():
|
71 |
outputs = model(**inputs)
|
72 |
+
|
73 |
# Process outputs based on your specific task
|
74 |
# This is a placeholder; adjust according to your model's output
|
75 |
if hasattr(outputs, 'logits'):
|
|
|
77 |
elif isinstance(outputs, torch.Tensor):
|
78 |
logits = outputs
|
79 |
else:
|
80 |
+
return f"Unexpected output format: {type(outputs)}"
|
81 |
|
82 |
probabilities = torch.softmax(logits, dim=1)
|
83 |
return probabilities.tolist()[0]
|
84 |
|
85 |
tokenizer, model = load_model()
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
# Text input for single prediction
|
88 |
text_input = st.text_area("Enter text for analysis:")
|
89 |
if st.button("Analyze Text"):
|
|
|
93 |
else:
|
94 |
st.write("Please enter some text to analyze.")
|
95 |
|
96 |
+
# Note about sample data
|
97 |
+
st.markdown("Note: To use a CSV file, you would typically upload it and process each row. For simplicity, we're using direct text input in this example.")
|