Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 9, 2024

Commit

c178699

verified ·

1 Parent(s): 8436e7c

Update pages/21_GraphRag.py

Browse files

Files changed (1) hide show

pages/21_GraphRag.py +32 -35

pages/21_GraphRag.py CHANGED Viewed

@@ -3,19 +3,34 @@ import pandas as pd
 from transformers import AutoTokenizer, AutoModel
 import torch
 import graphrag
 # Diagnostic Section
 st.title("Graphrag Module Investigation")
-st.write("Graphrag version:", graphrag.__version__)
 st.write("Contents of graphrag module:")
-st.write(dir(graphrag))
-for item in dir(graphrag):
-    st.write(f"Type of {item}: {type(getattr(graphrag, item))}")
-    if callable(getattr(graphrag, item)):
-        st.write(f"Docstring of {item}:")
-        st.write(getattr(graphrag, item).__doc__)
 # Main Application Section
 st.title("Graphrag Text Analysis")
@@ -29,18 +44,15 @@ def load_model():
     # Initialize Graphrag model
     # Note: This part may need to be adjusted based on the actual structure of graphrag
     model = None
-    for item in dir(graphrag):
         if 'model' in item.lower() or 'rag' in item.lower():
             model_class = getattr(graphrag, item)
-            if callable(model_class):
                 try:
-                    model = model_class(
-                        bert_model,
-                        num_labels=2,  # Adjust based on your task
-                        num_hidden_layers=2,
-                        hidden_size=768,
-                        intermediate_size=3072,
-                    )
                     break
                 except Exception as e:
                     st.write(f"Tried initializing {item}, but got error: {str(e)}")
@@ -57,6 +69,7 @@ def process_text(text, tokenizer, model):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
     # Process outputs based on your specific task
     # This is a placeholder; adjust according to your model's output
     if hasattr(outputs, 'logits'):
@@ -64,29 +77,13 @@ def process_text(text, tokenizer, model):
     elif isinstance(outputs, torch.Tensor):
         logits = outputs
     else:
-        return "Unexpected output format"
     probabilities = torch.softmax(logits, dim=1)
     return probabilities.tolist()[0]
 tokenizer, model = load_model()
-# File uploader
-uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
-if uploaded_file is not None:
-    data = pd.read_csv(uploaded_file)
-    st.write(data.head())
-    if st.button("Process Data"):
-        results = []
-        for text in data['text']:  # Assuming your CSV has a 'text' column
-            result = process_text(text, tokenizer, model)
-            results.append(result)
-        data['results'] = results
-        st.write(data)
 # Text input for single prediction
 text_input = st.text_area("Enter text for analysis:")
 if st.button("Analyze Text"):
@@ -96,5 +93,5 @@ if st.button("Analyze Text"):
     else:
         st.write("Please enter some text to analyze.")
-# Add a link to sample data
-st.markdown("[Download Sample CSV](https://raw.githubusercontent.com/your_username/your_repo/main/sample_data.csv)")

 from transformers import AutoTokenizer, AutoModel
 import torch
 import graphrag
+import inspect
 # Diagnostic Section
 st.title("Graphrag Module Investigation")
 st.write("Contents of graphrag module:")
+module_contents = dir(graphrag)
+st.write(module_contents)
+st.write("Detailed information about graphrag module contents:")
+for item in module_contents:
+    attr = getattr(graphrag, item)
+    st.write(f"Name: {item}")
+    st.write(f"Type: {type(attr)}")
+    if inspect.isclass(attr):
+        st.write("Class Methods and Attributes:")
+        for name, value in inspect.getmembers(attr):
+            if not name.startswith('_'):  # Exclude private methods/attributes
+                st.write(f"  - {name}: {type(value)}")
+    if callable(attr):
+        st.write("Signature:")
+        st.write(inspect.signature(attr))
+        st.write("Docstring:")
+        st.write(inspect.getdoc(attr))
+    st.write("---")
 # Main Application Section
 st.title("Graphrag Text Analysis")
     # Initialize Graphrag model
     # Note: This part may need to be adjusted based on the actual structure of graphrag
     model = None
+    for item in module_contents:
         if 'model' in item.lower() or 'rag' in item.lower():
             model_class = getattr(graphrag, item)
+            if inspect.isclass(model_class):
                 try:
+                    # Attempt to initialize the model
+                    # You may need to adjust the parameters based on the actual class signature
+                    model = model_class(bert_model)
+                    st.success(f"Successfully initialized {item}")
                     break
                 except Exception as e:
                     st.write(f"Tried initializing {item}, but got error: {str(e)}")
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
     # Process outputs based on your specific task
     # This is a placeholder; adjust according to your model's output
     if hasattr(outputs, 'logits'):
     elif isinstance(outputs, torch.Tensor):
         logits = outputs
     else:
+        return f"Unexpected output format: {type(outputs)}"
     probabilities = torch.softmax(logits, dim=1)
     return probabilities.tolist()[0]
 tokenizer, model = load_model()
 # Text input for single prediction
 text_input = st.text_area("Enter text for analysis:")
 if st.button("Analyze Text"):
     else:
         st.write("Please enter some text to analyze.")
+# Note about sample data
+st.markdown("Note: To use a CSV file, you would typically upload it and process each row. For simplicity, we're using direct text input in this example.")