import streamlit as st import pandas as pd from transformers import AutoTokenizer, AutoModel import torch import graphrag # Diagnostic Section st.title("Graphrag Module Investigation") st.write("Graphrag version:", graphrag.__version__) st.write("Contents of graphrag module:") st.write(dir(graphrag)) for item in dir(graphrag): st.write(f"Type of {item}: {type(getattr(graphrag, item))}") if callable(getattr(graphrag, item)): st.write(f"Docstring of {item}:") st.write(getattr(graphrag, item).__doc__) # Main Application Section st.title("Graphrag Text Analysis") @st.cache_resource def load_model(): bert_model_name = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(bert_model_name) bert_model = AutoModel.from_pretrained(bert_model_name) # Initialize Graphrag model # Note: This part may need to be adjusted based on the actual structure of graphrag model = None for item in dir(graphrag): if 'model' in item.lower() or 'rag' in item.lower(): model_class = getattr(graphrag, item) if callable(model_class): try: model = model_class( bert_model, num_labels=2, # Adjust based on your task num_hidden_layers=2, hidden_size=768, intermediate_size=3072, ) break except Exception as e: st.write(f"Tried initializing {item}, but got error: {str(e)}") if model is None: st.error("Could not initialize any Graphrag model. Please check the module structure.") return tokenizer, model def process_text(text, tokenizer, model): if model is None: return "Model not initialized" inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) # Process outputs based on your specific task # This is a placeholder; adjust according to your model's output if hasattr(outputs, 'logits'): logits = outputs.logits elif isinstance(outputs, torch.Tensor): logits = outputs else: return "Unexpected output format" probabilities = torch.softmax(logits, dim=1) return probabilities.tolist()[0] tokenizer, model = load_model() # File uploader uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: data = pd.read_csv(uploaded_file) st.write(data.head()) if st.button("Process Data"): results = [] for text in data['text']: # Assuming your CSV has a 'text' column result = process_text(text, tokenizer, model) results.append(result) data['results'] = results st.write(data) # Text input for single prediction text_input = st.text_area("Enter text for analysis:") if st.button("Analyze Text"): if text_input: result = process_text(text_input, tokenizer, model) st.write(f"Analysis Result: {result}") else: st.write("Please enter some text to analyze.") # Add a link to sample data st.markdown("[Download Sample CSV](https://raw.githubusercontent.com/your_username/your_repo/main/sample_data.csv)")