Spaces:
Sleeping
Sleeping
File size: 3,344 Bytes
41f73cb 4a2750f 41f73cb 4a2750f 41f73cb 8436e7c 41f73cb 4a2750f 8436e7c 4a2750f 8436e7c eb6725a 41f73cb 8436e7c 41f73cb 4a2750f 8436e7c 41f73cb 4a2750f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import graphrag
# Diagnostic Section
st.title("Graphrag Module Investigation")
st.write("Graphrag version:", graphrag.__version__)
st.write("Contents of graphrag module:")
st.write(dir(graphrag))
for item in dir(graphrag):
st.write(f"Type of {item}: {type(getattr(graphrag, item))}")
if callable(getattr(graphrag, item)):
st.write(f"Docstring of {item}:")
st.write(getattr(graphrag, item).__doc__)
# Main Application Section
st.title("Graphrag Text Analysis")
@st.cache_resource
def load_model():
bert_model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
bert_model = AutoModel.from_pretrained(bert_model_name)
# Initialize Graphrag model
# Note: This part may need to be adjusted based on the actual structure of graphrag
model = None
for item in dir(graphrag):
if 'model' in item.lower() or 'rag' in item.lower():
model_class = getattr(graphrag, item)
if callable(model_class):
try:
model = model_class(
bert_model,
num_labels=2, # Adjust based on your task
num_hidden_layers=2,
hidden_size=768,
intermediate_size=3072,
)
break
except Exception as e:
st.write(f"Tried initializing {item}, but got error: {str(e)}")
if model is None:
st.error("Could not initialize any Graphrag model. Please check the module structure.")
return tokenizer, model
def process_text(text, tokenizer, model):
if model is None:
return "Model not initialized"
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
# Process outputs based on your specific task
# This is a placeholder; adjust according to your model's output
if hasattr(outputs, 'logits'):
logits = outputs.logits
elif isinstance(outputs, torch.Tensor):
logits = outputs
else:
return "Unexpected output format"
probabilities = torch.softmax(logits, dim=1)
return probabilities.tolist()[0]
tokenizer, model = load_model()
# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
st.write(data.head())
if st.button("Process Data"):
results = []
for text in data['text']: # Assuming your CSV has a 'text' column
result = process_text(text, tokenizer, model)
results.append(result)
data['results'] = results
st.write(data)
# Text input for single prediction
text_input = st.text_area("Enter text for analysis:")
if st.button("Analyze Text"):
if text_input:
result = process_text(text_input, tokenizer, model)
st.write(f"Analysis Result: {result}")
else:
st.write("Please enter some text to analyze.")
# Add a link to sample data
st.markdown("[Download Sample CSV](https://raw.githubusercontent.com/your_username/your_repo/main/sample_data.csv)") |