data_eng / app.py
jinghan23's picture
Update app.py
f747d84 verified
raw
history blame
6.78 kB
import gradio as gr
import json
import pandas as pd
from typing import Dict, List
import numpy as np
def flatten_dict(d: Dict, parent_key: str = '', sep: str = '.') -> Dict:
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def create_preview(text: str, max_length: int = 50) -> str:
if not isinstance(text, str):
text = str(text)
if len(text) <= max_length:
return text
return text[:max_length] + "..."
class JsonViewer:
def __init__(self):
self.data = None
self.df = None
def load_json(self, file):
if file is None:
return None, None
try:
# Read JSON file
if isinstance(file, str):
with open(file, 'r', encoding='utf-8') as f:
self.data = json.load(f)
else:
content = file.decode('utf-8')
self.data = json.loads(content)
# Convert to DataFrame
if isinstance(self.data, dict):
self.df = pd.DataFrame([self.data])
elif isinstance(self.data, list):
self.df = pd.DataFrame(self.data)
else:
raise ValueError("JSON must contain either a dictionary or a list of dictionaries")
# Create preview for all columns
preview_df = self.df.copy()
for col in preview_df.columns:
preview_df[col] = preview_df[col].apply(create_preview)
return preview_df, self.df.index.tolist()
except Exception as e:
return f"Error loading JSON: {str(e)}", None
def get_full_row(self, index):
if self.df is None or index is None:
return "Please load a JSON file first"
try:
row = self.df.iloc[int(index)]
formatted_data = json.dumps(row.to_dict(), indent=2)
return formatted_data
except Exception as e:
return f"Error displaying row: {str(e)}"
def create_interface():
viewer = JsonViewer()
with gr.Blocks(css="""
#full_content {
font-family: monospace;
}
.content-container {
display: flex;
flex-wrap: wrap;
gap: 20px;
padding: 15px;
background: #f5f5f5;
border-radius: 5px;
}
.content-section {
flex: 1;
min-width: 300px;
background: white;
padding: 15px;
border-radius: 5px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.section-title {
font-weight: bold;
color: #444;
margin-bottom: 10px;
padding-bottom: 5px;
border-bottom: 2px solid #eee;
}
.section-content {
white-space: pre-wrap;
line-height: 1.5;
}
.content-key {
font-weight: bold;
color: #444;
padding-right: 10px;
}
.content-value {
display: block;
padding-left: 0;
margin-top: 5px;
}
""") as interface:
gr.Markdown("# JSON Data Viewer")
with gr.Row():
file_input = gr.File(label="Upload JSON file")
with gr.Row():
preview_table = gr.Dataframe(
value=pd.DataFrame(),
label="Data Preview",
interactive=True,
wrap=True,
row_count=(5, "dynamic")
)
with gr.Row():
full_content = gr.HTML(
label="Full Content",
elem_id="full_content"
)
def update_table(file):
preview, _ = viewer.load_json(file)
if isinstance(preview, pd.DataFrame):
return preview
return pd.DataFrame()
def show_full_content(evt: gr.SelectData):
if viewer.df is None:
return "Please load a JSON file first"
try:
row = viewer.df.iloc[evt.index[0]]
row_dict = row.to_dict()
sections = {
'Video Info': ['vid', 'video_url', 'title', 'category', 'extended_description'],
'Content': ['rephrased_description'],
'Additional Info': ['vqa','vqa_1']
}
formatted_sections = []
for section_title, keys in sections.items():
section_content = []
for key in keys:
if key in row_dict:
value = row_dict[key]
if isinstance(value, str):
value = value.replace('\\n', '<br>')
value = value.replace('\\"', '"')
value = value.replace('<br>', '<br>&nbsp;&nbsp;&nbsp;&nbsp;')
section_content.append(
f'<span class="content-key">{key}:</span>'
f'<span class="content-value">{value}</span>'
)
if section_content:
formatted_section = f"""
<div class="content-section">
<div class="section-title">{section_title}</div>
<div class="section-content">
{'<br>'.join(section_content)}
</div>
</div>
"""
formatted_sections.append(formatted_section)
formatted_data = f"""
<div class="content-container">
{''.join(formatted_sections)}
</div>
"""
return formatted_data
except Exception as e:
return f"Error displaying row: {str(e)}"
file_input.change(
fn=update_table,
inputs=[file_input],
outputs=[preview_table]
)
preview_table.select(
fn=show_full_content,
inputs=None,
outputs=[full_content]
)
return interface
if __name__ == "__main__":
demo = create_interface()
demo.launch(share=True)