data_eng / app.py
jinghan23's picture
Update app.py
14d4745 verified
import gradio as gr
import json
import pandas as pd
from typing import Dict, List
import numpy as np
def flatten_dict(d: Dict, parent_key: str = '', sep: str = '.') -> Dict:
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def create_preview(text: str, max_length: int = 50) -> str:
if not isinstance(text, str):
text = str(text)
if len(text) <= max_length:
return text
return text[:max_length] + "..."
class JsonViewer:
def __init__(self):
self.data = None
self.df = None
def load_json(self, file):
if file is None:
return None, None
try:
# Read JSON file
if isinstance(file, str):
with open(file, 'r', encoding='utf-8') as f:
self.data = json.load(f)
else:
content = file.decode('utf-8')
self.data = json.loads(content)
# Convert to DataFrame
if isinstance(self.data, dict):
self.df = pd.DataFrame([self.data])
elif isinstance(self.data, list):
self.df = pd.DataFrame(self.data)
else:
raise ValueError("JSON must contain either a dictionary or a list of dictionaries")
# Create preview for all columns
preview_df = self.df.copy()
for col in preview_df.columns:
preview_df[col] = preview_df[col].apply(create_preview)
return preview_df, self.df.index.tolist()
except Exception as e:
return f"Error loading JSON: {str(e)}", None
def get_full_row(self, index):
if self.df is None or index is None:
return "Please load a JSON file first"
try:
row = self.df.iloc[int(index)]
formatted_data = json.dumps(row.to_dict(), indent=2)
return formatted_data
except Exception as e:
return f"Error displaying row: {str(e)}"
def create_interface():
viewer = JsonViewer()
with gr.Blocks(css="""
#full_content {
font-family: monospace;
}
.content-container {
display: flex;
flex-wrap: wrap;
gap: 20px;
padding: 15px;
background: #f5f5f5;
border-radius: 5px;
}
.content-section {
flex: 1;
min-width: 300px;
background: white;
padding: 15px;
border-radius: 5px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.section-title {
font-weight: bold;
color: #444;
margin-bottom: 10px;
padding-bottom: 5px;
border-bottom: 2px solid #eee;
}
.section-content {
white-space: pre-wrap;
line-height: 1.5;
}
.content-key {
font-weight: bold;
color: #444;
padding-right: 10px;
}
.content-value {
display: block;
padding-left: 0;
margin-top: 5px;
}
""") as interface:
gr.Markdown("# JSON Data Viewer")
with gr.Row():
file_input = gr.File(label="Upload JSON file")
with gr.Row():
preview_table = gr.Dataframe(
value=pd.DataFrame(),
label="Data Preview",
interactive=True,
wrap=True,
row_count=(5, "dynamic")
)
with gr.Row():
full_content = gr.HTML(
label="Full Content",
elem_id="full_content"
)
def update_table(file):
preview, _ = viewer.load_json(file)
if isinstance(preview, pd.DataFrame):
return preview
return pd.DataFrame()
def show_full_content(evt: gr.SelectData):
if viewer.df is None:
return "Please load a JSON file first"
try:
row = viewer.df.iloc[evt.index[0]]
row_dict = row.to_dict()
sections = {
'Video Info': ['vid', 'video_url', 'title', 'category', 'extended_description'],
'Content': ['rephrased_description'],
'Additional Info': ['vqa','vqa_v1']
}
formatted_sections = []
for section_title, keys in sections.items():
section_content = []
for key in keys:
if key in row_dict:
value = row_dict[key]
if isinstance(value, str):
value = value.replace('\\n', '<br>')
value = value.replace('\\"', '"')
value = value.replace('<br>', '<br>&nbsp;&nbsp;&nbsp;&nbsp;')
section_content.append(
f'<span class="content-key">{key}:</span>'
f'<span class="content-value">{value}</span>'
)
if section_content:
formatted_section = f"""
<div class="content-section">
<div class="section-title">{section_title}</div>
<div class="section-content">
{'<br>'.join(section_content)}
</div>
</div>
"""
formatted_sections.append(formatted_section)
formatted_data = f"""
<div class="content-container">
{''.join(formatted_sections)}
</div>
"""
return formatted_data
except Exception as e:
return f"Error displaying row: {str(e)}"
file_input.change(
fn=update_table,
inputs=[file_input],
outputs=[preview_table]
)
preview_table.select(
fn=show_full_content,
inputs=None,
outputs=[full_content]
)
return interface
if __name__ == "__main__":
demo = create_interface()
demo.launch(share=True)