Spaces:

awacke1
/

TransformersDiffusersNDatasets

Running

App Files Files Community

awacke1 commited on Mar 10

Commit

d8d14b1

verified ·

1 Parent(s): 1e53277

Create app.py

Browse files

Files changed (1) hide show

app.py +494 -0

app.py ADDED Viewed

	@@ -0,0 +1,494 @@

+#!/usr/bin/env python3
+import os
+import re
+import streamlit as st
+import streamlit.components.v1 as components
+from urllib.parse import quote
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import base64
+import glob
+import time
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from mergekit.config import MergeConfiguration
+from mergekit.merge import Mergekit
+from spectrum import SpectrumAnalyzer
+import distilkit
+import yaml
+from dataclasses import dataclass
+from typing import Optional, List
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Page Configuration
+st.set_page_config(
+    page_title="AI Knowledge Tree Builder 📈🌿",
+    page_icon="🌳✨",
+    layout="wide",
+    initial_sidebar_state="auto",
+)
+# Predefined Knowledge Trees
+trees = {
+    "ML Engineering": """
+0. ML Engineering 🌐
+1. Data Preparation
+- Load Data 📊
+- Preprocess Data 🛠️
+2. Model Building
+- Train Model 🤖
+- Evaluate Model 📈
+3. Deployment
+- Deploy Model 🚀
+    """,
+    "Health": """
+0. Health and Wellness 🌿
+1. Physical Health
+- Exercise 🏋️
+- Nutrition 🍎
+2. Mental Health
+- Meditation 🧘
+- Therapy 🛋️
+    """,
+}
+# Project Seeds
+project_seeds = {
+    "Code Project": """
+0. Code Project 📂
+1. app.py 🐍
+2. requirements.txt 📦
+3. README.md 📄
+    """,
+    "Papers Project": """
+0. Papers Project 📚
+1. markdown 📝
+2. mermaid 🖼️
+3. huggingface.co 🤗
+    """,
+    "AI Project": """
+0. AI Project 🤖
+1. Streamlit Torch Transformers
+- Streamlit 🌐
+- Torch 🔥
+- Transformers 🤖
+2. DistillKit MergeKit Spectrum
+- DistillKit 🧪
+- MergeKit 🔄
+- Spectrum 📊
+3. Transformers Diffusers Datasets
+- Transformers 🤖
+- Diffusers 🎨
+- Datasets 📊
+    """,
+}
+# Meta class for model configuration
+class ModelMeta(type):
+    def __new__(cls, name, bases, attrs):
+        attrs['registry'] = {}
+        return super().__new__(cls, name, bases, attrs)
+# Base Model Configuration Class
+@dataclass
+class ModelConfig(metaclass=ModelMeta):
+    name: str
+    base_model: str
+    size: str
+    domain: Optional[str] = None
+    def __init_subclass__(cls):
+        ModelConfig.registry[cls.__name__] = cls
+    @property
+    def model_path(self):
+        return f"models/{self.name}"
+# Decorator for pipeline stages
+def pipeline_stage(func):
+    def wrapper(*args, **kwargs):
+        st.spinner(f"Running {func.__name__}...")
+        result = func(*args, **kwargs)
+        st.success(f"Completed {func.__name__}!")
+        return result
+    return wrapper
+# Model Builder Class
+class ModelBuilder:
+    def __init__(self):
+        self.config = None
+        self.model = None
+        self.tokenizer = None
+    @pipeline_stage
+    def load_base_model(self, model_name: str):
+        """Load base model from Hugging Face"""
+        self.model = AutoModelForCausalLM.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        return self
+    @pipeline_stage
+    def apply_distillation(self, teacher_model: str, output_dir: str):
+        """Apply DistilKit for model distillation"""
+        distiller = distilkit.Distiller(
+            teacher_model=teacher_model,
+            student_model=self.model,
+            tokenizer=self.tokenizer
+        )
+        distiller.distill(output_dir=output_dir)
+        self.model = distiller.student_model
+        return self
+    @pipeline_stage
+    def apply_merge(self, models_to_merge: List[str], output_dir: str):
+        """Apply Mergekit for model merging"""
+        merge_config = MergeConfiguration(
+            models=models_to_merge,
+            merge_method="linear",
+            output_dir=output_dir
+        )
+        merger = Mergekit(merge_config)
+        merger.run()
+        self.model = AutoModelForCausalLM.from_pretrained(output_dir)
+        return self
+    @pipeline_stage
+    def apply_spectrum(self, domain_data: str):
+        """Apply Spectrum for domain specialization"""
+        analyzer = SpectrumAnalyzer(self.model)
+        analyzer.fit(domain_data)
+        self.model = analyzer.specialized_model
+        return self
+    def save_model(self, path: str):
+        """Save the final model"""
+        self.model.save_pretrained(path)
+        self.tokenizer.save_pretrained(path)
+# Utility Functions
+def sanitize_label(label):
+    """Remove invalid characters for Mermaid labels."""
+    return re.sub(r'[^\w\s-]', '', label).replace(' ', '_')
+def sanitize_filename(label):
+    """Make a valid filename from a label."""
+    return re.sub(r'[^\w\s-]', '', label).replace(' ', '_')
+def parse_outline_to_mermaid(outline_text, search_agent):
+    """Convert tree outline to Mermaid syntax with clickable nodes."""
+    lines = outline_text.strip().split('\n')
+    nodes, edges, clicks, stack = [], [], [], []
+    for line in lines:
+        indent = len(line) - len(line.lstrip())
+        level = indent // 4
+        label = re.sub(r'^[#*\->\d\.\s]+', '', line.strip())
+        if label:
+            node_id = f"N{len(nodes)}"
+            sanitized_label = sanitize_label(label)
+            nodes.append(f'{node_id}["{label}"]')
+            search_url = search_urls[search_agent](label)
+            clicks.append(f'click {node_id} "{search_url}" _blank')
+            if stack:
+                parent_level = stack[-1][0]
+                if level > parent_level:
+                    edges.append(f"{stack[-1][1]} --> {node_id}")
+                    stack.append((level, node_id))
+                else:
+                    while stack and stack[-1][0] >= level:
+                        stack.pop()
+                    if stack:
+                        edges.append(f"{stack[-1][1]} --> {node_id}")
+                    stack.append((level, node_id))
+            else:
+                stack.append((level, node_id))
+    return "%%{init: {'themeVariables': {'fontSize': '18px'}}}%%\nflowchart LR\n" + "\n".join(nodes + edges + clicks)
+def generate_mermaid_html(mermaid_code):
+    """Generate HTML to display Mermaid diagram."""
+    return f"""
+    <html><head><script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
+    <style>.centered-mermaid{{display:flex;justify-content:center;margin:20px auto;}}</style></head>
+    <body><div class="mermaid centered-mermaid">{mermaid_code}</div>
+    <script>mermaid.initialize({{startOnLoad:true}});</script></body></html>
+    """
+def grow_tree(base_tree, new_node_name, parent_node):
+    """Add a new node to the tree under a specified parent."""
+    lines = base_tree.strip().split('\n')
+    new_lines = []
+    added = False
+    for line in lines:
+        new_lines.append(line)
+        if parent_node in line and not added:
+            indent = len(line) - len(line.lstrip())
+            new_lines.append(f"{' ' * (indent + 4)}- {new_node_name} 🌱")
+            added = True
+    return "\n".join(new_lines)
+def get_download_link(file_path, mime_type="text/plain"):
+    """Generate a download link for a file."""
+    with open(file_path, 'rb') as f:
+        data = f.read()
+    b64 = base64.b64encode(data).decode()
+    return f'<a href="data:{mime_type};base64,{b64}" download="{file_path}">Download {file_path}</a>'
+def save_tree_to_file(tree_text, parent_node, new_node):
+    """Save tree to a markdown file with name based on nodes."""
+    root_node = tree_text.strip().split('\n')[0].split('.')[1].strip() if tree_text.strip() else "Knowledge_Tree"
+    filename = f"{sanitize_filename(root_node)}_{sanitize_filename(parent_node)}_{sanitize_filename(new_node)}_{int(time.time())}.md"
+    mermaid_code = parse_outline_to_mermaid(tree_text, "🔮Google")  # Default search engine for saved trees
+    export_md = f"# Knowledge Tree: {root_node}\n\n## Outline\n{tree_text}\n\n## Mermaid Diagram\n```mermaid\n{mermaid_code}\n```"
+    with open(filename, "w") as f:
+        f.write(export_md)
+    return filename
+def load_trees_from_files():
+    """Load all saved tree markdown files."""
+    tree_files = glob.glob("*.md")
+    trees_dict = {}
+    for file in tree_files:
+        if file != "README.md" and file != "knowledge_tree.md":  # Skip project README and temp export
+            try:
+                with open(file, 'r') as f:
+                    content = f.read()
+                    # Extract the tree name from the first line
+                    match = re.search(r'# Knowledge Tree: (.*)', content)
+                    if match:
+                        tree_name = match.group(1)
+                    else:
+                        tree_name = os.path.splitext(file)[0]
+                    # Extract the outline section
+                    outline_match = re.search(r'## Outline\n(.*?)(?=\n## |$)', content, re.DOTALL)
+                    if outline_match:
+                        tree_outline = outline_match.group(1).strip()
+                        trees_dict[f"{tree_name} ({file})"] = tree_outline
+            except Exception as e:
+                print(f"Error loading {file}: {e}")
+    return trees_dict
+# Search Agents (Highest resolution social network default: X)
+search_urls = {
+    "📚📖ArXiv": lambda k: f"/?q={quote(k)}",
+    "🔮Google": lambda k: f"https://www.google.com/search?q={quote(k)}",
+    "📺Youtube": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
+    "🔭Bing": lambda k: f"https://www.bing.com/search?q={quote(k)}",
+    "💡Truth": lambda k: f"https://truthsocial.com/search?q={quote(k)}",
+    "📱X": lambda k: f"https://twitter.com/search?q={quote(k)}",
+}
+# Main App
+st.title("🌳 AI Knowledge Tree Builder 🌱")
+# Sidebar with saved trees
+st.sidebar.title("Saved Trees")
+saved_trees = load_trees_from_files()
+selected_saved_tree = st.sidebar.selectbox("Select a saved tree", ["None"] + list(saved_trees.keys()))
+# Select Project Type
+project_type = st.selectbox("Select Project Type", ["Code Project", "Papers Project", "AI Project"])
+# Initialize or load tree
+if 'current_tree' not in st.session_state:
+    if selected_saved_tree != "None" and selected_saved_tree in saved_trees:
+        st.session_state['current_tree'] = saved_trees[selected_saved_tree]
+    else:
+        st.session_state['current_tree'] = trees.get("ML Engineering", project_seeds[project_type])
+elif selected_saved_tree != "None" and selected_saved_tree in saved_trees:
+    st.session_state['current_tree'] = saved_trees[selected_saved_tree]
+# Select Search Agent for Node Links
+search_agent = st.selectbox("Select Search Agent for Node Links", list(search_urls.keys()), index=5)  # Default to X
+# Tree Growth
+new_node = st.text_input("Add New Node")
+parent_node = st.text_input("Parent Node")
+if st.button("Grow Tree 🌱") and new_node and parent_node:
+    st.session_state['current_tree'] = grow_tree(st.session_state['current_tree'], new_node, parent_node)
+    # Save to a new file with the node names
+    saved_file = save_tree_to_file(st.session_state['current_tree'], parent_node, new_node)
+    st.success(f"Added '{new_node}' under '{parent_node}' and saved to {saved_file}!")
+    # Also update the temporary current_tree.md for compatibility
+    with open("current_tree.md", "w") as f:
+        f.write(st.session_state['current_tree'])
+# Display Mermaid Diagram
+st.markdown("### Knowledge Tree Visualization")
+mermaid_code = parse_outline_to_mermaid(st.session_state['current_tree'], search_agent)
+components.html(generate_mermaid_html(mermaid_code), height=600)
+# Export Tree
+if st.button("Export Tree as Markdown"):
+    export_md = f"# Knowledge Tree\n\n## Outline\n{st.session_state['current_tree']}\n\n## Mermaid Diagram\n```mermaid\n{mermaid_code}\n```"
+    with open("knowledge_tree.md", "w") as f:
+        f.write(export_md)
+    st.markdown(get_download_link("knowledge_tree.md", "text/markdown"), unsafe_allow_html=True)
+# AI Project: Model Building Options
+if project_type == "AI Project":
+    st.subheader("AI Model Building Options")
+    model_option = st.radio("Choose Model Building Method", ["Minimal ML Model from CSV", "Advanced Model Pipeline"])
+    if model_option == "Minimal ML Model from CSV":
+        st.write("### Build Minimal ML Model from CSV")
+        uploaded_file = st.file_uploader("Upload CSV", type="csv")
+        if uploaded_file:
+            df = pd.read_csv(uploaded_file)
+            st.write("Columns:", df.columns.tolist())
+            feature_cols = st.multiselect("Select feature columns", df.columns)
+            target_col = st.selectbox("Select target column", df.columns)
+            if st.button("Train Model"):
+                X = df[feature_cols].values
+                y = df[target_col].values
+                X_tensor = torch.tensor(X, dtype=torch.float32)
+                y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)
+                dataset = TensorDataset(X_tensor, y_tensor)
+                loader = DataLoader(dataset, batch_size=32, shuffle=True)
+                model = nn.Linear(X.shape[1], 1)
+                criterion = nn.MSELoss()
+                optimizer = optim.Adam(model.parameters(), lr=0.01)
+                for epoch in range(10):
+                    for batch_X, batch_y in loader:
+                        optimizer.zero_grad()
+                        outputs = model(batch_X)
+                        loss = criterion(outputs, batch_y)
+                        loss.backward()
+                        optimizer.step()
+                torch.save(model.state_dict(), "model.pth")
+                app_code = f"""
+import streamlit as st
+import torch
+import torch.nn as nn
+model = nn.Linear({len(feature_cols)}, 1)
+model.load_state_dict(torch.load("model.pth"))
+model.eval()
+st.title("ML Model Demo")
+inputs = []
+for col in {feature_cols}:
+    inputs.append(st.number_input(col))
+if st.button("Predict"):
+    input_tensor = torch.tensor([inputs], dtype=torch.float32)
+    prediction = model(input_tensor).item()
+    st.write(f"Predicted {target_col}: {{prediction}}")
+"""
+                with open("app.py", "w") as f:
+                    f.write(app_code)
+                reqs = "streamlit\ntorch\npandas\n"
+                with open("requirements.txt", "w") as f:
+                    f.write(reqs)
+                readme = """
+# ML Model Demo
+## How to run
+1. Install requirements: `pip install -r requirements.txt`
+2. Run the app: `streamlit run app.py`
+3. Input feature values and click "Predict".
+"""
+                with open("README.md", "w") as f:
+                    f.write(readme)
+                st.markdown(get_download_link("model.pth", "application/octet-stream"), unsafe_allow_html=True)
+                st.markdown(get_download_link("app.py", "text/plain"), unsafe_allow_html=True)
+                st.markdown(get_download_link("requirements.txt", "text/plain"), unsafe_allow_html=True)
+                st.markdown(get_download_link("README.md", "text/markdown"), unsafe_allow_html=True)
+    elif model_option == "Advanced Model Pipeline":
+        st.write("### Advanced Model Building Pipeline")
+        # Model Configuration
+        with st.expander("Model Configuration", expanded=True):
+            base_model = st.selectbox(
+                "Select Base Model",
+                ["mistral-7b", "llama-2-7b", "gpt2-medium"]
+            )
+            model_name = st.text_input("Model Name", "custom-model")
+            domain = st.text_input("Target Domain", "general")
+            use_distillation = st.checkbox("Apply Distillation", True)
+            use_merging = st.checkbox("Apply Model Merging", False)
+            use_spectrum = st.checkbox("Apply Spectrum Specialization", True)
+        # Build Model
+        if st.button("Build Advanced Model"):
+            config = ModelConfig(
+                name=model_name,
+                base_model=base_model,
+                size="7B",
+                domain=domain
+            )
+            builder = ModelBuilder()
+            with st.status("Building advanced model...", expanded=True) as status:
+                builder.load_base_model(config.base_model)
+                if use_distillation:
+                    teacher_model = st.selectbox(
+                        "Select Teacher Model",
+                        ["mistral-13b", "llama-2-13b"]
+                    )
+                    builder.apply_distillation(teacher_model, f"distilled_{config.name}")
+                if use_merging:
+                    models_to_merge = st.multiselect(
+                        "Select Models to Merge",
+                        ["mistral-7b", "llama-2-7b", "gpt2-medium"]
+                    )
+                    builder.apply_merge(models_to_merge, f"merged_{config.name}")
+                if use_spectrum:
+                    domain_data = st.text_area("Enter domain-specific data", "Sample domain data")
+                    builder.apply_spectrum(domain_data)
+                builder.save_model(config.model_path)
+                status.update(label="Advanced model built successfully!", state="complete")
+            # Generate deployment files
+            app_code = f"""
+import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{config.model_path}")
+tokenizer = AutoTokenizer.from_pretrained("{config.model_path}")
+st.title("Advanced Model Demo")
+input_text = st.text_area("Enter text")
+if st.button("Generate"):
+    inputs = tokenizer(input_text, return_tensors="pt")
+    outputs = model.generate(**inputs)
+    st.write(tokenizer.decode(outputs[0], skip_special_tokens=True))
+"""
+            with open("advanced_app.py", "w") as f:
+                f.write(app_code)
+            reqs = "streamlit\ntorch\ntransformers\n"
+            with open("advanced_requirements.txt", "w") as f:
+                f.write(reqs)
+            readme = f"""
+# Advanced Model Demo
+## How to run
+1. Install requirements: `pip install -r advanced_requirements.txt`
+2. Run the app: `streamlit run advanced_app.py`
+3. Input text and click "Generate".
+"""
+            with open("advanced_README.md", "w") as f:
+                f.write(readme)
+            st.markdown(get_download_link("advanced_app.py", "text/plain"), unsafe_allow_html=True)
+            st.markdown(get_download_link("advanced_requirements.txt", "text/plain"), unsafe_allow_html=True)
+            st.markdown(get_download_link("advanced_README.md", "text/markdown"), unsafe_allow_html=True)
+            st.write(f"Model saved at: {config.model_path}")
+if __name__ == "__main__":
+    st.run()