Qwen2vl-Flux / flux-architecture.svg
youknownothing's picture
Duplicate from Djrango/Qwen2vl-Flux
ed57de1 verified
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 1000">
<defs>
<!-- 现代化的渐变效果 -->
<linearGradient id="blockGradient" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#5B8FF9;stop-opacity:0.15"/>
<stop offset="100%" style="stop-color:#5B8FF9;stop-opacity:0.25"/>
</linearGradient>
<linearGradient id="inputGradient" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#3CC780;stop-opacity:0.15"/>
<stop offset="100%" style="stop-color:#3CC780;stop-opacity:0.25"/>
</linearGradient>
<!-- 优化的箭头标记 -->
<marker id="arrow" viewBox="0 0 10 10" refX="9" refY="5"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M 0 0 L 10 5 L 0 10 z" fill="#5B8FF9"/>
</marker>
<marker id="controlArrow" viewBox="0 0 10 10" refX="9" refY="5"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M 0 0 L 10 5 L 0 10 z" fill="#9C27B0"/>
</marker>
<!-- 阴影效果 -->
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
<feGaussianBlur in="SourceAlpha" stdDeviation="2"/>
<feOffset dx="2" dy="2" result="offsetblur"/>
<feComponentTransfer>
<feFuncA type="linear" slope="0.2"/>
</feComponentTransfer>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
</defs>
<!-- Reference Image Input - 调整位置和大小 -->
<g transform="translate(250,30)" filter="url(#shadow)">
<rect x="0" y="0" width="300" height="50" rx="8" fill="url(#inputGradient)" stroke="#3CC780" stroke-width="2"/>
<text x="150" y="30" text-anchor="middle" font-family="Arial" font-size="16" fill="#1A1A1A">Input Image</text>
</g>
<!-- Qwen2VL Transformer - 优化布局 -->
<g transform="translate(200,120)" filter="url(#shadow)">
<rect x="0" y="0" width="400" height="250" rx="8" fill="url(#blockGradient)" stroke="#5B8FF9" stroke-width="2"/>
<text x="200" y="35" text-anchor="middle" font-family="Arial" font-size="20" font-weight="bold" fill="#1A1A1A">Qwen2VL-7B Transformer</text>
<!-- 自注意力层 - 更紧凑的布局 -->
<g transform="translate(50,60)">
<rect x="0" y="0" width="300" height="35" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="150" y="23" text-anchor="middle" font-size="14">Self-Attention Layer</text>
</g>
<g transform="translate(50,105)">
<rect x="0" y="0" width="300" height="35" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="150" y="23" text-anchor="middle" font-size="14">Self-Attention Layer</text>
</g>
<g transform="translate(50,150)">
<rect x="0" y="0" width="300" height="35" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="150" y="23" text-anchor="middle" font-size="14">Self-Attention Layer</text>
</g>
<g transform="translate(50,195)">
<rect x="0" y="0" width="300" height="35" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="150" y="23" text-anchor="middle" font-size="14">Self-Attention Layer</text>
</g>
</g>
<!-- GridDot Panel - 紫色主题 -->
<g transform="translate(620,200)">
<rect x="0" y="0" width="140" height="140" rx="8" fill="#F3E5F5" stroke="#9C27B0" stroke-width="2" stroke-dasharray="4,4"/>
<!-- Optional badge -->
<g transform="translate(110,-10)">
<rect x="0" y="0" width="60" height="20" rx="10" fill="#9C27B0" filter="url(#shadow)"/>
<text x="30" y="14" text-anchor="middle" font-family="Arial" font-size="12" fill="white">Optional</text>
</g>
<text x="70" y="30" text-anchor="middle" font-family="Arial" font-size="16" font-weight="bold" fill="#1A1A1A">GridDot Panel</text>
<!-- 网格可视化 - 紫色主题 -->
<g transform="translate(20,45)">
<rect width="100" height="80" fill="none" stroke="#9C27B0"/>
<g fill="#9C27B0">
<circle cx="25" cy="20" r="3.5"/>
<circle cx="50" cy="20" r="3.5"/>
<circle cx="75" cy="20" r="3.5"/>
<circle cx="25" cy="40" r="3.5"/>
<circle cx="50" cy="40" r="3.5"/>
<circle cx="75" cy="40" r="3.5"/>
<circle cx="25" cy="60" r="3.5"/>
<circle cx="50" cy="60" r="3.5"/>
<circle cx="75" cy="60" r="3.5"/>
</g>
</g>
</g>
<!-- Qwen2VL Image Embeddings -->
<g transform="translate(250,410)" filter="url(#shadow)">
<rect x="0" y="0" width="300" height="50" rx="8" fill="url(#blockGradient)" stroke="#5B8FF9" stroke-width="2"/>
<text x="150" y="30" text-anchor="middle" font-family="Arial" font-size="16" fill="#1A1A1A">Qwen2VL Image Embeddings</text>
</g>
<!-- Connector Network -->
<g transform="translate(250,500)" filter="url(#shadow)">
<rect x="0" y="0" width="300" height="50" rx="8" fill="url(#blockGradient)" stroke="#5B8FF9" stroke-width="2"/>
<text x="150" y="30" text-anchor="middle" font-family="Arial" font-size="16" fill="#1A1A1A">Connector</text>
</g>
<!-- T5XXL Text Embeddings (Optional) -->
<g transform="translate(50,590)">
<!-- Background with dashed border -->
<rect x="0" y="0" width="200" height="50" rx="8" fill="url(#inputGradient)" stroke="#3CC780" stroke-width="2" stroke-dasharray="4,4"/>
<!-- Optional badge -->
<g transform="translate(170,-10)">
<rect x="0" y="0" width="60" height="20" rx="10" fill="#FFB74D" filter="url(#shadow)"/>
<text x="30" y="14" text-anchor="middle" font-family="Arial" font-size="12" fill="white">Optional</text>
</g>
<!-- Main text -->
<text x="100" y="30" text-anchor="middle" font-family="Arial" font-size="16" fill="#1A1A1A">T5 Text Embeddings</text>
</g>
<!-- FLUX Transformer -->
<g transform="translate(150,680)" filter="url(#shadow)">
<rect x="0" y="0" width="500" height="220" rx="8" fill="url(#blockGradient)" stroke="#5B8FF9" stroke-width="2"/>
<text x="250" y="35" text-anchor="middle" font-family="Arial" font-size="20" font-weight="bold" fill="#1A1A1A">FLUX Transformer</text>
<!-- Block Layers -->
<g transform="translate(50,60)">
<rect x="0" y="0" width="400" height="50" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="200" y="30" text-anchor="middle" font-size="16">Double Block Layer</text>
</g>
<g transform="translate(50,130)">
<rect x="0" y="0" width="400" height="50" rx="4" fill="white" stroke="#5B8FF9"/>
<text x="200" y="30" text-anchor="middle" font-size="16">Single Block Layer</text>
</g>
</g>
<!-- Output -->
<g transform="translate(250,940)" filter="url(#shadow)">
<rect x="0" y="0" width="300" height="50" rx="8" fill="url(#inputGradient)" stroke="#3CC780" stroke-width="2"/>
<text x="150" y="30" text-anchor="middle" font-family="Arial" font-size="16" fill="#1A1A1A">Generated Image</text>
</g>
<!-- Connections - 优化连线 -->
<g stroke-width="2" fill="none">
<!-- 主流程 -->
<path d="M400,80 L400,120" stroke="#5B8FF9" marker-end="url(#arrow)"/>
<path d="M400,370 L400,410" stroke="#5B8FF9" marker-end="url(#arrow)"/>
<path d="M400,460 L400,500" stroke="#5B8FF9" marker-end="url(#arrow)"/>
<path d="M400,550 L400,680" stroke="#5B8FF9" marker-end="url(#arrow)"/>
<path d="M250,615 L400,680" stroke="#5B8FF9" stroke-dasharray="4,4" marker-end="url(#arrow)"/>
<path d="M400,900 L400,940" stroke="#5B8FF9" marker-end="url(#arrow)"/>
<!-- GridDot Panel控制 -->
<path d="M620,270 L550,400" stroke="#9C27B0" stroke-dasharray="5,5" marker-end="url(#controlArrow)"/>
</g>
<!-- Double Block Layer重复指示器 -->
<text x="560" y="740" font-family="Arial" font-size="14" font-style="italic" fill="#666">×N</text>
<!-- Single Block Layer重复指示器 -->
<text x="560" y="810" font-family="Arial" font-size="14" font-style="italic" fill="#666">×M</text>
</svg>