Spaces:

arittrabag
/

zocket-backend

Sleeping

App Files Files Community

arittrabag commited on Jun 27

Commit

2b88d9f

verified ·

1 Parent(s): 91b91a6

Upload 3 files

Browse files

Files changed (2) hide show

enhanced_knowledge_graph.py +253 -0
enhanced_retriever.py +128 -0

enhanced_knowledge_graph.py ADDED Viewed

	@@ -0,0 +1,253 @@

+from typing import Dict, List, Set, Tuple, Optional
+from collections import defaultdict, deque
+class EnhancedKnowledgeGraph:
+    """Enhanced Knowledge Graph with traversal capabilities"""
+    def __init__(self):
+        # Node properties
+        self.nodes = {
+            # Tones
+            "fun": {
+                "type": "tone",
+                "properties": {
+                    "formality": 0.2,
+                    "energy": 0.9,
+                    "creativity": 0.8
+                }
+            },
+            "professional": {
+                "type": "tone",
+                "properties": {
+                    "formality": 0.9,
+                    "energy": 0.5,
+                    "creativity": 0.3
+                }
+            },
+            "semi-fun": {
+                "type": "tone",
+                "properties": {
+                    "formality": 0.5,
+                    "energy": 0.7,
+                    "creativity": 0.6
+                }
+            },
+            # Platforms
+            "Meta": {
+                "type": "platform",
+                "properties": {
+                    "char_limit": 2200,
+                    "emoji_friendly": True,
+                    "hashtag_friendly": True,
+                    "visual_emphasis": 0.9
+                }
+            },
+            "Google": {
+                "type": "platform",
+                "properties": {
+                    "char_limit": 90,
+                    "emoji_friendly": False,
+                    "hashtag_friendly": False,
+                    "visual_emphasis": 0.2
+                }
+            },
+            "LinkedIn": {
+                "type": "platform",
+                "properties": {
+                    "char_limit": 3000,
+                    "emoji_friendly": False,
+                    "hashtag_friendly": True,
+                    "visual_emphasis": 0.4
+                }
+            },
+            # Creative Types
+            "awareness": {
+                "type": "creative_type",
+                "properties": {
+                    "goal": "brand_visibility",
+                    "cta_strength": 0.3
+                }
+            },
+            "engagement": {
+                "type": "creative_type",
+                "properties": {
+                    "goal": "interaction",
+                    "cta_strength": 0.7
+                }
+            },
+            "conversion": {
+                "type": "creative_type",
+                "properties": {
+                    "goal": "sales",
+                    "cta_strength": 1.0
+                }
+            }
+        }
+        # Edges (relationships)
+        self.edges = defaultdict(list)
+        self._build_relationships()
+    def _build_relationships(self):
+        """Build graph relationships"""
+        # Tone -> Platform compatibility
+        self.add_edge("fun", "Meta", "highly_compatible", weight=0.9)
+        self.add_edge("fun", "LinkedIn", "moderately_compatible", weight=0.3)
+        self.add_edge("fun", "Google", "poorly_compatible", weight=0.1)
+        self.add_edge("professional", "LinkedIn", "highly_compatible", weight=0.95)
+        self.add_edge("professional", "Google", "highly_compatible", weight=0.9)
+        self.add_edge("professional", "Meta", "moderately_compatible", weight=0.5)
+        self.add_edge("semi-fun", "Meta", "highly_compatible", weight=0.8)
+        self.add_edge("semi-fun", "LinkedIn", "highly_compatible", weight=0.7)
+        self.add_edge("semi-fun", "Google", "moderately_compatible", weight=0.5)
+        # Tone -> Creative Type
+        self.add_edge("fun", "awareness", "suitable_for", weight=0.9)
+        self.add_edge("fun", "engagement", "suitable_for", weight=0.95)
+        self.add_edge("professional", "conversion", "suitable_for", weight=0.9)
+        self.add_edge("semi-fun", "engagement", "suitable_for", weight=0.8)
+        # Platform -> Creative Type preferences
+        self.add_edge("Meta", "engagement", "prefers", weight=0.9)
+        self.add_edge("LinkedIn", "conversion", "prefers", weight=0.8)
+        self.add_edge("Google", "conversion", "prefers", weight=0.95)
+    def add_edge(self, from_node: str, to_node: str, relationship: str, weight: float = 1.0):
+        """Add an edge to the graph"""
+        self.edges[from_node].append({
+            "to": to_node,
+            "relationship": relationship,
+            "weight": weight
+        })
+    def traverse_bfs(self, start_node: str, max_depth: int = 2) -> Dict[str, List[Tuple[str, str, float]]]:
+        """Breadth-first traversal to find related nodes"""
+        visited = set()
+        queue = deque([(start_node, 0)])
+        paths = defaultdict(list)
+        while queue:
+            current_node, depth = queue.popleft()
+            if current_node in visited or depth > max_depth:
+                continue
+            visited.add(current_node)
+            for edge in self.edges.get(current_node, []):
+                to_node = edge["to"]
+                relationship = edge["relationship"]
+                weight = edge["weight"]
+                paths[to_node].append((current_node, relationship, weight))
+                if depth < max_depth:
+                    queue.append((to_node, depth + 1))
+        return dict(paths)
+    def find_best_path(self, start: str, end: str) -> Optional[List[Tuple[str, str, float]]]:
+        """Find the best path between two nodes using weighted edges"""
+        # Simple Dijkstra-like approach
+        distances = {node: float('inf') for node in self.nodes}
+        distances[start] = 0
+        previous = {}
+        unvisited = set(self.nodes.keys())
+        while unvisited:
+            current = min(unvisited, key=lambda x: distances[x])
+            if distances[current] == float('inf'):
+                break
+            unvisited.remove(current)
+            for edge in self.edges.get(current, []):
+                neighbor = edge["to"]
+                weight = 1 - edge["weight"]  # Convert to distance (lower is better)
+                distance = distances[current] + weight
+                if distance < distances[neighbor]:
+                    distances[neighbor] = distance
+                    previous[neighbor] = (current, edge["relationship"], edge["weight"])
+        # Reconstruct path
+        if end not in previous:
+            return None
+        path = []
+        current = end
+        while current != start:
+            if current not in previous:
+                return None
+            prev_node, rel, weight = previous[current]
+            path.append((prev_node, rel, weight))
+            current = prev_node
+        return list(reversed(path))
+    def get_recommendations(self, tone: str, platform: str) -> Dict[str, any]:
+        """Get recommendations based on tone and platform"""
+        recommendations = {
+            "compatibility_score": 0,
+            "suggested_elements": [],
+            "warnings": [],
+            "creative_types": []
+        }
+        # Check direct compatibility
+        for edge in self.edges.get(tone, []):
+            if edge["to"] == platform:
+                recommendations["compatibility_score"] = edge["weight"]
+                break
+        # Find related creative types
+        tone_paths = self.traverse_bfs(tone, max_depth=1)
+        platform_paths = self.traverse_bfs(platform, max_depth=1)
+        # Extract creative type recommendations
+        for node, paths in tone_paths.items():
+            if self.nodes.get(node, {}).get("type") == "creative_type":
+                for _, rel, weight in paths:
+                    if rel == "suitable_for" and weight > 0.7:
+                        recommendations["creative_types"].append(node)
+        # Platform-specific suggestions
+        platform_props = self.nodes.get(platform, {}).get("properties", {})
+        tone_props = self.nodes.get(tone, {}).get("properties", {})
+        if platform_props.get("emoji_friendly") and tone_props.get("creativity", 0) > 0.7:
+            recommendations["suggested_elements"].append("Use emojis to enhance engagement")
+        elif not platform_props.get("emoji_friendly") and tone == "fun":
+            recommendations["warnings"].append("Platform doesn't support emojis well - adjust tone")
+        if platform_props.get("char_limit", float('inf')) < 100:
+            recommendations["suggested_elements"].append("Keep message extremely concise")
+        return recommendations
+    def explain_relationship(self, node1: str, node2: str) -> str:
+        """Explain the relationship between two nodes"""
+        # Check direct connection first
+        for edge in self.edges.get(node1, []):
+            if edge["to"] == node2:
+                return f"{node1} is {edge['relationship']} with {node2} (strength: {edge['weight']:.2f})"
+        # If no direct connection, find path
+        path = self.find_best_path(node1, node2)
+        if not path:
+            return f"No direct relationship found between {node1} and {node2}"
+        explanation = []
+        current = node1
+        for prev_node, relationship, weight in path:
+            # The path reconstruction gives us the path backwards, so we need to handle it correctly
+            explanation.append(f"{prev_node} {relationship} {current} (strength: {weight:.2f})")
+            current = prev_node
+        return " → ".join(explanation)

enhanced_retriever.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from typing import List, Dict, Tuple
+import numpy as np
+from collections import defaultdict
+import re
+class EnhancedRetriever:
+    """Enhanced RAG with semantic similarity scoring"""
+    def __init__(self, guideline_path: str = "tone_guidelines.txt"):
+        self.guideline_path = guideline_path
+        self.guidelines = self._load_guidelines()
+        self.embeddings_cache = {}
+    def _load_guidelines(self) -> Dict[str, List[str]]:
+        """Load guidelines from file"""
+        guidelines = defaultdict(list)
+        current_key = None
+        with open(self.guideline_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                if ":" in line:
+                    current_key = line.replace(":", "").strip().lower()
+                elif current_key:
+                    guidelines[current_key].append(line.strip("- ").strip())
+        return dict(guidelines)
+    def _simple_embedding(self, text: str) -> np.ndarray:
+        """Create simple word-based embeddings for semantic similarity"""
+        # Normalize text
+        text = text.lower()
+        # Extract key features
+        features = {
+            'length': len(text.split()),
+            'has_emoji': int(bool(re.search(r'[😀-🙏]', text))),
+            'has_exclamation': int('!' in text),
+            'formal_words': sum(1 for word in ['professional', 'value', 'benefits', 'business'] if word in text),
+            'casual_words': sum(1 for word in ['fun', 'playful', 'emoji', 'snappy'] if word in text),
+            'cta_presence': int(any(word in text for word in ['cta', 'button', 'click'])),
+            'hashtag_mention': int('#' in text or 'hashtag' in text),
+        }
+        # Convert to vector
+        return np.array(list(features.values()), dtype=np.float32)
+    def _cosine_similarity(self, vec1: np.ndarray, vec2: np.ndarray) -> float:
+        """Calculate cosine similarity between two vectors"""
+        dot_product = np.dot(vec1, vec2)
+        norm1 = np.linalg.norm(vec1)
+        norm2 = np.linalg.norm(vec2)
+        if norm1 == 0 or norm2 == 0:
+            return 0.0
+        return dot_product / (norm1 * norm2)
+    def semantic_search(self, query: str, top_k: int = 5) -> List[Tuple[str, str, float]]:
+        """Perform semantic search across all guidelines"""
+        query_embedding = self._simple_embedding(query)
+        results = []
+        for category, items in self.guidelines.items():
+            for item in items:
+                item_embedding = self._simple_embedding(item)
+                similarity = self._cosine_similarity(query_embedding, item_embedding)
+                results.append((category, item, similarity))
+        # Sort by similarity score
+        results.sort(key=lambda x: x[2], reverse=True)
+        return results[:top_k]
+    def retrieve_with_relevance(self, tone: str, platforms: List[str]) -> Dict[str, any]:
+        """Enhanced retrieval with relevance scoring"""
+        context_query = f"{tone} tone for {' '.join(platforms)} platforms"
+        semantic_results = self.semantic_search(context_query)
+        # Structure the response with relevance scores
+        response = {
+            "direct_matches": {},
+            "semantic_matches": [],
+            "relevance_scores": {}
+        }
+        # Direct matches (existing logic)
+        tone_lower = tone.lower()
+        if tone_lower in self.guidelines:
+            response["direct_matches"][tone] = self.guidelines[tone_lower]
+            response["relevance_scores"][tone] = 1.0
+        for platform in platforms:
+            p_lower = platform.lower()
+            if p_lower in self.guidelines:
+                response["direct_matches"][platform] = self.guidelines[p_lower]
+                response["relevance_scores"][platform] = 1.0
+        # Add semantic matches
+        for category, item, score in semantic_results:
+            if category not in response["direct_matches"]:
+                response["semantic_matches"].append({
+                    "category": category,
+                    "guideline": item,
+                    "relevance": score
+                })
+        return response
+    def format_guidance_with_scores(self, retrieval_result: Dict) -> str:
+        """Format retrieval results with relevance scores"""
+        output = []
+        # Direct matches
+        for key, guidelines in retrieval_result["direct_matches"].items():
+            score = retrieval_result["relevance_scores"].get(key, 0)
+            output.append(f"\n{key} Guidelines (Relevance: {score:.2f}):")
+            for guideline in guidelines:
+                output.append(f"  - {guideline}")
+        # Semantic matches
+        if retrieval_result["semantic_matches"]:
+            output.append("\nAdditional Relevant Guidelines:")
+            for match in retrieval_result["semantic_matches"][:3]:  # Top 3
+                output.append(f"  - [{match['category']}] {match['guideline']} (Score: {match['relevance']:.2f})")
+        return "\n".join(output)