File size: 3,953 Bytes
df2b222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Async utilities for improved performance in concurrent operations."""

import asyncio
import aiohttp
from typing import Dict, Any, List
from concurrent.futures import ThreadPoolExecutor
from .config import api_config, app_config
from .exceptions import APIError
from .logging_config import logger

class AsyncWebSearchAgent:
    """Async version of web search for concurrent operations."""
    
    def __init__(self):
        self.session = None
        
    async def __aenter__(self):
        """Async context manager entry."""
        self.session = aiohttp.ClientSession()
        return self
        
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self.session:
            await self.session.close()
    
    async def search_multiple_queries(self, queries: List[str]) -> List[Dict[str, Any]]:
        """Search multiple queries concurrently."""
        if not self.session:
            raise APIError("AsyncWebSearch", "Session not initialized. Use as async context manager.")
        
        logger.info(f"Starting concurrent search for {len(queries)} queries")
        
        # Create tasks for concurrent execution
        tasks = [self._search_single_query(query) for query in queries]
        
        # Execute all searches concurrently
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        # Process results and handle any exceptions
        processed_results = []
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                logger.error(f"Search failed for query {i}: {str(result)}")
                processed_results.append({
                    "error": str(result),
                    "query": queries[i],
                    "results": []
                })
            else:
                processed_results.append(result)
        
        logger.info(f"Completed concurrent searches: {len([r for r in processed_results if not r.get('error')])} successful")
        return processed_results
    
    async def _search_single_query(self, query: str) -> Dict[str, Any]:
        """Search a single query using Tavily API."""
        try:
            # In a real implementation, you'd make async HTTP calls to Tavily
            # For now, we'll use the sync version in a thread pool
            from tavily import TavilyClient
            client = TavilyClient(api_key=api_config.tavily_api_key)
            
            # Run sync operation in thread pool
            loop = asyncio.get_event_loop()
            with ThreadPoolExecutor() as executor:
                response = await loop.run_in_executor(
                    executor,
                    lambda: client.search(
                        query=query,
                        search_depth="basic",
                        max_results=app_config.max_search_results,
                        include_answer=True
                    )
                )
            
            return {
                "query": response.get("query", query),
                "tavily_answer": response.get("answer"),
                "results": response.get("results", []),
                "data_source": "Tavily Search API (Async)",
            }
            
        except Exception as e:
            raise APIError("Tavily", f"Async search failed: {str(e)}")

async def process_subquestions_concurrently(sub_questions: List[str]) -> List[Dict[str, Any]]:
    """Process multiple sub-questions concurrently for better performance."""
    logger.info(f"Processing {len(sub_questions)} sub-questions concurrently")
    
    async with AsyncWebSearchAgent() as async_searcher:
        # Execute all searches concurrently
        search_results = await async_searcher.search_multiple_queries(sub_questions)
        
        return search_results