File size: 5,546 Bytes
fd52f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import AgentExecutor, create_openai_functions_agent
from decouple import config
from typing import AsyncGenerator, List
import os
import json

# Get API keys from environment
TAVILY_API_KEY = config('TAVILY_API_KEY')
OPENAI_API_KEY = config('OPENAI_API_KEY')

# Debug logging
print(f"\nLoaded OpenAI API Key: {OPENAI_API_KEY[:7]}...")
print(f"Key starts with 'sk-proj-': {OPENAI_API_KEY.startswith('sk-proj-')}")
print(f"Key starts with 'sk-': {OPENAI_API_KEY.startswith('sk-')}\n")

# Set Tavily API key in environment
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

# Initialize the search tool
search_tool = TavilySearchResults(tavily_api_key=TAVILY_API_KEY)

# List of available tools for the prompt
tools_description = """
Available tools:
- TavilySearchResults: A search tool that provides comprehensive web search results. Use this to gather information about topics.
"""

# Create the prompt template
researcher_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert researcher tasked with gathering comprehensive information on given topics.
    Your goal is to provide detailed, factual information limited to 500 words.
    Focus on key points, recent developments, and verified facts.
    Structure your response clearly with main points and supporting details.
    Keep your response concise and focused.
    
    {tools}
    
    Remember to provide accurate and up-to-date information."""),
    ("user", "{input}"),
    ("assistant", "{agent_scratchpad}")
])

# Initialize the LLM with streaming
researcher_llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.3,
    api_key=OPENAI_API_KEY,
    streaming=True
)

# Create the agent
researcher_agent = create_openai_functions_agent(
    llm=researcher_llm,
    prompt=researcher_prompt,
    tools=[search_tool]
)

# Create the agent executor
researcher_executor = AgentExecutor(
    agent=researcher_agent,
    tools=[search_tool],
    verbose=True,
    handle_parsing_errors=True,
    return_intermediate_steps=True
)

def chunk_text(text: str, max_length: int = 3800) -> List[str]:
    """Split text into chunks of maximum length while preserving sentence boundaries."""
    # Split into sentences and trim whitespace
    sentences = [s.strip() for s in text.split('.')]
    sentences = [s + '.' for s in sentences if s]
    
    chunks = []
    current_chunk = []
    current_length = 0
    
    for sentence in sentences:
        sentence_length = len(sentence)
        if current_length + sentence_length > max_length:
            if current_chunk:  # If we have accumulated sentences, join them and add to chunks
                chunks.append(' '.join(current_chunk))
                current_chunk = [sentence]
                current_length = sentence_length
            else:  # If a single sentence is too long, split it
                if sentence_length > max_length:
                    words = sentence.split()
                    temp_chunk = []
                    temp_length = 0
                    for word in words:
                        if temp_length + len(word) + 1 > max_length:
                            chunks.append(' '.join(temp_chunk))
                            temp_chunk = [word]
                            temp_length = len(word)
                        else:
                            temp_chunk.append(word)
                            temp_length += len(word) + 1
                    if temp_chunk:
                        chunks.append(' '.join(temp_chunk))
                else:
                    chunks.append(sentence)
        else:
            current_chunk.append(sentence)
            current_length += sentence_length
    
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    return chunks

async def research_topic_stream(topic: str) -> AsyncGenerator[str, None]:
    """
    Research a topic and stream the results as they are generated.
    """
    try:
        async for chunk in researcher_executor.astream(
            {
                "input": f"Research this topic thoroughly: {topic}",
                "tools": tools_description
            }
        ):
            if isinstance(chunk, dict):
                # Stream intermediate steps for transparency
                if "intermediate_steps" in chunk:
                    for step in chunk["intermediate_steps"]:
                        yield json.dumps({"type": "intermediate", "content": str(step)}) + "\n"
                
                # Stream the final output
                if "output" in chunk:
                    yield json.dumps({"type": "final", "content": chunk["output"]}) + "\n"
            else:
                yield json.dumps({"type": "chunk", "content": str(chunk)}) + "\n"
    except Exception as e:
        yield json.dumps({"type": "error", "content": str(e)}) + "\n"

async def research_topic(topic: str) -> str:
    """
    Research a topic and return the complete result.
    Kept for compatibility with existing code.
    """
    try:
        result = await researcher_executor.ainvoke(
            {
                "input": f"Research this topic thoroughly: {topic}",
                "tools": tools_description
            }
        )
        return result["output"]
    except Exception as e:
        print(f"Error in research: {str(e)}")
        return "Error occurred during research."