Spaces:
Sleeping
Sleeping
File size: 5,546 Bytes
fd52f31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import AgentExecutor, create_openai_functions_agent
from decouple import config
from typing import AsyncGenerator, List
import os
import json
# Get API keys from environment
TAVILY_API_KEY = config('TAVILY_API_KEY')
OPENAI_API_KEY = config('OPENAI_API_KEY')
# Debug logging
print(f"\nLoaded OpenAI API Key: {OPENAI_API_KEY[:7]}...")
print(f"Key starts with 'sk-proj-': {OPENAI_API_KEY.startswith('sk-proj-')}")
print(f"Key starts with 'sk-': {OPENAI_API_KEY.startswith('sk-')}\n")
# Set Tavily API key in environment
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
# Initialize the search tool
search_tool = TavilySearchResults(tavily_api_key=TAVILY_API_KEY)
# List of available tools for the prompt
tools_description = """
Available tools:
- TavilySearchResults: A search tool that provides comprehensive web search results. Use this to gather information about topics.
"""
# Create the prompt template
researcher_prompt = ChatPromptTemplate.from_messages([
("system", """You are an expert researcher tasked with gathering comprehensive information on given topics.
Your goal is to provide detailed, factual information limited to 500 words.
Focus on key points, recent developments, and verified facts.
Structure your response clearly with main points and supporting details.
Keep your response concise and focused.
{tools}
Remember to provide accurate and up-to-date information."""),
("user", "{input}"),
("assistant", "{agent_scratchpad}")
])
# Initialize the LLM with streaming
researcher_llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.3,
api_key=OPENAI_API_KEY,
streaming=True
)
# Create the agent
researcher_agent = create_openai_functions_agent(
llm=researcher_llm,
prompt=researcher_prompt,
tools=[search_tool]
)
# Create the agent executor
researcher_executor = AgentExecutor(
agent=researcher_agent,
tools=[search_tool],
verbose=True,
handle_parsing_errors=True,
return_intermediate_steps=True
)
def chunk_text(text: str, max_length: int = 3800) -> List[str]:
"""Split text into chunks of maximum length while preserving sentence boundaries."""
# Split into sentences and trim whitespace
sentences = [s.strip() for s in text.split('.')]
sentences = [s + '.' for s in sentences if s]
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
sentence_length = len(sentence)
if current_length + sentence_length > max_length:
if current_chunk: # If we have accumulated sentences, join them and add to chunks
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_length = sentence_length
else: # If a single sentence is too long, split it
if sentence_length > max_length:
words = sentence.split()
temp_chunk = []
temp_length = 0
for word in words:
if temp_length + len(word) + 1 > max_length:
chunks.append(' '.join(temp_chunk))
temp_chunk = [word]
temp_length = len(word)
else:
temp_chunk.append(word)
temp_length += len(word) + 1
if temp_chunk:
chunks.append(' '.join(temp_chunk))
else:
chunks.append(sentence)
else:
current_chunk.append(sentence)
current_length += sentence_length
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
async def research_topic_stream(topic: str) -> AsyncGenerator[str, None]:
"""
Research a topic and stream the results as they are generated.
"""
try:
async for chunk in researcher_executor.astream(
{
"input": f"Research this topic thoroughly: {topic}",
"tools": tools_description
}
):
if isinstance(chunk, dict):
# Stream intermediate steps for transparency
if "intermediate_steps" in chunk:
for step in chunk["intermediate_steps"]:
yield json.dumps({"type": "intermediate", "content": str(step)}) + "\n"
# Stream the final output
if "output" in chunk:
yield json.dumps({"type": "final", "content": chunk["output"]}) + "\n"
else:
yield json.dumps({"type": "chunk", "content": str(chunk)}) + "\n"
except Exception as e:
yield json.dumps({"type": "error", "content": str(e)}) + "\n"
async def research_topic(topic: str) -> str:
"""
Research a topic and return the complete result.
Kept for compatibility with existing code.
"""
try:
result = await researcher_executor.ainvoke(
{
"input": f"Research this topic thoroughly: {topic}",
"tools": tools_description
}
)
return result["output"]
except Exception as e:
print(f"Error in research: {str(e)}")
return "Error occurred during research." |