Spaces:

dataera2013
/

podcraft_web_app

Sleeping

File size: 5,546 Bytes

fd52f31

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import AgentExecutor, create_openai_functions_agent
from decouple import config
from typing import AsyncGenerator, List
import os
import json

# Get API keys from environment
TAVILY_API_KEY = config('TAVILY_API_KEY')
OPENAI_API_KEY = config('OPENAI_API_KEY')

# Debug logging
print(f"\nLoaded OpenAI API Key: {OPENAI_API_KEY[:7]}...")
print(f"Key starts with 'sk-proj-': {OPENAI_API_KEY.startswith('sk-proj-')}")
print(f"Key starts with 'sk-': {OPENAI_API_KEY.startswith('sk-')}\n")

# Set Tavily API key in environment
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

# Initialize the search tool
search_tool = TavilySearchResults(tavily_api_key=TAVILY_API_KEY)

# List of available tools for the prompt
tools_description = """
Available tools:
- TavilySearchResults: A search tool that provides comprehensive web search results. Use this to gather information about topics.
"""

# Create the prompt template
researcher_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert researcher tasked with gathering comprehensive information on given topics.
    Your goal is to provide detailed, factual information limited to 500 words.
    Focus on key points, recent developments, and verified facts.
    Structure your response clearly with main points and supporting details.
    Keep your response concise and focused.
    
    {tools}
    
    Remember to provide accurate and up-to-date information."""),
    ("user", "{input}"),
    ("assistant", "{agent_scratchpad}")
])

# Initialize the LLM with streaming
researcher_llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.3,
    api_key=OPENAI_API_KEY,
    streaming=True
)

# Create the agent
researcher_agent = create_openai_functions_agent(
    llm=researcher_llm,
    prompt=researcher_prompt,
    tools=[search_tool]
)

# Create the agent executor
researcher_executor = AgentExecutor(
    agent=researcher_agent,
    tools=[search_tool],
    verbose=True,
    handle_parsing_errors=True,
    return_intermediate_steps=True
)

def chunk_text(text: str, max_length: int = 3800) -> List[str]:
    """Split text into chunks of maximum length while preserving sentence boundaries."""
    # Split into sentences and trim whitespace
    sentences = [s.strip() for s in text.split('.')]
    sentences = [s + '.' for s in sentences if s]
    
    chunks = []
    current_chunk = []
    current_length = 0
    
    for sentence in sentences:
        sentence_length = len(sentence)
        if current_length + sentence_length > max_length:
            if current_chunk:  # If we have accumulated sentences, join them and add to chunks
                chunks.append(' '.join(current_chunk))
                current_chunk = [sentence]
                current_length = sentence_length
            else:  # If a single sentence is too long, split it
                if sentence_length > max_length:
                    words = sentence.split()
                    temp_chunk = []
                    temp_length = 0
                    for word in words:
                        if temp_length + len(word) + 1 > max_length:
                            chunks.append(' '.join(temp_chunk))
                            temp_chunk = [word]
                            temp_length = len(word)
                        else:
                            temp_chunk.append(word)
                            temp_length += len(word) + 1
                    if temp_chunk:
                        chunks.append(' '.join(temp_chunk))
                else:
                    chunks.append(sentence)
        else:
            current_chunk.append(sentence)
            current_length += sentence_length
    
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    return chunks

async def research_topic_stream(topic: str) -> AsyncGenerator[str, None]:
    """
    Research a topic and stream the results as they are generated.
    """
    try:
        async for chunk in researcher_executor.astream(
            {
                "input": f"Research this topic thoroughly: {topic}",
                "tools": tools_description
            }
        ):
            if isinstance(chunk, dict):
                # Stream intermediate steps for transparency
                if "intermediate_steps" in chunk:
                    for step in chunk["intermediate_steps"]:
                        yield json.dumps({"type": "intermediate", "content": str(step)}) + "\n"
                
                # Stream the final output
                if "output" in chunk:
                    yield json.dumps({"type": "final", "content": chunk["output"]}) + "\n"
            else:
                yield json.dumps({"type": "chunk", "content": str(chunk)}) + "\n"
    except Exception as e:
        yield json.dumps({"type": "error", "content": str(e)}) + "\n"

async def research_topic(topic: str) -> str:
    """
    Research a topic and return the complete result.
    Kept for compatibility with existing code.
    """
    try:
        result = await researcher_executor.ainvoke(
            {
                "input": f"Research this topic thoroughly: {topic}",
                "tools": tools_description
            }
        )
        return result["output"]
    except Exception as e:
        print(f"Error in research: {str(e)}")
        return "Error occurred during research."