Final_Assignment_Template

Runtime error

App Files Files Community

itskavya commited on Jun 6

Commit

06a4d84

1 Parent(s): 231a8e0

update working

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +332 -27
requirements.txt +6 -3

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ .DS_store

app.py CHANGED Viewed

@@ -1,59 +1,338 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
-from typing import TypedDict, Optional, Annotated
 from langchain_core.messages import AnyMessage
-from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langgraph.graph.message import add_messages
 from langchain_hyperbrowser import HyperbrowserBrowserUseTool
-from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_community.tools import DuckDuckGoSearchResults
 browser_tool = HyperbrowserBrowserUseTool()
-search_tool = DuckDuckGoSearchResults()
-tools = [browser_tool, search_tool]
-llm = HuggingFaceEndpoint(repo_id="Qwen/Qwen2.5-Coder-32B-Instruct")
-chat = ChatHuggingFace(llm=llm)
-llm_with_tools = chat.bind_tools(tools)
-class State(TypedDict):
-    messages = Annotated[list[AnyMessage], add_messages]
-def assistant(state:MessagesState):
-    system_message = SystemMessage(content="You are a helpful assistant. Your job is to answer the questions asked of you as accurately as possible. You have access to search and browser tools, which you may use when needed to answer a question.")
-    print(state["messages"])
     response = llm_with_tools.invoke([system_message] + state["messages"])
     print(response)
     return {
-        "messages": response
     }
-workflow = StateGraph(state_schema=MessagesState)
 workflow.add_node("assistant", assistant)
 workflow.add_node("tools", ToolNode(tools))
 workflow.add_edge(START, "assistant")
 workflow.add_conditional_edges("assistant", tools_condition)
 workflow.add_edge("tools", "assistant")
 app = workflow.compile()
-# (Keep Constants as is)
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         # fixed_answer = "This is a default answer."
         messages = [HumanMessage(content=question)]
-        answer = app.invoke({"messages": messages})
         answer = answer["messages"][-1].content
         # print(f"Agent returning fixed answer: {fixed_answer}")
         print(f"Agent returning answer: {answer}")
@@ -116,11 +395,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -235,6 +520,26 @@ if __name__ == "__main__":
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
+import base64
 import pandas as pd
+from typing import TypedDict, Annotated
 from langchain_core.messages import AnyMessage
 from langgraph.graph.message import add_messages
 from langchain_hyperbrowser import HyperbrowserBrowserUseTool
+from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_community.tools import DuckDuckGoSearchRun
+import whisper
+import yt_dlp
+import pandas as pd
+from langchain.globals import set_debug
+from langchain_community.tools.riza.command import ExecPython
+from langchain_openai import ChatOpenAI
+import cv2
+import os
+import shutil
+import uuid
+from langchain_tavily import TavilySearch
+# set_debug(True)
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]
+    task_id: str
+    has_file: bool
+def get_file(task_id: str):
+    """
+    Download a file locally for a given task.
+    """
+    files_url = f"{DEFAULT_API_URL}/files/{task_id}"
+    try:
+        response = requests.get(files_url, timeout=20)
+        response.raise_for_status()
+        cd = response.headers.get("content-disposition")
+        filename = cd.split("filename=")[-1].strip('"')
+        with open(filename, "wb") as file:
+            file.write(response.content)
+        return filename
+    except Exception as e:
+        print(str(e))
+        return ""
+def interpret_image(image_name: str, question: str):
+    """
+    Interpret an image for analysis.
+    """
+    vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)
+    try:
+        with open(image_name, "rb") as file:
+            bytes = file.read()
+        base64_image = base64.b64encode(bytes).decode("utf-8")
+        messages = [HumanMessage(content=[
+            {
+                "type": "text",
+                "text": (
+                    f"{question}"
+                )
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/png;base64,{base64_image}"
+                }
+            }
+        ])]
+        response = vision_llm.invoke(messages)
+        return response.content
+    except Exception as e:
+        print(str(e))
+        return ""
+def transcribe_audio(file_name: str):
+    """
+    Transcribes audio file.
+    """
+    model = whisper.load_model("small")
+    result = model.transcribe(file_name)
+    return result["text"]
+def download_youtube_video(url: str):
+    """
+    Download a YouTube video.
+    """
+    output_path = f"output_{uuid.uuid4()}"
+    ydl_opts = {
+        'format': 'bestvideo+bestaudio/best',
+        'outtmpl': output_path,
+        'merge_output_format': 'mp4',  # Use mp4 as the final output format
+        'quiet': True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return output_path+".mp4"
+def read_excel(file_name: str):
+    """
+    Read the contents of an Excel file.
+    """
+    df = pd.read_excel(file_name)
+    print(df.to_string(index=False))
+    return df.to_string(index=False)
+def read_file(file_name: str):
+    """
+    Read the content of a text-based file.
+    """
+    with open(file_name, 'r') as file:
+        content = file.read()
+    return content
+def watch_video(file_name: str):
+    """
+    Extract frames from a video and interpret them.
+    """
+    if os.path.exists("extracted_frames"):
+        shutil.rmtree("extracted_frames")
+    os.makedirs("extracted_frames")
+    cap = cv2.VideoCapture(file_name)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_interval = int(fps * 5)
+    frame_count = 0
+    saved_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_count % frame_interval == 0:
+            filename = os.path.join("extracted_frames", f"frame_{saved_count:04d}.jpg")
+            cv2.imwrite(filename, frame)
+            saved_count+=1
+        frame_count+=1
+    cap.release()
+    print(f"Saved {saved_count}")
+    captions = []
+    for file in sorted(os.listdir("extracted_frames")):
+        file_path = os.path.join("extracted_frames", file)
+        caption = interpret_image(file_path, "Return a one line description of the image.")
+        print(caption)
+        captions.append(caption)
+    print(captions)
+    return captions
 browser_tool = HyperbrowserBrowserUseTool()
+# search_tool = DuckDuckGoSearchRun()
+search_tool = TavilySearch()
+code_executor_tool = ExecPython()
+tools = [search_tool, code_executor_tool, interpret_image, get_file, transcribe_audio, download_youtube_video, read_file, watch_video, read_excel]
+llm = ChatOpenAI(model="gpt-4o", temperature=0)
+llm_with_tools = llm.bind_tools(tools)
+def assistant(state:AgentState):
+    task_id = state["task_id"]
+    image_tool_description = """
+    interpret_image(image_name: str) -> str:
+        Interpret an image for analysis.
+        Args:
+            image_name: Name of the downloaded image file as string.
+            question: Question about the image as string.
+        Returns:
+            An interpretation of the image as string.
+    """
+    download_file_tool_description = """
+    get_file(task_id: str) -> str:
+        Download a file locally for a given task.
+        Args:
+            task_id: The ID of the current task as string.
+        Returns:
+            The name of the downloaded file as string.
+    """
+    audio_tool_description = """
+    transcribe_audio(file_name: str) -> str:
+        Transcribe an audio file.
+        Args:
+            file_name: The name of the audio file as string.
+        Returns:
+            The transcription of the audio as string.
+    """
+    download_youtube_video_description = """
+    download_youtube_video(url: str, output_path: str):
+        Downloads a YouTube video.
+        Args:
+            url: URL of the YouTube video as string.
+        Returns:
+            The output path for the file.
+    """
+    excel_tool_description = """
+    read_excel(file_name: str) -> str:
+        Read the content of an Excel file.
+        Args:
+            file_name: The name of the Excel file as string.
+        Returns:
+            A string representation of the content of the file.
+    """
+    read_file_tool_description = """
+    read_file(file_name: str) -> str:
+        Read the content of a text-based file.
+        Args:
+            file_name: The name of the file as string.
+        Returns:
+            A string containing the content of the file.
+    """
+    watch_video_tool_description = """
+    watch_video(file_name: str) -> str:
+        Extract frames from a video and interpret them.
+        Args:
+            file_name: The name of the file as string.
+        Returns:
+            A list of captions for each frame.
+    """
+    search_tool_description = search_tool.description
+    code_executor_tool_description = code_executor_tool.description
+    has_file = state["has_file"]
+    system_message = SystemMessage(content=f"""
+                                   You are a general AI assistant. I will ask you a question.
+                                   Your response should be a number, OR as few words as possible, OR a comma-separated list of numbers and/or strings.
+                                   You SHOULD NOT provide explanations in your response.
+                                   If you are asked for a number, don't use a comma to write your number, neither use symbols such as $ or % unless specified otherwise.
+                                   If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities).
+                                   If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+                                   If you are including text from the question in your response, make sure to include the text exactly as it appears in the question (e.g. with adjective).
+                                   Do NOT end your response with a period.
+                                   Do NOT write numbers as text.
+				                   You have access to the following tools, which you can use as needed to answer a question:
+                                   - File downloading tool: {download_file_tool_description}
+                                   - Image interpretation tool: {image_tool_description}
+                                   - YouTube video downloader: {download_youtube_video_description}
+                                   - Audio transcription tool: {audio_tool_description}
+                                   - Text-based file reading tool: {read_file_tool_description}
+                                   - Internet search tool: {search_tool_description}
+                                   - Code execution tool: {code_executor_tool_description}
+                                   - Watch video tool: {watch_video_tool_description}
+                                   - Read Excel file tool: {excel_tool_description}
+                                   You may download a file for a given task ONLY if it has a file by using its associated task ID.
+                                   Always ensure you have downloaded a file before using a relevant tool.
+                                   You MUST use the name of a particular downloaded file in your tool call. Do NOT use a file name mentioned in the question.
+                                   When asked about a YouTube video, you can watch it and/or hear it.
+                                   When writing code, avoid excess formatting and keep it clean.
+                                   Do NOT make up answers, instead use a tool to answer the question.
+                                   The current task ID is {task_id}.
+                                   The current task has a file: {has_file}
+                                   """)
     response = llm_with_tools.invoke([system_message] + state["messages"])
     print(response)
+    print("\n\n")
     return {
+        "messages": [response],
+        "task_id": task_id,
+        "has_file": has_file
     }
+workflow = StateGraph(AgentState)
 workflow.add_node("assistant", assistant)
 workflow.add_node("tools", ToolNode(tools))
 workflow.add_edge(START, "assistant")
 workflow.add_conditional_edges("assistant", tools_condition)
 workflow.add_edge("tools", "assistant")
 app = workflow.compile()
 # --- Basic Agent Definition ---
 # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+    def __call__(self, question: str, task_id: str, has_file: bool) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         # fixed_answer = "This is a default answer."
         messages = [HumanMessage(content=question)]
+        state = {"messages": messages, "task_id": task_id, "has_file": has_file}
+        answer = app.invoke(state)
         answer = answer["messages"][-1].content
         # print(f"Agent returning fixed answer: {fixed_answer}")
         print(f"Agent returning answer: {answer}")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        has_file=False
+        if item.get("file_name"):
+            has_file=True
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(task_id)
+            submitted_answer = agent(question_text, task_id, has_file)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    # try:
+        # random_url = f"{DEFAULT_API_URL}/random-question"
+        # response = requests.get(random_url, timeout=20)
+        # response.raise_for_status()
+        # question = response.json()
+        # print(question)
+        # agent = BasicAgent()
+        # print(question.get("question"))
+        # print(question.get("task_id"))
+        # has_file=False
+        # if question.get("file_name"):
+        #     has_file=True
+        # print(agent(question.get("question"), question.get("task_id"), has_file))
+    # except Exception as e:
+    #     print(str(e))
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -2,7 +2,10 @@ gradio
 requests
 langchain_core
 langgraph
-langchain-huggingface
-langchain-hyperbrowser
 duckduckgo-search
-langchain-community

 requests
 langchain_core
 langgraph
 duckduckgo-search
+langchain-community
+openai-whisper
+yt-dlp
+rizaio
+langchain-openai
+langchain-tavily