#!/usr/bin/env python # coding: utf-8 # In[14]: pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib # In[15]: get_ipython().system('pip show openai') get_ipython().system('pip uninstall newspaper3k -y') get_ipython().system('pip install newspaper3k') # In[16]: import os import yaml import pandas as pd import numpy as np import azureml.core from azureml.core import Workspace, Datastore, ComputeTarget from azure.identity import DefaultAzureCredential from azure.ai.ml import MLClient from azure.ai.ml import command from azure.ai.ml import Input, Output from azure.ai.ml import load_component from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule from datetime import datetime, timedelta # perspective generation import openai import os from openai import OpenAI from newspaper import Article import gradio as gr import json import difflib # In[17]: # Read the YAML file with open('./curify_api.yaml', 'r') as yaml_file: data = yaml.safe_load(yaml_file) # Access the API keys and other configuration data weaviate_url = data.get('weaviate').get('url') weaviate_api_key = data.get('weaviate').get('api_key') cohere_api_key = data.get('cohere').get('api_key') openai_api_key = data.get('openai').get('api_key') serper_api_key = data.get('serper').get('api_key') os.environ["OPENAI_API_KEY"] = openai_api_key os.environ["SERPER_API_KEY"] = serper_api_key SUBSCRIPTION = data.get('azure').get('subscription_id') RESOURCE_GROUP = data.get('azure').get('resource_group_name') WS_NAME = data.get('azure').get('workspace_name') # In[18]: def convert_to_listed_json(input_string): """ Converts a string to a listed JSON object. Parameters: input_string (str): The JSON-like string to be converted. Returns: list: A JSON object parsed into a Python list of dictionaries. """ try: # Parse the string into a Python object trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1] json_object = json.loads(trimmed_string) return json_object except json.JSONDecodeError as e: return None return None #raise ValueError(f"Invalid JSON format: {e}") def validate_and_extract_json(json_string): """ Validates the JSON string, extracts fields with possible variants using fuzzy matching. Args: - json_string (str): The JSON string to validate and extract from. - field_names (list): List of field names to extract, with possible variants. Returns: - dict: Extracted values with the best matched field names. """ # Try to parse the JSON string trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1] try: parsed_json = json.loads(trimmed_string) return parsed_json except json.JSONDecodeError as e: return None # {"error": "Parsed JSON is not a dictionary."} return None def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}): dat_df = pd.DataFrame([dat_schema]) try: dat_df = pd.DataFrame(dat_json) except Exception as e: dat_df = pd.DataFrame([dat_schema]) # ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}") return dat_df # In[19]: from transformers import pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def summarize_content(text): summary = summarizer(text, max_length=350, min_length=40, do_sample=False) return summary[0]['summary_text'] # In[20]: client = OpenAI( api_key= os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted ) # Send the prompt to the OpenAI API def call_openai_api(prompt): response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}], max_tokens=5000 ) return response.choices[0].message.content.strip() def fn_task_analysis(project_context, task_description): prompt = ( f"You are working in the context of {project_context}. " f"Your task is to analyze the task and break down into reasoning steps: {task_description}" "For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project." "2) whether this idea is concrete todo or vague." "3) what is the category of the task." "Please output in JSON with description, project_association, is_concrete, task_category as keys." ) return call_openai_api(prompt) # Function to break down a task (e.g., Research Paper Review) and create a reasoning path def generate_reasoning_path(project_context, task_description): res_task_analysis = fn_task_analysis(project_context, task_description) prompt = ( f"You are working in the context of {project_context}. " f"Your task is to analyze the task and break down into reasoning steps: {task_description}" f"Please use the results of task analysis: {res_task_analysis}. " "Guideline for breaking down the task: " "If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable." "If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast." "If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack." "If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration." "If the task is questionnaire or interview, please deliver a questionnaire design." "If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. " "For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype." "Please output the action and priority of each step, you do not need to give explanation." "Please ignore the low priority steps in the output." "Please output the reasoning steps in JSON with reasoning_steps as key." ) res_steps = call_openai_api(prompt) #return res_task_analysis, res_steps try: json_task_analysis = validate_and_extract_json(res_task_analysis) json_steps = validate_and_extract_json(res_steps) return json_task_analysis, json_steps except ValueError as e: return None, None # Function to store the reasoning path as JSON and use it for task execution def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'): if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path: reasoning_steps = reasoning_path[json_key] # Example logic to simulate execution (this is just a placeholder) # for step in task_steps: # step["status"] = "completed" # Mark as completed after execution return reasoning_steps return None # In[21]: # Initialize dataframes for the schema ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"]) def extract_ideas(context, text): """ Extract project ideas from text, with or without a context, and return in JSON format. Parameters: context (str): Context of the extraction. Can be empty. text (str): Text to extract ideas from. Returns: list: A list of ideas, each represented as a dictionary with name and description. """ if context: # Template when context is provided prompt = ( f"You are working in the context of {context}. " "Please extract the ongoing projects with project name and description." "Please only the listed JSON as output string." f"Ongoing projects: {text}" ) else: # Template when context is not provided prompt = ( "Given the following information about the user." "Please extract the ongoing projects with project name and description." "Please only the listed JSON as output string." f"Ongoing projects: {text}" ) # return the raw string return call_openai_api(prompt) def df_to_string(df, empty_message = ''): """ Converts a DataFrame to a string if it is not empty. If the DataFrame is empty, returns an empty string. Parameters: ideas_df (pd.DataFrame): The DataFrame to be converted. Returns: str: A string representation of the DataFrame or an empty string. """ if df.empty: return empty_message else: return df.to_string(index=False) # In[22]: def curify_ideas(project_description, task_description): # May need a task split step that semantically splits the task. str_projects = extract_ideas('AI-powered tools for productivity', project_description) json_projects = convert_to_listed_json(str_projects) # Generate reasoning path task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description) # Store and simulate execution of task task_data = store_and_execute_task(task_description, reasoning_path) return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis # In[23]: project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.' # convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description)) task_description = 'Build an interview bot for the curify digest project.' task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description) store_and_execute_task(task_description, reasoning_path) # In[ ]: reasoning_path # In[ ]: # Gradio Demo with gr.Blocks( css=""" .gradio-table td { white-space: normal !important; word-wrap: break-word !important; } .gradio-table { width: 100% !important; /* Adjust to 100% to fit the container */ table-layout: fixed !important; /* Fixed column widths */ overflow-x: hidden !important; /* Disable horizontal scrolling */ } .gradio-container { overflow-x: hidden !important; /* Disable horizontal scroll for entire container */ padding: 0 !important; /* Remove any default padding */ } .gradio-column { max-width: 100% !important; /* Ensure columns take up full width */ overflow: hidden !important; /* Hide overflow to prevent horizontal scroll */ } .gradio-row { overflow-x: hidden !important; /* Prevent horizontal scroll on rows */ } """ ) as demo: gr.Markdown("## Curify: Unified AI Tools for Productivity") with gr.Tab("Curify Idea"): with gr.Row(): # Column 1: Webpage rendering with gr.Column(): gr.Markdown("## Enter project descriptions.") project_input = gr.Textbox( placeholder="Describe your project...", label=None, lines=5) gr.Markdown("## Enter task message.") idea_input = gr.Textbox( label=None, placeholder="Describe the task you want to execute (e.g., Research Paper Review)") task_btn = gr.Button("Generating task steps...") gr.Markdown("## Projects Overview") project_list = gr.DataFrame( type="pandas" ) # Column 2: Summary and Perspectives with gr.Column(): gr.Markdown("## Task analysis") task_analysis_txt = gr.Textbox( label=None, placeholder="Here is an analysis of your task...", lines=3) gr.Markdown("## Execution path") task_steps = gr.DataFrame( type="pandas" ) task_btn.click( curify_ideas, inputs=[project_input, idea_input], outputs=[project_list, task_steps, task_analysis_txt] ) demo.launch(share=True) # In[ ]: