from fasthtml_hf import setup_hf_backup from timelinestyle import TimelineStyle import os import json import pandas as pd import traceback from datetime import datetime from typing import Literal from pydantic_core import from_json from PyPDF2 import PdfReader from langchain_core.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.output_parsers import PydanticOutputParser from langchain.chains.summarize import load_summarize_chain from langchain_openai import ChatOpenAI from langchain_anthropic import ChatAnthropic from pydantic import BaseModel, Field, ValidationError from langchain_openai import ChatOpenAI from fasthtml.common import * from fasthtml.components import Svg from langchain_community.utilities.wikipedia import WikipediaAPIWrapper from langchain_community.tools.wikipedia.tool import WikipediaQueryRun # Set up the app, including daisyui and tailwind for the chat component tlink = Script(src="https://cdn.tailwindcss.com"), dlink = Link(rel="stylesheet", href="https://cdn.jsdelivr.net/npm/daisyui@4.11.1/dist/full.min.css") assets_dir = "/Users/manaranjanp/Documents/Work/MyLearnings/fastHTML/llmtimeline/assets" app = FastHTML(hdrs=(tlink, dlink, picolink)) svg = Svg( Path(fill_rule='evenodd', d='M10 18a8 8 0 100-16 8 8 0 000 16zm3.857-9.809a.75.75 0 00-1.214-.882l-3.483 4.79-1.88-1.88a.75.75 0 10-1.06 1.061l2.5 2.5a.75.75 0 001.137-.089l4-5.5z', clip_rule='evenodd'), xmlns='http://www.w3.org/2000/svg', viewbox='0 0 20 20', fill='currentColor', cls='h-5 w-5' ) print(type(svg)) # Pydantic models class Event(BaseModel): time: datetime = Field(description="When the event occurred") description: str = Field(description="A summary of what happened. Not more than 20 words.") sentiment: Literal["Positive", "Negative"] = Field(..., description="Categorization of the event sentiment") class EventResponse(BaseModel): events: List[Event] = Field(max_length=20, description="List of events extracted from the context") # Set up the Pydantic output parser parser = PydanticOutputParser(pydantic_object=EventResponse) # LangChain prompt template with format instructions event_extraction_template = """ Extract the time based informations or events from the context and return a list of events with time, event description and event sentiment type whether it was positive or negative event. The context may contain information about people, organization or any other entity. Try to get detailed and unique list of events as possible. {context} The response must follow the following schema strictly. There will be penalty for not following the schema. {format_instructions} Output: """ event_prompt = PromptTemplate( input_variables=["topic", "context"], partial_variables={"format_instructions": parser.get_format_instructions()}, template=event_extraction_template ) # Function to get the appropriate language model based on user selection def getModel(model, key): if(model == 'OpenAI'): os.environ['OPENAI_API_KEY'] = key return ChatOpenAI(temperature=0, # Set to 0 for deterministic output model="gpt-4o-2024-08-06", # Using the GPT-4 Turbo model max_tokens=8000) # Limit the response length else: os.environ['ANTHROPIC_API_KEY'] = key return ChatAnthropic(model='claude-3-5-sonnet-20240620') # Limit the response length # Function to generate an HTML table from the summary object #def generate_timeline_html(timeline): # rows = [] # for idx, tline in timeline.iterrows(): # if(tline['Sentiment'] == "Positive"): # rows.append(Div(Div( H2(tline['Time']), P(tline['Event']), cls = 'content'), cls = "container left")) # else: # rows.append(Div(Div( H2(tline['Time']), P(tline['Event']), cls = 'content'), cls = "container right")) # # return Div(*rows, cls="timeline") # Function to generate an HTML table from the summary object def generate_timeline_html(timeline): rows = [] for idx, tline in timeline.iterrows(): if idx % 2 == 0: rows.append(Li(Div(File("./assets/circle.svg"), cls = "timeline-middle"), Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start mb-10 md:text-end"), Hr())) else: rows.append(Li(Div(File("./assets/circle.svg"), cls = "timeline-middle"), Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end mb-10"), Hr())) # for idx, tline in timeline.iterrows(): # if idx % 2 == 0: # rows.append(Li(Div(svg, cls = "timeline-middle"), # Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start mb-10 md:text-end"), # Hr())) # else: # rows.append(Li(Div(svg, cls = "timeline-middle"), # Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end mb-10"), # Hr())) # for idx, tline in timeline.iterrows(): # if idx % 2 == 0: # rows.append(Li(#Div(Img(src="/assets/icons/circle.svg", cls="w-5 h-5"), cls = "timeline-middle"), # Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start timeline-box"), # Hr())) # else: # rows.append(Li(#Div(Img(src="/assets/icons/circle.svg", cls="w-5 h-5"), cls = "timeline-middle"), # Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end timeline-box"), # Hr())) return Ul(*rows, cls="timeline timeline-vertical") def get_timeline_df(result): results_data = [] # Parse the final result into GradedQAPair objects try: if not isinstance(result, EventResponse): raise ValueError(f"Expected a list, but got {type(result)}") except Exception as e: print(f"An error occurred during analysis: {str(e)}") raise except Exception as e: print(f"An error occurred during analysis: {str(e)}") raise if isinstance(result, EventResponse): # Create a list to hold the data for the DataFrame for event in result.events: results_data.append({ 'Time': event.time, 'Event': event.description, 'Sentiment': event.sentiment }) df = pd.DataFrame(results_data) df = df.sort_values("Time", ascending = True).reset_index() df['TimeStr'] = df['Time'].map(lambda x: x.strftime('%d/%m/%Y')) return df # Placeholder function for Q&A generation def generate_timeline(topic, llm): # This function will be implemented later # For now, return a sample DataFrame wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) wiki_content = wikipedia.run(topic) chain = event_prompt | llm | parser result = chain.invoke({"context" : wiki_content}) try: # Parse the output using PydanticOutputParser # response = parser.parse(result) # Create the DataFrame print(f"Results: {result}") # timeline = parser.parse(result) df = get_timeline_df(result) # Optionally, save the DataFrame to a CSV file df.to_csv(f"timeline.csv", index=True) print("Results saved to 'results.csv'") except Exception as e: print(f"Error parsing LLM output: {str(e)}") return None return df # Function to generate the configuration form for the web interface def getConfigForm(): return Card(Form(hx_post="/submit", hx_target="#result", hx_swap_oob="innerHTML", hx_indicator="#indicator")( Div( Label(Strong("Model and Topic: "), style="color:#3498db; font-size:25px;") ), Div( Span(Strong('Model: '), cls ="badge"), Select(Option("OpenAI"), Option("Anthropic"), id="model", cls = 'select w-full max-w-xs') ), Div( Span(Strong('Topic for timeline (Person/Organization/Event): '), cls ="badge"), Input(id="secret", type="password", placeholder="Key: "), ), Div( Span(Strong('Provide the topic.: '), cls ="badge"), Input(type = 'text', id="topic", cls = "input w-full max-w-xs", placeholder = "Type here") ), Div( Button("Generate Timeline", cls = 'btn') ), Div( Br(), A("Developed by Manaranjan Pradhan", href="http://www.manaranjanp.com/", target="_blank", style = 'color: red; font-size: 16px;') ))) # Define the route for the homepage @app.get('/') def homepage(): return Titled('Generate a timeline ', Grid( getConfigForm(), Div( Div(id="result"), Div(Label(Strong('Generating timeline for the topic.... take a deep breath....')), Progress(), id="indicator", cls="htmx-indicator") ) , style="grid-template-columns: 400px 1000px; gap: 50px;" )) @app.get('/assets/{fname:path}.{ext}') async def get(fname: str, ext: str): fpath:str = (assets_dir)+'/'+str(fname)+'.'+str(ext) if os.path.isfile(fpath): response = FileResponse(fpath, media_type="image/svg") print("file sent:"+fpath) else: print("file failed:"+fpath) response = HTTPException(status_code=404, detail="File not found") # Define the route for form submission @app.post('/submit') async def post(d:dict): try: # Get the appropriate language model model = getModel(d['model'], d['secret']) # Perform one-pass summarization timeline_df = generate_timeline(d['topic'], model) #qas = pd.read_csv("results_tesla.csv") timeline_df.head(10) # Generate and return the HTML table with the summaries return generate_timeline_html(timeline_df) except BaseException as e: print(traceback.format_exc()) return str(e) setup_hf_backup(app) # Start the FastAPI server serve()