Spaces:
Runtime error
Runtime error
from fasthtml_hf import setup_hf_backup | |
from timelinestyle import TimelineStyle | |
import os | |
import json | |
import pandas as pd | |
import traceback | |
from datetime import datetime | |
from typing import Literal | |
from pydantic_core import from_json | |
from PyPDF2 import PdfReader | |
from langchain_core.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain.output_parsers import PydanticOutputParser | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain_openai import ChatOpenAI | |
from langchain_anthropic import ChatAnthropic | |
from pydantic import BaseModel, Field, ValidationError | |
from langchain_openai import ChatOpenAI | |
from fasthtml.common import * | |
from fasthtml.components import Svg | |
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper | |
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun | |
# Set up the app, including daisyui and tailwind for the chat component | |
tlink = Script(src="https://cdn.tailwindcss.com"), | |
dlink = Link(rel="stylesheet", href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.min.css") | |
assets_dir = "/Users/manaranjanp/Documents/Work/MyLearnings/fastHTML/llmtimeline/assets" | |
app = FastHTML(hdrs=(tlink, dlink, picolink)) | |
svg = Svg( | |
Path(fill_rule='evenodd', d='M10 18a8 8 0 100-16 8 8 0 000 16zm3.857-9.809a.75.75 0 00-1.214-.882l-3.483 4.79-1.88-1.88a.75.75 0 10-1.06 1.061l2.5 2.5a.75.75 0 001.137-.089l4-5.5z', clip_rule='evenodd'), | |
xmlns='http://www.w3.org/2000/svg', | |
viewbox='0 0 20 20', | |
fill='currentColor', | |
cls='h-5 w-5' | |
) | |
print(type(svg)) | |
# Pydantic models | |
class Event(BaseModel): | |
time: datetime = Field(description="When the event occurred") | |
description: str = Field(description="A summary of what happened. Not more than 20 words.") | |
sentiment: Literal["Positive", "Negative"] = Field(..., description="Categorization of the event sentiment") | |
class EventResponse(BaseModel): | |
events: List[Event] = Field(max_length=20, description="List of events extracted from the context") | |
# Set up the Pydantic output parser | |
parser = PydanticOutputParser(pydantic_object=EventResponse) | |
# LangChain prompt template with format instructions | |
event_extraction_template = """ | |
Extract the time based informations or events from the context and return a list of events with time, event description and event sentiment type whether it was positive or negative event. | |
The context may contain information about people, organization or any other entity. Try to get detailed and unique list of events as possible. | |
<context> | |
{context} | |
</context> | |
The response must follow the following schema strictly. There will be penalty for not following the schema. | |
<schema> | |
{format_instructions} | |
</schema> | |
Output: | |
""" | |
event_prompt = PromptTemplate( | |
input_variables=["topic", "context"], | |
partial_variables={"format_instructions": parser.get_format_instructions()}, | |
template=event_extraction_template | |
) | |
# Function to get the appropriate language model based on user selection | |
def getModel(model, key): | |
if(model == 'OpenAI'): | |
os.environ['OPENAI_API_KEY'] = key | |
return ChatOpenAI(temperature=0, # Set to 0 for deterministic output | |
model="gpt-4o-2024-08-06", # Using the GPT-4 Turbo model | |
max_tokens=8000) # Limit the response length | |
else: | |
os.environ['ANTHROPIC_API_KEY'] = key | |
return ChatAnthropic(model='claude-3-5-sonnet-20240620') # Limit the response length | |
# Function to generate an HTML table from the summary object | |
#def generate_timeline_html(timeline): | |
# rows = [] | |
# for idx, tline in timeline.iterrows(): | |
# if(tline['Sentiment'] == "Positive"): | |
# rows.append(Div(Div( H2(tline['Time']), P(tline['Event']), cls = 'content'), cls = "container left")) | |
# else: | |
# rows.append(Div(Div( H2(tline['Time']), P(tline['Event']), cls = 'content'), cls = "container right")) | |
# | |
# return Div(*rows, cls="timeline") | |
# Function to generate an HTML table from the summary object | |
def generate_timeline_html(timeline): | |
rows = [] | |
for idx, tline in timeline.iterrows(): | |
if idx % 2 == 0: | |
rows.append(Li(Div(File("./assets/circle.svg"), cls = "timeline-middle"), | |
Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start mb-10 md:text-end"), | |
Hr())) | |
else: | |
rows.append(Li(Div(File("./assets/circle.svg"), cls = "timeline-middle"), | |
Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end mb-10"), | |
Hr())) | |
# for idx, tline in timeline.iterrows(): | |
# if idx % 2 == 0: | |
# rows.append(Li(Div(svg, cls = "timeline-middle"), | |
# Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start mb-10 md:text-end"), | |
# Hr())) | |
# else: | |
# rows.append(Li(Div(svg, cls = "timeline-middle"), | |
# Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end mb-10"), | |
# Hr())) | |
# for idx, tline in timeline.iterrows(): | |
# if idx % 2 == 0: | |
# rows.append(Li(#Div(Img(src="/assets/icons/circle.svg", cls="w-5 h-5"), cls = "timeline-middle"), | |
# Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-start timeline-box"), | |
# Hr())) | |
# else: | |
# rows.append(Li(#Div(Img(src="/assets/icons/circle.svg", cls="w-5 h-5"), cls = "timeline-middle"), | |
# Div(Time(tline['TimeStr'], cls = "font-mono italic"), Div(tline['Event'], cls = 'text-lg font-black'), cls = "timeline-end timeline-box"), | |
# Hr())) | |
return Ul(*rows, cls="timeline timeline-vertical") | |
def get_timeline_df(result): | |
results_data = [] | |
# Parse the final result into GradedQAPair objects | |
try: | |
if not isinstance(result, EventResponse): | |
raise ValueError(f"Expected a list, but got {type(result)}") | |
except Exception as e: | |
print(f"An error occurred during analysis: {str(e)}") | |
raise | |
except Exception as e: | |
print(f"An error occurred during analysis: {str(e)}") | |
raise | |
if isinstance(result, EventResponse): | |
# Create a list to hold the data for the DataFrame | |
for event in result.events: | |
results_data.append({ | |
'Time': event.time, | |
'Event': event.description, | |
'Sentiment': event.sentiment | |
}) | |
df = pd.DataFrame(results_data) | |
df = df.sort_values("Time", ascending = True).reset_index() | |
df['TimeStr'] = df['Time'].map(lambda x: x.strftime('%d/%m/%Y')) | |
return df | |
# Placeholder function for Q&A generation | |
def generate_timeline(topic, llm): | |
# This function will be implemented later | |
# For now, return a sample DataFrame | |
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) | |
wiki_content = wikipedia.run(topic) | |
chain = event_prompt | llm | parser | |
result = chain.invoke({"context" : wiki_content}) | |
try: | |
# Parse the output using PydanticOutputParser | |
# response = parser.parse(result) | |
# Create the DataFrame | |
print(f"Results: {result}") | |
# timeline = parser.parse(result) | |
df = get_timeline_df(result) | |
# Optionally, save the DataFrame to a CSV file | |
df.to_csv(f"timeline.csv", index=True) | |
print("Results saved to 'results.csv'") | |
except Exception as e: | |
print(f"Error parsing LLM output: {str(e)}") | |
return None | |
return df | |
# Function to generate the configuration form for the web interface | |
def getConfigForm(): | |
return Card(Form(hx_post="/submit", hx_target="#result", hx_swap_oob="innerHTML", hx_indicator="#indicator")( | |
Div( | |
Label(Strong("Model and Topic: "), style="color:#3498db; font-size:25px;") | |
), | |
Div( | |
Span(Strong('Model: '), cls ="badge"), | |
Select(Option("OpenAI"), Option("Anthropic"), id="model", cls = 'select w-full max-w-xs') | |
), | |
Div( | |
Span(Strong('Topic for timeline (Person/Organization/Event): '), cls ="badge"), | |
Input(id="secret", type="password", placeholder="Key: "), | |
), | |
Div( | |
Span(Strong('Provide the topic.: '), cls ="badge"), | |
Input(type = 'text', | |
id="topic", | |
cls = "input w-full max-w-xs", | |
placeholder = "Type here") | |
), | |
Div( | |
Button("Generate Timeline", cls = 'btn') | |
), | |
Div( | |
Br(), | |
A("Developed by Manaranjan Pradhan", href="http://www.manaranjanp.com/", | |
target="_blank", | |
style = 'color: red; font-size: 16px;') | |
))) | |
# Define the route for the homepage | |
def homepage(): | |
return Titled('Generate a timeline ', Grid( getConfigForm(), | |
Div( | |
Div(id="result"), | |
Div(Label(Strong('Generating timeline for the topic.... take a deep breath....')), | |
Progress(), id="indicator", cls="htmx-indicator") | |
) | |
, style="grid-template-columns: 400px 1000px; gap: 50px;" | |
)) | |
async def get(fname: str, ext: str): | |
fpath:str = (assets_dir)+'/'+str(fname)+'.'+str(ext) | |
if os.path.isfile(fpath): | |
response = FileResponse(fpath, media_type="image/svg") | |
print("file sent:"+fpath) | |
else: | |
print("file failed:"+fpath) | |
response = HTTPException(status_code=404, detail="File not found") | |
# Define the route for form submission | |
async def post(d:dict): | |
try: | |
# Get the appropriate language model | |
model = getModel(d['model'], d['secret']) | |
# Perform one-pass summarization | |
timeline_df = generate_timeline(d['topic'], model) | |
#qas = pd.read_csv("results_tesla.csv") | |
timeline_df.head(10) | |
# Generate and return the HTML table with the summaries | |
return generate_timeline_html(timeline_df) | |
except BaseException as e: | |
print(traceback.format_exc()) | |
return str(e) | |
setup_hf_backup(app) | |
# Start the FastAPI server | |
serve() | |