import openai import os import pdfplumber from langchain.chains.mapreduce import MapReduceChain from langchain.text_splitter import CharacterTextSplitter from langchain.chains.summarize import load_summarize_chain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import UnstructuredFileLoader from langchain.prompts import PromptTemplate import logging import json from typing import List import mimetypes import validators import requests import tempfile from langchain.chains import create_extraction_chain from GoogleNews import GoogleNews import pandas as pd import requests import gradio as gr import re from langchain.document_loaders import WebBaseLoader from langchain.chains.combine_documents.stuff import StuffDocumentsChain from transformers import pipeline import plotly.express as px from langchain.document_loaders.csv_loader import CSVLoader from langchain.chains.llm import LLMChain import yfinance as yf import pandas as pd import nltk from nltk.tokenize import sent_tokenize class KeyValueExtractor: def __init__(self): """ Initialize the ContractSummarizer object. Parameters: pdf_file_path (str): The path to the input PDF file. """ self.model = "facebook/bart-large-mnli" os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY def get_url(self,keyword): return f"https://finance.yahoo.com/quote/{keyword}?p={keyword}" def get_each_link_summary(self,url): loader = WebBaseLoader(url) docs = loader.load() text_splitter = CharacterTextSplitter.from_tiktoken_encoder( chunk_size=3000, chunk_overlap=200 ) # Split the documents into chunks split_docs = text_splitter.split_documents(docs) # Prepare the prompt template for summarization prompt_template = """The give text is Finance Stock Details for one company i want to get values for Previous Close : [value] Open : [value] Bid : [value] Ask : [value] Day's Range : [value] 52 Week Range : [value] Volume : [value] Avg. Volume : [value] Market Cap : [value] Beta (5Y Monthly) : [value] PE Ratio (TTM) : [value] EPS (TTM) : [value] Earnings Date : [value] Forward Dividend & Yield : [value] Ex-Dividend Date : [value] 1y Target Est : [value] these details form that and Write a abractive summary about those details: Given Text: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) # Prepare the template for refining the summary with additional context refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary" "If the context isn't useful, return the original summary." ) refine_prompt = PromptTemplate.from_template(refine_template) # Load the summarization chain using the ChatOpenAI language model chain = load_summarize_chain( llm = ChatOpenAI(temperature=0), chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) # Generate the refined summary using the loaded summarization chain result = chain({"input_documents": split_docs}, return_only_outputs=True) print(result["output_text"]) return result["output_text"] def one_day_summary(self,content) -> None: # Use OpenAI's Completion API to analyze the text and extract key-value pairs response = openai.Completion.create( engine="text-davinci-003", # You can choose a different engine as well temperature = 0, prompt=f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```.", max_tokens=1000 # You can adjust the length of the response ) # Extract and return the chatbot's reply result = response['choices'][0]['text'].strip() print(result) return result def extract_key_value_pair(self,content) -> None: """ Extract key-value pairs from the refined summary. Prints the extracted key-value pairs. """ try: # Use OpenAI's Completion API to analyze the text and extract key-value pairs response = openai.Completion.create( engine="text-davinci-003", # You can choose a different engine as well temperature = 0, prompt=f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```.", max_tokens=1000 # You can adjust the length of the response ) # Extract and return the chatbot's reply result = response['choices'][0]['text'].strip() return result except Exception as e: # If an error occurs during the key-value extraction process, log the error logging.error(f"Error while extracting key-value pairs: {e}") print("Error:", e) def analyze_sentiment_for_graph(self, text): pipe = pipeline("zero-shot-classification", model=self.model) label=["Positive", "Negative", "Neutral"] result = pipe(text, label) sentiment_scores = { result['labels'][0]: result['scores'][0], result['labels'][1]: result['scores'][1], result['labels'][2]: result['scores'][2] } return sentiment_scores def display_graph(self,text): sentiment_scores = self.analyze_sentiment_for_graph(text) labels = sentiment_scores.keys() scores = sentiment_scores.values() fig = px.bar(x=scores, y=labels, orientation='h', color=labels, color_discrete_map={"Negative": "red", "Positive": "green", "Neutral": "gray"}) fig.update_traces(texttemplate='%{x:.2f}%', textposition='outside') fig.update_layout(title="Sentiment Analysis",width=800) formatted_pairs = [] for key, value in sentiment_scores.items(): formatted_value = round(value, 2) # Round the value to two decimal places formatted_pairs.append(f"{key} : {formatted_value}") result_string = '\t'.join(formatted_pairs) return fig def get_finance_data(self,symbol): # Define the stock symbol and date range start_date = '2022-08-19' end_date = '2023-08-19' # Fetch historical OHLC data using yfinance data = yf.download(symbol, start=start_date, end=end_date) # Select only the OHLC columns ohlc_data = data[['Open', 'High', 'Low', 'Close']] csv_path = "ohlc_data.csv" # Save the OHLC data to a CSV file ohlc_data.to_csv(csv_path) return csv_path def csv_to_dataframe(self,csv_path): # Replace 'your_file.csv' with the actual path to your CSV file csv_file_path = csv_path # Read the CSV file into a DataFrame df = pd.read_csv(csv_file_path) # Now you can work with the 'df' DataFrame return df # Display the first few rows of the DataFrame def save_dataframe_in_text_file(self,df): output_file_path = 'output.txt' # Convert the DataFrame to a text file df.to_csv(output_file_path, sep='\t', index=False) return output_file_path def csv_loader(self,output_file_path): loader = UnstructuredFileLoader(output_file_path, strategy="fast") docs = loader.load() return docs def document_text_spilliter(self,docs): """ Split documents into chunks for efficient processing. Returns: List[str]: List of split document chunks. """ # Initialize the text splitter with specified chunk size and overlap text_splitter = CharacterTextSplitter.from_tiktoken_encoder( chunk_size=1000, chunk_overlap=200 ) # Split the documents into chunks split_docs = text_splitter.split_documents(docs) # Return the list of split document chunks return split_docs def change_bullet_points(self,text): nltk.download('punkt') # Download the sentence tokenizer data (only need to run this once) # Example passage passage = text # Tokenize the passage into sentences sentences = sent_tokenize(passage) bullet_string = "" # Print the extracted sentences for sentence in sentences: bullet_string+="* "+sentence+"\n" return bullet_string def one_year_summary(self,keyword): csv_path = self.get_finance_data(keyword) df = self.csv_to_dataframe(csv_path) output_file_path = self.save_dataframe_in_text_file(df) docs = self.csv_loader(output_file_path) split_docs = self.document_text_spilliter(docs) prompt_template = """Analyze the Financial Details and Write a abractive quick short summary how the company perform up and down,Bullish/Bearish of the following: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) # Prepare the template for refining the summary with additional context refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary" "If the context isn't useful, return the original summary." "10 line summary is enough" ) refine_prompt = PromptTemplate.from_template(refine_template) # Load the summarization chain using the ChatOpenAI language model chain = load_summarize_chain( llm = ChatOpenAI(temperature=0), chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) # Generate the refined summary using the loaded summarization chain result = chain({"input_documents": split_docs}, return_only_outputs=True) one_year_perfomance_summary = self.change_bullet_points(result["output_text"]) # Return the refined summary return one_year_perfomance_summary def main(self,keyword): clean_url = self.get_url(keyword) link_summary = self.get_each_link_summary(clean_url) clean_summary = self.one_day_summary(link_summary) key_value = self.extract_key_value_pair(clean_summary) return clean_summary, key_value def gradio_interface(self): with gr.Blocks(css="style.css",theme= 'karthikeyan-adople/hudsonhayes-gray') as app: gr.HTML("""