File size: 6,824 Bytes
a2335c5
 
10e4113
 
c1cdf7c
10e4113
e283410
10e4113
a7932b8
10e4113
a2335c5
 
 
 
 
 
 
 
 
407de98
ef5fd00
10e4113
 
 
 
 
a2335c5
10e4113
 
a2335c5
00ac423
 
 
 
 
 
 
 
 
 
10e4113
00ac423
 
a2335c5
 
7d6eec9
a2335c5
328806f
a2335c5
 
 
10e4113
7d6eec9
a2335c5
 
 
 
00ac423
a2335c5
 
 
 
 
 
 
 
 
 
 
 
7d6eec9
a2335c5
 
 
 
 
e2808f2
a2335c5
328806f
 
a2335c5
 
 
 
 
 
 
 
e2808f2
10e4113
a2335c5
 
 
 
 
 
328806f
a2335c5
328806f
a2335c5
 
 
10e4113
 
 
b50112d
10e4113
 
 
e2808f2
10e4113
 
e2808f2
10e4113
 
e2808f2
10e4113
 
e2808f2
10e4113
 
e2808f2
 
 
10e4113
 
 
 
e2808f2
10e4113
 
e2808f2
10e4113
 
e2808f2
10e4113
 
e2808f2
10e4113
e2808f2
10e4113
 
 
e2808f2
10e4113
 
 
 
 
e2808f2
10e4113
 
e2808f2
10e4113
c1cdf7c
10e4113
34054e0
10e4113
a65ba38
10e4113
ab3adb5
10e4113
c1cdf7c
10e4113
 
 
 
 
 
 
 
34054e0
10e4113
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import requests
from bs4 import BeautifulSoup
import gradio as gr
from huggingface_hub import InferenceClient
import random
import urllib.parse
from datetime import datetime, timedelta
import re
import os
# List of user agents to rotate through
_useragent_list = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
]

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN')}"}

def query_llama(payload):
    """Send a query to the Llama model via Hugging Face API"""
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_verify=None, days_back=90):
    """Perform a Google search and return results"""
    print(f"Searching for term: {term}")
    
    # Calculate the date range
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days_back)
    
    # Format dates as strings
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = end_date.strftime("%Y-%m-%d")
    
    # Add the date range to the search term
    search_term = f"{term} financial earnings report after:{start_date_str} before:{end_date_str}"
    
    escaped_term = urllib.parse.quote_plus(search_term)
    start = 0
    all_results = []

    with requests.Session() as session:
        while len(all_results) < num_results:
            try:
                # Choose a random user agent
                user_agent = random.choice(_useragent_list)
                headers = {'User-Agent': user_agent}

                resp = session.get(
                    url="https://www.google.com/search",
                    headers=headers,
                    params={
                        "q": search_term,
                        "num": num_results - start,
                        "hl": lang,
                        "start": start,
                        "safe": safe,
                    },
                    timeout=timeout,
                    verify=ssl_verify,
                )
                resp.raise_for_status()
            except requests.exceptions.RequestException as e:
                print(f"Error fetching search results: {e}")
                break

            soup = BeautifulSoup(resp.text, "html.parser")
            result_block = soup.find_all("div", attrs={"class": "g"})
            if not result_block:
                print("No more results found.")
                break

            for result in result_block:
                if len(all_results) >= num_results:
                    break
                link = result.find("a", href=True)
                if link:
                    link = link["href"]
                    print(f"Found link: {link}")
                    try:
                        webpage = session.get(link, headers=headers, timeout=timeout)
                        webpage.raise_for_status()
                        visible_text = extract_text_from_webpage(webpage.text)
                        
                        all_results.append({"link": link, "text": visible_text})
                    except requests.exceptions.RequestException as e:
                        print(f"Error fetching or processing {link}: {e}")
                        all_results.append({"link": link, "text": None})
                else:
                    print("No link found in result.")
                    all_results.append({"link": None, "text": None})

            start += len(result_block)

    print(f"Total results fetched: {len(all_results)}")
    return all_results

def extract_text_from_webpage(html_content):
    """Extract visible text from HTML content"""
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.decompose()
    
    # Get text
    text = soup.get_text()
    
    # Break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    
    # Break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    
    # Drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)
    
    return text

def filter_relevant_content(text):
    """Filter out irrelevant content"""
    # List of keywords related to financial reports
    keywords = ['revenue', 'profit', 'earnings', 'financial', 'quarter', 'fiscal', 'growth', 'income', 'loss', 'dividend']
    
    # Split the text into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text)
    
    # Filter sentences containing at least one keyword
    relevant_sentences = [sentence for sentence in sentences if any(keyword in sentence.lower() for keyword in keywords)]
    
    # Join the relevant sentences back into a single string
    filtered_text = ' '.join(relevant_sentences)
    
    return filtered_text

def summarize_financial_news(query):
    """Search for financial news, extract relevant content, and summarize"""
    search_results = google_search(query, num_results=3)
    
    all_filtered_text = ""
    for result in search_results:
        if result['text']:
            filtered_text = filter_relevant_content(result['text'])
            all_filtered_text += filtered_text + "\n\n"
    
    if not all_filtered_text:
        return "No relevant financial information found."
    
    prompt = f"""You are a financial analyst. Summarize the following text from a financial perspective:

{all_filtered_text}

Provide a detailed, coherent summary focusing on financial implications and analysis."""

    summary = query_llama({"inputs": prompt, "parameters": {"max_length": 500}})
    
    return summary[0]['generated_text']

# Gradio Interface
iface = gr.Interface(
    fn=summarize_financial_news,
    inputs=gr.Textbox(lines=2, placeholder="Enter a company name or financial topic..."),
    outputs="text",
    title="Financial News Summarizer",
    description="Enter a company name or financial topic to get a summary of recent financial news."
)

iface.launch()