from collections import Counter
from fastapi import APIRouter
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import os
router = APIRouter()
HF_API_KEY = os.getenv("HF_API_KEY")
def getLatestStatistics(symbol):
all_company_data = []
file_path = 'output_file_update.csv'
url = f'https://www.mse.mk/mk/stats/symbolhistory/{symbol}'
response = requests.get(url)
content = BeautifulSoup(response.text, 'html.parser')
table_of_data = content.select("#resultsTable tbody tr")
for row in table_of_data:
data = row.text.split('\n')
if data.__contains__(''):
data.insert(0, symbol)
def string_to_float(column):
if column == 'DATE':
return dataframe[column]
dataframe[column] = dataframe[column].str.replace('.', '').astype(float)
return dataframe[column]
if column == 'COMPANY':
return dataframe[column]
# Replace commas with dots, then remove extra dots and convert to float
dataframe[column] = dataframe[column].str.replace(',', '.').str.replace('.', '', 1)
dataframe[column] = dataframe[column].astype(float)
except Exception as e:
print(f"Error processing column {column}: {e}")
return dataframe[column]
# Apply the transformation to each column
for col in dataframe.columns:
dataframe[col] = string_to_float(col)
dataframe['DATE'] = pd.to_datetime(dataframe['DATE'])
current_data = pd.read_csv('app/data-formatted.csv')
current_data['DATE'] = pd.to_datetime(current_data['DATE'])
merged_df = pd.concat([current_data, dataframe], ignore_index=True)
updated_df = merged_df.drop_duplicates(keep='first')
return updated_df
def getBerzaNews(symbol):
url = f'https://www.mse.mk/en/symbol/{symbol}'
response = requests.get(url)
content = BeautifulSoup(response.text, 'html.parser')
# finding links to news
aElements = content.find_all('a', href=True)
newsLinks = [link['href'] for link in aElements if link['href'].startswith('/en/news')]
news = []
for link in newsLinks:
response = requests.get(f'https://www.mse.mk{link}')
content = BeautifulSoup(response.text, 'html.parser')
# print(content.find(id='content').text)
# print('-----------------------------------')
except Exception as e:
return news
# Load a multilingual model
def analyzeSentiment(symbol):
API_URL = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
def query(text):
payload = {"inputs": text}
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code != 200:
print(f"Error: {response.status_code}, {response.text}")
return None
return response.json()
def getMax(result):
# Ensure the response has the expected structure
if not result or not isinstance(result, list):
return "neutral" # Default to neutral if the response is invalid
result = result[0] if isinstance(result[0], list) else result
max_label = max(result, key=lambda x: x['score'])['label']
return max_label
# Fetch news articles
texts = getBerzaNews(symbol)
if not texts:
return 'No news for ' + symbol
# Collect sentiment labels
sentiment_labels = []
for text in texts:
result = query(text) # Query the Hugging Face API
if result:
sentiment_label = getMax(result) # Get the label
except Exception as e:
print(f"Error processing text: {e}")
if not sentiment_labels:
return 'No news for ' + symbol
# Count occurrences of each sentiment
sentiment_counts = Counter(sentiment_labels)
# Find the sentiment with the most occurrences
most_common = sentiment_counts.most_common()
if len(most_common) == 0:
return "neutral" # Default to neutral if no sentiments found
# Check for ties
max_count = most_common[0][1]
top_sentiments = [sentiment for sentiment, count in most_common if count == max_count]
if len(top_sentiments) > 1:
return "neutral" # Return neutral in case of ties
return top_sentiments[0]
def predict_future_price(input_data):
input_data = input_data.drop(columns=['DATE'])
data_to_dictionary = input_data.to_dict(orient='list')
url = 'https://stefan155-das-lstm-model-api.hf.space/predict/'
payload = {
"input_data": data_to_dictionary,
response = requests.post(url, json=payload)
return response.json()['prediction']
# Function to resample data for timeframes
def resample_data(data, timeframe):
data["DATE"] = pd.to_datetime(data["DATE"]) # Ensure DATE is in datetime format
data = data.drop_duplicates(subset="DATE", keep="first") # Drop duplicate dates
data = data.set_index("DATE") # Set DATE as the index
# Select only numeric columns for resampling
numeric_columns = data.select_dtypes(include=["number"]).columns
non_numeric_columns = data.select_dtypes(exclude=["number"]).columns
if timeframe == "1D":
resampled_data = data[numeric_columns].asfreq("D").fillna(method="ffill")
elif timeframe == "1W":
resampled_data = data[numeric_columns].resample("W").mean().fillna(0)
elif timeframe == "1M":
resampled_data = data[numeric_columns].resample("M").mean().fillna(0)
raise ValueError("Invalid timeframe. Choose '1D', '1W', or '1M'.")
print(f"Resampled data for {timeframe} timeframe:")
resampled_data = resampled_data.reset_index()
# Reattach non-numeric columns (e.g., COMPANY)
if not non_numeric_columns.empty:
non_numeric_data = data[non_numeric_columns].reset_index().drop_duplicates(subset="DATE")
resampled_data = resampled_data.merge(non_numeric_data, on="DATE", how="left")
return resampled_data
# Function to calculate technical indicators
def calculate_technical_indicators(data, column="PRICE OF LAST TRANSACTION"):
data = data.sort_values(by="DATE").reset_index(drop=True)
# Oscillators
delta = data[column].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain).rolling(window=14).mean()
avg_loss = pd.Series(loss).rolling(window=14).mean()
rs = avg_gain / avg_loss
data["RSI"] = 100 - (100 / (1 + rs))
data["EMA12"] = data[column].ewm(span=12, adjust=False).mean()
data["EMA26"] = data[column].ewm(span=26, adjust=False).mean()
data["MACD"] = data["EMA12"] - data["EMA26"]
# Stochastic Oscillator
data["L14"] = data[column].rolling(window=14).min()
data["H14"] = data[column].rolling(window=14).max()
data["Stochastic"] = (data[column] - data["L14"]) / (data["H14"] - data["L14"]) * 100
# Williams %R
data["Williams %R"] = (data["H14"] - data[column]) / (data["H14"] - data["L14"]) * -100
# Rate of Change
data["ROC"] = data[column].pct_change(periods=12)
# Moving Averages
for window in [10, 20, 50]:
data[f"SMA{window}"] = data[column].rolling(window=window).mean()
data[f"EMA{window}"] = data[column].ewm(span=window, adjust=False).mean()
# Meters
oscillators_meter = "STRONG BUY" if data["RSI"].iloc[-1] > 70 else "NEUTRAL"
moving_averages_meter = "STRONG BUY" if data[f"SMA10"].iloc[-1] < data[column].iloc[-1] else "SELL"
return data, oscillators_meter, moving_averages_meter
async def get_stock_data(ticker: str):
print(f"Fetching data for ticker: {ticker}")
latest_data = getLatestStatistics(ticker)
stock_data = latest_data[latest_data["COMPANY"] == ticker]
if stock_data.empty:
print("No data found for the given ticker.")
return {"error": "Ticker not found"}
# Process for each timeframe
timeframes = ["1D", "1W", "1M"]
timeframe_results = {}
for timeframe in timeframes:
resampled_data = resample_data(stock_data, timeframe)
# Replace NaN/Inf/-Inf in resampled data
resampled_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)
indicators_data, oscillators_meter, moving_averages_meter = calculate_technical_indicators(resampled_data)
# Replace NaN/Inf/-Inf in indicators_data
indicators_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)
price_prediction = predict_future_price(indicators_data)
market_news_evaluation = analyzeSentiment(ticker)
# market_news_evaluation = 'neutral'
timeframe_results[timeframe] = {
"Price Prediction": price_prediction,
"Market News Evaluation": market_news_evaluation,
"GraphData": indicators_data[["DATE", "PRICE OF LAST TRANSACTION"]].to_dict(orient="records"),
"Oscillators": {
"RSI": indicators_data["RSI"].iloc[-1],
"MACD": indicators_data["MACD"].iloc[-1],
"Stochastic Oscillator": indicators_data["Stochastic"].iloc[-1],
"Williams %R": indicators_data["Williams %R"].iloc[-1],
"Rate of Change": indicators_data["ROC"].iloc[-1],
"METER": oscillators_meter,
"Moving Averages": {
"SMA10": indicators_data["SMA10"].iloc[-1],
"EMA10": indicators_data["EMA10"].iloc[-1],
"SMA20": indicators_data["SMA20"].iloc[-1],
"EMA20": indicators_data["EMA20"].iloc[-1],
"SMA50": indicators_data["SMA50"].iloc[-1],
"METER": moving_averages_meter,
except Exception as e:
print(f"Error processing timeframe {timeframe}: {e}")
timeframe_results[timeframe] = {"error": str(e)}
# Replace NaN/Inf/-Inf in stock_data
stock_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)
# reverse stock data
stock_data = stock_data.iloc[::-1]
# Construct response
response = {
"Ticker": ticker,
"Company Name": stock_data["COMPANY"].iloc[0],
"Current Price": stock_data["AVERAGE PRICE"].iloc[-1],
"MAX Price": stock_data["PRICE OF LAST TRANSACTION"].max(),
"MIN Price": stock_data["PRICE OF LAST TRANSACTION"].min(),
"Volume": stock_data["QUANTITY"].sum() if "QUANTITY" in stock_data.columns else None,
"REVENUE": stock_data[
"REVENUE IN BEST DENARS"].sum() if "REVENUE IN BEST DENARS" in stock_data.columns else None,
"AVERAGE PRICE": stock_data["AVERAGE PRICE"].iloc[-1],
"Timeframes": timeframe_results,
# Ensure JSON compliance
response = {
key: (float(value) if isinstance(value, (np.float64, np.int64)) else value)
for key, value in response.items()
return response