Spaces:

stefan155
/

das-homework-api

Running

App Files Files Community

das-homework-api / app /routes.py

stefan155

hugging face does not have write permissons, fix for it to work

7a95fec verified 5 months ago

raw

history blame contribute delete

11.8 kB

	from collections import Counter
	from fastapi import APIRouter
	import pandas as pd
	import numpy as np
	import requests
	from bs4 import BeautifulSoup
	import os

	router = APIRouter()
	HF_API_KEY = os.getenv("HF_API_KEY")

	def getLatestStatistics(symbol):
	all_company_data = []
	file_path = 'output_file_update.csv'

	url = f'https://www.mse.mk/mk/stats/symbolhistory/{symbol}'
	response = requests.get(url)
	content = BeautifulSoup(response.text, 'html.parser')
	table_of_data = content.select("#resultsTable tbody tr")

	for row in table_of_data:
	data = row.text.split('\n')
	data.pop()
	data.remove('')
	if data.__contains__(''):
	continue
	data.insert(0, symbol)
	all_company_data.append(data)

	dataframe = pd.DataFrame(all_company_data, columns=['COMPANY', 'DATE', 'PRICE OF LAST TRANSACTION', 'MAX', 'MIN', 'AVERAGE PRICE', '% PERCENT', 'QUANTITY', 'REVENUE IN BEST DENARS', 'TOTAL REVENUE IN DENARS'])

	def string_to_float(column):
	if column == 'DATE':
	return dataframe[column]
	if column == 'TOTAL REVENUE IN DENARS':
	dataframe[column] = dataframe[column].str.replace('.', '').astype(float)
	return dataframe[column]
	if column == 'COMPANY':
	return dataframe[column]
	try:
	# Replace commas with dots, then remove extra dots and convert to float
	dataframe[column] = dataframe[column].str.replace(',', '.').str.replace('.', '', 1)
	dataframe[column] = dataframe[column].astype(float)
	except Exception as e:
	print(f"Error processing column {column}: {e}")
	return dataframe[column]

	# Apply the transformation to each column
	for col in dataframe.columns:
	dataframe[col] = string_to_float(col)

	dataframe['DATE'] = pd.to_datetime(dataframe['DATE'])

	current_data = pd.read_csv('app/data-formatted.csv')
	current_data['DATE'] = pd.to_datetime(current_data['DATE'])

	merged_df = pd.concat([current_data, dataframe], ignore_index=True)
	updated_df = merged_df.drop_duplicates(keep='first')

	return updated_df


	def getBerzaNews(symbol):
	url = f'https://www.mse.mk/en/symbol/{symbol}'
	response = requests.get(url)
	content = BeautifulSoup(response.text, 'html.parser')

	# finding links to news
	aElements = content.find_all('a', href=True)
	newsLinks = [link['href'] for link in aElements if link['href'].startswith('/en/news')]

	news = []
	for link in newsLinks:
	response = requests.get(f'https://www.mse.mk{link}')
	content = BeautifulSoup(response.text, 'html.parser')
	try:
	# print(content.find(id='content').text)
	# print('-----------------------------------')
	news.append(content.find(id='content').text)
	except Exception as e:
	continue

	return news


	# Load a multilingual model
	def analyzeSentiment(symbol):
	API_URL = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
	headers = {"Authorization": f"Bearer {HF_API_KEY}"}

	def query(text):
	payload = {"inputs": text}
	response = requests.post(API_URL, headers=headers, json=payload)
	if response.status_code != 200:
	print(f"Error: {response.status_code}, {response.text}")
	return None
	return response.json()

	def getMax(result):
	# Ensure the response has the expected structure
	if not result or not isinstance(result, list):
	return "neutral" # Default to neutral if the response is invalid

	result = result[0] if isinstance(result[0], list) else result
	max_label = max(result, key=lambda x: x['score'])['label']
	return max_label

	# Fetch news articles
	texts = getBerzaNews(symbol)
	if not texts:
	return 'No news for ' + symbol

	# Collect sentiment labels
	sentiment_labels = []
	for text in texts:
	try:
	result = query(text) # Query the Hugging Face API
	if result:
	sentiment_label = getMax(result) # Get the label
	sentiment_labels.append(sentiment_label)
	except Exception as e:
	print(f"Error processing text: {e}")
	continue

	if not sentiment_labels:
	return 'No news for ' + symbol

	# Count occurrences of each sentiment
	sentiment_counts = Counter(sentiment_labels)

	# Find the sentiment with the most occurrences
	most_common = sentiment_counts.most_common()

	if len(most_common) == 0:
	return "neutral" # Default to neutral if no sentiments found

	# Check for ties
	max_count = most_common[0][1]
	top_sentiments = [sentiment for sentiment, count in most_common if count == max_count]

	if len(top_sentiments) > 1:
	return "neutral" # Return neutral in case of ties
	return top_sentiments[0]

	def predict_future_price(input_data):
	input_data = input_data.drop(columns=['DATE'])
	data_to_dictionary = input_data.to_dict(orient='list')

	url = 'https://stefan155-das-lstm-model-api.hf.space/predict/'

	payload = {
	"input_data": data_to_dictionary,
	}

	response = requests.post(url, json=payload)
	return response.json()['prediction']


	# Function to resample data for timeframes
	def resample_data(data, timeframe):
	data["DATE"] = pd.to_datetime(data["DATE"]) # Ensure DATE is in datetime format

	data = data.drop_duplicates(subset="DATE", keep="first") # Drop duplicate dates

	data = data.set_index("DATE") # Set DATE as the index

	# Select only numeric columns for resampling
	numeric_columns = data.select_dtypes(include=["number"]).columns
	non_numeric_columns = data.select_dtypes(exclude=["number"]).columns

	if timeframe == "1D":
	resampled_data = data[numeric_columns].asfreq("D").fillna(method="ffill")
	elif timeframe == "1W":
	resampled_data = data[numeric_columns].resample("W").mean().fillna(0)
	elif timeframe == "1M":
	resampled_data = data[numeric_columns].resample("M").mean().fillna(0)
	else:
	raise ValueError("Invalid timeframe. Choose '1D', '1W', or '1M'.")

	print(f"Resampled data for {timeframe} timeframe:")
	print(resampled_data)

	resampled_data = resampled_data.reset_index()

	# Reattach non-numeric columns (e.g., COMPANY)
	if not non_numeric_columns.empty:
	non_numeric_data = data[non_numeric_columns].reset_index().drop_duplicates(subset="DATE")
	resampled_data = resampled_data.merge(non_numeric_data, on="DATE", how="left")

	return resampled_data


	# Function to calculate technical indicators
	def calculate_technical_indicators(data, column="PRICE OF LAST TRANSACTION"):
	data = data.sort_values(by="DATE").reset_index(drop=True)

	# Oscillators
	delta = data[column].diff()
	gain = np.where(delta > 0, delta, 0)
	loss = np.where(delta < 0, -delta, 0)
	avg_gain = pd.Series(gain).rolling(window=14).mean()
	avg_loss = pd.Series(loss).rolling(window=14).mean()
	rs = avg_gain / avg_loss
	data["RSI"] = 100 - (100 / (1 + rs))

	# MACD
	data["EMA12"] = data[column].ewm(span=12, adjust=False).mean()
	data["EMA26"] = data[column].ewm(span=26, adjust=False).mean()
	data["MACD"] = data["EMA12"] - data["EMA26"]

	# Stochastic Oscillator
	data["L14"] = data[column].rolling(window=14).min()
	data["H14"] = data[column].rolling(window=14).max()
	data["Stochastic"] = (data[column] - data["L14"]) / (data["H14"] - data["L14"]) * 100

	# Williams %R
	data["Williams %R"] = (data["H14"] - data[column]) / (data["H14"] - data["L14"]) * -100

	# Rate of Change
	data["ROC"] = data[column].pct_change(periods=12)

	# Moving Averages
	for window in [10, 20, 50]:
	data[f"SMA{window}"] = data[column].rolling(window=window).mean()
	data[f"EMA{window}"] = data[column].ewm(span=window, adjust=False).mean()

	# Meters
	oscillators_meter = "STRONG BUY" if data["RSI"].iloc[-1] > 70 else "NEUTRAL"
	moving_averages_meter = "STRONG BUY" if data[f"SMA10"].iloc[-1] < data[column].iloc[-1] else "SELL"

	return data, oscillators_meter, moving_averages_meter




	@router.get("/stock-data/{ticker}")
	async def get_stock_data(ticker: str):
	print(f"Fetching data for ticker: {ticker}")
	latest_data = getLatestStatistics(ticker)
	stock_data = latest_data[latest_data["COMPANY"] == ticker]
	if stock_data.empty:
	print("No data found for the given ticker.")
	return {"error": "Ticker not found"}

	# Process for each timeframe
	timeframes = ["1D", "1W", "1M"]
	timeframe_results = {}
	for timeframe in timeframes:
	try:
	resampled_data = resample_data(stock_data, timeframe)

	# Replace NaN/Inf/-Inf in resampled data
	resampled_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)

	indicators_data, oscillators_meter, moving_averages_meter = calculate_technical_indicators(resampled_data)

	# Replace NaN/Inf/-Inf in indicators_data
	indicators_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)

	price_prediction = predict_future_price(indicators_data)
	market_news_evaluation = analyzeSentiment(ticker)
	# market_news_evaluation = 'neutral'


	timeframe_results[timeframe] = {
	"Price Prediction": price_prediction,
	"Market News Evaluation": market_news_evaluation,
	"GraphData": indicators_data[["DATE", "PRICE OF LAST TRANSACTION"]].to_dict(orient="records"),
	"Oscillators": {
	"RSI": indicators_data["RSI"].iloc[-1],
	"MACD": indicators_data["MACD"].iloc[-1],
	"Stochastic Oscillator": indicators_data["Stochastic"].iloc[-1],
	"Williams %R": indicators_data["Williams %R"].iloc[-1],
	"Rate of Change": indicators_data["ROC"].iloc[-1],
	"METER": oscillators_meter,
	},
	"Moving Averages": {
	"SMA10": indicators_data["SMA10"].iloc[-1],
	"EMA10": indicators_data["EMA10"].iloc[-1],
	"SMA20": indicators_data["SMA20"].iloc[-1],
	"EMA20": indicators_data["EMA20"].iloc[-1],
	"SMA50": indicators_data["SMA50"].iloc[-1],
	"METER": moving_averages_meter,
	},
	}
	except Exception as e:
	print(f"Error processing timeframe {timeframe}: {e}")
	timeframe_results[timeframe] = {"error": str(e)}

	# Replace NaN/Inf/-Inf in stock_data
	stock_data.replace([np.inf, -np.inf, np.nan], 0, inplace=True)

	# reverse stock data
	stock_data = stock_data.iloc[::-1]

	# Construct response
	response = {
	"Ticker": ticker,
	"Company Name": stock_data["COMPANY"].iloc[0],
	"Current Price": stock_data["AVERAGE PRICE"].iloc[-1],
	"MAX Price": stock_data["PRICE OF LAST TRANSACTION"].max(),
	"MIN Price": stock_data["PRICE OF LAST TRANSACTION"].min(),
	"Volume": stock_data["QUANTITY"].sum() if "QUANTITY" in stock_data.columns else None,
	"REVENUE": stock_data[
	"REVENUE IN BEST DENARS"].sum() if "REVENUE IN BEST DENARS" in stock_data.columns else None,
	"AVERAGE PRICE": stock_data["AVERAGE PRICE"].iloc[-1],
	"Timeframes": timeframe_results,
	}

	# Ensure JSON compliance
	response = {
	key: (float(value) if isinstance(value, (np.float64, np.int64)) else value)
	for key, value in response.items()
	}

	return response