File size: 3,331 Bytes
dfc542c
 
 
91a458d
dfc542c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d64edc6
 
dfc542c
 
 
d64edc6
dfc542c
d64edc6
dfc542c
22b9c3e
dfc542c
22b9c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d64edc6
 
22b9c3e
dfc542c
22b9c3e
 
 
 
 
 
 
 
 
 
 
 
d64edc6
22b9c3e
 
 
 
dfc542c
d64edc6
 
 
 
dfc542c
d64edc6
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import os
import sys
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
sys.path.append(src_directory)
from utils import logger

file_path =  "./world_population.csv"

def process_data():
    try:
        logger.log("I'm going to read the csv")
        data_frame = pd.read_csv(file_path)
        logger.log("I'm reading the csv")
        return data_frame
    except Exception as e :
        logger.log("I couldn't read the file")
        return f"Unable to read the file {e}"
    
def display_continents(dataframe):
    continents = dataframe['Continent'].unique()
    logger.log("Displaying the list of continents in the data")
    continents_df = pd.DataFrame(continents, columns=["Continent"])
    return continents_df

def display_countries(dataframe):
    countries = dataframe['Country'].values
    countries_df = pd.DataFrame(countries, columns=["Country"])
    logger.log("Displaying the list of countries in the data")
    return countries_df

def continent_stat(dataframe, attribute="Population", stat_type="highest"):
    try:
        if 'Continent' not in dataframe.columns or attribute not in dataframe.columns:
            return ValueError(f"Dataframe must contain 'Continent' and '{attribute}' columns.")
        
        continent_stats = dataframe.groupby('Continent')[attribute].agg(total_attribute='sum')

        if stat_type == "highest":
            continent = continent_stats.idxmax().item()
            value = continent_stats.max().item()
            logger.log(f"Displaying the continent with the highest {attribute}: {continent} with {attribute} {value}")

        elif stat_type == "lowest":
            continent = continent_stats.idxmin().item()
            value = continent_stats.min().item()
            logger.log(f"Displaying the continent with the lowest {attribute}: {continent} with {attribute} {value}")

        else:
            raise ValueError("Invalid stat_type. Use 'highest' or 'lowest'.")
        result = {attribute : {continent: value}}
        return result
    
    except Exception as e:
        logger.log(f"Error in continent_stat: {str(e)}")
        return {"error": str(e)}

def country_stat(dataframe, attribute : str = "Population", stat_type :str = "highest"):
    try : 
        if stat_type.lower() == "highest":
            index= dataframe[attribute].idxmax()
        elif stat_type.lower() == "lowest":
            index= dataframe[attribute].idxmin()

        country = dataframe['Country'][index]
        requested_attribute = dataframe[attribute][index]
        result = {attribute:{country:requested_attribute.item()}}
        logger.log(f"Displaying the country with {stat_type} {attribute} in the data")
        return result
    except Exception as e:
        return f"Unable to fetch the data. Error {e}"
    
def get_continent_wise_stat(data_frame, attribute):
    if "Continent" in data_frame.columns and "Population" in data_frame.columns:
        continent_data = data_frame.groupby("Continent")[attribute].sum().reset_index()
        return continent_data.to_dict()
    
def get_country_wise_stat(data_frame, country, attribute):
    country_df = data_frame[data_frame["Country"]== country]
    data = country_df[attribute].item()
    result = {country:{attribute:data}}
    return result