File size: 2,739 Bytes
10757ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""

Loads Maharashtra assembly 2019 dataset

"""
import json
import sqlite3
import pandas as pd
import csv

def load_data_from_csv(name, end=58925):
    data = []
    keys = None
    with open(name, "r", encoding="utf-8") as f:
        csv_data = csv.reader(f)
        for i, line in enumerate(csv_data):
            found = False
            if i == 0:
                keys = line
                continue
            for field in line:
                if field.strip() == "TURNOUT":
                    found = True
                    break
            if found:
                # print("TURNOUT found, skipping")
                continue
            item = {}
            # print(line)
            for key, val in zip(keys, line):
                item[key] = val
            data.append(item)
    return data

def clean_dataframe(df):
    # Strip leading and trailing spaces from column names (without changing them)
    df.columns = df.columns.str.strip()
    
    # Strip spaces and convert text columns to lowercase
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].str.strip()
        
    # Fill null values with 0
    df.fillna(0, inplace=True)
    
    return df

def load_data_from_csv_to_db(name, conn):

    # read the dataset from csv file and create a pandas dataframe
    df = pd.read_csv(open(name, "r", encoding="utf-8"))

    # clean the dataframe
    df = clean_dataframe(df)

    df.columns = [
        'state', 'constituency_number', 'constituency', 'candidate_name', 'sex', 'age', 
        'category', 'party_name', 'party_symbol', 'evm_votes', 'postal_votes', 'total_votes', 
        'vote_percentage', 'total_electors'
    ]

    # save the dataframe as a database table, name of table is: elections_2019
    result = df.to_sql("maha_2019", conn, if_exists='replace', index=False)

    return result


def query_sql(conn, query):
    cursor = conn.cursor()
    cursor.execute(query)
    result = cursor.fetchall()
    field_names = [r[0] for r in cursor.description]
    print(field_names)
    return result


if __name__ == '__main__':
    # create a connection to sql db called elections.db
    conn = sqlite3.connect('../data/elections.db')

    filename = r"../data/maha_results_2019.csv"
    data = load_data_from_csv(filename, end=5)
    # print(data)

    res = load_data_from_csv_to_db(filename, conn)
    # print(res)

    query = "SELECT * FROM maha_2019 LIMIT 5;"
    results = query_sql(conn, query)
    print(results)

    # keys = data.keys()
    # for i, item in enumerate(data):
    #     print(data[item])
    # jdata = json.loads(data.to_json())
    # print(jdata)