Spaces:
Sleeping
Sleeping
File size: 3,241 Bytes
0502369 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import sqlite3
from sqlite3 import Error
import csv
import pandas as pd
import os
def create_connection(db_file):
""" create a database connection to a database that resides
in the memory
"""
conn = None;
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
print(e)
return conn
def create_table(conn, create_table_sql, table_name):
""" create a table from the create_table_sql statement
:param conn: Connection object
:param create_table_sql: a CREATE TABLE statement
:return:
"""
try:
c = conn.cursor()
c.execute(f"""DROP TABLE IF EXISTS {table_name}""")
c.execute(create_table_sql)
except Error as e:
print(e)
def insert_values(conn, task, sql):
cur = conn.cursor()
cur.execute(sql, task)
conn.commit()
return cur.lastrowid
def populate(csv_file, db_file, table_insert):
conn = create_connection(db_file)
with conn:
with open(csv_file, mode ='r')as file:
csvfile = csv.reader(file)
for n,lines in enumerate(csvfile):
if n>0:
lines = tuple(i for i in lines)
insert_values(conn, lines, table_insert)
else:pass
def main():
name = "sql_pdf.xlsx"
excel_file = (pd.read_excel(name))
csv_file = f"""{name.split(".")[0]}.csv"""
excel_file.to_csv(csv_file,
index=None,
header=True)
column = [x for x in excel_file.columns]
column_type = {}
type_map = {
"<class 'str'>": "TEXT",
"<class 'int'>": "INTEGER",
"<class 'float'>": "REAL",
}
for i in range(len(column)):
datatype = {}
for j in excel_file.values:
if type(j[i]) not in list(datatype.keys()):datatype[type(j[i])] = 1
else: datatype[type(j[i])] += 1
ma_x = 0
max_type = "<class 'str'>"
for k in list(datatype.keys()):
if ma_x < datatype[k]:max_type = str(k)
try:
column_type[column[i]] = type_map[max_type]
except KeyError:
column_type[column[i]] = "TEXT"
print(column_type)
table_construct = f"""CREATE TABLE IF NOT EXISTS {name.split(".")[0]}( """
table_insert = f"""INSERT INTO {name.split(".")[0]}("""
table_values = f"""VALUES ("""
for l in list(column_type.keys()):
table_construct += f"""{l} {column_type[l]}, """
table_insert += f"""{l}, """
table_values += "?, "
table_construct = f"""{table_construct[:-2]});"""
table_values = f"""{table_values[:-2]})"""
table_insert = f"""{table_insert[:-2]})\n{table_values}"""
print(table_construct)
print("\n\n", table_insert)
database = f"""{name.split(".")[0]}.db"""
conn = create_connection(database)
# create tables
if conn is not None:
# create projects table
create_table(conn, table_construct, name.split(".")[0])
else:
print("Error! cannot create the database connection.")
populate(csv_file, database, table_insert)
if __name__ == '__main__':
main() |