Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- sql.py +128 -0
- sql_pdf.xlsx +0 -0
sql.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
from sqlite3 import Error
|
3 |
+
import csv
|
4 |
+
import pandas as pd
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def create_connection(db_file):
|
9 |
+
""" create a database connection to a database that resides
|
10 |
+
in the memory
|
11 |
+
"""
|
12 |
+
conn = None;
|
13 |
+
try:
|
14 |
+
conn = sqlite3.connect(db_file)
|
15 |
+
return conn
|
16 |
+
|
17 |
+
except Error as e:
|
18 |
+
print(e)
|
19 |
+
|
20 |
+
return conn
|
21 |
+
|
22 |
+
|
23 |
+
def create_table(conn, create_table_sql, table_name):
|
24 |
+
""" create a table from the create_table_sql statement
|
25 |
+
:param conn: Connection object
|
26 |
+
:param create_table_sql: a CREATE TABLE statement
|
27 |
+
:return:
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
c = conn.cursor()
|
31 |
+
c.execute(f"""DROP TABLE IF EXISTS {table_name}""")
|
32 |
+
c.execute(create_table_sql)
|
33 |
+
except Error as e:
|
34 |
+
print(e)
|
35 |
+
|
36 |
+
|
37 |
+
def insert_values(conn, task, sql):
|
38 |
+
|
39 |
+
cur = conn.cursor()
|
40 |
+
cur.execute(sql, task)
|
41 |
+
conn.commit()
|
42 |
+
return cur.lastrowid
|
43 |
+
|
44 |
+
|
45 |
+
def populate(csv_file, db_file, table_insert):
|
46 |
+
|
47 |
+
conn = create_connection(db_file)
|
48 |
+
with conn:
|
49 |
+
|
50 |
+
|
51 |
+
with open(csv_file, mode ='r')as file:
|
52 |
+
csvfile = csv.reader(file)
|
53 |
+
|
54 |
+
for n,lines in enumerate(csvfile):
|
55 |
+
|
56 |
+
if n>0:
|
57 |
+
lines = tuple(i for i in lines)
|
58 |
+
insert_values(conn, lines, table_insert)
|
59 |
+
|
60 |
+
else:pass
|
61 |
+
|
62 |
+
def main():
|
63 |
+
|
64 |
+
name = "sql_pdf.xlsx"
|
65 |
+
excel_file = (pd.read_excel(name))
|
66 |
+
csv_file = f"""{name.split(".")[0]}.csv"""
|
67 |
+
excel_file.to_csv(csv_file,
|
68 |
+
index=None,
|
69 |
+
header=True)
|
70 |
+
column = [x for x in excel_file.columns]
|
71 |
+
column_type = {}
|
72 |
+
type_map = {
|
73 |
+
"<class 'str'>": "TEXT",
|
74 |
+
"<class 'int'>": "INTEGER",
|
75 |
+
"<class 'float'>": "REAL",
|
76 |
+
}
|
77 |
+
|
78 |
+
|
79 |
+
for i in range(len(column)):
|
80 |
+
datatype = {}
|
81 |
+
for j in excel_file.values:
|
82 |
+
if type(j[i]) not in list(datatype.keys()):datatype[type(j[i])] = 1
|
83 |
+
else: datatype[type(j[i])] += 1
|
84 |
+
|
85 |
+
ma_x = 0
|
86 |
+
max_type = "<class 'str'>"
|
87 |
+
|
88 |
+
for k in list(datatype.keys()):
|
89 |
+
if ma_x < datatype[k]:max_type = str(k)
|
90 |
+
|
91 |
+
try:
|
92 |
+
column_type[column[i]] = type_map[max_type]
|
93 |
+
|
94 |
+
except KeyError:
|
95 |
+
column_type[column[i]] = "TEXT"
|
96 |
+
|
97 |
+
print(column_type)
|
98 |
+
|
99 |
+
table_construct = f"""CREATE TABLE IF NOT EXISTS {name.split(".")[0]}( """
|
100 |
+
table_insert = f"""INSERT INTO {name.split(".")[0]}("""
|
101 |
+
table_values = f"""VALUES ("""
|
102 |
+
for l in list(column_type.keys()):
|
103 |
+
table_construct += f"""{l} {column_type[l]}, """
|
104 |
+
table_insert += f"""{l}, """
|
105 |
+
table_values += "?, "
|
106 |
+
table_construct = f"""{table_construct[:-2]});"""
|
107 |
+
table_values = f"""{table_values[:-2]})"""
|
108 |
+
table_insert = f"""{table_insert[:-2]})\n{table_values}"""
|
109 |
+
|
110 |
+
print(table_construct)
|
111 |
+
print("\n\n", table_insert)
|
112 |
+
|
113 |
+
database = f"""{name.split(".")[0]}.db"""
|
114 |
+
|
115 |
+
conn = create_connection(database)
|
116 |
+
|
117 |
+
# create tables
|
118 |
+
if conn is not None:
|
119 |
+
# create projects table
|
120 |
+
create_table(conn, table_construct, name.split(".")[0])
|
121 |
+
else:
|
122 |
+
print("Error! cannot create the database connection.")
|
123 |
+
|
124 |
+
|
125 |
+
populate(csv_file, database, table_insert)
|
126 |
+
|
127 |
+
if __name__ == '__main__':
|
128 |
+
main()
|
sql_pdf.xlsx
ADDED
Binary file (11.1 kB). View file
|
|