Joshua Sundance Bailey commited on
Commit
4442689
·
1 Parent(s): e9d1bce

maybe working

Browse files
geospatial-data-converter/kml_tricks_before.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zipfile
2
+ from io import StringIO
3
+
4
+ import bs4
5
+ import fiona
6
+ import geopandas as gpd
7
+ import lxml # nosec
8
+ import pandas as pd
9
+
10
+ fiona.drvsupport.supported_drivers["KML"] = "rw"
11
+
12
+
13
+ def desctogdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
14
+ """Parses Descriptions from Google Earth file to create a legit gpd.GeoDataFrame"""
15
+ dfs = []
16
+ len(gdf)
17
+ # pull chunks of data from feature descriptions
18
+ for idx, desc in enumerate(gdf["Description"], start=1):
19
+ desc = StringIO(desc)
20
+ try:
21
+ tmpdf = pd.read_html(desc)[1].T
22
+ except IndexError:
23
+ tmpdf = pd.read_html(desc)[0].T
24
+ tmpdf.columns = tmpdf.iloc[0]
25
+ tmpdf = tmpdf.iloc[1:]
26
+ dfs.append(tmpdf)
27
+ # join chunks together
28
+ ccdf = pd.concat(dfs, ignore_index=True)
29
+ ccdf["geometry"] = gdf["geometry"]
30
+ df = gpd.GeoDataFrame(ccdf, crs=gdf.crs)
31
+ return df
32
+
33
+
34
+ def readkmz(path: str) -> gpd.GeoDataFrame:
35
+ """Simply read kmz using geopandas/fiona without parsing Descriptions"""
36
+ # get name of kml in kmz (should be doc.kml but we don't assume)
37
+ with zipfile.ZipFile(path, "r") as kmz:
38
+ namelist = [f for f in kmz.namelist() if f.endswith(".kml")]
39
+ if len(namelist) != 1:
40
+ # this should never really happen
41
+ raise IndexError(
42
+ "kmz contains more than one kml. Extract or convert to multiple kmls.",
43
+ )
44
+ # return GeoDataFrame by reading contents of kmz
45
+ return gpd.read_file("zip://{}\\{}".format(path, namelist[0]), driver="KML")
46
+
47
+
48
+ def ge_togdf(gefile: str) -> gpd.GeoDataFrame:
49
+ """Return gpd.GeoDataFrame after reading kmz or kml and parsing Descriptions"""
50
+ if gefile.endswith(".kml"):
51
+ gdf = desctogdf(gpd.read_file(gefile, driver="KML"))
52
+ elif gefile.endswith(".kmz"):
53
+ gdf = desctogdf(readkmz(gefile))
54
+ else:
55
+ raise ValueError("File must end with .kml or .kmz")
56
+ return gdf
57
+
58
+
59
+ def simpledata_fromcode(kmlcode: str) -> pd.DataFrame:
60
+ """Return DataFrame extracted from KML code
61
+ parameter kmlcode (str): kml source code
62
+ Uses simpledata tags, NOT embedded tables in feature descriptions
63
+ """
64
+ # get the KML source code as a BeautifulSoup object
65
+ soup = bs4.BeautifulSoup(kmlcode, "html.parser")
66
+ # find all rows (schemadata tags) in the soup
67
+ rowtags = soup.find_all("schemadata")
68
+ # generator expression yielding a {name: value} dict for each row
69
+ rowdicts = (
70
+ {
71
+ **{"Placemark_name": row.parent.parent.find("name").text},
72
+ **{field.get("name"): field.text for field in row.find_all("simpledata")},
73
+ }
74
+ for row in rowtags
75
+ )
76
+ # return pd.DataFrame from row dict generator
77
+ return pd.DataFrame(rowdicts)
78
+
79
+
80
+ def kmlcode_fromfile(gefile: str) -> str:
81
+ """Return kml source code (str) extracted from Google Earth File
82
+ parameter gefile (str): absolute or relative path to Google Earth file
83
+ (kmz or kml)
84
+ Uses simpledata tags, NOT embedded tables in feature descriptions
85
+ """
86
+ fileextension = gefile.lower().split(".")[-1]
87
+ if fileextension == "kml":
88
+ with open(gefile, "r") as kml:
89
+ kmlsrc = kml.read()
90
+ elif fileextension == "kmz":
91
+ with zipfile.ZipFile(gefile) as kmz:
92
+ # there should only be one kml file and it should be named doc.kml
93
+ # we won't make that assumption
94
+ kmls = [f for f in kmz.namelist() if f.lower().endswith(".kml")]
95
+ if len(kmls) != 1:
96
+ raise IndexError(
97
+ "kmz contains more than one kml. Extract or convert to multiple kmls.",
98
+ )
99
+ with kmz.open(kmls[0]) as kml:
100
+ # .decode() because zipfile.ZipFile.open(name).read() -> bytes
101
+ kmlsrc = kml.read().decode()
102
+ else:
103
+ raise ValueError("parameter gefile must end with .kml or .kmz")
104
+ return kmlsrc
105
+
106
+
107
+ def simpledata_fromfile(gefile: str) -> gpd.GeoDataFrame:
108
+ """Return DataFrame extracted from Google Earth File
109
+ parameter gefile (str): absolute or relative path to Google Earth file
110
+ (kmz or kml)
111
+ Uses simpledata tags, NOT embedded tables in feature descriptions
112
+ """
113
+ df = simpledata_fromcode(kmlcode_fromfile(gefile))
114
+ if gefile.endswith(".kmz"):
115
+ gefile_gdf = readkmz(gefile)
116
+ else:
117
+ gefile_gdf = gpd.read_file(gefile, driver="KML")
118
+ gdf = gpd.GeoDataFrame(df, geometry=gefile_gdf["geometry"], crs=gefile_gdf.crs)
119
+ return gdf
120
+
121
+
122
+ def readge(gefile: str) -> pd.DataFrame:
123
+ """Extract data from Google Earth file & save as zip
124
+ parameter gefile (str): absolute or relative path to Google Earth file
125
+ parameter zipfile (str): absolute or relative path to output zip file
126
+ Will read simpledata tags OR embedded tables in feature descriptions
127
+ """
128
+ code = kmlcode_fromfile(gefile)
129
+ func1, func2 = ge_togdf, simpledata_fromfile
130
+ if any((tag in code.lower() for tag in ("<simpledata", "<simplefield"))):
131
+ func1, func2 = func2, func1
132
+ try:
133
+ df = func1(gefile)
134
+ except (
135
+ pd.errors.ParserError,
136
+ lxml.etree.ParserError,
137
+ lxml.etree.XMLSyntaxError,
138
+ ValueError,
139
+ ):
140
+ df = func2(gefile)
141
+ return df
geospatial-data-converter/utils.py CHANGED
@@ -6,7 +6,8 @@ from typing import BinaryIO
6
 
7
  import geopandas as gpd
8
 
9
- from kml_tricks import read_ge_file
 
10
 
11
  output_format_dict = {
12
  "ESRI Shapefile": ("shp", "zip", "application/zip"), # must be zipped
 
6
 
7
  import geopandas as gpd
8
 
9
+ # from kml_tricks import read_ge_file
10
+ from kml_tricks_before import readge as read_ge_file
11
 
12
  output_format_dict = {
13
  "ESRI Shapefile": ("shp", "zip", "application/zip"), # must be zipped