Spaces:
Runtime error
Runtime error
import json | |
import os | |
# Configuration | |
name = "chs.json" | |
outputFolder = "database" | |
deleteKeys = [ | |
"images", | |
"tags", | |
"html" | |
] | |
typeScrape = { | |
"article": "text", | |
"event": "description", | |
"list": "items" | |
} | |
data = json.load(open(name, "r")) | |
i = -1 | |
k = 0 | |
try: | |
os.mkdir(outputFolder) | |
except: pass | |
for item in data: | |
i += 1 | |
for key in deleteKeys: | |
if key in item: | |
item[key] | |
del item[key] | |
data[i] = item | |
if "type" in item: | |
for typeKey, scrapeText in typeScrape.items(): | |
try: | |
if item["type"] == typeKey: | |
k += 1 | |
file = open(f"{outputFolder}/chs-{typeKey}-{k}.txt", "a") | |
if item["type"] == "list": | |
text = "" | |
if "title" in item: | |
text = item["title"] | |
file.write(text) | |
for pair in item[scrapeText]: | |
text = "" | |
if "title" in pair: | |
text = "\n" + pair["title"] | |
if "summary" in pair: | |
if pair["summary"].replace(" ", "") != pair["title"].replace(" ", ""): | |
text += "\n" + pair["summary"].replace(pair["title"], "") | |
if "fsElementContent" in pair: | |
if pair["fsElementContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
text += "\n" + pair["fsElementContent"] | |
if "fsElementFooterContent" in pair: | |
if pair["fsElementFooterContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
text += "\n" + pair["fsElementFooterContent"] | |
if "fsElementHeaderContent" in pair: | |
if pair["fsElementHeaderContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
text += "\n" + pair["fsElementHeaderContent"] | |
if text != "": | |
file.write(text) | |
else: | |
text = item[scrapeText] | |
if text != "": | |
file.write(text) | |
except: pass | |
json.dump(data, open(name, "w"), indent = 6) |