File size: 1,576 Bytes
f1ab0d5
 
60ec487
 
f1ab0d5
 
 
ae7097b
60ec487
f1ab0d5
 
 
 
60ec487
 
f1ab0d5
 
 
 
ae7097b
f1ab0d5
 
ae7097b
f1ab0d5
 
 
 
 
 
 
4b890a6
60ec487
 
 
 
 
f1ab0d5
 
60ec487
 
 
f1ab0d5
ae7097b
 
f1ab0d5
 
 
60ec487
 
4b890a6
60ec487
ae7097b
4b890a6
f1ab0d5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
import ssl
import shutil
import requests
from bs4 import BeautifulSoup

from entity import Entity
from common import selectors, defaults, mkdir

def get_page(e: Entity):
    try:
        page = requests.get(e.url)
    except Exception:
        url = e.url.replace('http', 'https')
        page = requests.get(url)
    return page

def get_cert(e: Entity):
    ssl_url = e.url.split("/")[2]
    mkdir.make_dirs(defaults.CERTS_PATH)
    try:
        cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None)
        fn = f"{defaults.CERTS_PATH}/{e.bco}.cert"
        with open(fn, 'w') as f:
            f.write(cert)
    except Exception as err:
        with open(f"{fn}.error.log", 'w+') as f:
            f.write(str(err))
    return fn

def get_img_logo(src: str, fn):
        res = requests.get(src, stream=True)
        with open(fn, "wb") as f:
            shutil.copyfileobj(res.raw, f)
        return fn

def get_logos(e: Entity, page):
    soup = BeautifulSoup(page.content, "html.parser")
    logos = soup.select(selectors.img_logo)
    logos.extend(soup.select(selectors.id_logo))
    logos.extend(soup.select(selectors.cls_logo))

    mkdir.make_dirs(defaults.LOGOS_DATA_PATH)

    i = 0
    lfn = []
    for l in logos:
        if 'src' in l.attrs:
            src = l.attrs['src']
            ext = src.split('.')[-1].split('/')[-1]
            if not src.startswith('http'): src = e.url + src
            fn = f"{defaults.LOGOS_DATA_PATH}/{e.bco}.{i}.{ext}"
            lfn.append(get_img_logo(src, fn))
        i+=1
    return lfn