File size: 4,203 Bytes
38e70c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12e35a6
38e70c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from datetime import datetime, timedelta
import json
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
from urllib.parse import parse_qs, urlparse

from huggingface_hub import list_datasets
from datasets import load_dataset
import numpy as np


def running_mean(x, N, total_length=-1):
    cumsum = np.cumsum(np.insert(x, 0, 0))
    to_pad = max(total_length - len(cumsum), 0)
    return np.pad(cumsum[N:] - cumsum[:-N], (to_pad, 0)) / float(N)


class RequestHandler(SimpleHTTPRequestHandler):
    def do_GET(self):
        print(self.path)
        if self.path == "/":
            self.path = "index.html"

            return SimpleHTTPRequestHandler.do_GET(self)

        if self.path.startswith("/initialize"):
            metrics_datasets = [d.id.split('/')[1] for d in list_datasets(author='open-source-metrics', use_auth_token=True)]
            pip_installs_datasets = [d.split('-')[0] for d in metrics_datasets if 'pip' in d]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(pip_installs_datasets).encode("utf-8"))

            return SimpleHTTPRequestHandler

        if self.path.startswith("/retrievePipInstalls"):
            url = urlparse(self.path)
            query = parse_qs(url.query)
            library_names = query.get("input", None)[0]
            library_names = library_names.split(',')

            returned_values = {}
            for library_name in library_names:
                dataset = load_dataset(f"open-source-metrics/{library_name}-pip-installs", use_auth_token=True)['train']
                for i in dataset:
                    if i['day'] in returned_values:
                        returned_values[i['day']][library_name] = i['num_downloads']
                    else:
                        returned_values[i['day']] = {library_name: i['num_downloads']}

            for library_name in library_names:
                for i in returned_values.keys():
                    if library_name not in returned_values[i]:
                        returned_values[i][library_name] = 0

            output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
            output['day'] = list(returned_values.keys())[::-1]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(output).encode("utf-8"))

            return SimpleHTTPRequestHandler

        if self.path.startswith("/retrieveStars"):
            url = urlparse(self.path)
            query = parse_qs(url.query)
            library_names = query.get("input", None)[0]
            library_names = library_names.split(',')

            returned_values = {}
            dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True, revision='90cb31b2db73c8c4291bcf317d831595e4fb2a91').sort('dates')

            for library_name in library_names:
                dataset = dataset_dict[library_name]
                n = 0
                for i in dataset:
                    n += 1
                    if i['dates'] in returned_values:
                        returned_values[i['dates']][library_name] = n
                    else:
                        returned_values[i['dates']] = {library_name: n}

            for library_name in library_names:
                for i in returned_values.keys():
                    if library_name not in returned_values[i]:
                        returned_values[i][library_name] = None

            output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
            output['day'] = list(returned_values.keys())[::-1]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(output).encode("utf-8"))

            return SimpleHTTPRequestHandler

        return SimpleHTTPRequestHandler.do_GET(self)


server = ThreadingHTTPServer(("", 7860), RequestHandler)

print("Running on port 7860")

server.serve_forever()