File size: 4,333 Bytes
ebe86df
38e70c4
 
 
 
 
ebe86df
38e70c4
 
 
ebe86df
 
 
 
38e70c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12e35a6
38e70c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
from datetime import datetime, timedelta
import json
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
from urllib.parse import parse_qs, urlparse

from huggingface_hub import list_datasets, set_access_token, HfFolder
from datasets import load_dataset
import numpy as np

HF_TOKEN = os.environ['HF_TOKEN']
set_access_token(HF_TOKEN)
HfFolder.save_token(HF_TOKEN)


def running_mean(x, N, total_length=-1):
    cumsum = np.cumsum(np.insert(x, 0, 0))
    to_pad = max(total_length - len(cumsum), 0)
    return np.pad(cumsum[N:] - cumsum[:-N], (to_pad, 0)) / float(N)


class RequestHandler(SimpleHTTPRequestHandler):
    def do_GET(self):
        print(self.path)
        if self.path == "/":
            self.path = "index.html"

            return SimpleHTTPRequestHandler.do_GET(self)

        if self.path.startswith("/initialize"):
            metrics_datasets = [d.id.split('/')[1] for d in list_datasets(author='open-source-metrics', use_auth_token=True)]
            pip_installs_datasets = [d.split('-')[0] for d in metrics_datasets if 'pip' in d]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(pip_installs_datasets).encode("utf-8"))

            return SimpleHTTPRequestHandler

        if self.path.startswith("/retrievePipInstalls"):
            url = urlparse(self.path)
            query = parse_qs(url.query)
            library_names = query.get("input", None)[0]
            library_names = library_names.split(',')

            returned_values = {}
            for library_name in library_names:
                dataset = load_dataset(f"open-source-metrics/{library_name}-pip-installs", use_auth_token=True)['train']
                for i in dataset:
                    if i['day'] in returned_values:
                        returned_values[i['day']][library_name] = i['num_downloads']
                    else:
                        returned_values[i['day']] = {library_name: i['num_downloads']}

            for library_name in library_names:
                for i in returned_values.keys():
                    if library_name not in returned_values[i]:
                        returned_values[i][library_name] = 0

            output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
            output['day'] = list(returned_values.keys())[::-1]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(output).encode("utf-8"))

            return SimpleHTTPRequestHandler

        if self.path.startswith("/retrieveStars"):
            url = urlparse(self.path)
            query = parse_qs(url.query)
            library_names = query.get("input", None)[0]
            library_names = library_names.split(',')

            returned_values = {}
            dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True, revision='90cb31b2db73c8c4291bcf317d831595e4fb2a91').sort('dates')

            for library_name in library_names:
                dataset = dataset_dict[library_name]
                n = 0
                for i in dataset:
                    n += 1
                    if i['dates'] in returned_values:
                        returned_values[i['dates']][library_name] = n
                    else:
                        returned_values[i['dates']] = {library_name: n}

            for library_name in library_names:
                for i in returned_values.keys():
                    if library_name not in returned_values[i]:
                        returned_values[i][library_name] = None

            output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
            output['day'] = list(returned_values.keys())[::-1]

            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()

            self.wfile.write(json.dumps(output).encode("utf-8"))

            return SimpleHTTPRequestHandler

        return SimpleHTTPRequestHandler.do_GET(self)


server = ThreadingHTTPServer(("", 7860), RequestHandler)

print("Running on port 7860")

server.serve_forever()