rhoitjadhav commited on
Commit
0c01de4
·
1 Parent(s): 73b81af

update dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -0
  2. load_data.py +32 -0
  3. start.sh +6 -1
Dockerfile CHANGED
@@ -28,6 +28,7 @@ RUN apt -y install elasticsearch
28
  # Copy users db file along with execution script
29
  COPY users.yml /app
30
  COPY start.sh /app
 
31
  RUN chmod +x /app/start.sh
32
 
33
  RUN useradd -ms /bin/bash user -p "$(openssl passwd -1 ubuntu)"
 
28
  # Copy users db file along with execution script
29
  COPY users.yml /app
30
  COPY start.sh /app
31
+ COPY load_data.py /app
32
  RUN chmod +x /app/start.sh
33
 
34
  RUN useradd -ms /bin/bash user -p "$(openssl passwd -1 ubuntu)"
load_data.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+
4
+
5
+ def load_data():
6
+ # install datasets library with pip install datasets
7
+ import argilla as rg
8
+ from datasets import load_dataset
9
+
10
+ # load dataset from the hub
11
+ dataset = load_dataset("argilla/gutenberg_spacy-ner", split="train")
12
+
13
+ # read in dataset, assuming its a dataset for token classification
14
+ dataset_rg = rg.read_datasets(dataset, task="TokenClassification")
15
+
16
+ # log the dataset
17
+ rg.log(dataset_rg, "gutenberg_spacy-ner")
18
+
19
+
20
+ if __name__ == '__main__':
21
+ while True:
22
+ try:
23
+ response = requests.get("http://0.0.0.0:6900/")
24
+ if response.status_code == 200:
25
+ load_data()
26
+ print("Data Loaded!")
27
+ break
28
+ else:
29
+ time.sleep(5)
30
+ except Exception as e:
31
+ print(e)
32
+ pass
start.sh CHANGED
@@ -4,8 +4,13 @@ set -e
4
 
5
  echo "ubuntu" | sudo -S su user
6
 
7
- # Starting elasticsearch & argilla
8
  sudo /etc/init.d/elasticsearch start
9
  echo "Waiting for elasticsearch to start"
10
  sleep 15
 
 
 
 
 
11
  uvicorn argilla:app --host "0.0.0.0"
 
4
 
5
  echo "ubuntu" | sudo -S su user
6
 
7
+ # Start elasticsearch
8
  sudo /etc/init.d/elasticsearch start
9
  echo "Waiting for elasticsearch to start"
10
  sleep 15
11
+
12
+ # Load data
13
+ python3 /app/load_data.py &
14
+
15
+ # Start argilla
16
  uvicorn argilla:app --host "0.0.0.0"