Th3BossC commited on
Commit
34f203c
·
1 Parent(s): a740165

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **/__pycache__/
2
+ .venv/
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ WORKDIR $HOME/app
18
+
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qnabackend import create_app
2
+
3
+
4
+ app = create_app()
5
+
6
+ @app.route('/')
7
+ def home():
8
+ return "hello world"
9
+
10
+
11
+ if __name__ == '__main__':
12
+ app.run(debug = True, port = 5000)
13
+
14
+ # if __name__ == '__main__':
15
+ # app.run(debug = False, host = "0.0.0.0", port = 7860)
qnabackend/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask
2
+ from qnabackend.config import Config
3
+ from flask_cors import CORS
4
+
5
+ def create_app(config_class = Config):
6
+ app = Flask(__name__)
7
+ CORS(app)
8
+ app.config.from_object(config_class)
9
+ from qnabackend.resources.routes import resources
10
+ app.register_blueprint(resources)
11
+
12
+ return app
qnabackend/common/__init__.py ADDED
File without changes
qnabackend/common/utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+
4
+ def getText(url : str):
5
+ response = requests.get(url)
6
+
7
+ if response.status_code == 200:
8
+ html_content = response.content
9
+ else:
10
+ print(f"[INFO] couldn't access website data, try again")
11
+ return
12
+ soup = BeautifulSoup(html_content, 'html.parser')
13
+
14
+ text_elements = soup.find_all(['p'])
15
+ scraped_text = ' '.join(element.get_text() for element in text_elements)
16
+
17
+ if len(scraped_text) > 20000:
18
+ print(f"[ERROR] page too large to perform qna")
19
+ return
20
+
21
+ return scraped_text
22
+
23
+
24
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
25
+
26
+ model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')
27
+ tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')
28
+
29
+ def getAnswer(url : str, question : str):
30
+ context = getText(url)
31
+
32
+
33
+ inputs = tokenizer(f"context : {context}, question : {question}", return_tensors = 'pt').input_ids
34
+
35
+ outputs = model.generate(
36
+ inputs,
37
+ min_length = 10,
38
+ max_new_tokens = 600,
39
+ length_penalty = 1,
40
+ num_beams = 3,
41
+ no_repeat_ngram_size = 3,
42
+ temperature = 0.7,
43
+ top_k = 110,
44
+ top_p = 0.8,
45
+ repetition_penalty = 2.1
46
+ )
47
+
48
+ answer = tokenizer.decode(outputs[0], skip_special_tokens = True)
49
+
50
+ return answer
qnabackend/config.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ class Config:
3
+ SECRET_KEY = '7a2b25ca707a5be465f9a8894f528999'
qnabackend/resources/__init__.py ADDED
File without changes
qnabackend/resources/routes.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, request
2
+ from flask_restful import Api, Resource
3
+ from qnabackend.common.utils import getAnswer
4
+
5
+ resources = Blueprint('resources', __name__)
6
+ api = Api(resources)
7
+
8
+ class Backend(Resource):
9
+ def post(self):
10
+ url = request.json['url']
11
+ question = request.json['question']
12
+
13
+ answer = getAnswer(url, question)
14
+
15
+ return {'question' : question, 'answer' : answer}
16
+
17
+ api.add_resource(Backend, '/question')
requirements.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aniso8601==9.0.1
2
+ beautifulsoup4==4.12.2
3
+ blinker==1.6.2
4
+ certifi==2023.7.22
5
+ charset-normalizer==3.2.0
6
+ click==8.1.6
7
+ colorama==0.4.6
8
+ filelock==3.12.2
9
+ Flask==2.3.2
10
+ Flask-Cors==4.0.0
11
+ Flask-RESTful==0.3.10
12
+ fsspec==2023.6.0
13
+ huggingface-hub==0.16.4
14
+ idna==3.4
15
+ itsdangerous==2.1.2
16
+ Jinja2==3.1.2
17
+ MarkupSafe==2.1.3
18
+ mpmath==1.3.0
19
+ networkx==3.1
20
+ numpy==1.25.2
21
+ packaging==23.1
22
+ pytz==2023.3
23
+ PyYAML==6.0.1
24
+ regex==2023.8.8
25
+ requests==2.31.0
26
+ safetensors==0.3.2
27
+ six==1.16.0
28
+ soupsieve==2.4.1
29
+ sympy==1.12
30
+ tokenizers==0.13.3
31
+ torch==2.0.1
32
+ tqdm==4.66.1
33
+ transformers==4.31.0
34
+ typing_extensions==4.7.1
35
+ urllib3==2.0.4
36
+ Werkzeug==2.3.6