vincentclaes commited on
Commit
4161807
·
1 Parent(s): a59370d

format code

Browse files
Files changed (2) hide show
  1. app.py +22 -7
  2. scrape_website.py +4 -14
app.py CHANGED
@@ -1,8 +1,14 @@
 
1
  import torch
2
- from peft import PeftModel
3
  import transformers
4
- import gradio as gr
 
 
 
 
 
5
  from scrape_website import process_webpage
 
6
  assert (
7
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
8
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -19,6 +25,7 @@ else:
19
  device = "cpu"
20
 
21
  try:
 
22
  if torch.backends.mps.is_available():
23
  device = "mps"
24
  except:
@@ -71,6 +78,7 @@ def generate_prompt(instruction, input=None):
71
  {instruction}
72
  ### Response:"""
73
 
 
74
  if device != "cpu":
75
  model.half()
76
  model.eval()
@@ -122,7 +130,9 @@ g = gr.Interface(
122
  gr.components.Textbox(
123
  lines=2, label="FAQ", placeholder="Ask me anything about this website?"
124
  ),
125
- gr.components.Textbox(lines=1, label="Website URL", placeholder="https://www.meet-drift.ai/"),
 
 
126
  # gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
127
  # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
128
  # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
@@ -139,11 +149,16 @@ g = gr.Interface(
139
  ],
140
  title="FAQ A Website",
141
  examples=[
142
- ["Can you list the capabilities this company has in bullet points?", "https://www.meet-drift.ai/"],
 
 
 
143
  ["What's the name of the founder?", "https://www.meet-drift.ai/about"],
144
- ["in 1 word what's the service the company is providing?", "https://www.meet-drift.ai/"],
145
- ]
146
- # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
 
 
147
  )
148
  g.queue(concurrency_count=1)
149
  g.launch()
 
1
+ import gradio as gr
2
  import torch
 
3
  import transformers
4
+
5
+ # https://github.com/huggingface/peft
6
+ # Parameter-Efficient Fine-Tuning (PEFT) methods enable efficient adaptation of pre-trained language models (PLMs)
7
+ # to various downstream applications without fine-tuning all the model's parameters.
8
+ from peft import PeftModel
9
+
10
  from scrape_website import process_webpage
11
+
12
  assert (
13
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
14
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 
25
  device = "cpu"
26
 
27
  try:
28
+ # mps device enables high-performance training on GPU for MacOS devices with Metal programming framework.
29
  if torch.backends.mps.is_available():
30
  device = "mps"
31
  except:
 
78
  {instruction}
79
  ### Response:"""
80
 
81
+
82
  if device != "cpu":
83
  model.half()
84
  model.eval()
 
130
  gr.components.Textbox(
131
  lines=2, label="FAQ", placeholder="Ask me anything about this website?"
132
  ),
133
+ gr.components.Textbox(
134
+ lines=1, label="Website URL", placeholder="https://www.meet-drift.ai/"
135
+ ),
136
  # gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
137
  # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
138
  # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
 
149
  ],
150
  title="FAQ A Website",
151
  examples=[
152
+ [
153
+ "Can you list the capabilities this company has in bullet points?",
154
+ "https://www.meet-drift.ai/",
155
+ ],
156
  ["What's the name of the founder?", "https://www.meet-drift.ai/about"],
157
+ [
158
+ "in 1 word what's the service the company is providing?",
159
+ "https://www.meet-drift.ai/",
160
+ ],
161
+ ],
162
  )
163
  g.queue(concurrency_count=1)
164
  g.launch()
scrape_website.py CHANGED
@@ -1,9 +1,8 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
- TOKEN_CUT_OFF = 2500
5
 
6
- def process_webpage(url:str):
7
  # A set to keep track of visited pages
8
  visited_pages = set()
9
 
@@ -36,9 +35,6 @@ def process_webpage(url:str):
36
 
37
  text_list.append(text_content)
38
 
39
- # Get the text content of the landing page
40
- # get_child_pages(url)
41
-
42
  # Make a GET request to the page and get the HTML content
43
  response = requests.get(url)
44
  html_content = response.content
@@ -52,15 +48,9 @@ def process_webpage(url:str):
52
  for element in soup.find_all(tag):
53
  text_content += element.get_text() + " "
54
 
55
- # # make main page as first item
56
- # text_list.reverse()
57
- # text_list_cut_off = text_list[:TOKEN_CUT_OFF]
58
- # page_content = "\n".join(text_list_cut_off)
59
- # # Print the text content of the landing page and all child pages
60
- # print(page_content)
61
- # return page_content
62
  print(text_content)
63
  return text_content
64
 
65
- if __name__ == '__main__':
66
- process_webpage(url="https://www.meet-drift.ai/")
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
 
4
 
5
+ def process_webpage(url: str):
6
  # A set to keep track of visited pages
7
  visited_pages = set()
8
 
 
35
 
36
  text_list.append(text_content)
37
 
 
 
 
38
  # Make a GET request to the page and get the HTML content
39
  response = requests.get(url)
40
  html_content = response.content
 
48
  for element in soup.find_all(tag):
49
  text_content += element.get_text() + " "
50
 
 
 
 
 
 
 
 
51
  print(text_content)
52
  return text_content
53
 
54
+
55
+ if __name__ == "__main__":
56
+ process_webpage(url="https://www.meet-drift.ai/")