File size: 1,920 Bytes
4a318e0
a0bfec6
4a318e0
 
a0bfec6
 
 
 
 
 
 
 
 
 
 
4a318e0
 
a0bfec6
 
 
 
 
 
 
 
 
 
4a318e0
a0bfec6
 
 
 
 
4a318e0
 
 
 
 
 
 
 
 
a0bfec6
 
4a318e0
a0bfec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from ctransformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    model_path_or_repo_id="TheBloke/Llama-2-7B-chat-GGML",
    max_new_tokens=512,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.15
)

system_message = """
You are a helpful, respectful and honest assistant. Your job is to answer the users query as best as possible given the Web Page Content. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. If you DO NOT KNOW THE ANSWER DO NOT SHARE FALSE INFORMATION.
You have been given scraped text content of a webpage under the section called "Web Page Content". Using this information answer the users query. However, if the webpage DOES NOT contain the answer to the query, you CAN answer based on your existing knowledge IF you are sure of the answer, but ALWAYS let the user know when doing so.
"""

def generate_prompt(system_message, context, prompt):
  prompt=f'''[INST] <<SYS>>
{system_message}
<</SYS>>

Web Page Content:
```
{context}
```

{prompt} [/INST]'''

  return prompt

import requests
from bs4 import BeautifulSoup
import re

def scraper(url):
  req = requests.get(url)
  soup = BeautifulSoup(req.content, "html.parser")
  context = soup.get_text()
  relevant_text = soup.get_text()
  cleaned_text = re.sub(r'\s+', ' ', relevant_text).strip()

  return cleaned_text

def run(url, input):
  context = scraper(url)
  response = model(generate_prompt(system_message=system_message, context=context, prompt=input))

  return response

import gradio as gr

# Create a Gradio interface
iface = gr.Interface(
    fn=run,
    inputs=["text","text"],
    outputs="text",
    title="Web Query App",
    description="Enter the webpage url and your query\nIMPORTANT: Larger webpages are likely to cause error due to lack of computational resources"
)

# Launch the interface
iface.launch(inline=False)