Find-it-Auto

Sleeping

File size: 14,760 Bytes

d6afb45
 
7ee1b98
d6afb45
 
2a7d1fa
 
 
 
 
46d853f
2a7d1fa
 
 
 
 
57c61e4
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
5039147
 
2a7d1fa
 
5039147
2a7d1fa
5039147
 
2a7d1fa
5039147
 
 
 
2a7d1fa
 
 
 
d513153
2a7d1fa
20cc82f
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5039147
2a7d1fa
455d65b
2a7d1fa
 
 
455d65b
2a7d1fa
 
 
 
 
 
d6afb45
2a7d1fa
 
 
 
 
 
 
 
 
29fe941
 
2a7d1fa
 
 
 
 
 
 
 
 
 
 
f7222e9
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267d21f
6fc27a6
4a7dea6
2a7d1fa
 
 
 
 
 
 
 
 
 
 
6a02a37
 
2a7d1fa
6a02a37
2a7d1fa
6a02a37
2a7d1fa
 
 
 
d160b4b
2a7d1fa
d513153
 
 
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8832566
2a7d1fa
df4b728
2a7d1fa
fc6479b
20cc82f
 
df4b728
 
9f5fe72
2a7d1fa
 
 
 
 
 
 
 
 
 
20cc82f
 
 
 
 
2a7d1fa
20cc82f
 
 
 
 
 
 
07bc596
2a7d1fa
20cc82f
2a7d1fa
fc6479b
2a7d1fa
b97439a
2a7d1fa
455d65b
8812113
2a7d1fa
 
455d65b
8812113
2a7d1fa
 
 
 
 
f586a70
 
 
 
4551e44
9fe62de
f586a70
df0618d
07cfa54
f586a70
3381f0b
9fe62de
3381f0b
c69bac0
817d95e
7313962
c69bac0
 
 
43954cf
9fe62de
ba81343
 
 
 
8eb0cc4
7313962
21a312e
f586a70
 
 
289044f
4551e44
6005136
d6afb45
 
 
 
 
 
 
 
 
 
2a7d1fa
 
e1bf925
 
 
 
dc21b34
e1bf925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d28c4c
e1bf925
 
fa8ddec
e1bf925
 
 
 
 
 
 
 
fa8ddec
e1bf925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a7d1fa
 
 
 
 
 
d6afb45
2a7d1fa
 
 
 
 
 
 
d6afb45
2a7d1fa
d6afb45
 
f6ad611
 
 
 
 
 
f586a70
 
 
56e3a34
26f9624
792d4ad
4551e44
f586a70
 
4551e44
f586a70
176890c
d6afb45
2a7d1fa

import gradio as gr
import urllib.request
import requests
import bs4
import lxml
import os
#import subprocess
from huggingface_hub import InferenceClient,HfApi
import random
import json
import datetime
#from query import tasks
from prompts import (
    FINDER,
    COMPRESS_HISTORY_PROMPT,
    COMPRESS_DATA_PROMPT,
    COMPRESS_DATA_PROMPT_SMALL,
    LOG_PROMPT,
    LOG_RESPONSE,
    PREFIX,
    TASK_PROMPT,
)
api=HfApi()



client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.1"
)

def parse_action(string: str):
    print("PARSING:")
    print(string)
    assert string.startswith("action:")
    idx = string.find("action_input=")
    print(idx)
    if idx == -1:
        print ("idx == -1")
        print (string[8:])
        return string[8:], None

    print ("last return:")
    print (string[8 : idx - 1])
    print (string[idx + 13 :].strip("'").strip('"'))
    return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"')



VERBOSE = True
MAX_HISTORY = 100
MAX_DATA = 1000

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def call_search(purpose, task, history, action_input):
    return_list=[]
    print (action_input)
    #if action_input in query.tasks:
    print ("trying")        
    try:
        if action_input != "" and action_input != None:
            action_input.strip('""')
            #model_list = api.list_models(filter=f"{action_input}",sort="last_modified",limit=1000,direction=-1)
            #model_list = api.list_models(filter=f"{action_input}",limit=1000)
            model_list = api.list_models(filter=f"{action_input}")
            this_obj = list(model_list)
            print(f'THIS_OBJ :: {this_obj[0]}')
            for i,eb in enumerate(this_obj):
                #return_list.append(this_obj[i].id)
                return_list.append({"id":this_obj[i].id,
                                    "author":this_obj[i].author,
                                    "created_at":this_obj[i].created_at,
                                    "last_modified":this_obj[i].last_modified,
                                    "private":this_obj[i].private,
                                    "gated":this_obj[i].gated,
                                    "disabled":this_obj[i].disabled,
                                    "downloads":this_obj[i].downloads,
                                    "likes":this_obj[i].likes,
                                    "library_name":this_obj[i].library_name,
                                    "tags":this_obj[i].tags,
                                    "pipeline_tag":this_obj[i].pipeline_tag,
                                   })
            #print (return_list)
            c=0
            rl = len(return_list)
            print(rl)
            for i in str(return_list):
                if i == " " or i==",":
                    c +=1
            
            print (c)
            if rl > MAX_DATA:
                print("compressing...")
                return_list = compress_data(rl,purpose,task,return_list)
            history = "observation: the search results are:\n {}\n".format(return_list)
            return "MAIN", None, history, task
        else: 
            history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
            return "UPDATE-TASK", None, history, task
    except Exception as e:
        print (e)
        history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
        return "UPDATE-TASK", None, history, task

        #else:
    #    history = "observation: The search query I used did not return a valid response"
        
    return "MAIN", None, history, task


def run_gpt(
    prompt_template,
    stop_tokens,
    max_tokens,
    seed,
    purpose,
    **prompt_kwargs,
):
    timestamp=datetime.datetime.now()

    print(seed)
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=max_tokens,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )
    
    content = PREFIX.format(
        timestamp=timestamp,
        purpose=purpose,
    ) + prompt_template.format(**prompt_kwargs)
    if VERBOSE:
        print(LOG_PROMPT.format(content))
    
    
    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    #formatted_prompt = format_prompt(f'{content}', history)

    stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
    resp = ""
    for response in stream:
        resp += response.token.text
        #yield resp

    if VERBOSE:
        print(LOG_RESPONSE.format(resp))
    return resp

def compress_data(c,purpose, task, history):
    seed=random.randint(1,1000000000)
    
    print (c)
    #tot=len(purpose)
    #print(tot)
    divr=int(c)/MAX_DATA
    divi=int(divr)+1 if divr != int(divr) else int(divr)
    chunk = int(int(c)/divr)
    print(f'chunk:: {chunk}')
    print(f'divr:: {divr}')
    print (f'divi:: {divi}')
    out = []
    #out=""
    s=0
    e=chunk
    print(f'e:: {e}')
    new_history=""
    task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
    for z in range(divi):
        print(f's:e :: {s}:{e}')
        
        hist = history[s:e]
        
        resp = run_gpt(
            COMPRESS_DATA_PROMPT_SMALL,
            stop_tokens=["observation:", "task:", "action:", "thought:"],
            max_tokens=2048,
            seed=seed,
            purpose=purpose,
            task=task,
            knowledge=new_history,
            history=hist,
        )
        new_history = resp
        print (resp)
        out+=resp
        e=e+chunk
        s=s+chunk
    '''
    resp = run_gpt(
        COMPRESS_DATA_PROMPT,
        stop_tokens=["observation:", "task:", "action:", "thought:"],
        max_tokens=1024,
        seed=seed,
        purpose=purpose,
        task=task,
        knowledge=new_history,
        history="All data has been recieved.",
    )'''
    print ("final" + resp)
    history = "observation: {}\n".format(resp)
    return history




def compress_history(purpose, task, history):
    resp = run_gpt(
        COMPRESS_HISTORY_PROMPT,
        stop_tokens=["observation:", "task:", "action:", "thought:"],
        max_tokens=512,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    )
    history = "observation: {}\n".format(resp)
    return history


def call_main(purpose, task, history, action_input):
    resp = run_gpt(
        FINDER,
        stop_tokens=["observation:", "task:", "action:"],
        max_tokens=512,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    )
    lines = resp.strip().strip("\n").split("\n")
    for line in lines:
        if line == "":
            continue
        if line.startswith("thought: "):
            history += "{}\n".format(line)
        if line.startswith("action: COMPLETE"):
            print("COMPLETE called")
            return "COMPLETE", None, history, task
        if line.startswith("action:"):
            action_name, action_input = parse_action(line)
            print(f'ACTION::{action_name} -- INPUT :: {action_input}')
            history += "{}\n".format(line)
            return action_name, action_input, history, task
        else:
            
            history += "{}\n".format(line)
            #assert False, "unknown action: {}".format(line)
            #return "UPDATE-TASK", None, history, task
    if "VERBOSE":
        print(history)
    return "MAIN", None, history, task


def call_set_task(purpose, task, history, action_input):
    task = run_gpt(
        TASK_PROMPT,
        stop_tokens=[],
        max_tokens=1024,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    ).strip("\n")
    history += "observation: task has been updated to: {}\n".format(task)
    return "MAIN", None, history, task



###########################################################
def search_all(url):
    source=""
    return source



def find_all(purpose,task,history, url):
    return_list=[]
    print (url)
    #if action_input in query.tasks:
    print (f"trying URL:: {url}")        
    try:
        if url != "" and url != None:    
            #rawp = []
            out = []
            source = requests.get(url)
            #source = urllib.request.urlopen(url).read()
            soup = bs4.BeautifulSoup(source.content,'lxml')
            # title of the page
            print(soup.title)
            # get attributes:
            print(soup.title.name)
            # get values:
            print(soup.title.string)
            # beginning navigation:
            print(soup.title.parent.name)
            #rawp.append([tag.name for tag in soup.find_all()] )
            print([tag.name for tag in soup.find_all()])
            rawp=(f'RAW TEXT RETURNED:\n*********\n{soup.text}\n*********\n')
            out.append(rawp)
            q=("a","p","span","content","article")
            for p in soup.find_all(f'{q}'):
                out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
            c=0
            rl = len(out)
            print(f'rl:: {rl}')
            for ea in out:
                for i in str(ea):
                    if i == " " or i==",":
                        c +=1
            print (f'c:: {c}')
            if rl > MAX_DATA:
                print("compressing...")
                rawp = compress_data(c,purpose,task,out)    
            print (rawp)
            history += "observation: the search results are:\n {}\n".format(rawp)
            task = "complete?"
            return "MAIN", None, history, task
        else: 
            history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
            return "MAIN", None, history, task
    except Exception as e:
        print (e)
        history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
        return "MAIN", None, history, task

        #else:
    #    history = "observation: The search query I used did not return a valid response"
        
    return "MAIN", None, history, task


def find_it(url,q=None,num=None):
    out = []
    out_l = []
    z=""
    source = urllib.request.urlopen(url).read()
    soup = bs4.BeautifulSoup(source,'lxml')
    
    for p in soup.find_all(f'{q}'):
        if num != "":
            z=p.get(f'{num}')
        
        try:
            test = soup.select(f'{p.name}:first-child')
            
            #print(p.findChildren())
        except Exception as e:
            print (e)
        #out.append(p)
        out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
        if p.string !=None:
            out_l.append(p.string)
        else:
            out_l.append(z)
        #out.append(p.parent.name)
        print(dir(p))
        print(p.parent.name)
    for url in soup.find_all('a'):
        print(url.get('href'))
        
    #print(soup.get_text())
    return out,out_l
    
def find_it2(url):
    response = requests.get(url,a1=None,q2=None,q3=None)
    try:
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'lxml')
        out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
        return out
    except Exception as e:
        print (e)
        return e
#################################

NAME_TO_FUNC = {
    "MAIN": call_main,
    "UPDATE-TASK": call_set_task,
    "SEARCH_ENGINE": find_all,
    "SCRAPE_WEBSITE": find_all,
}


def run_action(purpose, task, history, action_name, action_input):
    if action_name == "COMPLETE":
        print("Complete - Exiting")
        #exit(0) 
        return "COMPLETE", None, history, task

    # compress the history when it is long
    if len(history.split("\n")) > MAX_HISTORY:
        if VERBOSE:
            print("COMPRESSING HISTORY")
        history = compress_history(purpose, task, history)
    if action_name in NAME_TO_FUNC:
        
        assert action_name in NAME_TO_FUNC

        print(f"RUN: {action_name}  ACTION_INPUT: {action_input}")
        return NAME_TO_FUNC[action_name](purpose, task, history, action_input)
    else:
        history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, WEBSITE_SCRAPE, COMPLETE)\n"

        return "MAIN", None, history, task

def run(purpose,history):
    task=None
    history = ""
    #if not history:
    #    history = []
    action_name = "SEARCH_ENGINE" if task is None else "MAIN"
    action_input = None
    while True:
        print("")
        print("")
        print("---")
        #print("purpose:", purpose)
        print("task:", task)
        print("---")
        #print(history)
        print("---")

        action_name, action_input, history, task = run_action(
            purpose,
            task,
            history,
            action_name,
            action_input,
        )
        yield history
        if action_name == "COMPLETE":
            return history




examples =[
    "find the most popular model that I can use to generate an image by providing a text prompt",
    "return the top 10 models that I can use to identify objects in images",
    "which models have the most likes from each category?"
]


gr.ChatInterface(
    fn=run,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    title="Mixtral 46.7B Powered <br> Search",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)

'''
with gr.Blocks() as app:
    with gr.Row():
        with gr.Column(scale=1):
            inp = gr.Textbox()
        with gr.Column(scale=2):
            q = gr.Textbox(value="p")
        with gr.Column(scale=2):
            num = gr.Textbox()
    with gr.Row():
        all_btn = gr.Button("Load")
        find_btn = gr.Button("Find")
    with gr.Row():
        rawp = gr.JSON()
        outp = gr.JSON()
        outl = gr.Textbox()
    
    all_btn.click(find_all,[inp,q,num],[rawp])
    find_btn.click(find_it,[inp,q,num],[outp,outl])
    
app.launch()

'''