import gradio as gr import requests import bs4 import lxml import os from huggingface_hub import InferenceClient,HfApi import random import json import datetime import xmltodict """ from prompts import ( COMPRESS_HISTORY_PROMPT, COMPRESS_DATA_PROMPT, COMPRESS_DATA_PROMPT_SMALL, PREFIX, TASK_PROMPT, ) api=HfApi() client = InferenceClient( "mistralai/Mixtral-8x7B-Instruct-v0.1" ) def parse_action(string: str): print("PARSING:") print(string) assert string.startswith("action:") idx = string.find("action_input=") print(idx) if idx == -1: print ("idx == -1") print (string[8:]) return string[8:], None print ("last return:") print (string[8 : idx - 1]) print (string[idx + 13 :].strip("'").strip('"')) return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') MAX_HISTORY = 100 MAX_DATA = 20000 def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def run_gpt( prompt_template, stop_tokens, max_tokens, seed, purpose, **prompt_kwargs, ): timestamp=datetime.datetime.now() print(seed) generate_kwargs = dict( temperature=0.9, max_new_tokens=max_tokens, top_p=0.95, repetition_penalty=1.0, do_sample=True, seed=seed, ) content = PREFIX.format( timestamp=timestamp, purpose=purpose, ) + prompt_template.format(**prompt_kwargs) if VERBOSE: print(LOG_PROMPT.format(content)) #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) #formatted_prompt = format_prompt(f'{content}', **prompt_kwargs['history']) stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) resp = "" for response in stream: resp += response.token.text #yield resp if VERBOSE: print(LOG_RESPONSE.format(resp)) return resp def compress_data(c,purpose, task, history, result): seed=random.randint(1,1000000000) print (c) divr=int(c)/MAX_DATA divi=int(divr)+1 if divr != int(divr) else int(divr) chunk = int(int(c)/divr) print(f'chunk:: {chunk}') print(f'divr:: {divr}') print (f'divi:: {divi}') out = [] #out="" s=0 e=chunk print(f'e:: {e}') new_history="" task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' for z in range(divi): print(f's:e :: {s}:{e}') hist = history[s:e] resp = run_gpt( COMPRESS_DATA_PROMPT, stop_tokens=["observation:", "task:", "action:", "thought:"], max_tokens=2048, seed=seed, purpose=purpose, task=task, knowledge=new_history, history=hist, ) new_history = resp print (resp) out+=resp e=e+chunk s=s+chunk ''' resp = run_gpt( COMPRESS_DATA_PROMPT, stop_tokens=["observation:", "task:", "action:", "thought:"], max_tokens=2048, seed=seed, purpose=purpose, task=task, knowledge=new_history, history=result, ) ''' print ("final" + resp) history = "result: {}\n".format(resp) return history def find_all(purpose,task,history, rss_url, result): return_list=[] #if action_input in query.tasks: print (f"trying URL:: {rss_url}") lod="" try: if rss_url != "" and rss_url != None: #rawp = [] out = [] r = requests.get(f'{rss_url}') if ".json" in rss_url: lod = json.loads(r.text) if ".xml" in rss_url: lod = xmltodict.parse(r.content) if ".rss" in rss_url: lod = xmltodict.parse(r.content) else: try: lod = xmltodict.parse(r.content) except Exception as e: history+=f"observation: could not complete RSS Search due to this error:\n{e}" return "MAIN", None, history, task, result rawp = lod print(f'RAWP::\n{rawp}') cnt=0 cnt+=len(rawp) out.append(rawp) out = str(out) rl = len(out) print(f'rl:: {rl}') c=0 for i in str(out): c +=1 print (f'c:: {c}') if c > MAX_HISTORY: print("compressing...") rawp = compress_data(c,purpose,task,out,result) else: rawp = out result += rawp print (rawp) print (f'out:: {out}') history = "observation: the search results are:\n {}\n".format(rawp) task = "compile report and return action: COMPLETE" return "MAIN", None, history, task, result else: history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: READ-RSS action_input=URL\n" return "UPDATE-TASK", None, history, task, result except Exception as e: print (e) history += "observation: I need to trigger a search using the following syntax:\naction: READ-RSS action_input=URL\n" return "UPDATE-TASK", None, history, task, result return "MAIN", None, history, task, result """ def find_rss(): lod="" out_box=[] yield [],[(None,"loading sources")] with open ('feeds.json','r') as j: cont = json.loads(j.read()) #print(cont) for ea in cont: #lod="" print (ea['link']) rss_url=ea['link'] link_box=[] r = requests.get(f'{rss_url}') if r.status_code == 200: try: if ".json" in rss_url: lod = json.loads(r.text) if ".xml" in rss_url: lod = xmltodict.parse(r.content) if ".rss" in rss_url: lod = xmltodict.parse(r.content) else: try: lod = xmltodict.parse(r.content) except Exception as e: lod=f'{rss_url} ::ERROR:: {e}' except Exception as e: lod=f'{rss_url} ::ERROR:: {e}' else: lod = f'{rss_url} ::ERROR::COULD NOT CONNECT:: {r.status_code}' pass try: print(lod['rss']['channel']['item'][0].keys()) print(lod['rss'].keys()) for i,ea in enumerate(lod['rss']['channel']['item']): r_link = ea['link'] r_title = ea['title'] r_description = ea['description'] """ r_link = lod['rss']['channel']['item'][i]['link'] r_title = lod['rss']['channel']['item'][i]['title'] r_description = lod['rss']['channel']['item'][i]['description']""" lods = {"title":r_title, "description":r_description,"link":r_link} link_box.append(lods) lod={lod['rss']['channel']['title']:link_box} except Exception as e: print(f'{ea["source"]}') print(f'{ea["link"]}') lod = f'{rss_url} ::ERROR:: {e}' print(f'Exception::{e}') print(f'Exception::{ea.keys()}') out_box.append(lod) return out_box,[(None,"loading sources")] with gr.Blocks() as app: cb = gr.Chatbot() with gr.Row(): rss_search = gr.Textbox(label="Instructions") search_btn=gr.Button("Send") with gr.Row(): rss = gr.Textbox(label="rss") btn = gr.Button("load rss") r_btn=gr.Button("read") with gr.Row(): out_json = gr.JSON() fil = gr.Textbox() r_btn.click(find_rss,None,[out_json,chatbot]) #r_btn.click(read_rss,None,[out_json,fil]) #search_btn.click(find_rss,rss_search,out_json) #btn.click(get_rss,rss,out_json) app.launch()