Spaces:

warhawkmonk
/

mutimodal

Running

App Files Files Community

warhawkmonk commited on 28 days ago

Commit

e941113

verified ·

1 Parent(s): 9b65b91

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -92

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import numpy as np
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from sentence_transformers import SentenceTransformer,util
 from streamlit_image_select import image_select
 import os
 import fitz
@@ -66,34 +67,92 @@ def encoding_model():
     model = SentenceTransformer(model_name)
     return model
-# def consume_llm_api(prompt):
-#     client = Groq(
-#         api_key="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"
-#     )
-#     completion = client.chat.completions.create(
-#         model="llama-3.3-70b-versatile",
-#         messages=[
-#             {
-#                 "role": "system",
-#                 "content": prompt
-#             },
-#         ],
-#         temperature=1,
-#         # max_completion_tokens=1024,
-#         top_p=1,
-#         stream=True,
-#         stop=None,
-#     )
-#     for chunk in completion:
-#         if chunk.choices[0].delta.content:
-#             yield chunk.choices[0].delta.content
 def send_prompt():
     return "please respond according to the prompt asked below from the above context"
@@ -271,8 +330,14 @@ with column2:
                     if prompts_[-1]=="@working":
                         if index==0:
-                            st.write(prompts_[0].split(send_prompt())[-1].upper() if send_prompt() in prompts_[0] else prompts_[0].upper())
-                            data_need=st.write_stream(consume_llm_api(prompts_[0]))
                             dictionary['every_prompt_with_val'][-1]=(prompts_[0],str(data_need))
                     elif isinstance(prompts_[-1],str):
@@ -529,68 +594,82 @@ with st.spinner('Wait for it...'):
     with column1:
     # Create a canvas component
         changes,implementation,current=st.columns([0.01,0.9,0.01])
         model = encoding_model()
         with implementation:
                     st.write("<br>"*3,unsafe_allow_html=True)
                     if bg_doc:
                         canvas_result=None
-                        with open("temp.pdf", "wb") as f:
-                            f.write(bg_doc.getbuffer())
-                        # Process the uploaded PDF file
-                        data = process_pdf("temp.pdf")
-                        if str(data) not in dictionary['text_embeddings']:
-                            dictionary['text_embeddings']={}
-                            text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
-                            chunks = text_splitter.split_documents(data)
-                            dictionary['text_embeddings'][str(data)]={str(chunk.page_content):np.array(model.encode(str(chunk.page_content))) for chunk in chunks}
-                            embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
-                            vector_store = []
-                            for i in dictionary['text_embeddings'][str(data)]:
-                                vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
-                        else:
-                            embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
-                            vector_store = []
-                            for i in dictionary['text_embeddings'][str(data)]:
-                                vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
-                        # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-                        # chunks = text_splitter.split_documents(data)
-                        # # chunk_texts = [str(chunk.page_content) for chunk in chunks]
-                        # # print("testing",chunk_texts)
-                        # model = encoding_model()
-                        # embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
-                        # vector_store = []
-                        # for chunk, embedding in zip(chunks, embeddings):
-                        #     vector_store.append((embedding, chunk.page_content) )
-                    else:
-                        if screen_width<=485:
-                            screen_width = screen_width//1.05
                         else:
-                            screen_width = int(screen_width//2.3)  if screen_width!=820 else int(screen_width//2)
-                        screen_height = int(screen_height//2.16) if screen_height!=1180 else int(screen_height//2)
                         canvas_result = st_canvas(
                             fill_color="rgba(0, 0, 0, 0.3)",  # Fixed fill color with some opacity
                             stroke_width=stroke_width,
                             stroke_color=stroke_color,
                             background_color=bg_color,
-                            background_image=gen_image if gen_image else Image.open("/home/user/app/ALL_image_formation/image_gen.png"),
                             update_streamlit=True,
-                            height=screen_height,
-                            width=screen_width,
                             drawing_mode=drawing_mode,
                             point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
                             key="canvas",
                         )
@@ -603,46 +682,50 @@ with column1:
     # run=st.button("run_experiment")
 if bg_doc:
-    with st.spinner('Wait for it...'):
-        if len(dictionary['every_prompt_with_val'])==0:
-            query_embedding = model.encode(["something"])
-        else:
-            query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
         retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for  match in vector_store])[-1]
         with implementation:
-            with st.spinner('Wait for it...'):
                 text_lookup=retrieved_chunks
                 pages=[]
                 buffer = bg_doc.getbuffer()
                 byte_data = bytes(buffer)
-                with fitz.open("temp.pdf") as doc:
                     for page_no in range(doc.page_count):
                         pages.append(doc.load_page(page_no - 1))
-                    # areas = pages[page_number-1].search_for(text_lookup)
                     with st.container(height=int(screen_height//1.8)):
                         for pg_no in pages[::-1]:
                             areas = pg_no.search_for(text_lookup)
                             for area in areas:
                                 pg_no.add_rect_annot(area)
                             pix = pg_no.get_pixmap(dpi=100).tobytes()
-                            st.image(pix,use_container_width=True)
 if bg_doc and prompt:
     query_embedding = model.encode([prompt])
-    retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for  match in vector_store])[-1]
-    print(retrieved_chunks)
-    prompt = "Context: "+ retrieved_chunks +"\n"+send_prompt()+ "\n"+prompt
-    modifiedValue="@working"
-    dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
-    st.rerun()
 elif not bg_doc and canvas_result.image_data is not None:
     if prompt:

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from sentence_transformers import SentenceTransformer,util
+from code_editor import code_editor
 from streamlit_image_select import image_select
 import os
 import fitz
     model = SentenceTransformer(model_name)
     return model
+def executer(query):
+    try:
+        output = io.StringIO()
+        sys.stdout = output
+        exec(query)
+        sys.stdout = sys.__stdout__
+        print(output.getvalue())
+        return False
+    except Exception as e:
+        return f"Error: {str(e)}"
+def dataframe_info(data):
+    value= data[:5]
+    return str(value)
+def extract_python_code(text):
+    """
+    Extracts code blocks from a given text.
+    Supports triple-backtick blocks, indented code, and inline code.
+    """
+    code_snippets = []
+    # Extract triple-backtick code blocks
+    triple_backtick_blocks = re.findall(r"```(?:[\w+\-]*)\n(.*?)```", text, re.DOTALL)
+    code_snippets.extend(triple_backtick_blocks)
+    # Extract indented code blocks (4 spaces or tab)
+    indented_blocks = re.findall(r"(?:^|\n)((?:    |\t).+(\n(?:    |\t).+)*)", text)
+    code_snippets.extend([block[0] for block in indented_blocks])
+    # Extract inline code snippets with single backticks
+    inline_code = re.findall(r"`([^`\n]+)`", text)
+    code_snippets.extend(inline_code)
+    return [snippet.strip() for snippet in code_snippets if snippet.strip()]
+# @st.cache_resource
+def run_code_blocks(code_blocks,df):
+    import io
+    from contextlib import redirect_stdout
+    buffer = io.StringIO()
+    # coder="".join(code_blocks)
+    coder = str(code_blocks)
+    # print(coder)
+@st.cache_resource
+def file_handler(file):
+    """
+    Handles file upload and returns the file path.
+    """
+    file_name = file.name
+    if file_name.split(".")[-1] in ["csv"]:
+        value = pd.read_csv(file)
+        return value
+    elif file_name.split(".")[-1] in ["xlsx"]:
+        value = pd.read_excel(file)
+        return value
+    else:
+        return None
+def run_agent(prompt,df):
+    # progress = True
+    # return_val = ""
+    # while isinstance(progress,list):
+    llm = OllamaLLM(model="llama3:latest")
+    intermediate_steps = prompt+"\n"
+    intermediate_steps += "\nAbove is the user request that has to be completed. \n"
+    intermediate_steps += "Below is the dataframe sample provided . \n"
+    intermediate_steps += "The dataframe is as follows:\n"
+    intermediate_steps +=  dataframe_info(df)+"\n"
+    intermediate_steps += "You are a senior pandas dataframe developer and you have to write code to complete the user request.\n"
+    intermediate_steps += "Below are the instructions\n"
+    intermediate_steps += "There is a variable name 'df' which is a dataframe and have values.\n"
+    intermediate_steps += "write code using df to manipulate it and give result according to user instruction.\n"
+    intermediate_steps += "No need to load the data 'df' is the required variable.\n"
+    intermediate_steps += "Whole team told you that you no need to use pd.read data is already there in df.\n"
+    intermediate_steps += "Since we are showing code output in streamlit not in terminal so code it properly. \n"
+    intermediate_steps += "This is last warning as a ceo of the company, you have to return only required code as per user request.\n"
+    intermediate_steps += "Example\n"
+    intermediate_steps += "User request: 'show me the rows which has highest electricity_kwh_per_month\n"
+    intermediate_steps += "```\nst.write(df[df['electricity_kwh_per_month'] == df['electricity_kwh_per_month'].max()])\n```\n"
+    intermediate_steps += "You can see that above is the required code(in quotes) for user query(only required) and below is the next request.\n"
+    intermediate_steps += "User request: {prompt}\n"
+    intermediate_steps += "Generate code for the above request only but write some code to full fill user query.\n"
+    return intermediate_steps
 def send_prompt():
     return "please respond according to the prompt asked below from the above context"
                     if prompts_[-1]=="@working":
                         if index==0:
+                            data_need=""
+                            while(len(data_need)==0):
+                                if len(prompts_)==3:
+                                    data_need = st.write_stream(consume_llm_api(prompts_[1]))
+                                else:
+                                    data_need=st.write_stream(consume_llm_api(prompts_[0]))
                             dictionary['every_prompt_with_val'][-1]=(prompts_[0],str(data_need))
                     elif isinstance(prompts_[-1],str):
     with column1:
     # Create a canvas component
         changes,implementation,current=st.columns([0.01,0.9,0.01])
         model = encoding_model()
         with implementation:
+            with st.spinner('Wait for it...'):
+                    # pdf_file = st.file_uploader("Upload PDF file", type=('pdf'))
                     st.write("<br>"*3,unsafe_allow_html=True)
                     if bg_doc:
                         canvas_result=None
+                        # st.write(bg_doc.name)
+                        file_type = file_handler(bg_doc)
+                        if isinstance(file_type,type(None)) :
+                            with open(bg_doc.name, "wb") as f_work:
+                                f_work.write(bg_doc.getbuffer())
+                            data = process_pdf(bg_doc.name)
+                            if str(data) not in dictionary['text_embeddings']:
+                                dictionary['text_embeddings']={}
+                                text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
+                                chunks = text_splitter.split_documents(data)
+                                dictionary['text_embeddings'][str(data)]={str(chunk.page_content):model.encode(str(chunk.page_content)) for chunk in chunks}
+                                embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
+                                vector_store = []
+                                for i in dictionary['text_embeddings'][str(data)]:
+                                    vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
+                            else:
+                                embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
+                                vector_store = []
+                                for i in dictionary['text_embeddings'][str(data)]:
+                                    vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
                         else:
+                            code_runner,code_check,data_frame = st.tabs(["🗃 code runner", "code","📈 Chart"])
+                            with data_frame:
+                                file_type = st.data_editor(file_type,hide_index=True,use_container_width=True,num_rows='dynamic')
+                            with code_check:
+                                if len(dictionary['every_prompt_with_val'])!=0:
+                                    with st.form("code_form"):
+                                        code_new=extract_python_code(dictionary['every_prompt_with_val'][-1][-1])
+                                        code_new = "".join(code_new)
+                                        response = code_editor(code_new, lang="python", key="editor1",height=screen_height/4,allow_reset=True,response_mode="blur",focus=True)
+                                        print(response)
+                                        submitted = st.form_submit_button("Submit Code")
+                            with code_runner:
+                                if len(dictionary['every_prompt_with_val'])!=0 and submitted:
+                                    code_new = response.get('text')
+                                    print(code_new,response)
+                                    run_code_blocks(code_new,file_type)
+                                elif len(dictionary['every_prompt_with_val'])!=0 :
+                                    code_new=extract_python_code(dictionary['every_prompt_with_val'][-1][-1])
+                                    code_new = "".join(code_new)
+                                    run_code_blocks(code_new,file_type)
+                                else:
+                                    st.header("Please ask your query from data")
+                    else:
                         canvas_result = st_canvas(
                             fill_color="rgba(0, 0, 0, 0.3)",  # Fixed fill color with some opacity
                             stroke_width=stroke_width,
                             stroke_color=stroke_color,
                             background_color=bg_color,
+                            background_image=gen_image if gen_image else Image.open("ALL_image_formation\image_gen.png"),
                             update_streamlit=True,
+                            height=int(screen_height//2.16) if screen_height!=1180 else screen_height//2,
+                            width=int(screen_width//2.3)  if screen_width!=820 else screen_width//2,
                             drawing_mode=drawing_mode,
                             point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
                             key="canvas",
                         )
+        # st.rerun()
     # run=st.button("run_experiment")
 if bg_doc:
+    if len(dictionary['every_prompt_with_val'])==0:
+        query_embedding = model.encode(["something"])
+    else:
+        query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
+    if isinstance(file_type,type(None)) :
         retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for  match in vector_store])[-1]
         with implementation:
+            with st.spinner('Wait for it...'):
                 text_lookup=retrieved_chunks
                 pages=[]
                 buffer = bg_doc.getbuffer()
                 byte_data = bytes(buffer)
+                with fitz.open(stream=byte_data, filetype="pdf") as doc:
                     for page_no in range(doc.page_count):
                         pages.append(doc.load_page(page_no - 1))
                     with st.container(height=int(screen_height//1.8)):
                         for pg_no in pages[::-1]:
                             areas = pg_no.search_for(text_lookup)
                             for area in areas:
                                 pg_no.add_rect_annot(area)
                             pix = pg_no.get_pixmap(dpi=100).tobytes()
+                            st.image(pix,use_column_width=True)
 if bg_doc and prompt:
     query_embedding = model.encode([prompt])
+    if isinstance(file_type,type(None)) :
+        retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1]) for  match in vector_store])[-1]
+        prompt = "Context: "+ retrieved_chunks +"\n"+send_prompt()+ "\n"+prompt
+        modifiedValue="@working"
+        dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
+        st.rerun()
+    else:
+        modifiedValue="@working"
+        new_prompt = run_agent(prompt,file_type)
+        dictionary['every_prompt_with_val'].append((prompt,new_prompt,modifiedValue))
+        st.rerun()
 elif not bg_doc and canvas_result.image_data is not None:
     if prompt: