warhawkmonk commited on
Commit
e941113
Β·
verified Β·
1 Parent(s): 9b65b91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -92
app.py CHANGED
@@ -10,6 +10,7 @@ import numpy as np
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.schema import Document
12
  from sentence_transformers import SentenceTransformer,util
 
13
  from streamlit_image_select import image_select
14
  import os
15
  import fitz
@@ -66,34 +67,92 @@ def encoding_model():
66
  model = SentenceTransformer(model_name)
67
  return model
68
 
69
- # def consume_llm_api(prompt):
70
-
71
- # client = Groq(
72
- # api_key="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"
73
- # )
74
-
75
- # completion = client.chat.completions.create(
76
 
77
- # model="llama-3.3-70b-versatile",
78
- # messages=[
79
-
80
- # {
81
- # "role": "system",
82
- # "content": prompt
83
- # },
84
- # ],
85
-
86
- # temperature=1,
87
- # # max_completion_tokens=1024,
88
- # top_p=1,
89
- # stream=True,
90
- # stop=None,
91
- # )
92
-
93
- # for chunk in completion:
94
- # if chunk.choices[0].delta.content:
95
- # yield chunk.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def send_prompt():
99
  return "please respond according to the prompt asked below from the above context"
@@ -271,8 +330,14 @@ with column2:
271
  if prompts_[-1]=="@working":
272
  if index==0:
273
 
274
- st.write(prompts_[0].split(send_prompt())[-1].upper() if send_prompt() in prompts_[0] else prompts_[0].upper())
275
- data_need=st.write_stream(consume_llm_api(prompts_[0]))
 
 
 
 
 
 
276
  dictionary['every_prompt_with_val'][-1]=(prompts_[0],str(data_need))
277
 
278
  elif isinstance(prompts_[-1],str):
@@ -529,68 +594,82 @@ with st.spinner('Wait for it...'):
529
  with column1:
530
  # Create a canvas component
531
  changes,implementation,current=st.columns([0.01,0.9,0.01])
 
532
  model = encoding_model()
533
  with implementation:
 
 
534
  st.write("<br>"*3,unsafe_allow_html=True)
535
  if bg_doc:
536
-
537
  canvas_result=None
538
-
539
- with open("temp.pdf", "wb") as f:
540
- f.write(bg_doc.getbuffer())
541
 
542
- # Process the uploaded PDF file
543
- data = process_pdf("temp.pdf")
544
- if str(data) not in dictionary['text_embeddings']:
545
- dictionary['text_embeddings']={}
546
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
547
- chunks = text_splitter.split_documents(data)
548
-
549
- dictionary['text_embeddings'][str(data)]={str(chunk.page_content):np.array(model.encode(str(chunk.page_content))) for chunk in chunks}
550
-
551
- embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
552
- vector_store = []
553
- for i in dictionary['text_embeddings'][str(data)]:
554
- vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
555
- else:
556
- embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
557
- vector_store = []
558
- for i in dictionary['text_embeddings'][str(data)]:
559
- vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
560
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
561
- # chunks = text_splitter.split_documents(data)
562
- # # chunk_texts = [str(chunk.page_content) for chunk in chunks]
563
- # # print("testing",chunk_texts)
564
-
565
- # model = encoding_model()
566
- # embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
567
-
568
- # vector_store = []
569
- # for chunk, embedding in zip(chunks, embeddings):
570
- # vector_store.append((embedding, chunk.page_content) )
571
 
572
- else:
573
- if screen_width<=485:
574
- screen_width = screen_width//1.05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  else:
576
- screen_width = int(screen_width//2.3) if screen_width!=820 else int(screen_width//2)
577
- screen_height = int(screen_height//2.16) if screen_height!=1180 else int(screen_height//2)
578
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  canvas_result = st_canvas(
581
  fill_color="rgba(0, 0, 0, 0.3)", # Fixed fill color with some opacity
582
  stroke_width=stroke_width,
583
  stroke_color=stroke_color,
584
  background_color=bg_color,
585
- background_image=gen_image if gen_image else Image.open("/home/user/app/ALL_image_formation/image_gen.png"),
586
  update_streamlit=True,
587
- height=screen_height,
588
- width=screen_width,
589
  drawing_mode=drawing_mode,
590
  point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
591
  key="canvas",
592
  )
593
-
594
 
595
 
596
 
@@ -603,46 +682,50 @@ with column1:
603
 
604
  # run=st.button("run_experiment")
605
  if bg_doc:
606
- with st.spinner('Wait for it...'):
607
- if len(dictionary['every_prompt_with_val'])==0:
608
- query_embedding = model.encode(["something"])
609
- else:
610
-
611
- query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
612
  retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
613
-
614
-
615
-
616
  with implementation:
617
- with st.spinner('Wait for it...'):
618
  text_lookup=retrieved_chunks
619
  pages=[]
620
  buffer = bg_doc.getbuffer()
621
  byte_data = bytes(buffer)
622
- with fitz.open("temp.pdf") as doc:
623
-
624
  for page_no in range(doc.page_count):
625
  pages.append(doc.load_page(page_no - 1))
626
 
627
- # areas = pages[page_number-1].search_for(text_lookup)
628
  with st.container(height=int(screen_height//1.8)):
629
  for pg_no in pages[::-1]:
630
  areas = pg_no.search_for(text_lookup)
631
  for area in areas:
632
  pg_no.add_rect_annot(area)
633
-
634
  pix = pg_no.get_pixmap(dpi=100).tobytes()
635
- st.image(pix,use_container_width=True)
636
 
637
  if bg_doc and prompt:
 
638
  query_embedding = model.encode([prompt])
639
- retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
640
- print(retrieved_chunks)
641
- prompt = "Context: "+ retrieved_chunks +"\n"+send_prompt()+ "\n"+prompt
642
 
643
- modifiedValue="@working"
644
- dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
645
- st.rerun()
 
 
 
 
 
 
 
 
646
  elif not bg_doc and canvas_result.image_data is not None:
647
  if prompt:
648
 
 
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.schema import Document
12
  from sentence_transformers import SentenceTransformer,util
13
+ from code_editor import code_editor
14
  from streamlit_image_select import image_select
15
  import os
16
  import fitz
 
67
  model = SentenceTransformer(model_name)
68
  return model
69
 
70
+ def executer(query):
71
+ try:
72
+ output = io.StringIO()
73
+ sys.stdout = output
 
 
 
74
 
75
+ exec(query)
76
+ sys.stdout = sys.__stdout__
77
+ print(output.getvalue())
78
+ return False
79
+ except Exception as e:
80
+ return f"Error: {str(e)}"
81
+ def dataframe_info(data):
82
+ value= data[:5]
83
+ return str(value)
84
+
85
+ def extract_python_code(text):
86
+ """
87
+ Extracts code blocks from a given text.
88
+ Supports triple-backtick blocks, indented code, and inline code.
89
+ """
90
+ code_snippets = []
91
+
92
+ # Extract triple-backtick code blocks
93
+ triple_backtick_blocks = re.findall(r"```(?:[\w+\-]*)\n(.*?)```", text, re.DOTALL)
94
+ code_snippets.extend(triple_backtick_blocks)
95
+
96
+ # Extract indented code blocks (4 spaces or tab)
97
+ indented_blocks = re.findall(r"(?:^|\n)((?: |\t).+(\n(?: |\t).+)*)", text)
98
+ code_snippets.extend([block[0] for block in indented_blocks])
99
+
100
+ # Extract inline code snippets with single backticks
101
+ inline_code = re.findall(r"`([^`\n]+)`", text)
102
+ code_snippets.extend(inline_code)
103
+
104
+ return [snippet.strip() for snippet in code_snippets if snippet.strip()]
105
+
106
 
107
+ # @st.cache_resource
108
+ def run_code_blocks(code_blocks,df):
109
+ import io
110
+ from contextlib import redirect_stdout
111
+
112
+ buffer = io.StringIO()
113
+ # coder="".join(code_blocks)
114
+ coder = str(code_blocks)
115
+ # print(coder)
116
+ @st.cache_resource
117
+ def file_handler(file):
118
+ """
119
+ Handles file upload and returns the file path.
120
+ """
121
+ file_name = file.name
122
+ if file_name.split(".")[-1] in ["csv"]:
123
+ value = pd.read_csv(file)
124
+ return value
125
+ elif file_name.split(".")[-1] in ["xlsx"]:
126
+ value = pd.read_excel(file)
127
+ return value
128
+
129
+ else:
130
+ return None
131
+ def run_agent(prompt,df):
132
+ # progress = True
133
+ # return_val = ""
134
+ # while isinstance(progress,list):
135
+ llm = OllamaLLM(model="llama3:latest")
136
+ intermediate_steps = prompt+"\n"
137
+ intermediate_steps += "\nAbove is the user request that has to be completed. \n"
138
+ intermediate_steps += "Below is the dataframe sample provided . \n"
139
+ intermediate_steps += "The dataframe is as follows:\n"
140
+ intermediate_steps += dataframe_info(df)+"\n"
141
+ intermediate_steps += "You are a senior pandas dataframe developer and you have to write code to complete the user request.\n"
142
+ intermediate_steps += "Below are the instructions\n"
143
+ intermediate_steps += "There is a variable name 'df' which is a dataframe and have values.\n"
144
+ intermediate_steps += "write code using df to manipulate it and give result according to user instruction.\n"
145
+ intermediate_steps += "No need to load the data 'df' is the required variable.\n"
146
+ intermediate_steps += "Whole team told you that you no need to use pd.read data is already there in df.\n"
147
+ intermediate_steps += "Since we are showing code output in streamlit not in terminal so code it properly. \n"
148
+ intermediate_steps += "This is last warning as a ceo of the company, you have to return only required code as per user request.\n"
149
+ intermediate_steps += "Example\n"
150
+ intermediate_steps += "User request: 'show me the rows which has highest electricity_kwh_per_month\n"
151
+ intermediate_steps += "```\nst.write(df[df['electricity_kwh_per_month'] == df['electricity_kwh_per_month'].max()])\n```\n"
152
+ intermediate_steps += "You can see that above is the required code(in quotes) for user query(only required) and below is the next request.\n"
153
+ intermediate_steps += "User request: {prompt}\n"
154
+ intermediate_steps += "Generate code for the above request only but write some code to full fill user query.\n"
155
+ return intermediate_steps
156
 
157
  def send_prompt():
158
  return "please respond according to the prompt asked below from the above context"
 
330
  if prompts_[-1]=="@working":
331
  if index==0:
332
 
333
+ data_need=""
334
+ while(len(data_need)==0):
335
+ if len(prompts_)==3:
336
+ data_need = st.write_stream(consume_llm_api(prompts_[1]))
337
+ else:
338
+ data_need=st.write_stream(consume_llm_api(prompts_[0]))
339
+
340
+
341
  dictionary['every_prompt_with_val'][-1]=(prompts_[0],str(data_need))
342
 
343
  elif isinstance(prompts_[-1],str):
 
594
  with column1:
595
  # Create a canvas component
596
  changes,implementation,current=st.columns([0.01,0.9,0.01])
597
+
598
  model = encoding_model()
599
  with implementation:
600
+ with st.spinner('Wait for it...'):
601
+ # pdf_file = st.file_uploader("Upload PDF file", type=('pdf'))
602
  st.write("<br>"*3,unsafe_allow_html=True)
603
  if bg_doc:
 
604
  canvas_result=None
605
+ # st.write(bg_doc.name)
606
+ file_type = file_handler(bg_doc)
 
607
 
608
+ if isinstance(file_type,type(None)) :
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
 
610
+ with open(bg_doc.name, "wb") as f_work:
611
+ f_work.write(bg_doc.getbuffer())
612
+
613
+ data = process_pdf(bg_doc.name)
614
+ if str(data) not in dictionary['text_embeddings']:
615
+ dictionary['text_embeddings']={}
616
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
617
+ chunks = text_splitter.split_documents(data)
618
+
619
+ dictionary['text_embeddings'][str(data)]={str(chunk.page_content):model.encode(str(chunk.page_content)) for chunk in chunks}
620
+
621
+ embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
622
+ vector_store = []
623
+ for i in dictionary['text_embeddings'][str(data)]:
624
+ vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
625
+ else:
626
+ embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
627
+ vector_store = []
628
+ for i in dictionary['text_embeddings'][str(data)]:
629
+ vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
630
  else:
631
+
632
+ code_runner,code_check,data_frame = st.tabs(["πŸ—ƒ code runner", "code","πŸ“ˆ Chart"])
633
+ with data_frame:
634
+ file_type = st.data_editor(file_type,hide_index=True,use_container_width=True,num_rows='dynamic')
635
+ with code_check:
636
+ if len(dictionary['every_prompt_with_val'])!=0:
637
+ with st.form("code_form"):
638
+ code_new=extract_python_code(dictionary['every_prompt_with_val'][-1][-1])
639
+ code_new = "".join(code_new)
640
+ response = code_editor(code_new, lang="python", key="editor1",height=screen_height/4,allow_reset=True,response_mode="blur",focus=True)
641
+ print(response)
642
+ submitted = st.form_submit_button("Submit Code")
643
+
644
+ with code_runner:
645
+ if len(dictionary['every_prompt_with_val'])!=0 and submitted:
646
+ code_new = response.get('text')
647
+ print(code_new,response)
648
+ run_code_blocks(code_new,file_type)
649
+ elif len(dictionary['every_prompt_with_val'])!=0 :
650
+ code_new=extract_python_code(dictionary['every_prompt_with_val'][-1][-1])
651
+ code_new = "".join(code_new)
652
+ run_code_blocks(code_new,file_type)
653
+ else:
654
+ st.header("Please ask your query from data")
655
+
656
+
657
+ else:
658
 
659
  canvas_result = st_canvas(
660
  fill_color="rgba(0, 0, 0, 0.3)", # Fixed fill color with some opacity
661
  stroke_width=stroke_width,
662
  stroke_color=stroke_color,
663
  background_color=bg_color,
664
+ background_image=gen_image if gen_image else Image.open("ALL_image_formation\image_gen.png"),
665
  update_streamlit=True,
666
+ height=int(screen_height//2.16) if screen_height!=1180 else screen_height//2,
667
+ width=int(screen_width//2.3) if screen_width!=820 else screen_width//2,
668
  drawing_mode=drawing_mode,
669
  point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
670
  key="canvas",
671
  )
672
+ # st.rerun()
673
 
674
 
675
 
 
682
 
683
  # run=st.button("run_experiment")
684
  if bg_doc:
685
+ if len(dictionary['every_prompt_with_val'])==0:
686
+ query_embedding = model.encode(["something"])
687
+ else:
688
+
689
+ query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
690
+ if isinstance(file_type,type(None)) :
691
  retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
 
 
 
692
  with implementation:
693
+ with st.spinner('Wait for it...'):
694
  text_lookup=retrieved_chunks
695
  pages=[]
696
  buffer = bg_doc.getbuffer()
697
  byte_data = bytes(buffer)
698
+ with fitz.open(stream=byte_data, filetype="pdf") as doc:
699
+
700
  for page_no in range(doc.page_count):
701
  pages.append(doc.load_page(page_no - 1))
702
 
 
703
  with st.container(height=int(screen_height//1.8)):
704
  for pg_no in pages[::-1]:
705
  areas = pg_no.search_for(text_lookup)
706
  for area in areas:
707
  pg_no.add_rect_annot(area)
708
+
709
  pix = pg_no.get_pixmap(dpi=100).tobytes()
710
+ st.image(pix,use_column_width=True)
711
 
712
  if bg_doc and prompt:
713
+
714
  query_embedding = model.encode([prompt])
715
+ if isinstance(file_type,type(None)) :
716
+ retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1]) for match in vector_store])[-1]
 
717
 
718
+
719
+ prompt = "Context: "+ retrieved_chunks +"\n"+send_prompt()+ "\n"+prompt
720
+
721
+ modifiedValue="@working"
722
+ dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
723
+ st.rerun()
724
+ else:
725
+ modifiedValue="@working"
726
+ new_prompt = run_agent(prompt,file_type)
727
+ dictionary['every_prompt_with_val'].append((prompt,new_prompt,modifiedValue))
728
+ st.rerun()
729
  elif not bg_doc and canvas_result.image_data is not None:
730
  if prompt:
731