Hemasagar commited on
Commit
70111e8
·
verified ·
1 Parent(s): a4cc4d0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +32 -32
utils.py CHANGED
@@ -48,38 +48,38 @@ def create_docs(user_pdf_list):
48
 
49
 
50
 
51
- # for filename in user_pdf_list:
52
 
53
- # print(filename)
54
- raw_data=get_pdf_text(user_pdf_list)
55
- print("pdf_Data",raw_data)
56
- # print("extracted raw data")
57
-
58
- llm_extracted_data=extracted_data(raw_data)
59
- print("llm_extracted_data",llm_extracted_data)
60
- #print(llm_extracted_data)
61
- #print("llm extracted data")
62
- #Adding items to our list - Adding data & its metadata
63
-
64
- pattern = r'{(.+)}'
65
- match = re.search(pattern, llm_extracted_data, re.DOTALL)
66
-
67
-
68
-
69
- if match:
70
- extracted_text = match.group(1)
71
- # Converting the extracted text to a dictionary
72
- data_dict = eval('{' + extracted_text + '}')
73
- print(data_dict)
74
- else:
75
- print("No match found.")
76
- # Initialize data_dict
77
- data_dict = {}
78
-
79
-
80
- # df=df.append([data_dict], ignore_index=True)
81
- print("********************DONE***************")
82
- # df=df.append(save_to_dataframe(llm_extracted_data), ignore_index=True)
83
-
84
  llm_extracted_data
85
  return llm_extracted_data
 
48
 
49
 
50
 
51
+ for filename in user_pdf_list:
52
 
53
+ # print(filename)
54
+ raw_data=get_pdf_text(filename)
55
+ print("pdf_Data",raw_data)
56
+ # print("extracted raw data")
57
+
58
+ llm_extracted_data=extracted_data(raw_data)
59
+ print("llm_extracted_data",llm_extracted_data)
60
+ #print(llm_extracted_data)
61
+ #print("llm extracted data")
62
+ #Adding items to our list - Adding data & its metadata
63
+
64
+ pattern = r'{(.+)}'
65
+ match = re.search(pattern, llm_extracted_data, re.DOTALL)
66
+
67
+
68
+
69
+ if match:
70
+ extracted_text = match.group(1)
71
+ # Converting the extracted text to a dictionary
72
+ data_dict = eval('{' + extracted_text + '}')
73
+ print(data_dict)
74
+ else:
75
+ print("No match found.")
76
+ # Initialize data_dict
77
+ data_dict = {}
78
+
79
+
80
+ df=df._append([data_dict], ignore_index=True)
81
+ print("********************DONE***************")
82
+ # df=df.append(save_to_dataframe(llm_extracted_data), ignore_index=True)
83
+
84
  llm_extracted_data
85
  return llm_extracted_data