vsrinivas commited on
Commit
326f756
·
verified ·
1 Parent(s): 9e35974

Create app

Browse files
Files changed (1) hide show
  1. app +157 -0
app ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ast
3
+ from nbformat.v4 import (new_notebook, new_markdown_cell, new_code_cell)
4
+
5
+
6
+ def classify_page(statement):
7
+ genai.configure(api_key='AIzaSyBjGNru-WJoLncbVrErEzJE184MgX1o_Kg')
8
+
9
+ # Create the model
10
+ generation_config = {
11
+ "temperature": 0,
12
+ "max_output_tokens": 8192,
13
+ "response_mime_type": "text/plain",
14
+ }
15
+
16
+ model = genai.GenerativeModel(
17
+ model_name="gemini-1.5-flash-002",
18
+ generation_config=generation_config,
19
+ )
20
+
21
+ chat_session = model.start_chat(
22
+ history=[
23
+ ]
24
+ )
25
+
26
+ prompt = f"""
27
+ Group the following "Input" strings as substring blocks of "Code" or "Text".
28
+ The response content shall be strictly just a sequence of Python touples where the first element of each touple either "Code" or "Text" and the second elemnt is the coressponding grouped substring block.
29
+
30
+ Input:
31
+ # Summary Report - Visual Exploration of Data on Covid-19 Outbreak, Worldwide.
32
+
33
+ The data was scraped from 'worldometer' web site at https://www.worldometers.info/coronavirus/ and the analysis was carried out using 'Python' programming language and various related libraries.
34
+
35
+ The worldometer web site provides the data more on cumulative basis and therefore, this report and effort also include the process of gathering daily data.
36
+
37
+ # First, we start with the loading the required packages.
38
+ import pandas as pd
39
+ import numpy as np
40
+ import matplotlib.pyplot as plt
41
+ import matplotlib.dates as mdates
42
+ import requests
43
+
44
+ Then we access the website link, read the web page content and do some pre-processing.
45
+
46
+ fig, ax = plt.subplots()
47
+ ax.get_yaxis().get_major_formatter().set_scientific(False)
48
+
49
+ # Create a twin Axes object that shares the x-axis
50
+ ax2 = ax.twinx()
51
+
52
+ # Plot the new cumulative cases time-series in green
53
+ plot_timeseries(ax, daily_data2.index, daily_data2['NewCases']+MissedOut_NewCases,
54
+ "green", "Date" , "Cumulative no. confirmed of cases")
55
+
56
+ # Plot the new cumulative deaths data in green
57
+ plot_timeseries(ax2, daily_data2.index, daily_data2['NewDeaths']+MissedOut_NewDeaths,
58
+ "orange", "Date" , "Cumulative no. of deaths")
59
+
60
+ # Plot the new daily cases time-series in blue
61
+ plot_timeseries(ax, daily_data1.index, daily_data1['NewCases'], "blue", "Date" , "Confirmed cases")
62
+
63
+ response_content:
64
+ [("Text", # Summary Report - Visual Exploration of Data on Covid-19 Outbreak, Worldwide.
65
+
66
+ The data was scraped from 'worldometer' web site at https://www.worldometers.info/coronavirus/ and the analysis was carried out using 'Python' programming language and various related libraries.
67
+
68
+ The worldometer web site provides the data more on cumulative basis and therefore, this report and effort also include the process of gathering daily data.),
69
+ ("Code", # First, we start with the loading the required packages.
70
+ import pandas as pd
71
+ import numpy as np
72
+ import matplotlib.pyplot as plt
73
+ import matplotlib.dates as mdates
74
+ import requests),
75
+ ("Text", Then we access the website link, read the web page content and do some pre-processing.),
76
+ ("Code", fig, ax = plt.subplots()
77
+ ax.get_yaxis().get_major_formatter().set_scientific(False)
78
+
79
+ # Create a twin Axes object that shares the x-axis
80
+ ax2 = ax.twinx()
81
+
82
+ # Plot the new cumulative cases time-series in green
83
+ plot_timeseries(ax, daily_data2.index, daily_data2['NewCases']+MissedOut_NewCases,
84
+ "green", "Date" , "Cumulative no. confirmed of cases")
85
+
86
+ # Plot the new cumulative deaths data in green
87
+ plot_timeseries(ax2, daily_data2.index, daily_data2['NewDeaths']+MissedOut_NewDeaths,
88
+ "orange", "Date" , "Cumulative no. of deaths")
89
+
90
+ # Plot the new daily cases time-series in blue
91
+ plot_timeseries(ax, daily_data1.index, daily_data1['NewCases'], "blue", "Date" , "Confirmed cases")]
92
+
93
+ Now, classify this string:
94
+ Input: {statement}
95
+ """
96
+ response = chat_session.send_message(prompt)
97
+ print(response.text.replace("```python\n", "").replace("```", "").strip())
98
+ return ast.literal_eval(response.text.replace("```python\n", "").replace("```", "").strip())
99
+
100
+ def create_notebook(file, tc, bc):
101
+ notebook = new_notebook()
102
+ with pdfplumber.open(file) as pdf:
103
+ for p, page in enumerate(pdf.pages):
104
+ # Extract the text from the PDF
105
+ width, height = page.width, page.height
106
+ top_crop = tc # Height of the header to exclude
107
+ bottom_crop = bc # Height of the footer to exclude
108
+
109
+ crop_box = (0, top_crop, width, height - bottom_crop)
110
+
111
+ # Crop the page
112
+ cropped_page = page.within_bbox(crop_box)
113
+ text = cropped_page.extract_text()
114
+ # Split the text into lines
115
+ # lines = text.split('\n')
116
+ blocks = classify_page(text)
117
+ # print(blocks)
118
+ for c, value in blocks:
119
+ print(c)
120
+ print(value)
121
+ if c == "Code":
122
+ notebook.cells.append(new_code_cell(value))
123
+ elif c == "Text":
124
+ value = value.replace("\n", "\n\n")
125
+ # notebook.cells.append(new_markdown_cell(value))
126
+ notebook.cells.append(new_markdown_cell(value.replace('[[','').replace(']','')))
127
+
128
+ print(f"Page No.{p+1} completed")
129
+
130
+ file_path = 'your_ipynb_nOtebook_file'
131
+
132
+ # Write the notebook in UTF-8 encoding
133
+ with open(file_path + '.ipynb', 'w', encoding="utf-8") as f:
134
+ nbformat.write(notebook, f)
135
+
136
+ print(f'{file_path}.ipynb notebook created successfully.')
137
+ return f'{file_path}.ipynb'
138
+
139
+
140
+ with gr.Blocks() as app:
141
+ gr.Markdown("## File Processor App")
142
+
143
+ file_input = gr.File(label="Upload a PDF file")
144
+ tc = gr.Slider(label='Top Crop in Pixels', value=25)
145
+ bc = gr.Slider(label='Bottom Crop in pixels', value=25)
146
+
147
+ download_button = gr.File(label="Download processed file")
148
+
149
+ process_button = gr.Button("Process File")
150
+
151
+ process_button.click(
152
+ fn=create_notebook,
153
+ inputs=[file_input, tc, bc],
154
+ outputs=download_button
155
+ )
156
+
157
+ app.launch(debug=True)