Update app.py
Browse files
app.py
CHANGED
@@ -1,85 +1,85 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
from PIL import Image
|
4 |
-
import json
|
5 |
-
from streamlit_lottie import st_lottie
|
6 |
-
|
7 |
-
##### BUET Logo ###########
|
8 |
-
image = Image.open("buet.png")
|
9 |
-
new_image = image.resize((100, 100))
|
10 |
-
#st.image(new_image)
|
11 |
-
st.title("Automated LLM and Web Scrapping based Road Accident Dataset creation from Newspapers")
|
12 |
-
|
13 |
-
|
14 |
-
######### Animation ##########
|
15 |
-
def load_lottiefile(filepath:str):
|
16 |
-
with open(filepath,"r") as f:
|
17 |
-
return json.load(f)
|
18 |
-
lottie_coding=load_lottiefile("animate.json")
|
19 |
-
st_lottie(
|
20 |
-
lottie_coding,
|
21 |
-
height=200,
|
22 |
-
|
23 |
-
)
|
24 |
-
|
25 |
-
|
26 |
-
radio_btn1=st.radio("**Choose the newspaper you want to collect news from**",options=("Prothom Alo","
|
27 |
-
radio_btn2=st.radio("Choose an LLM model",options=("GPT-3.5 (Medium Cost)","GPT-4 (High Cost)","Llama3 (Free)"))
|
28 |
-
|
29 |
-
number = st.number_input("**Enter the number of accident news you want the LLM to go through**",min_value=0,max_value=
|
30 |
-
|
31 |
-
if st.button("Generate Dataset"):
|
32 |
-
st.write("**Please wait until the datasest is finished generating. It takes almost 8 sec to process each entry for GPT-4 and 30 sec for GPT-3.5 and Llama3. So, for example, if you entered 15 as input, it will take almost 2 minutes for GPT-4 and 7.5 min for GPT-3.5 and Llama3. The dataset will appear below.**")
|
33 |
-
|
34 |
-
if radio_btn1=="Prothom Alo":
|
35 |
-
import Prothom_alo_fully_scraped
|
36 |
-
df=Prothom_alo_fully_scraped.get_data(number)
|
37 |
-
elif radio_btn1=="Dhaka Tribune":
|
38 |
-
import Dhaka_Tribune_Fully_Scraped
|
39 |
-
df=Dhaka_Tribune_Fully_Scraped.get_data(number)
|
40 |
-
elif radio_btn1== "The Daily Star":
|
41 |
-
import Daily_Star_fully_scraped
|
42 |
-
df=Daily_Star_fully_scraped.get_data(number)
|
43 |
-
if radio_btn2=="GPT-4 (High Cost)":
|
44 |
-
import LLM_automation_GPT
|
45 |
-
df2=LLM_automation_GPT.create_data(df)
|
46 |
-
elif radio_btn2=="Llama3 (Free)":
|
47 |
-
import LLM_automation_Groq
|
48 |
-
df2=LLM_automation_Groq.create_data(df)
|
49 |
-
elif radio_btn2=="GPT-3.5 (Medium Cost)":
|
50 |
-
import LLM_automation_GPT35
|
51 |
-
df2=LLM_automation_GPT35.create_data(df)
|
52 |
-
st.dataframe(df2)
|
53 |
-
print(len(df))
|
54 |
-
|
55 |
-
|
56 |
-
#st.write("""
|
57 |
-
# **Developed by:**\n
|
58 |
-
|
59 |
-
# *MD Thamed Bin Zaman Chowdhury, Student ID: 1904184,*\n
|
60 |
-
# *Department of Civil Engineering, BUET*\n
|
61 |
-
# *E-mail: [email protected]*
|
62 |
-
# """)
|
63 |
-
|
64 |
-
|
65 |
-
st.write("--------")
|
66 |
-
st.write("**Modules and packages used to develop the program:**")
|
67 |
-
|
68 |
-
######## Other Logos ################
|
69 |
-
p=125
|
70 |
-
image2 = Image.open("pandas.png")
|
71 |
-
new_image2 = image2.resize((p, p))
|
72 |
-
image3 = Image.open("numpy.png")
|
73 |
-
new_image3 = image3.resize((p, p))
|
74 |
-
image4 = Image.open("selenium_webdriver.jpeg")
|
75 |
-
new_image4 = image4.resize((p, p))
|
76 |
-
image5 = Image.open("streamlit.png")
|
77 |
-
new_image5 = image5.resize((p, p))
|
78 |
-
image6 = Image.open("openai.png")
|
79 |
-
new_image6 = image6.resize((p, p))
|
80 |
-
image7 = Image.open("llama3.jpeg")
|
81 |
-
new_image7 = image7.resize((p, p))
|
82 |
-
image8 = Image.open("langchain.png")
|
83 |
-
new_image8 = image8.resize((p, p))
|
84 |
-
|
85 |
st.image([new_image2, new_image3,new_image4,new_image5,new_image6,new_image7,new_image8])
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from PIL import Image
|
4 |
+
import json
|
5 |
+
from streamlit_lottie import st_lottie
|
6 |
+
|
7 |
+
##### BUET Logo ###########
|
8 |
+
image = Image.open("buet.png")
|
9 |
+
new_image = image.resize((100, 100))
|
10 |
+
#st.image(new_image)
|
11 |
+
st.title("Automated LLM and Web Scrapping based Road Accident Dataset creation from Newspapers")
|
12 |
+
|
13 |
+
|
14 |
+
######### Animation ##########
|
15 |
+
def load_lottiefile(filepath:str):
|
16 |
+
with open(filepath,"r") as f:
|
17 |
+
return json.load(f)
|
18 |
+
lottie_coding=load_lottiefile("animate.json")
|
19 |
+
st_lottie(
|
20 |
+
lottie_coding,
|
21 |
+
height=200,
|
22 |
+
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
radio_btn1=st.radio("**Choose the newspaper you want to collect news from**",options=("Prothom Alo","The Daily Star"))
|
27 |
+
radio_btn2=st.radio("Choose an LLM model",options=("GPT-3.5 (Medium Cost)","GPT-4 (High Cost)","Llama3 (Free)"))
|
28 |
+
|
29 |
+
number = st.number_input("**Enter the number of accident news you want the LLM to go through (Maximum 50)**",min_value=0,max_value=50)
|
30 |
+
|
31 |
+
if st.button("Generate Dataset"):
|
32 |
+
st.write("**Please wait until the datasest is finished generating. It takes almost 8 sec to process each entry for GPT-4 and 30 sec for GPT-3.5 and Llama3. So, for example, if you entered 15 as input, it will take almost 2 minutes for GPT-4 and 7.5 min for GPT-3.5 and Llama3. The dataset will appear below.**")
|
33 |
+
|
34 |
+
if radio_btn1=="Prothom Alo":
|
35 |
+
import Prothom_alo_fully_scraped
|
36 |
+
df=Prothom_alo_fully_scraped.get_data(number)
|
37 |
+
elif radio_btn1=="Dhaka Tribune":
|
38 |
+
import Dhaka_Tribune_Fully_Scraped
|
39 |
+
df=Dhaka_Tribune_Fully_Scraped.get_data(number)
|
40 |
+
elif radio_btn1== "The Daily Star":
|
41 |
+
import Daily_Star_fully_scraped
|
42 |
+
df=Daily_Star_fully_scraped.get_data(number)
|
43 |
+
if radio_btn2=="GPT-4 (High Cost)":
|
44 |
+
import LLM_automation_GPT
|
45 |
+
df2=LLM_automation_GPT.create_data(df)
|
46 |
+
elif radio_btn2=="Llama3 (Free)":
|
47 |
+
import LLM_automation_Groq
|
48 |
+
df2=LLM_automation_Groq.create_data(df)
|
49 |
+
elif radio_btn2=="GPT-3.5 (Medium Cost)":
|
50 |
+
import LLM_automation_GPT35
|
51 |
+
df2=LLM_automation_GPT35.create_data(df)
|
52 |
+
st.dataframe(df2)
|
53 |
+
print(len(df))
|
54 |
+
|
55 |
+
|
56 |
+
#st.write("""
|
57 |
+
# **Developed by:**\n
|
58 |
+
|
59 |
+
# *MD Thamed Bin Zaman Chowdhury, Student ID: 1904184,*\n
|
60 |
+
# *Department of Civil Engineering, BUET*\n
|
61 |
+
# *E-mail: [email protected]*
|
62 |
+
# """)
|
63 |
+
|
64 |
+
|
65 |
+
st.write("--------")
|
66 |
+
st.write("**Modules and packages used to develop the program:**")
|
67 |
+
|
68 |
+
######## Other Logos ################
|
69 |
+
p=125
|
70 |
+
image2 = Image.open("pandas.png")
|
71 |
+
new_image2 = image2.resize((p, p))
|
72 |
+
image3 = Image.open("numpy.png")
|
73 |
+
new_image3 = image3.resize((p, p))
|
74 |
+
image4 = Image.open("selenium_webdriver.jpeg")
|
75 |
+
new_image4 = image4.resize((p, p))
|
76 |
+
image5 = Image.open("streamlit.png")
|
77 |
+
new_image5 = image5.resize((p, p))
|
78 |
+
image6 = Image.open("openai.png")
|
79 |
+
new_image6 = image6.resize((p, p))
|
80 |
+
image7 = Image.open("llama3.jpeg")
|
81 |
+
new_image7 = image7.resize((p, p))
|
82 |
+
image8 = Image.open("langchain.png")
|
83 |
+
new_image8 = image8.resize((p, p))
|
84 |
+
|
85 |
st.image([new_image2, new_image3,new_image4,new_image5,new_image6,new_image7,new_image8])
|