File size: 3,368 Bytes
a5fd426
 
 
 
 
 
 
 
 
 
2e5199f
a5fd426
 
 
 
 
 
 
 
 
 
 
 
 
ffddafd
7cfb0d5
70638ef
a5fd426
70638ef
48855de
a5fd426
 
d1d7ee9
a5fd426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703ccd2
a5fd426
 
 
 
 
 
48855de
 
a5fd426
48855de
 
 
 
a5fd426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11a3e50
 
a5fd426
11a3e50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
import pandas as pd
from PIL import Image
import json
from streamlit_lottie import st_lottie

##### BUET Logo ###########
image = Image.open("buet.png")
new_image = image.resize((100, 100))
#st.image(new_image)
st.title("Durghotona GPT: A Web Scraping and Large Language Model Based Framework to Generate Accident Dataset Automatically in Bangladesh")


######### Animation ##########
def load_lottiefile(filepath:str):
    with open(filepath,"r") as f:
        return json.load(f)
lottie_coding=load_lottiefile("animate.json")
st_lottie(
        lottie_coding,
        height=200,
        
    )

st.write("**WARNING: Please enter a small number (5-10) for testing purpose. Rule of thumb: It takes almost 20 seconds to process each entry.**")
radio_btn1=st.radio("**Choose the newspaper you want to collect news from**",options=("Prothom Alo","The Daily Star"))
radio_btn2=st.radio("**Choose an LLM model**",options=("GPT-4 (High Cost)","Llama3 (Free)"))

number = st.number_input("**Enter the number of accident news you want the LLM to go through (Maximum 20)**",min_value=0,max_value=20)
url = "https://drive.google.com/file/d/1WvEgAhNHds_Mn2j8SQzdv-9HBXM_BSMD/view?usp=sharing"

if st.button("Generate Dataset"):
    st.write("**Please wait while the dataset is being generated. Note that, 'General' accident news reports will be excluded from the dataset. To know more, please read this paper:** [link](%s)" % url)
    
    if radio_btn1=="Prothom Alo":
        import Prothom_alo_fully_scraped
        df=Prothom_alo_fully_scraped.get_data(number)
    elif radio_btn1=="Dhaka Tribune":
        import Dhaka_Tribune_Fully_Scraped
        df=Dhaka_Tribune_Fully_Scraped.get_data(number)
    elif radio_btn1== "The Daily Star":
        import Daily_Star_fully_scraped
        df=Daily_Star_fully_scraped.get_data(number)
    if radio_btn2=="GPT-4 (High Cost)":
        import LLM_automation_GPT
        df2=LLM_automation_GPT.create_data(df)
    elif radio_btn2=="Llama3 (Free)":
        import LLM_automation_Groq
        df2=LLM_automation_Groq.create_data(df)
    elif radio_btn2=="GPT-3.5 (Medium Cost + Long Waiting Time)":
        import LLM_automation_GPT35
        df2=LLM_automation_GPT35.create_data(df)
    st.dataframe(df2)
    print(len(df))
    
    
st.write("""
                   **Developed by:**\n
        
                   *MD Thamed Bin Zaman Chowdhury, Student ID: 1904184,*\n
                   *Department of Civil Engineering, BUET*\n
                   *E-mail: [email protected]*
        """)


st.write("--------")
st.write("**Modules and packages used to develop the program:**")

######## Other Logos ################
p=125
image2 = Image.open("pandas.png")
new_image2 = image2.resize((p, p))
image3 = Image.open("numpy.png")
new_image3 = image3.resize((p, p))
image4 = Image.open("selenium_webdriver.jpeg")
new_image4 = image4.resize((p, p))
image5 = Image.open("streamlit.png")
new_image5 = image5.resize((p, p))
image6 = Image.open("openai.png")
new_image6 = image6.resize((p, p))
image7 = Image.open("llama3.jpeg")
new_image7 = image7.resize((p, p))
image8 = Image.open("langchain.png")
new_image8 = image8.resize((p, p))
image9 = Image.open("deep_translator.png")
new_image9 = image9.resize((p, p))

st.image([new_image2, new_image3,new_image4,new_image5,new_image6,new_image7,new_image8,new_image9])