KushwanthK commited on
Commit
a5d883c
·
verified ·
1 Parent(s): 510f31f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -167
app.py CHANGED
@@ -525,52 +525,52 @@ display_highlighted_pdf()
525
  # print(f"An error occurred: {e}")
526
 
527
 
528
- import streamlit as st
529
- import requests
530
- from bs4 import BeautifulSoup
531
 
532
- def fetch_html(url):
533
- # Fetch the webpage content
534
- response = requests.get(url)
535
- if response.status_code == 200:
536
- return response.content
537
- else:
538
- st.error(f"Failed to fetch webpage. Status code: {response.status_code}")
539
 
540
- def scrape_data(html_content):
541
- # Parse HTML content
542
- soup = BeautifulSoup(html_content, "html.parser")
543
 
544
- # Scrape data (replace this with your specific scraping logic)
545
- data = soup.find_all(class_="container first-container")
546
 
547
- return data
548
 
549
- def main(url):
550
- st.title("Webpage Scraper")
551
 
552
- # User input for webpage URL
553
- url = st.text_input("Enter the URL of the webpage:", value=url)
554
 
555
- # Convert webpage to HTML and scrape data
556
- if st.button("Scrape Data"):
557
- if url:
558
- html_content = fetch_html(url)
559
- str_content = """ """
560
- if html_content:
561
- data = scrape_data(html_content)
562
- # st.title("HTML Page Display")
563
- # st.components.v1.html(html_content, height=1600, width=800)
564
- # Display scraped data in a new tab
565
- with st.expander("Scraped Data", expanded=True):
566
- for item in data:
567
- # Convert item to string and display
568
- str_content += str(item)
569
- # st.write(str(item))
570
- # st.title("HTML Page Display")
571
- # st.components.v1.html(data, height=1600, width=800)
572
- # st.title("HTML Page Display")
573
- st.components.v1.html(str_content, height=1600, width=680)
574
  # main()
575
 
576
 
@@ -602,180 +602,180 @@ def main(url):
602
 
603
 
604
 
605
- import requests
606
- from bs4 import BeautifulSoup
607
 
608
- # URL of the webpage
609
- baseurl = "https://vedabase.io/en/library/bg/"
610
 
611
- # Fetch the webpage content
612
- response = requests.get(baseurl)
613
- if response.status_code == 200:
614
- html_content = response.content
615
 
616
- # Parse HTML content
617
- soup = BeautifulSoup(html_content, "html.parser")
618
 
619
- # Find all direct child div elements with class="r-chapter"
620
- direct_child_div_elements = soup.select("div.col-12 > div.r-chapter")
621
 
622
- # List to store the extracted text
623
- output = []
624
 
625
- # Iterate over each direct child div element
626
- for div in direct_child_div_elements:
627
- # Find the <a> tag within the div
628
- a_tag = div.find("a")
629
- if a_tag:
630
- # Extract the text from the <a> tag and append it to the output list
631
- output.append(a_tag.text.strip())
632
 
633
- # Print the output list
634
- # print(output)
635
- # print(len(output))
636
 
637
- ### Link to all chapters
638
- import re
639
 
640
- # Sample text
641
- chapter = output[4]
642
 
643
- text_to_number = {
644
- "One": "1",
645
- "Two": "2",
646
- "Three": "3",
647
- "Four": "4",
648
- "Five": "5",
649
- "Six": "6",
650
- "Seven": "7",
651
- "Eight": "8",
652
- "Nine": "9",
653
- "Ten": "10",
654
- # Add more numbers if needed
655
- }
656
 
657
- # Split the text by spaces
658
- words = chapter.split()
659
 
660
- # Find the text representing the number
661
- number_text = words[1].strip(":") # Assuming the number text is the second word
662
 
663
- # Extract the numeric part
664
- chapter_number = text_to_number[number_text]
665
 
666
- # Print the chapter number
667
- # print(chapter_number)
668
 
669
- url = baseurl + chapter_number
670
 
671
- # print(url)
672
 
673
- ### all Texts in each chapter
674
 
675
- response = requests.get(url)
676
- if response.status_code == 200:
677
- html_content = response.content
678
 
679
- # Parse HTML content
680
- text = BeautifulSoup(html_content, "html.parser")
681
 
682
- # print(text)
683
 
684
- # Find all direct child div elements with class="r-chapter"
685
- direct_child_div_elements = text.select("div.col-12 > dl.r.r-verse")
686
 
687
- # print(direct_child_div_elements)
688
 
689
- # List to store the extracted text
690
- text_number = []
691
 
692
- # Iterate over each direct child div element
693
- for div in direct_child_div_elements:
694
- # Find the <a> tag within the div
695
- a_tag = div.find("a")
696
- if a_tag:
697
- # Extract the text from the <a> tag and append it to the output list
698
- text_number.append(a_tag.text.strip())
699
 
700
- # Print the output list
701
- # print(text_number)
702
- # print(len(text_number))
703
 
704
- ### link to each Text in each chapter
705
- text_page = text_number[0]
706
 
707
- # Split the text by spaces
708
- words = text_page.split()
709
 
710
- # Find the text representing the number
711
- text_number = words[1].strip(":") # Assuming the number text is the second word
712
 
713
- # print(f"chapter_number - {chapter_number} : text_number - {text_number}")
714
- texturl = baseurl + chapter_number + "/" + text_number
715
 
716
- # print(texturl)
717
 
718
- main(url=texturl)
719
 
720
- st.title("Display HTML File in Streamlit")
721
 
722
- # Path to the HTML file
723
- html_file_path = "../Transformers/Bg. 1.1.html"
724
 
725
- try:
726
- # Read the HTML file
727
- with open(html_file_path, "r", encoding="utf-8") as file:
728
- html_content = file.read()
729
 
730
- # Display the HTML content using the 'st.components.v1.html' component
731
- expanded = st.checkbox("Expand HTML page")
732
- if expanded:
733
- st.components.v1.html(html_content, height=1600, width=680)
734
- else:
735
- st.components.v1.html(html_content, height=600)
736
- except FileNotFoundError:
737
- st.error(f"HTML file '{html_file_path}' not found!")
738
 
739
- import streamlit as st
740
 
741
 
742
- st.title("Streamlit Tabs Example")
743
 
744
- # Add tabs to the sidebar
745
- tabs = st.sidebar.radio("Navigation", ["Home", "About", "Settings"])
746
 
747
- # Display different content based on the selected tab
748
- if tabs == "Home":
749
- st.header("Home Page")
750
- st.write("Welcome to the Home page!")
751
 
752
- elif tabs == "About":
753
- st.header("About Page")
754
- st.write("This is the About page.")
755
 
756
- elif tabs == "Settings":
757
- st.header("Settings Page")
758
- st.write("Here you can configure your settings.")
759
 
760
- st.title("Netflix-like Grid Display")
761
 
762
 
763
 
764
- import streamlit as st
765
- import os
766
- import streamlit.components.v1 as components
767
-
768
- # Define movie data
769
- movies = [
770
- {"title": "Movie 1", "poster_path": "../Transformers/Bg. 1.1.html"},
771
- {"title": "Movie 2", "poster_url": "https://via.placeholder.com/150"},
772
- {"title": "Movie 3", "poster_url": "https://via.placeholder.com/150"},
773
- {"title": "Movie 4", "poster_url": "https://via.placeholder.com/150"},
774
- {"title": "Movie 5", "poster_url": "https://via.placeholder.com/150"},
775
- {"title": "Movie 6", "poster_url": "https://via.placeholder.com/150"},
776
- {"title": "Movie 7", "poster_url": "https://via.placeholder.com/150"},
777
- {"title": "Movie 8", "poster_url": "https://via.placeholder.com/150"},
778
- ]
779
 
780
  # Display movies in a grid
781
  # num_columns = 4
 
525
  # print(f"An error occurred: {e}")
526
 
527
 
528
+ # import streamlit as st
529
+ # import requests
530
+ # from bs4 import BeautifulSoup
531
 
532
+ # def fetch_html(url):
533
+ # # Fetch the webpage content
534
+ # response = requests.get(url)
535
+ # if response.status_code == 200:
536
+ # return response.content
537
+ # else:
538
+ # st.error(f"Failed to fetch webpage. Status code: {response.status_code}")
539
 
540
+ # def scrape_data(html_content):
541
+ # # Parse HTML content
542
+ # soup = BeautifulSoup(html_content, "html.parser")
543
 
544
+ # # Scrape data (replace this with your specific scraping logic)
545
+ # data = soup.find_all(class_="container first-container")
546
 
547
+ # return data
548
 
549
+ # def main(url):
550
+ # st.title("Webpage Scraper")
551
 
552
+ # # User input for webpage URL
553
+ # url = st.text_input("Enter the URL of the webpage:", value=url)
554
 
555
+ # # Convert webpage to HTML and scrape data
556
+ # if st.button("Scrape Data"):
557
+ # if url:
558
+ # html_content = fetch_html(url)
559
+ # str_content = """ """
560
+ # if html_content:
561
+ # data = scrape_data(html_content)
562
+ # # st.title("HTML Page Display")
563
+ # # st.components.v1.html(html_content, height=1600, width=800)
564
+ # # Display scraped data in a new tab
565
+ # with st.expander("Scraped Data", expanded=True):
566
+ # for item in data:
567
+ # # Convert item to string and display
568
+ # str_content += str(item)
569
+ # # st.write(str(item))
570
+ # # st.title("HTML Page Display")
571
+ # # st.components.v1.html(data, height=1600, width=800)
572
+ # # st.title("HTML Page Display")
573
+ # st.components.v1.html(str_content, height=1600, width=680)
574
  # main()
575
 
576
 
 
602
 
603
 
604
 
605
+ # import requests
606
+ # from bs4 import BeautifulSoup
607
 
608
+ # # URL of the webpage
609
+ # baseurl = "https://vedabase.io/en/library/bg/"
610
 
611
+ # # Fetch the webpage content
612
+ # response = requests.get(baseurl)
613
+ # if response.status_code == 200:
614
+ # html_content = response.content
615
 
616
+ # # Parse HTML content
617
+ # soup = BeautifulSoup(html_content, "html.parser")
618
 
619
+ # # Find all direct child div elements with class="r-chapter"
620
+ # direct_child_div_elements = soup.select("div.col-12 > div.r-chapter")
621
 
622
+ # # List to store the extracted text
623
+ # output = []
624
 
625
+ # # Iterate over each direct child div element
626
+ # for div in direct_child_div_elements:
627
+ # # Find the <a> tag within the div
628
+ # a_tag = div.find("a")
629
+ # if a_tag:
630
+ # # Extract the text from the <a> tag and append it to the output list
631
+ # output.append(a_tag.text.strip())
632
 
633
+ # # Print the output list
634
+ # # print(output)
635
+ # # print(len(output))
636
 
637
+ # ### Link to all chapters
638
+ # import re
639
 
640
+ # # Sample text
641
+ # chapter = output[4]
642
 
643
+ # text_to_number = {
644
+ # "One": "1",
645
+ # "Two": "2",
646
+ # "Three": "3",
647
+ # "Four": "4",
648
+ # "Five": "5",
649
+ # "Six": "6",
650
+ # "Seven": "7",
651
+ # "Eight": "8",
652
+ # "Nine": "9",
653
+ # "Ten": "10",
654
+ # # Add more numbers if needed
655
+ # }
656
 
657
+ # # Split the text by spaces
658
+ # words = chapter.split()
659
 
660
+ # # Find the text representing the number
661
+ # number_text = words[1].strip(":") # Assuming the number text is the second word
662
 
663
+ # # Extract the numeric part
664
+ # chapter_number = text_to_number[number_text]
665
 
666
+ # # Print the chapter number
667
+ # # print(chapter_number)
668
 
669
+ # url = baseurl + chapter_number
670
 
671
+ # # print(url)
672
 
673
+ # ### all Texts in each chapter
674
 
675
+ # response = requests.get(url)
676
+ # if response.status_code == 200:
677
+ # html_content = response.content
678
 
679
+ # # Parse HTML content
680
+ # text = BeautifulSoup(html_content, "html.parser")
681
 
682
+ # # print(text)
683
 
684
+ # # Find all direct child div elements with class="r-chapter"
685
+ # direct_child_div_elements = text.select("div.col-12 > dl.r.r-verse")
686
 
687
+ # # print(direct_child_div_elements)
688
 
689
+ # # List to store the extracted text
690
+ # text_number = []
691
 
692
+ # # Iterate over each direct child div element
693
+ # for div in direct_child_div_elements:
694
+ # # Find the <a> tag within the div
695
+ # a_tag = div.find("a")
696
+ # if a_tag:
697
+ # # Extract the text from the <a> tag and append it to the output list
698
+ # text_number.append(a_tag.text.strip())
699
 
700
+ # # Print the output list
701
+ # # print(text_number)
702
+ # # print(len(text_number))
703
 
704
+ # ### link to each Text in each chapter
705
+ # text_page = text_number[0]
706
 
707
+ # # Split the text by spaces
708
+ # words = text_page.split()
709
 
710
+ # # Find the text representing the number
711
+ # text_number = words[1].strip(":") # Assuming the number text is the second word
712
 
713
+ # # print(f"chapter_number - {chapter_number} : text_number - {text_number}")
714
+ # texturl = baseurl + chapter_number + "/" + text_number
715
 
716
+ # # print(texturl)
717
 
718
+ # main(url=texturl)
719
 
720
+ # st.title("Display HTML File in Streamlit")
721
 
722
+ # # Path to the HTML file
723
+ # html_file_path = "../Transformers/Bg. 1.1.html"
724
 
725
+ # try:
726
+ # # Read the HTML file
727
+ # with open(html_file_path, "r", encoding="utf-8") as file:
728
+ # html_content = file.read()
729
 
730
+ # # Display the HTML content using the 'st.components.v1.html' component
731
+ # expanded = st.checkbox("Expand HTML page")
732
+ # if expanded:
733
+ # st.components.v1.html(html_content, height=1600, width=680)
734
+ # else:
735
+ # st.components.v1.html(html_content, height=600)
736
+ # except FileNotFoundError:
737
+ # st.error(f"HTML file '{html_file_path}' not found!")
738
 
739
+ # import streamlit as st
740
 
741
 
742
+ # st.title("Streamlit Tabs Example")
743
 
744
+ # # Add tabs to the sidebar
745
+ # tabs = st.sidebar.radio("Navigation", ["Home", "About", "Settings"])
746
 
747
+ # # Display different content based on the selected tab
748
+ # if tabs == "Home":
749
+ # st.header("Home Page")
750
+ # st.write("Welcome to the Home page!")
751
 
752
+ # elif tabs == "About":
753
+ # st.header("About Page")
754
+ # st.write("This is the About page.")
755
 
756
+ # elif tabs == "Settings":
757
+ # st.header("Settings Page")
758
+ # st.write("Here you can configure your settings.")
759
 
760
+ # st.title("Netflix-like Grid Display")
761
 
762
 
763
 
764
+ # import streamlit as st
765
+ # import os
766
+ # import streamlit.components.v1 as components
767
+
768
+ # # Define movie data
769
+ # movies = [
770
+ # {"title": "Movie 1", "poster_path": "../Transformers/Bg. 1.1.html"},
771
+ # {"title": "Movie 2", "poster_url": "https://via.placeholder.com/150"},
772
+ # {"title": "Movie 3", "poster_url": "https://via.placeholder.com/150"},
773
+ # {"title": "Movie 4", "poster_url": "https://via.placeholder.com/150"},
774
+ # {"title": "Movie 5", "poster_url": "https://via.placeholder.com/150"},
775
+ # {"title": "Movie 6", "poster_url": "https://via.placeholder.com/150"},
776
+ # {"title": "Movie 7", "poster_url": "https://via.placeholder.com/150"},
777
+ # {"title": "Movie 8", "poster_url": "https://via.placeholder.com/150"},
778
+ # ]
779
 
780
  # Display movies in a grid
781
  # num_columns = 4