KrishanRao commited on
Commit
9f0b6cd
·
verified ·
1 Parent(s): 5c6ed8c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -121
app.py DELETED
@@ -1,121 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
- import gradio as gr
8
- from selenium import webdriver
9
- from selenium.webdriver.chrome.options import Options
10
- from bs4 import BeautifulSoup
11
- from transformers import pipeline
12
- import time
13
-
14
- # Set up Selenium with headless Chrome
15
- def setup_driver():
16
- options = Options()
17
- options.headless = True
18
- driver = webdriver.Chrome(options=options) # Make sure you have 'chromedriver' installed
19
- return driver
20
-
21
- # Function to extract text from the URL using Selenium
22
- def extract_text(url):
23
- try:
24
- driver = setup_driver()
25
- driver.get(url)
26
- time.sleep(3) # Wait for page to load completely
27
- page_source = driver.page_source
28
- driver.quit()
29
-
30
- soup = BeautifulSoup(page_source, "html.parser")
31
- text = ' '.join(soup.stripped_strings)
32
- return text
33
- except Exception as e:
34
- return f"Error extracting text from URL: {str(e)}"
35
-
36
- # Load Hugging Face model (for extracting named entities or QA)
37
- try:
38
- ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
39
- except Exception as e:
40
- ner_model = None
41
- print(f"Error loading model: {str(e)}")
42
-
43
- # Function to extract information using Hugging Face model
44
- def extract_info_with_model(text):
45
- if not ner_model:
46
- return {
47
- "Keytags": "Model loading failed.",
48
- "Amenities": "Model loading failed.",
49
- "Facilities": "Model loading failed.",
50
- "Seller Name": "Model loading failed.",
51
- "Location Details": "Model loading failed."
52
- }
53
-
54
- try:
55
- # Apply named entity recognition (NER) to extract entities from the text
56
- ner_results = ner_model(text)
57
-
58
- # Initialize variables
59
- keytags = []
60
- seller_name = ""
61
- location_details = ""
62
- amenities = ""
63
- facilities = ""
64
-
65
- # Search for relevant named entities
66
- for entity in ner_results:
67
- if entity['label'] == 'ORG':
68
- keytags.append(entity['word']) # Example: Company or key term (this can be changed)
69
- elif entity['label'] == 'PERSON':
70
- seller_name = entity['word'] # If a person is mentioned, consider it the seller name
71
- elif entity['label'] == 'GPE':
72
- location_details = entity['word'] # Geopolitical entity as location
73
-
74
- # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
75
- amenities = "No amenities found" # Placeholder for the amenities
76
- facilities = "No facilities found" # Placeholder for the facilities
77
-
78
- return {
79
- "Keytags": ", ".join(keytags) if keytags else "No keytags found",
80
- "Amenities": amenities,
81
- "Facilities": facilities,
82
- "Seller Name": seller_name if seller_name else "No seller name found",
83
- "Location Details": location_details if location_details else "No location details found"
84
- }
85
- except Exception as e:
86
- return {
87
- "Keytags": f"Error processing text: {str(e)}",
88
- "Amenities": f"Error processing text: {str(e)}",
89
- "Facilities": f"Error processing text: {str(e)}",
90
- "Seller Name": f"Error processing text: {str(e)}",
91
- "Location Details": f"Error processing text: {str(e)}"
92
- }
93
-
94
- # Function to combine the extraction process (from URL + model processing)
95
- def get_info(url):
96
- text = extract_text(url)
97
- if "Error" in text:
98
- return text, text, text, text, text # Return the error message for all outputs
99
-
100
- extracted_info = extract_info_with_model(text)
101
-
102
- return (
103
- extracted_info["Keytags"],
104
- extracted_info["Amenities"],
105
- extracted_info["Facilities"],
106
- extracted_info["Seller Name"],
107
- extracted_info["Location Details"]
108
- )
109
-
110
- # Gradio Interface to allow user input and display output
111
- demo = gr.Interface(
112
- fn=get_info,
113
- inputs="text", # Input is a URL
114
- outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
115
- title="Real Estate Info Extractor",
116
- description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
117
- )
118
-
119
- if __name__ == "__main__":
120
- demo.launch(show_api=False)
121
-