KrishanRao commited on
Commit
5c6ed8c
·
verified ·
1 Parent(s): ff12a50

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import gradio as gr
8
+ from selenium import webdriver
9
+ from selenium.webdriver.chrome.options import Options
10
+ from bs4 import BeautifulSoup
11
+ from transformers import pipeline
12
+ import time
13
+
14
+ # Set up Selenium with headless Chrome
15
+ def setup_driver():
16
+ options = Options()
17
+ options.headless = True
18
+ driver = webdriver.Chrome(options=options) # Make sure you have 'chromedriver' installed
19
+ return driver
20
+
21
+ # Function to extract text from the URL using Selenium
22
+ def extract_text(url):
23
+ try:
24
+ driver = setup_driver()
25
+ driver.get(url)
26
+ time.sleep(3) # Wait for page to load completely
27
+ page_source = driver.page_source
28
+ driver.quit()
29
+
30
+ soup = BeautifulSoup(page_source, "html.parser")
31
+ text = ' '.join(soup.stripped_strings)
32
+ return text
33
+ except Exception as e:
34
+ return f"Error extracting text from URL: {str(e)}"
35
+
36
+ # Load Hugging Face model (for extracting named entities or QA)
37
+ try:
38
+ ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
39
+ except Exception as e:
40
+ ner_model = None
41
+ print(f"Error loading model: {str(e)}")
42
+
43
+ # Function to extract information using Hugging Face model
44
+ def extract_info_with_model(text):
45
+ if not ner_model:
46
+ return {
47
+ "Keytags": "Model loading failed.",
48
+ "Amenities": "Model loading failed.",
49
+ "Facilities": "Model loading failed.",
50
+ "Seller Name": "Model loading failed.",
51
+ "Location Details": "Model loading failed."
52
+ }
53
+
54
+ try:
55
+ # Apply named entity recognition (NER) to extract entities from the text
56
+ ner_results = ner_model(text)
57
+
58
+ # Initialize variables
59
+ keytags = []
60
+ seller_name = ""
61
+ location_details = ""
62
+ amenities = ""
63
+ facilities = ""
64
+
65
+ # Search for relevant named entities
66
+ for entity in ner_results:
67
+ if entity['label'] == 'ORG':
68
+ keytags.append(entity['word']) # Example: Company or key term (this can be changed)
69
+ elif entity['label'] == 'PERSON':
70
+ seller_name = entity['word'] # If a person is mentioned, consider it the seller name
71
+ elif entity['label'] == 'GPE':
72
+ location_details = entity['word'] # Geopolitical entity as location
73
+
74
+ # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
75
+ amenities = "No amenities found" # Placeholder for the amenities
76
+ facilities = "No facilities found" # Placeholder for the facilities
77
+
78
+ return {
79
+ "Keytags": ", ".join(keytags) if keytags else "No keytags found",
80
+ "Amenities": amenities,
81
+ "Facilities": facilities,
82
+ "Seller Name": seller_name if seller_name else "No seller name found",
83
+ "Location Details": location_details if location_details else "No location details found"
84
+ }
85
+ except Exception as e:
86
+ return {
87
+ "Keytags": f"Error processing text: {str(e)}",
88
+ "Amenities": f"Error processing text: {str(e)}",
89
+ "Facilities": f"Error processing text: {str(e)}",
90
+ "Seller Name": f"Error processing text: {str(e)}",
91
+ "Location Details": f"Error processing text: {str(e)}"
92
+ }
93
+
94
+ # Function to combine the extraction process (from URL + model processing)
95
+ def get_info(url):
96
+ text = extract_text(url)
97
+ if "Error" in text:
98
+ return text, text, text, text, text # Return the error message for all outputs
99
+
100
+ extracted_info = extract_info_with_model(text)
101
+
102
+ return (
103
+ extracted_info["Keytags"],
104
+ extracted_info["Amenities"],
105
+ extracted_info["Facilities"],
106
+ extracted_info["Seller Name"],
107
+ extracted_info["Location Details"]
108
+ )
109
+
110
+ # Gradio Interface to allow user input and display output
111
+ demo = gr.Interface(
112
+ fn=get_info,
113
+ inputs="text", # Input is a URL
114
+ outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
115
+ title="Real Estate Info Extractor",
116
+ description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
117
+ )
118
+
119
+ if __name__ == "__main__":
120
+ demo.launch(show_api=False)
121
+