KrishanRao commited on
Commit
83fd0a6
·
verified ·
1 Parent(s): 9f0b6cd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import gradio as gr
8
+ import requests
9
+ from bs4 import BeautifulSoup
10
+ from transformers import pipeline
11
+ import os
12
+
13
+ # Function to extract text from the URL using requests
14
+ def extract_text(url):
15
+ try:
16
+ # Enhanced headers to simulate a real browser request
17
+ headers = {
18
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
19
+ 'Accept-Language': 'en-US,en;q=0.9',
20
+ 'Accept-Encoding': 'gzip, deflate, br',
21
+ 'Connection': 'keep-alive',
22
+ 'Referer': 'https://www.mansionglobal.com/', # Adding referer might help bypass restrictions
23
+ 'Upgrade-Insecure-Requests': '1', # Can sometimes help
24
+ 'Cache-Control': 'max-age=0'
25
+ }
26
+
27
+ # Sending GET request with headers
28
+ response = requests.get(url, headers=headers)
29
+
30
+ # Raise an error for bad status codes
31
+ response.raise_for_status()
32
+
33
+ # Parse HTML and extract text
34
+ soup = BeautifulSoup(response.text, "html.parser")
35
+ text = ' '.join(soup.stripped_strings)
36
+ return text
37
+ except requests.exceptions.RequestException as e:
38
+ return f"Error extracting text from URL: {str(e)}"
39
+
40
+ # Load Hugging Face model (for extracting named entities or QA)
41
+ try:
42
+ ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
43
+ except Exception as e:
44
+ ner_model = None
45
+ print(f"Error loading model: {str(e)}")
46
+
47
+ # Function to extract information using Hugging Face model
48
+ def extract_info_with_model(text):
49
+ if not ner_model:
50
+ return {
51
+ "Keytags": "Model loading failed.",
52
+ "Amenities": "Model loading failed.",
53
+ "Facilities": "Model loading failed.",
54
+ "Seller Name": "Model loading failed.",
55
+ "Location Details": "Model loading failed."
56
+ }
57
+
58
+ try:
59
+ # Apply named entity recognition (NER) to extract entities from the text
60
+ ner_results = ner_model(text)
61
+
62
+ # Initialize variables
63
+ keytags = []
64
+ seller_name = ""
65
+ location_details = ""
66
+ amenities = ""
67
+ facilities = ""
68
+
69
+ # Search for relevant named entities
70
+ for entity in ner_results:
71
+ if entity['label'] == 'ORG':
72
+ keytags.append(entity['word']) # Example: Company or key term (this can be changed)
73
+ elif entity['label'] == 'PERSON':
74
+ seller_name = entity['word'] # If a person is mentioned, consider it the seller name
75
+ elif entity['label'] == 'GPE':
76
+ location_details = entity['word'] # Geopolitical entity as location
77
+
78
+ # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
79
+ amenities = "No amenities found" # Placeholder for the amenities
80
+ facilities = "No facilities found" # Placeholder for the facilities
81
+
82
+ return {
83
+ "Keytags": ", ".join(keytags) if keytags else "No keytags found",
84
+ "Amenities": amenities,
85
+ "Facilities": facilities,
86
+ "Seller Name": seller_name if seller_name else "No seller name found",
87
+ "Location Details": location_details if location_details else "No location details found"
88
+ }
89
+ except Exception as e:
90
+ return {
91
+ "Keytags": f"Error processing text: {str(e)}",
92
+ "Amenities": f"Error processing text: {str(e)}",
93
+ "Facilities": f"Error processing text: {str(e)}",
94
+ "Seller Name": f"Error processing text: {str(e)}",
95
+ "Location Details": f"Error processing text: {str(e)}"
96
+ }
97
+
98
+ # Function to combine the extraction process (from URL + model processing)
99
+ def get_info(url):
100
+ text = extract_text(url)
101
+ if "Error" in text:
102
+ return text, text, text, text, text # Return the error message for all outputs
103
+
104
+ extracted_info = extract_info_with_model(text)
105
+
106
+ return (
107
+ extracted_info["Keytags"],
108
+ extracted_info["Amenities"],
109
+ extracted_info["Facilities"],
110
+ extracted_info["Seller Name"],
111
+ extracted_info["Location Details"]
112
+ )
113
+
114
+ # Gradio Interface to allow user input and display output
115
+ demo = gr.Interface(
116
+ fn=get_info,
117
+ inputs="text", # Input is a URL
118
+ outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
119
+ title="Real Estate Info Extractor",
120
+ description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
121
+ )
122
+
123
+ if __name__ == "__main__":
124
+ demo.launch(show_api=False)
125
+