File size: 4,719 Bytes
83fd0a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
import os

# Function to extract text from the URL using requests
def extract_text(url):
    try:
        # Enhanced headers to simulate a real browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Referer': 'https://www.mansionglobal.com/',  # Adding referer might help bypass restrictions
            'Upgrade-Insecure-Requests': '1',  # Can sometimes help
            'Cache-Control': 'max-age=0'
        }

        # Sending GET request with headers
        response = requests.get(url, headers=headers)

        # Raise an error for bad status codes
        response.raise_for_status()

        # Parse HTML and extract text
        soup = BeautifulSoup(response.text, "html.parser")
        text = ' '.join(soup.stripped_strings)
        return text
    except requests.exceptions.RequestException as e:
        return f"Error extracting text from URL: {str(e)}"

# Load Hugging Face model (for extracting named entities or QA)
try:
    ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
except Exception as e:
    ner_model = None
    print(f"Error loading model: {str(e)}")

# Function to extract information using Hugging Face model
def extract_info_with_model(text):
    if not ner_model:
        return {
            "Keytags": "Model loading failed.",
            "Amenities": "Model loading failed.",
            "Facilities": "Model loading failed.",
            "Seller Name": "Model loading failed.",
            "Location Details": "Model loading failed."
        }
    
    try:
        # Apply named entity recognition (NER) to extract entities from the text
        ner_results = ner_model(text)

        # Initialize variables
        keytags = []
        seller_name = ""
        location_details = ""
        amenities = ""
        facilities = ""

        # Search for relevant named entities
        for entity in ner_results:
            if entity['label'] == 'ORG':
                keytags.append(entity['word'])  # Example: Company or key term (this can be changed)
            elif entity['label'] == 'PERSON':
                seller_name = entity['word']  # If a person is mentioned, consider it the seller name
            elif entity['label'] == 'GPE':
                location_details = entity['word']  # Geopolitical entity as location

        # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
        amenities = "No amenities found"  # Placeholder for the amenities
        facilities = "No facilities found"  # Placeholder for the facilities

        return {
            "Keytags": ", ".join(keytags) if keytags else "No keytags found",
            "Amenities": amenities,
            "Facilities": facilities,
            "Seller Name": seller_name if seller_name else "No seller name found",
            "Location Details": location_details if location_details else "No location details found"
        }
    except Exception as e:
        return {
            "Keytags": f"Error processing text: {str(e)}",
            "Amenities": f"Error processing text: {str(e)}",
            "Facilities": f"Error processing text: {str(e)}",
            "Seller Name": f"Error processing text: {str(e)}",
            "Location Details": f"Error processing text: {str(e)}"
        }

# Function to combine the extraction process (from URL + model processing)
def get_info(url):
    text = extract_text(url)
    if "Error" in text:
        return text, text, text, text, text  # Return the error message for all outputs
    
    extracted_info = extract_info_with_model(text)
    
    return (
        extracted_info["Keytags"],
        extracted_info["Amenities"],
        extracted_info["Facilities"],
        extracted_info["Seller Name"],
        extracted_info["Location Details"]
    )

# Gradio Interface to allow user input and display output
demo = gr.Interface(
    fn=get_info,
    inputs="text",  # Input is a URL
    outputs=["text", "text", "text", "text", "text"],  # Outputs for each field (Keytags, Amenities, etc.)
    title="Real Estate Info Extractor",
    description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
)

if __name__ == "__main__":
    demo.launch(show_api=False)