Spaces:

KrishanRao
/

URL

Sleeping

App Files Files Community

KrishanRao commited on Jan 14

Commit

83fd0a6

verified ·

1 Parent(s): 9f0b6cd

Upload app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import os
+# Function to extract text from the URL using requests
+def extract_text(url):
+    try:
+        # Enhanced headers to simulate a real browser request
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Referer': 'https://www.mansionglobal.com/',  # Adding referer might help bypass restrictions
+            'Upgrade-Insecure-Requests': '1',  # Can sometimes help
+            'Cache-Control': 'max-age=0'
+        }
+        # Sending GET request with headers
+        response = requests.get(url, headers=headers)
+        # Raise an error for bad status codes
+        response.raise_for_status()
+        # Parse HTML and extract text
+        soup = BeautifulSoup(response.text, "html.parser")
+        text = ' '.join(soup.stripped_strings)
+        return text
+    except requests.exceptions.RequestException as e:
+        return f"Error extracting text from URL: {str(e)}"
+# Load Hugging Face model (for extracting named entities or QA)
+try:
+    ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
+except Exception as e:
+    ner_model = None
+    print(f"Error loading model: {str(e)}")
+# Function to extract information using Hugging Face model
+def extract_info_with_model(text):
+    if not ner_model:
+        return {
+            "Keytags": "Model loading failed.",
+            "Amenities": "Model loading failed.",
+            "Facilities": "Model loading failed.",
+            "Seller Name": "Model loading failed.",
+            "Location Details": "Model loading failed."
+        }
+    try:
+        # Apply named entity recognition (NER) to extract entities from the text
+        ner_results = ner_model(text)
+        # Initialize variables
+        keytags = []
+        seller_name = ""
+        location_details = ""
+        amenities = ""
+        facilities = ""
+        # Search for relevant named entities
+        for entity in ner_results:
+            if entity['label'] == 'ORG':
+                keytags.append(entity['word'])  # Example: Company or key term (this can be changed)
+            elif entity['label'] == 'PERSON':
+                seller_name = entity['word']  # If a person is mentioned, consider it the seller name
+            elif entity['label'] == 'GPE':
+                location_details = entity['word']  # Geopolitical entity as location
+        # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
+        amenities = "No amenities found"  # Placeholder for the amenities
+        facilities = "No facilities found"  # Placeholder for the facilities
+        return {
+            "Keytags": ", ".join(keytags) if keytags else "No keytags found",
+            "Amenities": amenities,
+            "Facilities": facilities,
+            "Seller Name": seller_name if seller_name else "No seller name found",
+            "Location Details": location_details if location_details else "No location details found"
+        }
+    except Exception as e:
+        return {
+            "Keytags": f"Error processing text: {str(e)}",
+            "Amenities": f"Error processing text: {str(e)}",
+            "Facilities": f"Error processing text: {str(e)}",
+            "Seller Name": f"Error processing text: {str(e)}",
+            "Location Details": f"Error processing text: {str(e)}"
+        }
+# Function to combine the extraction process (from URL + model processing)
+def get_info(url):
+    text = extract_text(url)
+    if "Error" in text:
+        return text, text, text, text, text  # Return the error message for all outputs
+    extracted_info = extract_info_with_model(text)
+    return (
+        extracted_info["Keytags"],
+        extracted_info["Amenities"],
+        extracted_info["Facilities"],
+        extracted_info["Seller Name"],
+        extracted_info["Location Details"]
+    )
+# Gradio Interface to allow user input and display output
+demo = gr.Interface(
+    fn=get_info,
+    inputs="text",  # Input is a URL
+    outputs=["text", "text", "text", "text", "text"],  # Outputs for each field (Keytags, Amenities, etc.)
+    title="Real Estate Info Extractor",
+    description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
+)
+if __name__ == "__main__":
+    demo.launch(show_api=False)