Spaces:

KrishanRao
/

URL

Sleeping

App Files Files Community

KrishanRao commited on Jan 14

Commit

5c6ed8c

verified ·

1 Parent(s): ff12a50

Upload app.py

Browse files

Files changed (1) hide show

app.py +121 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+import gradio as gr
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import time
+# Set up Selenium with headless Chrome
+def setup_driver():
+    options = Options()
+    options.headless = True
+    driver = webdriver.Chrome(options=options)  # Make sure you have 'chromedriver' installed
+    return driver
+# Function to extract text from the URL using Selenium
+def extract_text(url):
+    try:
+        driver = setup_driver()
+        driver.get(url)
+        time.sleep(3)  # Wait for page to load completely
+        page_source = driver.page_source
+        driver.quit()
+        soup = BeautifulSoup(page_source, "html.parser")
+        text = ' '.join(soup.stripped_strings)
+        return text
+    except Exception as e:
+        return f"Error extracting text from URL: {str(e)}"
+# Load Hugging Face model (for extracting named entities or QA)
+try:
+    ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
+except Exception as e:
+    ner_model = None
+    print(f"Error loading model: {str(e)}")
+# Function to extract information using Hugging Face model
+def extract_info_with_model(text):
+    if not ner_model:
+        return {
+            "Keytags": "Model loading failed.",
+            "Amenities": "Model loading failed.",
+            "Facilities": "Model loading failed.",
+            "Seller Name": "Model loading failed.",
+            "Location Details": "Model loading failed."
+        }
+    try:
+        # Apply named entity recognition (NER) to extract entities from the text
+        ner_results = ner_model(text)
+        # Initialize variables
+        keytags = []
+        seller_name = ""
+        location_details = ""
+        amenities = ""
+        facilities = ""
+        # Search for relevant named entities
+        for entity in ner_results:
+            if entity['label'] == 'ORG':
+                keytags.append(entity['word'])  # Example: Company or key term (this can be changed)
+            elif entity['label'] == 'PERSON':
+                seller_name = entity['word']  # If a person is mentioned, consider it the seller name
+            elif entity['label'] == 'GPE':
+                location_details = entity['word']  # Geopolitical entity as location
+        # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
+        amenities = "No amenities found"  # Placeholder for the amenities
+        facilities = "No facilities found"  # Placeholder for the facilities
+        return {
+            "Keytags": ", ".join(keytags) if keytags else "No keytags found",
+            "Amenities": amenities,
+            "Facilities": facilities,
+            "Seller Name": seller_name if seller_name else "No seller name found",
+            "Location Details": location_details if location_details else "No location details found"
+        }
+    except Exception as e:
+        return {
+            "Keytags": f"Error processing text: {str(e)}",
+            "Amenities": f"Error processing text: {str(e)}",
+            "Facilities": f"Error processing text: {str(e)}",
+            "Seller Name": f"Error processing text: {str(e)}",
+            "Location Details": f"Error processing text: {str(e)}"
+        }
+# Function to combine the extraction process (from URL + model processing)
+def get_info(url):
+    text = extract_text(url)
+    if "Error" in text:
+        return text, text, text, text, text  # Return the error message for all outputs
+    extracted_info = extract_info_with_model(text)
+    return (
+        extracted_info["Keytags"],
+        extracted_info["Amenities"],
+        extracted_info["Facilities"],
+        extracted_info["Seller Name"],
+        extracted_info["Location Details"]
+    )
+# Gradio Interface to allow user input and display output
+demo = gr.Interface(
+    fn=get_info,
+    inputs="text",  # Input is a URL
+    outputs=["text", "text", "text", "text", "text"],  # Outputs for each field (Keytags, Amenities, etc.)
+    title="Real Estate Info Extractor",
+    description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
+)
+if __name__ == "__main__":
+    demo.launch(show_api=False)