Spaces:

KrishanRao
/

URL

Sleeping

App Files Files Community

KrishanRao commited on Jan 14

Commit

59f361f

verified ·

1 Parent(s): f6a3a56

Upload app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+import gradio as gr
+from urllib.request import urlopen, Request
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import os
+# Function to extract text from the URL
+def extract_text(url):
+    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+    html = urlopen(req).read()
+    text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
+    return text
+# Load Hugging Face model (for extracting named entities or QA)
+ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
+# Function to extract information using Hugging Face model
+def extract_info_with_model(text):
+    # Apply named entity recognition (NER) to extract entities from the text
+    ner_results = ner_model(text)
+    # Initialize variables
+    keytags = []
+    seller_name = ""
+    location_details = ""
+    amenities = ""
+    facilities = ""
+    # Search for relevant named entities
+    for entity in ner_results:
+        if entity['label'] == 'ORG':
+            keytags.append(entity['word'])  # Example: Company or key term (this can be changed)
+        elif entity['label'] == 'PERSON':
+            seller_name = entity['word']  # If a person is mentioned, consider it the seller name
+        elif entity['label'] == 'GPE':
+            location_details = entity['word']  # Geopolitical entity as location
+    # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
+    amenities = "No amenities found"  # Placeholder for the amenities
+    facilities = "No facilities found"  # Placeholder for the facilities
+    return {
+        "Keytags": ", ".join(keytags) if keytags else "No keytags found",
+        "Amenities": amenities,
+        "Facilities": facilities,
+        "Seller Name": seller_name if seller_name else "No seller name found",
+        "Location Details": location_details if location_details else "No location details found"
+    }
+# Function to combine the extraction process (from URL + model processing)
+def get_info(url):
+    text = extract_text(url)
+    extracted_info = extract_info_with_model(text)
+    # Print debug to understand what's being returned
+    print(extracted_info)
+    # Ensure the information is returned in the expected format
+    return (
+        extracted_info["Keytags"],
+        extracted_info["Amenities"],
+        extracted_info["Facilities"],
+        extracted_info["Seller Name"],
+        extracted_info["Location Details"]
+    )
+# Gradio Interface to allow user input and display output
+demo = gr.Interface(
+    fn=get_info,
+    inputs="text",  # Input is a URL
+    outputs=["text", "text", "text", "text", "text"],  # Outputs for each field (Keytags, Amenities, etc.)
+    title="Real Estate Info Extractor",
+    description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
+)
+if __name__ == "__main__":
+    demo.launch(show_api=False)