Spaces:
Sleeping
Sleeping
import gradio as gr | |
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
import os | |
# Function to extract text from the URL | |
def extract_text(url): | |
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
html = urlopen(req).read() | |
text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings) | |
return text | |
# Load Hugging Face model (for extracting named entities or QA) | |
# Here we use a named entity recognition model, but you can use a question answering model if needed | |
ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") | |
# Function to extract information using Hugging Face model | |
def extract_info_with_model(text): | |
# Apply named entity recognition (NER) to extract entities from the text | |
ner_results = ner_model(text) | |
# You can refine this based on the type of entity or information you want to extract | |
keytags = [] | |
seller_name = "" | |
location_details = "" | |
amenities = "" | |
facilities = "" | |
# Search for relevant named entities | |
for entity in ner_results: | |
if entity['label'] == 'ORG': | |
keytags.append(entity['word']) # Example: Company or key term (this can be changed) | |
elif entity['label'] == 'PERSON': | |
seller_name = entity['word'] # If a person is mentioned, consider it the seller name | |
elif entity['label'] == 'GPE': | |
location_details = entity['word'] # Geopolitical entity as location | |
# For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models) | |
# For now, we'll just return a placeholder for these | |
amenities = "No amenities found" | |
facilities = "No facilities found" | |
return { | |
"Keytags": ", ".join(keytags) if keytags else "No keytags found", | |
"Amenities": amenities, | |
"Facilities": facilities, | |
"Seller Name": seller_name if seller_name else "No seller name found", | |
"Location Details": location_details if location_details else "No location details found" | |
} | |
# Function to combine the extraction process (from URL + model processing) | |
def get_info(url): | |
text = extract_text(url) | |
extracted_info = extract_info_with_model(text) | |
return extracted_info | |
# Gradio Interface to allow user input and display output | |
demo = gr.Interface( | |
fn=get_info, | |
inputs="text", # Input is a URL | |
outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.) | |
title="Real Estate Info Extractor", | |
description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL." | |
) | |
if __name__ == "__main__": | |
demo.launch(show_api=False) | |