#!/usr/bin/env python # coding: utf-8 # In[ ]: import gradio as gr from urllib.request import urlopen, Request from bs4 import BeautifulSoup from transformers import pipeline import os # Function to extract text from the URL def extract_text(url): try: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) html = urlopen(req).read() text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings) return text except Exception as e: return f"Error extracting text from URL: {str(e)}" # Load Hugging Face model (for extracting named entities or QA) try: ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") except Exception as e: ner_model = None print(f"Error loading model: {str(e)}") # Function to extract information using Hugging Face model def extract_info_with_model(text): if not ner_model: return { "Keytags": "Model loading failed.", "Amenities": "Model loading failed.", "Facilities": "Model loading failed.", "Seller Name": "Model loading failed.", "Location Details": "Model loading failed." } try: # Apply named entity recognition (NER) to extract entities from the text ner_results = ner_model(text) # Initialize variables keytags = [] seller_name = "" location_details = "" amenities = "" facilities = "" # Search for relevant named entities for entity in ner_results: if entity['label'] == 'ORG': keytags.append(entity['word']) # Example: Company or key term (this can be changed) elif entity['label'] == 'PERSON': seller_name = entity['word'] # If a person is mentioned, consider it the seller name elif entity['label'] == 'GPE': location_details = entity['word'] # Geopolitical entity as location # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models) amenities = "No amenities found" # Placeholder for the amenities facilities = "No facilities found" # Placeholder for the facilities return { "Keytags": ", ".join(keytags) if keytags else "No keytags found", "Amenities": amenities, "Facilities": facilities, "Seller Name": seller_name if seller_name else "No seller name found", "Location Details": location_details if location_details else "No location details found" } except Exception as e: return { "Keytags": f"Error processing text: {str(e)}", "Amenities": f"Error processing text: {str(e)}", "Facilities": f"Error processing text: {str(e)}", "Seller Name": f"Error processing text: {str(e)}", "Location Details": f"Error processing text: {str(e)}" } # Function to combine the extraction process (from URL + model processing) def get_info(url): text = extract_text(url) if "Error" in text: return text, text, text, text, text # Return the error message for all outputs extracted_info = extract_info_with_model(text) return ( extracted_info["Keytags"], extracted_info["Amenities"], extracted_info["Facilities"], extracted_info["Seller Name"], extracted_info["Location Details"] ) # Gradio Interface to allow user input and display output demo = gr.Interface( fn=get_info, inputs="text", # Input is a URL outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.) title="Real Estate Info Extractor", description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL." ) if __name__ == "__main__": demo.launch(show_api=False)