KrishanRao commited on
Commit
3a65d75
·
verified ·
1 Parent(s): 0780207

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import gradio as gr
8
+ from urllib.request import urlopen, Request
9
+ from bs4 import BeautifulSoup
10
+ from transformers import pipeline
11
+ import os
12
+
13
+ # Function to extract text from the URL
14
+ def extract_text(url):
15
+ try:
16
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
17
+ html = urlopen(req).read()
18
+ text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
19
+ return text
20
+ except Exception as e:
21
+ return f"Error extracting text from URL: {str(e)}"
22
+
23
+ # Load Hugging Face model (for extracting named entities or QA)
24
+ try:
25
+ ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
26
+ except Exception as e:
27
+ ner_model = None
28
+ print(f"Error loading model: {str(e)}")
29
+
30
+ # Function to extract information using Hugging Face model
31
+ def extract_info_with_model(text):
32
+ if not ner_model:
33
+ return {
34
+ "Keytags": "Model loading failed.",
35
+ "Amenities": "Model loading failed.",
36
+ "Facilities": "Model loading failed.",
37
+ "Seller Name": "Model loading failed.",
38
+ "Location Details": "Model loading failed."
39
+ }
40
+
41
+ try:
42
+ # Apply named entity recognition (NER) to extract entities from the text
43
+ ner_results = ner_model(text)
44
+
45
+ # Initialize variables
46
+ keytags = []
47
+ seller_name = ""
48
+ location_details = ""
49
+ amenities = ""
50
+ facilities = ""
51
+
52
+ # Search for relevant named entities
53
+ for entity in ner_results:
54
+ if entity['label'] == 'ORG':
55
+ keytags.append(entity['word']) # Example: Company or key term (this can be changed)
56
+ elif entity['label'] == 'PERSON':
57
+ seller_name = entity['word'] # If a person is mentioned, consider it the seller name
58
+ elif entity['label'] == 'GPE':
59
+ location_details = entity['word'] # Geopolitical entity as location
60
+
61
+ # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
62
+ amenities = "No amenities found" # Placeholder for the amenities
63
+ facilities = "No facilities found" # Placeholder for the facilities
64
+
65
+ return {
66
+ "Keytags": ", ".join(keytags) if keytags else "No keytags found",
67
+ "Amenities": amenities,
68
+ "Facilities": facilities,
69
+ "Seller Name": seller_name if seller_name else "No seller name found",
70
+ "Location Details": location_details if location_details else "No location details found"
71
+ }
72
+ except Exception as e:
73
+ return {
74
+ "Keytags": f"Error processing text: {str(e)}",
75
+ "Amenities": f"Error processing text: {str(e)}",
76
+ "Facilities": f"Error processing text: {str(e)}",
77
+ "Seller Name": f"Error processing text: {str(e)}",
78
+ "Location Details": f"Error processing text: {str(e)}"
79
+ }
80
+
81
+ # Function to combine the extraction process (from URL + model processing)
82
+ def get_info(url):
83
+ text = extract_text(url)
84
+ if "Error" in text:
85
+ return text, text, text, text, text # Return the error message for all outputs
86
+
87
+ extracted_info = extract_info_with_model(text)
88
+
89
+ return (
90
+ extracted_info["Keytags"],
91
+ extracted_info["Amenities"],
92
+ extracted_info["Facilities"],
93
+ extracted_info["Seller Name"],
94
+ extracted_info["Location Details"]
95
+ )
96
+
97
+ # Gradio Interface to allow user input and display output
98
+ demo = gr.Interface(
99
+ fn=get_info,
100
+ inputs="text", # Input is a URL
101
+ outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
102
+ title="Real Estate Info Extractor",
103
+ description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
104
+ )
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch(show_api=False)
108
+