KrishanRao commited on
Commit
59f361f
·
verified ·
1 Parent(s): f6a3a56

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import gradio as gr
8
+ from urllib.request import urlopen, Request
9
+ from bs4 import BeautifulSoup
10
+ from transformers import pipeline
11
+ import os
12
+
13
+ # Function to extract text from the URL
14
+ def extract_text(url):
15
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
16
+ html = urlopen(req).read()
17
+ text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
18
+ return text
19
+
20
+ # Load Hugging Face model (for extracting named entities or QA)
21
+ ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
22
+
23
+ # Function to extract information using Hugging Face model
24
+ def extract_info_with_model(text):
25
+ # Apply named entity recognition (NER) to extract entities from the text
26
+ ner_results = ner_model(text)
27
+
28
+ # Initialize variables
29
+ keytags = []
30
+ seller_name = ""
31
+ location_details = ""
32
+ amenities = ""
33
+ facilities = ""
34
+
35
+ # Search for relevant named entities
36
+ for entity in ner_results:
37
+ if entity['label'] == 'ORG':
38
+ keytags.append(entity['word']) # Example: Company or key term (this can be changed)
39
+ elif entity['label'] == 'PERSON':
40
+ seller_name = entity['word'] # If a person is mentioned, consider it the seller name
41
+ elif entity['label'] == 'GPE':
42
+ location_details = entity['word'] # Geopolitical entity as location
43
+
44
+ # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
45
+ amenities = "No amenities found" # Placeholder for the amenities
46
+ facilities = "No facilities found" # Placeholder for the facilities
47
+
48
+ return {
49
+ "Keytags": ", ".join(keytags) if keytags else "No keytags found",
50
+ "Amenities": amenities,
51
+ "Facilities": facilities,
52
+ "Seller Name": seller_name if seller_name else "No seller name found",
53
+ "Location Details": location_details if location_details else "No location details found"
54
+ }
55
+
56
+ # Function to combine the extraction process (from URL + model processing)
57
+ def get_info(url):
58
+ text = extract_text(url)
59
+ extracted_info = extract_info_with_model(text)
60
+
61
+ # Print debug to understand what's being returned
62
+ print(extracted_info)
63
+
64
+ # Ensure the information is returned in the expected format
65
+ return (
66
+ extracted_info["Keytags"],
67
+ extracted_info["Amenities"],
68
+ extracted_info["Facilities"],
69
+ extracted_info["Seller Name"],
70
+ extracted_info["Location Details"]
71
+ )
72
+
73
+ # Gradio Interface to allow user input and display output
74
+ demo = gr.Interface(
75
+ fn=get_info,
76
+ inputs="text", # Input is a URL
77
+ outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
78
+ title="Real Estate Info Extractor",
79
+ description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
80
+ )
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch(show_api=False)
84
+