KrishanRao commited on
Commit
c312535
·
verified ·
1 Parent(s): 975cde5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from urllib.request import urlopen, Request
3
+ from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
+ import os
6
+
7
+ # Function to extract text from the URL
8
+ def extract_text(url):
9
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
10
+ html = urlopen(req).read()
11
+ text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
12
+ return text
13
+
14
+ # Load Hugging Face model (for extracting named entities or QA)
15
+ # Here we use a named entity recognition model, but you can use a question answering model if needed
16
+ ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
17
+
18
+ # Function to extract information using Hugging Face model
19
+ def extract_info_with_model(text):
20
+ # Apply named entity recognition (NER) to extract entities from the text
21
+ ner_results = ner_model(text)
22
+
23
+ # You can refine this based on the type of entity or information you want to extract
24
+ keytags = []
25
+ seller_name = ""
26
+ location_details = ""
27
+ amenities = ""
28
+ facilities = ""
29
+
30
+ # Search for relevant named entities
31
+ for entity in ner_results:
32
+ if entity['label'] == 'ORG':
33
+ keytags.append(entity['word']) # Example: Company or key term (this can be changed)
34
+ elif entity['label'] == 'PERSON':
35
+ seller_name = entity['word'] # If a person is mentioned, consider it the seller name
36
+ elif entity['label'] == 'GPE':
37
+ location_details = entity['word'] # Geopolitical entity as location
38
+
39
+ # For amenities and facilities, you can modify the logic or use additional models (e.g., question-answering models)
40
+ # For now, we'll just return a placeholder for these
41
+ amenities = "No amenities found"
42
+ facilities = "No facilities found"
43
+
44
+ return {
45
+ "Keytags": ", ".join(keytags) if keytags else "No keytags found",
46
+ "Amenities": amenities,
47
+ "Facilities": facilities,
48
+ "Seller Name": seller_name if seller_name else "No seller name found",
49
+ "Location Details": location_details if location_details else "No location details found"
50
+ }
51
+
52
+ # Function to combine the extraction process (from URL + model processing)
53
+ def get_info(url):
54
+ text = extract_text(url)
55
+ extracted_info = extract_info_with_model(text)
56
+ return extracted_info
57
+
58
+ # Gradio Interface to allow user input and display output
59
+ demo = gr.Interface(
60
+ fn=get_info,
61
+ inputs="text", # Input is a URL
62
+ outputs=["text", "text", "text", "text", "text"], # Outputs for each field (Keytags, Amenities, etc.)
63
+ title="Real Estate Info Extractor",
64
+ description="Extract Keytags, Amenities, Facilities, Seller Name, and Location Details from a real estate article URL."
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch(show_api=False)