import re from io import BytesIO import mechanicalsoup import pandas as pd import requests from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import cm from reportlab.platypus import ( Image, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle, ) from unidecode import unidecode import gradio as gr class PDFPoster: def __init__(self, deputy_name: str): self.deputy_name = deputy_name def retrieve_deputy_data(self): self.deputy_data = self.get_deputy_votes_page() self.votes = self.get_votes_from_politic_page() self.img_url = self.get_politic_image() self.party = self.get_politic_party() return self.votes def generate_poster( self, vote_list, message_1: str = "Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!", message_2: str = "Les 30 juin, et 7 juin, renseignez vous, et votez en connaissance de cause !", ): df_subset = self.votes[self.votes["vote_topic"].isin(vote_list)] buffer = BytesIO() document = SimpleDocTemplate(buffer, pagesize=A4) # Set up the styles styles = getSampleStyleSheet() title_style = styles["Title"] title_style.alignment = TA_CENTER subtitle_style = styles["Heading2"] subtitle_style.alignment = TA_CENTER subtitle_style.fontName = "Helvetica-Bold" normal_style = styles["Normal"] normal_style.alignment = TA_CENTER red_style = ParagraphStyle( "red", parent=subtitle_style, textColor=colors.red, fontSize=20 ) # Add a title title = Paragraph( f"Les votes de votre député sortant : {self.deputy_name}", title_style ) subtitle = Paragraph(f"Parti : {self.party} ", subtitle_style) source = Paragraph(f"Source : {self.deputy_data['url']}", normal_style) after_text = Paragraph(message_1, subtitle_style) vote_text = Paragraph(message_2, red_style) # Add an image # Open the image URL with BytesIO image_response = requests.get(self.img_url) image_bytes = BytesIO(image_response.content) image = Image(image_bytes) image.drawHeight = 6 * cm image.drawWidth = 5 * cm # Create a list of sentences sentences = df_subset["vote_topic"].tolist() votes = df_subset["for_or_against"].tolist() # Create the table data table_data = [["Sujet", "Vote"]] for vote, sentence in zip(sentences, votes): row = [ Paragraph(vote, normal_style), Paragraph(sentence, normal_style), ] table_data.append(row) # Create the table table = Table(table_data) table.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.white), ("TEXTCOLOR", (0, 0), (-1, 0), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, 1), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, 0), 14), ("BOTTOMPADDING", (0, 0), (-1, 0), 12), ("ALIGN", (0, 1), (-1, -1), "CENTER"), ("BACKGROUND", (0, 1), (-1, -1), colors.white), ("GRID", (0, 0), (-1, -1), 1, colors.black), ] ) ) # Function to apply conditional formatting def apply_conditional_styles(table, data): style = TableStyle() for row_idx, row in enumerate(data): for col_idx, cell in enumerate(row): if isinstance(cell, Paragraph): if "POUR" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.green, ) elif "CONTRE" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.red, ) elif "ABSTENTION" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.beige, ) return style table.setStyle(apply_conditional_styles(table, table_data)) # Build the PDF elements = [ title, Spacer(1, 6), subtitle, Spacer(1, 12), image, Spacer(1, 12), table, source, Spacer(1, 8), after_text, Spacer(1, 8), vote_text, ] document.build(elements) buffer = BytesIO() buffer.seek(0) return buffer def get_deputy_votes_page(self): """Fetches the webpage containing the voting records of a specified deputy. Args: politic_name (str): Name of the deputy. Returns: politic_dict (dict): Dictionary containing the html page, the url and the name of the deputy.""" politic_name = unidecode(self.deputy_name.lower()).replace(" ", "-") browser = mechanicalsoup.StatefulBrowser() url = "https://datan.fr/deputes" research_page = browser.open(url) research_html = research_page.soup politic_card = research_html.select(f'a[href*="{politic_name}"]') if politic_card: url_politic = politic_card[0]["href"] politic_page = browser.open(url_politic + "/votes") politic_html = politic_page.soup politic_dict = { "html_page": politic_html, "url": url_politic, "name": politic_name, } return politic_dict else: raise ValueError(f"Politic {politic_name} not found") def get_votes_from_politic_page(self): """Extracts the voting records from the html page of a deputy. Args: politic_dict (dict): Dictionary containing the html page, the url and the name of the deputy. Returns: df (pd.DataFrame): DataFrame containing the voting records of the deputy.""" #
politic_html = self.deputy_data["html_page"] politic_name = self.deputy_data["name"] vote_elements = politic_html.find_all("div", class_="card card-vote") vote_categories = politic_html.find_all( class_=re.compile("col-md-6 sorting-item*") ) votes = [] for i, vote_element in enumerate(vote_elements): for_or_against = ( vote_element.find("div", class_="d-flex align-items-center") .text.replace("\n", "") .strip() ) vote_topic = ( vote_element.find("a", class_="stretched-link underline no-decoration") .text.replace("\n", "") .strip() ) vote_id = ( vote_element.find("a", class_="stretched-link underline no-decoration")[ "href" ] .split("/")[-1] .replace("\n", "") .strip() ) vote_date = ( vote_element.find("span", class_="date").text.replace("\n", "").strip() ) vote_category = vote_categories[i]["class"][-1] votes.append( [ vote_id, for_or_against, vote_topic, vote_date, politic_name, vote_category, ] ) df = pd.DataFrame( votes, columns=[ "vote_id", "for_or_against", "vote_topic", "vote_date", "politic_name", "vote_category", ], ) return df def get_politic_image(self): """Fetches the image of a deputy. Args: politic_name (str): Name of the deputy. Returns: image (str): URL of the image of the deputy.""" image = self.deputy_data["html_page"].find("img", alt=self.deputy_name) image_src = image.get("src") return image_src def get_politic_party(self): party = ( self.deputy_data["html_page"] .find("div", class_="link-group text-center mt-1") .text.replace("\n", "") .strip() ) return party css = """ #col-container { margin: 0 auto; max-width: 800px; } """ def convert_pdf_to_image(pdf_buffer): from reportlab.pdfgen.canvas import Canvas from reportlab.graphics.renderPM import drawToPIL from reportlab.graphics.shapes import Drawing buffer = BytesIO() canvas = Canvas(buffer, pagesize=A4) # Draw some content on the canvas drawing = Drawing(200, 100) canvas.drawText("Preview of the PDF") canvas.showPage() canvas.save() buffer.seek(0) pil_image = drawToPIL(drawing) return pil_image def fetch_votes(deputy_name): pdfposter = PDFPoster(deputy_name) votes = pdfposter.retrieve_deputy_data() vote_list = votes["vote_id"].tolist() vote_list = votes["vote_topic"].tolist() return gr.update(choices=vote_list) def generate_poster(deputy_name, message_1, message_2, vote_list): # Set default messages if not provided if not message_1: message_1 = "Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!" if not message_2: message_2 = "Les 30 juin, et 7 juillet, renseignez vous, et votez en connaissance de cause !" pdfposter = PDFPoster(deputy_name) pdfposter.retrieve_deputy_data() pdfbuffer = pdfposter.generate_poster(vote_list, message_1, message_2) pdf_path = f"./static/{deputy_name}.pdf" with open(pdf_path, "wb") as f: f.write(pdf_buffer.getvalue()) # Generate a preview image of the first page pdf_buffer.seek(0) preview_image = convert_pdf_to_image(pdf_buffer) return preview_image, pdf_path with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(""" # Réalise une affiche des votes de ton député ! """) with gr.Row(): deputy_name = gr.Text( label="deputy_name", show_label=False, max_lines=1, placeholder="Nom du député, si tu ne le connais pas RDV sur www.datan.fr ou www.nosdeputes.fr", container=False, ) fetch_button = gr.Button("Récupère ses votes importants", scale=0) vote_list = gr.CheckboxGroup(label="Select Votes", choices=[]) with gr.Row(): message_1 = gr.Text( label="message_1", max_lines=1, placeholder="Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!", visible=True, ) message_2 = gr.Text( label="message_2", max_lines=1, placeholder="Les 30 juin, et 7 juillet, renseignez vous, et votez en connaissance de cause !", visible=True, ) generate_button = gr.Button("Générer l'affiche ! ", scale=0) image_preview = gr.Image(label="Image") pdf_output = gr.File(label="Télécharger le PDF") fetch_button.click(fn=fetch_votes, inputs=deputy_name, outputs=vote_list) generate_button.click( fn=generate_poster, inputs=[deputy_name, message_1, message_2, vote_list], outputs=[image_preview, pdf_output], ) demo.queue().launch()