import re from io import BytesIO import mechanicalsoup import pandas as pd import requests from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import cm from reportlab.platypus import ( Image, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle, ) from unidecode import unidecode import gradio as gr class PDFPoster: def __init__(self, deputy_name: str): self.deputy_name = deputy_name def retrieve_deputy_data(self): self.deputy_data = self.get_deputy_votes_page() self.votes = self.get_votes_from_politic_page() self.img_url = self.get_politic_image() self.party = self.get_politic_party() return self.votes def generate_poster( self, vote_list, message_1: str = "Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!", message_2: str = "Les 30 juin, et 7 juin, renseignez vous, et votez en connaissance de cause !", ): df_subset = self.votes[self.votes["vote_topic"].isin(vote_list)] buffer = BytesIO() document = SimpleDocTemplate(buffer, pagesize=A4) # Set up the styles styles = getSampleStyleSheet() title_style = styles["Title"] title_style.alignment = TA_CENTER subtitle_style = styles["Heading2"] subtitle_style.alignment = TA_CENTER subtitle_style.fontName = "Helvetica-Bold" normal_style = styles["Normal"] normal_style.alignment = TA_CENTER red_style = ParagraphStyle( "red", parent=subtitle_style, textColor=colors.red, fontSize=20 ) # Add a title title = Paragraph( f"Les votes de votre député sortant : {self.deputy_name}", title_style ) subtitle = Paragraph(f"Parti : {self.party} ", subtitle_style) source = Paragraph(f"Source : {self.deputy_data['url']}", normal_style) after_text = Paragraph(message_1, subtitle_style) vote_text = Paragraph(message_2, red_style) # Add an image # Open the image URL with BytesIO image_response = requests.get(self.img_url) image_bytes = BytesIO(image_response.content) image = Image(image_bytes) image.drawHeight = 6 * cm image.drawWidth = 5 * cm # Create a list of sentences sentences = df_subset["vote_topic"].tolist() votes = df_subset["for_or_against"].tolist() # Create the table data table_data = [["Sujet", "Vote"]] for vote, sentence in zip(sentences, votes): row = [ Paragraph(vote, normal_style), Paragraph(sentence, normal_style), ] table_data.append(row) # Create the table table = Table(table_data) table.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.white), ("TEXTCOLOR", (0, 0), (-1, 0), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, 1), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, 0), 14), ("BOTTOMPADDING", (0, 0), (-1, 0), 12), ("ALIGN", (0, 1), (-1, -1), "CENTER"), ("BACKGROUND", (0, 1), (-1, -1), colors.white), ("GRID", (0, 0), (-1, -1), 1, colors.black), ] ) ) # Function to apply conditional formatting def apply_conditional_styles(table, data): style = TableStyle() for row_idx, row in enumerate(data): for col_idx, cell in enumerate(row): if isinstance(cell, Paragraph): if "POUR" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.green, ) elif "CONTRE" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.red, ) elif "ABSTENTION" in cell.text: style.add( "BACKGROUND", (col_idx, row_idx), (col_idx, row_idx), colors.beige, ) return style table.setStyle(apply_conditional_styles(table, table_data)) # Build the PDF elements = [ title, Spacer(1, 6), subtitle, Spacer(1, 12), image, Spacer(1, 12), table, source, Spacer(1, 8), after_text, Spacer(1, 8), vote_text, ] document.build(elements) buffer = BytesIO() buffer.seek(0) return buffer def get_deputy_votes_page(self): """Fetches the webpage containing the voting records of a specified deputy. Args: politic_name (str): Name of the deputy. Returns: politic_dict (dict): Dictionary containing the html page, the url and the name of the deputy.""" politic_name = unidecode(self.deputy_name.lower()).replace(" ", "-") browser = mechanicalsoup.StatefulBrowser() url = "https://datan.fr/deputes" research_page = browser.open(url) research_html = research_page.soup politic_card = research_html.select(f'a[href*="{politic_name}"]') if politic_card: url_politic = politic_card[0]["href"] politic_page = browser.open(url_politic + "/votes") politic_html = politic_page.soup politic_dict = { "html_page": politic_html, "url": url_politic, "name": politic_name, } return politic_dict else: raise ValueError(f"Politic {politic_name} not found") def get_votes_from_politic_page(self): """Extracts the voting records from the html page of a deputy. Args: politic_dict (dict): Dictionary containing the html page, the url and the name of the deputy. Returns: df (pd.DataFrame): DataFrame containing the voting records of the deputy.""" #