File size: 7,705 Bytes
5b4a293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1cde73
5b4a293
 
 
 
 
 
 
 
 
 
 
 
 
c1cde73
5b4a293
 
 
 
 
 
 
 
 
 
 
 
 
c1cde73
5b4a293
 
 
 
 
 
 
c1cde73
5b4a293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from crewai import Agent, Task, Crew, Process, LLM

from stealth_scrape_tool import StealthScrapeTool

class FragranticaCrew:
    def __init__(self, openai_api_key: str, openai_base_url: str, openai_model_name: str):
        self.openai_api_key = openai_api_key
        self.openai_base_url = openai_base_url
        self.openai_model_name = openai_model_name
        self.scrape_tool = StealthScrapeTool()

        llm = LLM(
            api_key=self.openai_api_key, 
            model=self.openai_model_name, 
            base_url=self.openai_base_url
        )

        self.research_agent = Agent(
            role='Expert Perfume Analyst and Web Data Extractor',
            goal="Analyze the content of the provided URL, which leads to a perfume review page. Based on the page's content, including official descriptions and user reviews, you must extract the specified information and format it as a user friendly text.",
            backstory=("As an expert in the world of fragrances and olfactory evaluator, you have a gift for dissecting complex perfume pages. You can read through hundreds of user reviews and technical details on a webpage, synthesizing them into a clear, structured summary. Your expertise allows you to adeptly identify olfactory notes, longevity, sillage and similar fragrances, providing a comprehensive analysis for any fragrance enthusiast."),
            verbose=True,
            tools=[self.scrape_tool],
            allow_delegation=False,
            llm=llm,
            max_retries=3
        )

        self.reporter_agent = Agent(
            role='Fragrance Expert Woman and Perfume Analysis Reporter',
            goal='Produce a "Human Friendly" analysis containing specific graded evaluations and personalized recommendations based on the extracted perfume information.',
            backstory=("You are a seasoned reporter with a passion for fragrances. You excel at transforming raw data about perfumes into engaging, well-structured, and informative reports. Your reports highlight key characteristics, unique selling points, and provide a holistic view of the fragrance, making it easy for enthusiasts to understand and appreciate. You are also an extraordinary woman, capable of providing insightful and personalized recommendations."),
            verbose=True,
            allow_delegation=False,
            llm=llm,
            max_retries=3
        )

    def kickoff(self, url: str) -> str:
        research_task = Task(
            description=(
                f"""1. Scrape the content of the URL: {url} using the 'Stealth Web Scraper' tool with `website_url` as {url} and `css_element` as "#main-content". If the scraping tool fails or returns empty content ONCE, try the `css_element` as "body", If they also fail when you pass `css_element` as "body", then you MUST return the exact string "SCRAPING_FAILED".

                2. If scraping is successful, carefully analyze the entire page content to extract the following information:

                   - Resumo: Look for a general summary of the perfume, often found near the top or in introductory paragraphs, synthesizing user opinions if available.

                   - Acordes principais: Find the section listing 'Main Accords' or similar, and extract the list of accords (e.g., 'amadeirado', 'cítrico', 'floral').

                   - Pirâmide Olfativa: Identify sections for 'Top Notes', 'Middle Notes', and 'Base Notes'. Extract the notes for 'topo' (top), 'coracao' (heart), and 'fundo' (base) into a dictionary format.

                   - Longevidade: Locate user polls or reviews discussing longevity. Choose one of the following exact string values based on the overall sentiment: 'Fraca', 'Moderada', 'Longa', 'Eterna'.
                   - Projeção: Locate user polls or reviews discussing sillage/projection. Choose one of the following exact string values based on the overall sentiment: 'Íntima', 'Moderada', 'Forte', 'Enorme'.

                   - Este Perfume me Lembra do: Find the section titled "Este perfume me lembra do", and list the perfume names mentioned there.

                   - Opinião dos usuários: Look for a section containing detailed user reviews, such as "Todas as Resenhas por Data" or similar, and synthesize a detailed summary from these reviews.

                3. Present the extracted information in a clear, structured format, ready for reporting. If any specific piece of information cannot be found, check again to make sure they are not found, after check again, if you truly do not find the info, state 'N/A' for that field. If the entire scraping process fails, return "SCRAPING_FAILED".
"""
            ),
            agent=self.research_agent,
            expected_output=(
                """A structured text containing all the extracted information: 
                    Resumo, 
                    Acordes principais, 
                    Pirâmide Olfativa, 
                    Longevidade, 
                    Projeção, 
                    Este Perfume me Lembra do,
                    Opinião dos usuários.
                Ensure Longevidade and Projeção use the exact specified string values.
                If any information is not found, state 'N/A' for that specific field. If the scraping process fails entirely, return the exact string "SCRAPING_FAILED"."""
            )
        )

        report_task = Task(
            description=(
                """With the extracted information, as an Fragrance Expert woman, your next step is to produce a "Human Friendly" analysis containing:\n"
                "If the input you receive from the research agent is "SCRAPING_FAILED", you MUST stop and output only that same message.\n"
                   - Nível de "doçura": Ranging from 1 to 5\n
                   - Intensidade: Ranging from 1 to 5\n
                   - Fixação na minha pele: Ranging from 1 to 5\n
                   - Projeção: Ranging from 1 to 5\n
                   - Segue o estilo do perfurme: Select the perfume that most match this one, based on "Este Perfume me Lembra do" and "Opinião dos usuários" extracted earlier\n
                   - Como ele é, na minha percepção: Based on your analyses, write a concise summary about "How do I see it". Where you give your opinion using info about the perfume grades, and etc.\n
                   - Eu indico para quem: Give your opinion two opinions about who would like it. Something like "gostam de frangâncias cítricas e amedeirado", "Querem um perfurme forte para usar no inverno"\n
                Your output must be a text containing the "Extraction" values and the "Process" values, in user friaendly format."""
            ),
            agent=self.reporter_agent,
            expected_output=(
                """A comprehensive perfume analysis report in markdown format.
                The report must include all extracted information (Resumo, Acordes principais, Pirâmide Olfativa, Longevidade, Projeção, Este Perfume me Lembra, Opinião dos usuários) 
                and the "Human Friendly" analysis (Nível de "doçura", Intensidade, Fixação na minha pele, Projeção, Segue o estilo do perfurme, Como ele é, na minha percepção, Eu indico para quem)."""
            ),
            context=[research_task]
        )

        crew = Crew(
            agents=[self.research_agent, self.reporter_agent],
            tasks=[research_task, report_task],
            process=Process.sequential
        )

        print(f"Fragrantica Crew is kicking off for URL: {url}")
        result = crew.kickoff()
        if result == "SCRAPING_FAILED":
            return result
        return result