Spaces:

amoldwalunj
/

listing_app

Runtime error

File size: 73,364 Bytes

import streamlit as st
import re
import gspread
from gspread_dataframe import get_as_dataframe, set_with_dataframe

import pandas as pd
import os
import openai
import time
import json

os.environ["api_key"] == st.secrets["api_key"]

#os.environ["OPENAI_API_KEY"] = api_key 
openai.api_key = os.getenv("api_key")

os.environ["GSPREAD_CREDENTIALS"]=  st.secrets["GSPREAD_CREDENTIALS"]

#print(type(os.environ["GSPREAD_CREDENTIALS"]))

#print(st.secrets["GSPREAD_CREDENTIALS"], "GSPREAD_CREDENTIALS")

json_credentials= st.secrets["GSPREAD_CREDENTIALS"]

# Parse the JSON content (string) into a Python dictionary
credentials_dict = json.loads(json_credentials)

# Initialize session state
if 'df_final_output' not in st.session_state:
    st.session_state['df_final_output'] = None

# def get_chatgpt_response(messages):
#    response = openai.ChatCompletion.create(
#    model="gpt-3.5-turbo",
#    messages=messages
#  )
#    return  response['choices'][0]['message']['content']

def get_chatgpt_response(messages, selected_model):
    response = openai.ChatCompletion.create(
        model=selected_model,
        messages=messages
    )
    return response['choices'][0]['message']['content']

# Define pages in sidebar
page = st.sidebar.radio('Select a page:', ('Review Analysis', 'review summary', 'Feature Benefits','combined features and benefits', 'feature_mapping', 'benefit_mapping', 'Identify Avatars', 'Tone of Voice Manual', 'question_answers'))


if page == 'Review Analysis':
    st.title('Review Analysis Page')

# Streamlit settings
#st.title('Customer Reviews Analysis')

    # Authenticate Google Sheets API
    #gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')
    # Authenticate Google Sheets API using the dictionary
    gc = gspread.service_account_from_dict(credentials_dict)

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

    user_char_limit = st.number_input("Enter character limit for chunk:", value=9000, step=500)

    # User input for prompts
    phase1_prompt = st.text_area("Enter your prompt for Phase 1- wherever you see '{}', we are passing reviews at that place", value="Analyze a set of customer reviews and identify the unique pros and cons and their themes mentioned by customers. for each pros and cons also mention unique theme associated for that pro/cons. Your response should have format like this.\n\nPros:\n\n1. Theme1 (please replace theme identified here): explanaition\n2. Theme2 (please replace theme identified here): explaination\n\ncons:\n1. Theme1: explanaition\n2. Theme2: explaination\n\nIn your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the reviews\n {}")
    phase2_prompt = st.text_area("Enter your prompt for Phase 2- wherever you see '{}', we are passing reviews at that place", value="Analyze a set of customer reviews pros/cons and identify the only unique pros and cons and their themes mentioned by customers. for each pros and cons also mention unique theme associated for that pro/cons. Your response should have format like this.\n\nPros:\n\n1. Theme1 (please replace theme identified here): explanaition\n2. Theme2 (please replace theme identified here): explaination\n\ncons:\n1. Theme1: explanaition\n2. Theme2: explaination\n\nIn your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the reviews\n {}")
    final_prompt = st.text_area("Enter your final prompt", value="please analyze below reviews and give me unique best 20(twenty) pros and cons mentioned. Your response should be in same format in 2 sections. one for pros and other for cons. e.g. Pros:\n1. Quality: explaination, 2. 2nd theme: explaination. \n\n {}\n\nPlease note, response should have 2 sections, one for pros and one for cons. All the unique pros should be in pros section and same should be done for cons. Response format: Pros:\n1. Quality: explaination, 2. 2nd theme: explaination. Make sure your response is long and cover all points (at least top 20 pros and cons)")

    # If the user has input a URL, fetch the data from Google Sheets
    if st.button('generate'):
    #if sheet_url:
    # Extract sheet ID from URL
        sheet_id = sheet_url.split('/')[5]
        # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        #st.session_state['sheet'] = sheet
        # Select sheet named 'Reviews'
        worksheet = sheet.worksheet('Reviews')
        # Get all records of the data
        df = get_as_dataframe(worksheet)
        # Convert dataframe to list and drop NaNs
        df['Body']= df['Body'].astype(str)
        # drop rows where 'Body' column has NaN
        df = df.dropna(subset=['Body'])

        ####first chunking

        char_limit= user_char_limit

        separator = '\n'
        char_limit = 9000

        top_reviews = []

        start_index = 0
        chunk_count = 1
        while start_index < len(df):
            char_count = 0
            end_index = start_index

            # Get the reviews within the character limit
            while end_index < len(df) and char_count + len(df['Body'][end_index]) <= char_limit:
                char_count += len(df['Body'][end_index])
                end_index += 1

            chunk_reviews = df['Body'][start_index:end_index]
            reviews_string = separator.join(chunk_reviews)

            num_reviews = len(chunk_reviews)
            print(f'Chunk {chunk_count}:')
            print(f'Number of reviews: {num_reviews}')
            print(f'Character length: {len(reviews_string)}')

            # For the first phase, replace the hardcoded prompt with the user's input
            messages = [
                {"role": "system", "content": "You are helpful assistant"},
                {"role": "user", "content": phase1_prompt.format(reviews_string)}
            ]

            model_response = get_chatgpt_response(messages, selected_model)

            top_reviews.append(model_response)

            start_index = end_index
            chunk_count += 1
        

        # After the first chunking phase, you create reviews_string_1
        separator = "\n" + "-" * 80 + "\n"
        reviews_string_1 = separator.join(top_reviews)

        print(len(reviews_string_1))

        # Now, we start a loop to repeatedly perform the second chunking phase
        while len(reviews_string_1) > 11000:
            dfn= pd.DataFrame(top_reviews, columns=['pros/cons']) #convert top_reviews in df

            separator = '\n'
            char_limit = 9000

            top_reviews = []

            start_index = 0
            chunk_count = 1
            while start_index < len(dfn):
                char_count = 0
                end_index = start_index

                # Get the reviews within the character limit
                while end_index < len(dfn) and char_count + len(dfn['pros/cons'][end_index]) <= char_limit:
                    char_count += len(dfn['pros/cons'][end_index])
                    end_index += 1

                chunk_reviews = dfn['pros/cons'][start_index:end_index]
                reviews_string = separator.join(chunk_reviews)

                num_reviews = len(chunk_reviews)
                print(f'Chunk {chunk_count}:')
                print(f'Number of reviews: {num_reviews}')
                print(f'Character length: {len(reviews_string)}')
                # For the second phase, replace the hardcoded prompt with the user's input
                messages = [
                    {"role": "system", "content": "You are helpful assistant"},
                    {"role": "user", "content": phase2_prompt.format(reviews_string)}
                ]

                try:
                    model_response = get_chatgpt_response(messages, selected_model)
                    top_reviews.append(model_response)
                except Exception as e:
                    if e.__class__ =="RateLimitError":
                        print("here")
                        time.sleep(30)
                        model_response = get_chatgpt_response(messages, selected_model)

                start_index = end_index
                chunk_count += 1

                # After the second chunking phase, you again create reviews_string_1
            separator = "\n" + "-" * 80 + "\n"
            reviews_string_1 = separator.join(top_reviews)

        # [...]
        # The code continues as is until the final message

        # For the final message, replace the hardcoded prompt with the user's input
        messages = [
            {"role": "system", "content": "You are helpful assistant who analyzes reviews and gives top 20 pros and cons from them"},
            {"role": "user", "content": final_prompt.format(reviews_string_1)}
        ]

        final_output = get_chatgpt_response(messages, selected_model)

        # Display the output
        st.text('Here are the top pros and cons:')
        st.write(final_output)

        st.session_state.df_final_output = final_output

    # Create a button that will trigger writing to Google Sheets
    if st.button('Write output to sheet'):
        #if st.button('Write output to sheet'):
        #st.session_state.df_final_output = final_output

        sheet_id = sheet_url.split('/')[5]
        # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        final_output= st.session_state.df_final_output

        st.write("Button clicked, processing data...")

        lines = final_output.split('\n')
        # Split each line into theme and description
        split_lines = []
        for line in lines:
            if "Pros:" in line:
                type_ = "Pros"
            elif "Cons:" in line:
                type_ = "Cons"
            elif ": " in line:
                theme, description = re.split(r': ', line, 1)
                split_lines.append([type_, theme, description])

        # Convert the list of lists into a DataFrame
        df_final_output = pd.DataFrame(split_lines, columns=['Type', 'Theme', 'Description'])

        st.write(df_final_output)

        # Create a new worksheet and write the final output to it
        # worksheet_output = sheet.add_worksheet(title="top_pros_cons", rows="100", cols="20")
        # set_with_dataframe(worksheet_output, df_final_output)

        # # Then, outside of your if statement for the button click, check if df_final_output exists in the session state
        # if st.session_state.df_final_output is not None:

        try:
            worksheet_output = sheet.add_worksheet(title="top_pros_cons", rows="100", cols="20")
            set_with_dataframe(worksheet_output, df_final_output)
            st.write("Data written successfully to Google Sheets.")
        except Exception as e:
            st.error(f"An error occurred while writing to Google Sheets: {e}")

    # # Wrap your button in a form
    #     with st.form(key='output_form'):
    #         # Change your button click event to set a session state variable
    #         if st.form_submit_button('Write output to sheet'):
    #             # Rest of the code to populate df_final_output
    #             st.session_state.df_final_output = df_final_output

    #     # Then, outside of your form, check if df_final_output exists in the session state
    #     if st.session_state.df_final_output is not None:
    #         try:
    #             worksheet_output = sheet.add_worksheet(title="top_pros_cons", rows="100", cols="20")
    #             set_with_dataframe(worksheet_output, st.session_state.df_final_output)
    #             st.write("Data written successfully to Google Sheets.")
    #         except Exception as e:
    #             st.error(f"An error occurred while writing to Google Sheets: {e}")


if page == 'Feature Benefits':
    st.title('Feature Benefits Page')

    # Authenticate Google Sheets API
    #gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')

    gc = gspread.service_account_from_dict(credentials_dict)

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # # Display default prompt to user
    # default_prompt = """Using below information of the listing, please extract the top 30 Features and Benefits:
    #         Title: {}\n 
    #         bullet_str: {}\n
    #         Legacy Product Description: {}\n
    #         A+ Description: {}\n
    #         Image_text: {}\n
    #         backend_search_terms: {}\n

    #         Please note, you need to create comprehensive top 30 Features and top 30 Benefits for listing using this information. Your response should have 2 sections. one for features and other for benefits
    #         """.format(title, bullet_str, legacy_desc, a_plus_desc, image_text_string, backend_search_terms)

    # prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)
    
    # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Select sheet named 'Listing'
    worksheet = sheet.worksheet('Listing')
    # Get all records of the data
    df = get_as_dataframe(worksheet)

    # Convert DataFrame to strings
    df_str = df.astype(str)

    title = df[df.eq('Title').any(axis=1)].iloc[0, 2]
    bullets = [df[df.eq(f"Bullet #{i}").any(axis=1)].iloc[0, 2] for i in range(1, 6)]

    backend_search_terms = df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)]) > 0 else None

    image_text_row = df[df.eq('Image Text').any(axis=1)]
    image_text = list(image_text_row.dropna(axis=1).iloc[0, :])

    a_plus_desc_mask = df.astype(str).apply(lambda x: 'A+ Description' in ' '.join(x), axis=1)
    if a_plus_desc_mask.any():
        a_plus_desc_row = df[a_plus_desc_mask].index[0]
        a_plus_desc = df.iloc[a_plus_desc_row:, :].fillna('').values.flatten()
        a_plus_desc = ' '.join(a_plus_desc).strip()
    else:
        a_plus_desc = None

    legacy_desc = df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)]) > 0 else None

    image_text_string = ' '.join(image_text)

    bullet_str = ""
    for i, bullet in enumerate(bullets, 1):
        bullet_str += f"Bullet #{i}: {bullet}\n"

    # Display default prompt to user
    default_prompt = """Using below information of the listing, please extract the top 30 Features and Benefits:
    ------------------------------------
    Title: {}\n 
    bullet_str: {}\n
    Legacy Product Description: {}\n
    A+ Description: {}\n
    Image_text: {}\n
    backend_search_terms: {}\n
    -------------------------------------    

        Please note, you need to create comprehensive top 30 Features and top 30 Benefits for listing using this information. Your response should have 2 sections, one for features and other for benefits
            """.format(title, bullet_str, legacy_desc, a_plus_desc, image_text_string, backend_search_terms)

    prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)

    #st.write(prompt)
    
    if st.button('generate'):
        #st.write(prompt)

        messages = [
            {"role": "system", "content": "You are helpful assistant who help create features and benefits for amazon listings"},
            {"role": "user", "content": prompt}
        ]


        # messages = [
        #     {"role": "system", "content": "You are helpful assistant who help create features and benefits for amazon listings"},
        #     {"role": "user", "content": """Using below information of the listing, please extract the top 30 Features and Benefits:
        #      Title: {}\n 
        #      bullet_str: {}\n
        #      Legacy Product Description: {}\n
        #      A+ Description: {}\n
        #      Image_text: {}\n
        #      backend_search_terms: {}\n

        #      Please note, you need to create comprehensive top 30 Features and top 30 Benefits for listing using this information. Your response should have 2 sections. one for features and other for benefits
        #      """.format(title, bullet_str, legacy_desc, a_plus_desc,image_text_string, backend_search_terms)}
        # ]
        
        model_response = get_chatgpt_response(messages, selected_model)

        #st.write(model_response)

        # messages = [
        #     {"role": "system", "content": "You are helpful assistant who help create features and benefits for amazon listings"},
        #     {"role": "user", "content": prompt}
        # ]

        messages1 = [
            {"role": "system", "content": "You are helpful assistant who help create features and benefits for amazon listings"},
            {"role": "user", "content": """{}

            Please re-write above features and benefits in same way in 2 sections.You need to add short heading/theme of 1-3 words for each feature benefit. 
            Your response should have 2 sections. one for features and other for benefits
            e.g features: 1. 100% cotton (this is theme that you will add if not available): 100% cotton cold press paper ensures exceptional quality and strength (this is exisitng description) 2. .....
            Benefits:
            1.Superior Watercolor Surface (this is theme that you will add if not available): Provides a high-quality surface for watercolor painting, with a unique texture that allows for even spreading and vivid colors.(this is exisitng description) 2. ...
            please note, you should not miss any feature benefits. all 30 should be covered with short heading.""".format(model_response)}
        ]

        final_model_response = get_chatgpt_response(messages1, selected_model)

        feature_benefits=final_model_response

        # Display the output
        # Display the output
        st.text('Here are the top features and benefits:')
        st.write(feature_benefits)

        # Store the result in the session state
        st.session_state.feature_benefits = feature_benefits

    # Create a button that will trigger writing to Google Sheets
    if st.button('Write features and benefits to sheet'):
        sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        # Retrieve the result from the session state
        feature_benefits = st.session_state.feature_benefits

        st.write("Button clicked, processing data...")

        lines = feature_benefits.split('\n')

        # # Split each line into feature/benefit and description
        # split_lines = []
        # for line in lines:
        #     if "Features:" in line:
        #         type_ = "Features"
        #     elif "Benefits:" in line:
        #         type_ = "Benefits"
        #     elif ": " in line:
        #         theme, description = re.split(r': ', line, 1)
        #         split_lines.append([type_, theme, description])

        # # Convert the list of lists into a DataFrame
        # df_feature_benefits = pd.DataFrame(split_lines, columns=['Type', 'Theme', 'Description'])

            # Split each line into feature/benefit and description
            # Split each line into feature/benefit and description
        split_lines = []
        for line in lines:
            if "Features:" in line:
                type_ = "Features"
            elif "Benefits:" in line:
                type_ = "Benefits"
            elif line.strip() == '':
                continue  # skip empty lines
            else:
                if ": " in line:  # If theme is present
                    theme, description = re.split(r': ', line, 1)
                else:  # If theme is not present
                    theme = ''
                    description = line.strip()

                split_lines.append([type_, theme, description])

        # Convert the list of lists into a DataFrame
        df_feature_benefits = pd.DataFrame(split_lines, columns=['Type', 'Theme', 'Description'])



        st.write(df_feature_benefits)

        # Create a new worksheet and write the final output to it
        try:
            worksheet_output = sheet.add_worksheet(title="top_features_benefits", rows="100", cols="20")
            set_with_dataframe(worksheet_output, df_feature_benefits)
            st.write("Data written successfully to Google Sheets.")
        except Exception as e:
            st.error(f"An error occurred while writing to Google Sheets: {e}")


if page == 'Identify Avatars':
    st.title('Identify Avatars Page')

    # Authenticate Google Sheets API
   # gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')

    gc = gspread.service_account_from_dict(credentials_dict)

    

    

    # st.title('Identify Avatars Page')

    # # Authenticate Google Sheets API
    # gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')

    

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)
    

    # Let user choose between pros/cons and reviews
    user_choice = st.selectbox("Select input for avatars:", ('review summary', 'Reviews'))

    

    if user_choice == 'review summary':
        worksheet = sheet.worksheet('reviews summary')
        # Get all records of the data
        df = get_as_dataframe(worksheet)


        # Select only the 'Body' column and drop rows with NaN values
        body_series = df['reviews summary'].dropna()

        #### first chunking
        separator = '\n\n'
        pros_and_cons = separator.join(body_series)

        # Clean up data
        reviews_ = body_series.str.cat(sep='\n\n')
        st.session_state.reviews_ = reviews_
        

        

        

    elif user_choice == 'Reviews':
        worksheet = sheet.worksheet('Reviews')
        # Get all records of the data
        df = get_as_dataframe(worksheet)


        # Select only the 'Body' column and drop rows with NaN values
        body_series = df['Body'].dropna()

        #### first chunking
        separator = '\n\n'
        pros_and_cons = separator.join(body_series)

        # Clean up data
        reviews_ = body_series.str.cat(sep='\n\n')
        st.session_state.reviews_ = reviews_


    # Prepare the prompt
    prompt = """Below are the customer reviews/ review summary for our product:

    -----------------------------------------------
    {}

    -----------------------------------------------


    You need to use above information and Write a list of the top 5 Avatars (persona) that you identify from the product reviews of the listing.
    For each avatar pls specify the following:

    - Name
    - Gender
    - Age range
    - Income range
    - Demographics
    - General description of the avatar
    - Personality Traits
    - Interests and Hobbies
    - Pains (Psychological, Spiritual, Physical)
    - Top 10 insecurities
    - Top 10 books that they read with title and author
    - Top 5 movies
    - Top 5 TV shows
    - List their top 10 desires
    - List the social media they use and channels they follow
    - List top 10 lead magnets used for marketing purpose

    And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(reviews_)

    # Editable prompt
    prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
    


    if st.button('Generate avatars'):
        #formatted_prompt = user_edited_prompt.format(reviews=reviews_)
        messages = [
            {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
            {"role": "user", "content": prompt}
        ]

        # Get model response
        model_response = get_chatgpt_response(messages, selected_model)

        # Display the output
        st.text('Here are the top 5 Avatars:')
        st.write(model_response)

        # Store the result in the session state
        st.session_state.avatars = model_response


    

    # Create a button that will trigger writing to Google Sheets
    if st.button('Write avatars to sheet'):
        sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        # Retrieve the result from the session state
        avatars = st.session_state.avatars

        st.write("Button clicked, processing data...")

        # Create a new worksheet and write the final output to it

        try:
    # Create a new worksheet named "avatars"
            worksheet_output = sheet.add_worksheet(title="avatars", rows="100", cols="20")
            
            # Prepare a list to store the avatar information
            avatars_data = [avatars]

            # Convert the list of avatar data into a DataFrame
            df_avatars = pd.DataFrame(avatars_data, columns=['Avatar Info'])

            # Write the DataFrame to the new "avatars" worksheet
            set_with_dataframe(worksheet_output, df_avatars)
            st.write("Data written successfully to Google Sheets.")
        except Exception as e:
            st.error(f"An error occurred while writing to Google Sheets: {e}")

       


if page == 'Tone of Voice Manual':
    st.title('Tone of Voice Manual Page')
    
    # Authenticate Google Sheets API
    #gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')

    gc = gspread.service_account_from_dict(credentials_dict)
    
    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')
    
    # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

    # Add the prompt
    prompt = """------------------------------------------------------------------------------------------------------
using reviews pros and cons we identified below avatar and related info of avatar for our amazon listing

------------------------------------------------------------------------------------------------------



------------------------------------------------------------------------------------------------------
Please generate a tone of voice manual to address the above Avatar.

can you also address below in tone of voice manual.

1. Make a list of all your communications channels
2. Highlight the tonal values that apply to each channel
3. Write best-practice tone of voice examples

Please make this manual very exhaustive and detailed. explain each of the point with long content

"""
    
    # Editable prompt
    prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
    
    if st.button('Generate Tone of Voice Manual'):
        messages = [
            {"role": "system", "content": "You are a helpful assistant who helps create a tone of voice manual for avatars"},
            {"role": "user", "content": prompt}
        ]

        # Get model response
        model_response = get_chatgpt_response(messages, selected_model)

        # Display the output
        st.text('Generated Tone of Voice Manual:')
        st.write(model_response)

        # Store the result in the session state
        st.session_state.tone_of_voice_manual = model_response

    # Create a button that will trigger writing to Google Sheets
    # if st.button('Write Tone of Voice Manual to sheet'):
    #     sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
    #     sheet = gc.open_by_key(sheet_id)

    #     # Retrieve the result from the session state
    #     tone_of_voice_manual = st.session_state.tone_of_voice_manual

    #     st.write("Button clicked, processing data...")

    #     # Create a new worksheet and write the final output to it
    #     try:
    #         # Create a new worksheet named "tone_of_voice_manual"
    #         worksheet_output = sheet.add_worksheet(title="tone_of_voice_manual", rows="100", cols="20")
            
    #         # Prepare a list to store the tone of voice manual
    #         tone_of_voice_manual_data = [tone_of_voice_manual]

    #         # Convert the list of tone of voice manual data into a DataFrame
    #         df_tone_of_voice_manual = pd.DataFrame(tone_of_voice_manual_data, columns=['Tone of Voice Manual'])

    #         # Write the DataFrame to the new "tone_of_voice_manual" worksheet
    #         set_with_dataframe(worksheet_output, df_tone_of_voice_manual)
    #         st.write("Data written successfully to Google Sheets.")
    #     except Exception as e:
    #         st.error(f"An error occurred while writing to Google Sheets: {e}")

    
    # Write Tone of Voice Manual to sheet
    if st.button('Write Tone of Voice Manual to sheet'):
        sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        # Retrieve the result from the session state
        tone_of_voice_manual = st.session_state.tone_of_voice_manual

        st.write("Button clicked, processing data...")

        try:
            # Check if the "tone_of_voice_manual" worksheet already exists
            worksheet_output = None
            for wks in sheet.worksheets():
                if wks.title == 'tone_of_voice_manual':
                    worksheet_output = wks
                    break

            # If the worksheet does not exist, create a new one
            if worksheet_output is None:
                worksheet_output = sheet.add_worksheet(title="tone_of_voice_manual", rows="100", cols="20")

            # Read the existing data from the worksheet
            existing_data = get_as_dataframe(worksheet_output)

            # Remove empty columns from the existing data
            existing_data = existing_data.dropna(how='all', axis=1)

            # Prepare a list to store the tone of voice manual data
            tone_of_voice_manual_data = [tone_of_voice_manual]

            # Convert the list of tone of voice manual data into a DataFrame
            new_data = pd.DataFrame(tone_of_voice_manual_data, columns=['Tone of Voice Manual'])

            # Append the new data to the existing data
            updated_data = pd.concat([existing_data, new_data], axis=1)

            # Clear the worksheet before writing the updated data
            worksheet_output.clear()

            # Write the updated data to the "tone_of_voice_manual" worksheet
            set_with_dataframe(worksheet_output, updated_data)
            st.write("Data written successfully to Google Sheets.")
        except Exception as e:
            st.error(f"An error occurred while writing to Google Sheets: {e}")




if page == 'review summary':
    st.title('Review summary Page')

    gc = gspread.service_account_from_dict(credentials_dict)

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

     # Set default prompts
    phase1_prompt = "Analyze a set of customer reviews and create a summary of reviews. don't miss any point in your summary\n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the reviews\n {}"
    phase2_prompt = "Analyze a set of customer reviews summary and create a summary covering all points. don't create too small summary. it should be exhaustive. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the review summary \n {}"
    final_prompt = "please analyze below reviews summary and create final review summary in less than 2500 words. don't miss any point in your summary. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience.\n {}"

    editable_phase1_prompt = st.text_area("Edit the first prompt:", value=phase1_prompt, height=200)
    user_char_limit = st.number_input("Enter character limit:", value=9000, step=500)


    # If the user has input a URL, fetch the data from Google Sheets
    if st.button('generate'):
    #if sheet_url:
    # Extract sheet ID from URL
        sheet_id = sheet_url.split('/')[5]
        # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)

        #st.session_state['sheet'] = sheet
        # Select sheet named 'Reviews'
        worksheet = sheet.worksheet('Reviews')
        # Get all records of the data
        df = get_as_dataframe(worksheet)
        # Convert dataframe to list and drop NaNs
        df['Body']= df['Body'].astype(str)
        # drop rows where 'Body' column has NaN
        df = df.dropna(subset=['Body'])

        ####first chunking

        phase1_prompt = editable_phase1_prompt
        char_limit = user_char_limit

        separator = '\n'
        #char_limit = 9000

        top_reviews = []

        start_index = 0
        chunk_count = 1
        while start_index < len(df):
            char_count = 0
            end_index = start_index

            # Get the reviews within the character limit
            while end_index < len(df) and char_count + len(df['Body'][end_index]) <= char_limit:
                char_count += len(df['Body'][end_index])
                end_index += 1

            chunk_reviews = df['Body'][start_index:end_index]
            reviews_string = separator.join(chunk_reviews)

            num_reviews = len(chunk_reviews)
            print(f'Chunk {chunk_count}:')
            print(f'Number of reviews: {num_reviews}')
            print(f'Character length: {len(reviews_string)}')

            # For the first phase, replace the hardcoded prompt with the user's input
            messages = [
                {"role": "system", "content": "You are helpful assistant"},
                {"role": "user", "content": phase1_prompt.format(reviews_string)}
            ]

            model_response = get_chatgpt_response(messages, selected_model)

            top_reviews.append(model_response)

            start_index = end_index
            chunk_count += 1
        

        # After the first chunking phase, you create reviews_string_1
        separator = "\n" + "-" * 80 + "\n"
        reviews_string_1 = separator.join(top_reviews)

        print(len(reviews_string_1))


        if len(reviews_string_1) > 14000:
        # Now, we start a loop to repeatedly perform the second chunking phase
            while len(reviews_string_1) > 11000:
                dfn= pd.DataFrame(top_reviews, columns=['pros/cons']) #convert top_reviews in df

                separator = '\n'
                char_limit = 9000

                top_reviews = []

                start_index = 0
                chunk_count = 1
                while start_index < len(dfn):
                    char_count = 0
                    end_index = start_index

                    # Get the reviews within the character limit
                    while end_index < len(dfn) and char_count + len(dfn['pros/cons'][end_index]) <= char_limit:
                        char_count += len(dfn['pros/cons'][end_index])
                        end_index += 1

                    chunk_reviews = dfn['pros/cons'][start_index:end_index]
                    reviews_string = separator.join(chunk_reviews)

                    num_reviews = len(chunk_reviews)
                    print(f'Chunk {chunk_count}:')
                    print(f'Number of reviews: {num_reviews}')
                    print(f'Character length: {len(reviews_string)}')
                    # For the second phase, replace the hardcoded prompt with the user's input
                    messages = [
                        {"role": "system", "content": "You are helpful assistant"},
                        {"role": "user", "content": phase2_prompt.format(reviews_string)}
                    ]

                    try:
                        model_response = get_chatgpt_response(messages, selected_model)
                        top_reviews.append(model_response)
                    except Exception as e:
                        if e.__class__ =="RateLimitError":
                            print("here")
                            time.sleep(30)
                            model_response = get_chatgpt_response(messages, selected_model)

                    start_index = end_index
                    chunk_count += 1

                    # After the second chunking phase, you again create reviews_string_1
                separator = "\n" + "-" * 80 + "\n"
                reviews_string_1 = separator.join(top_reviews)

            if len(reviews_string_1) > 14000:
                # For the final message, replace the hardcoded prompt with the user's input
                messages = [
                    {"role": "system", "content": "You are helpful assistant who analyzes reviews"},
                    {"role": "user", "content": final_prompt.format(reviews_string_1)}
                ]

                final_output = get_chatgpt_response(messages, selected_model)

                # Display the output
                st.text('Here are the top pros and cons:')
                st.markdown(final_output)

                st.session_state.df_final_output = final_output

            else:
                final_output = reviews_string_1
                st.markdown(final_output)
                st.session_state.df_final_output = final_output
        
        else:
            final_output = reviews_string_1
            print('we are here')
            st.markdown(final_output)
            st.session_state.df_final_output = final_output

    # Create a button that will trigger writing to Google Sheets
    if st.button('Write output to sheet'):
        #if st.button('Write output to sheet'):
        #st.session_state.df_final_output = final_output

        # sheet_id = sheet_url.split('/')[5]
        # # Open the Google spreadsheet
        # sheet = gc.open_by_key(sheet_id)

        final_output= st.session_state.df_final_output

        st.write("Button clicked, processing data...")

        sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
        sheet = gc.open_by_key(sheet_id)



       

        try:
            # Check if the "tone_of_voice_manual" worksheet already exists
            worksheet_output = None
            for wks in sheet.worksheets():
                if wks.title == 'reviews summary':
                    worksheet_output = wks
                    break

            # If the worksheet does not exist, create a new one
            if worksheet_output is None:
                worksheet_output = sheet.add_worksheet(title="reviews summary", rows="100", cols="20")

            # Read the existing data from the worksheet
            existing_data = get_as_dataframe(worksheet_output)

            # Remove empty columns from the existing data
            existing_data = existing_data.dropna(how='all', axis=1)

            # Prepare a list to store the tone of voice manual data
            tone_of_voice_manual_data = [final_output]

            # Convert the list of tone of voice manual data into a DataFrame
            new_data = pd.DataFrame(tone_of_voice_manual_data, columns=['reviews summary'])

            # Append the new data to the existing data
            updated_data = pd.concat([existing_data, new_data], axis=1)

            # Clear the worksheet before writing the updated data
            worksheet_output.clear()

            # Write the updated data to the "tone_of_voice_manual" worksheet
            set_with_dataframe(worksheet_output, updated_data)
            st.write("Data written successfully to Google Sheets.")
        except Exception as e:
            st.error(f"An error occurred while writing to Google Sheets: {e}")


# if page == 'question_answers':
#     st.title('question answers')

#     #gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')
#     gc = gspread.service_account_from_dict(credentials_dict)

#     # Ask user for Google Sheet URL
#     sheet_url = st.text_input('Enter the URL of your Google Sheet')

#     # Add this in the part of your Streamlit app where the user can select the model
#     models = ["gpt-3.5-turbo", "gpt-4"]
#     selected_model = st.selectbox("Choose a model:", models)

#     # Extract sheet ID from URL
#     sheet_id = sheet_url.split('/')[5]
#     # Open the Google spreadsheet
#     sheet = gc.open_by_key(sheet_id)

#     # Select sheet named 'Listing'
#     worksheet = sheet.worksheet('Listing')
#     # Get all records of the data
#     df = get_as_dataframe(worksheet)

#     # Convert DataFrame to strings
#     df_str = df.astype(str)

#     title = df[df.eq('Title').any(axis=1)].iloc[0, 2]
#     bullets = [df[df.eq(f"Bullet #{i}").any(axis=1)].iloc[0, 2] for i in range(1, 6)]

#     backend_search_terms = df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)]) > 0 else None

#     image_text_row = df[df.eq('Image Text').any(axis=1)]
#     image_text = list(image_text_row.dropna(axis=1).iloc[0, :])

#     a_plus_desc_mask = df.astype(str).apply(lambda x: 'A+ Description' in ' '.join(x), axis=1)
#     if a_plus_desc_mask.any():
#         a_plus_desc_row = df[a_plus_desc_mask].index[0]
#         a_plus_desc = df.iloc[a_plus_desc_row:, :].fillna('').values.flatten()
#         a_plus_desc = ' '.join(a_plus_desc).strip()
#     else:
#         a_plus_desc = None

#     legacy_desc = df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)]) > 0 else None

#     image_text_string = ' '.join(image_text)

#     bullet_str = ""
#     for i, bullet in enumerate(bullets, 1):
#         bullet_str += f"Bullet #{i}: {bullet}\n"

#     # Display default prompt to user
#     default_prompt = """Below is data of our products for amazon listing:
#     ------------------------------------
#     Title: {}\n 
#     bullet_str: {}\n
#     Legacy Product Description: {}\n
#     A+ Description: {}\n
#     Image_text: {}\n
#     backend_search_terms: {}\n
#     -------------------------------------    

#     Please create question-answers pairs for amazon listing using this data. 
#     Please segregate them theme wise. themes names should not be features or benefits and create 1 or more questions answer pairs for each theme.
#     questions generated should be potential questions in customer mind before buying product.
#     please create at least 20 question-answers pairs.
#             """.format(title, bullet_str, legacy_desc, a_plus_desc, image_text_string, backend_search_terms)

#     prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)



#     # If the user has input a URL, fetch the data from Google Sheets
#     if st.button('generate'):

#         messages = [
#                     {"role": "system", "content": "You are helpful assistant"},
#                     {"role": "user", "content": prompt}
#                 ]

        

#         model_response = get_chatgpt_response(messages, selected_model)

#         question_answers= model_response

#         st.session_state.question_answers = question_answers

#         st.write(question_answers)

#     def parse_qa(text):
#         try:
#             # Split the text into themes
#             themes = re.split(r'\nTheme: ', text)[1:]
            
#             output = []
#             for theme in themes:
#                 theme_lines = theme.split('\n')
#                 theme_name = theme_lines[0]
#                 qa_pairs = theme_lines[1:]
                
#                 # Extract question and answer pairs
#                 for qa in qa_pairs:
#                     if qa.startswith('Q'):
#                         question = qa.split(': ', 1)[1]
#                     elif qa.startswith('A'):
#                         answer = qa.split(': ', 1)[1]
#                         output.append((theme_name, question, answer))
#             return output, True
#         except Exception as e:
#             return text, False  # Return the full text and a flag indicating the parsing failed

        

#     # # Create a button that will trigger writing to Google Sheets
#     # if st.button('Write output to sheet'):

#     #     final_output= st.session_state.question_answers

#     #     st.write("Button clicked, processing data...")

#     #     sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
#     #     sheet = gc.open_by_key(sheet_id)



       

#     #     try:
#     #         # Check if the "tone_of_voice_manual" worksheet already exists
#     #         worksheet_output = None
#     #         for wks in sheet.worksheets():
#     #             if wks.title == 'question_answers':
#     #                 worksheet_output = wks
#     #                 break

#     #         # If the worksheet does not exist, create a new one
#     #         if worksheet_output is None:
#     #             worksheet_output = sheet.add_worksheet(title="question_answers", rows="100", cols="20")

#     #         # Read the existing data from the worksheet
#     #         existing_data = get_as_dataframe(worksheet_output)

#     #         # Remove empty columns from the existing data
#     #         existing_data = existing_data.dropna(how='all', axis=1)

#     #         # Prepare a list to store the tone of voice manual data
#     #         question_answers_data = [final_output]

#     #         # Convert the list of tone of voice manual data into a DataFrame
#     #         new_data = pd.DataFrame(question_answers_data, columns=['question_answers'])

#     #         # Append the new data to the existing data
#     #         updated_data = pd.concat([existing_data, new_data], axis=1)

#     #         # Clear the worksheet before writing the updated data
#     #         worksheet_output.clear()

#     #         # Write the updated data to the "tone_of_voice_manual" worksheet
#     #         set_with_dataframe(worksheet_output, updated_data)
#     #         st.write("Data written successfully to Google Sheets.")
#     #     except Exception as e:
#     #         st.error(f"An error occurred while writing to Google Sheets: {e}")

#     if st.button('Write output to sheet'):
#         final_output, parsed_successfully = parse_qa(st.session_state.question_answers)

#         st.write("Button clicked, processing data...")

#         sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
#         sheet = gc.open_by_key(sheet_id)

#         try:
#             worksheet_output = None
#             for wks in sheet.worksheets():
#                 if wks.title == 'question_answers':
#                     worksheet_output = wks
#                     break

#             if worksheet_output is None:
#                 worksheet_output = sheet.add_worksheet(title="question_answers", rows="100", cols="20")

#             if parsed_successfully:
#                 new_data = pd.DataFrame(final_output, columns=['Theme', 'Question', 'Answer'])
#             else:
#                 new_data = pd.DataFrame([final_output], columns=['Output'])

#             worksheet_output.clear()
#             set_with_dataframe(worksheet_output, new_data)
#             st.write("Data written successfully to Google Sheets.")
#         except Exception as e:
#             st.error(f"An error occurred while writing to Google Sheets: {e}")

if page == 'question_answers':
    st.title('question answers')

    #gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')
    #gc = gspread.service_account(filename='arctic-rite-381810-b124ba8c96a9.json')

    gc = gspread.service_account_from_dict(credentials_dict)

    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

        # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Select sheet named 'Listing'
    worksheet = sheet.worksheet('Q&A')
    # Get all records of the data
    df = get_as_dataframe(worksheet)

    df= df.dropna(subset = ['Question', 'Answer'])

    #st.write(df)

    question_answer = "\n".join(["Question: {}\nAnswer: {}".format(q, a) for q, a in zip(df['Question'], df['Answer'])])

    #st.write(question_answer)

    st.write(len(question_answer))

    # Display default prompt to user
    default_prompt = """Below is question-answer pairs from amazon listing:
    ------------------------------------
    {}
    -------------------------------------    
    please analyze these question-answer pairs and group them theme-wise. you have find themes and then group question-answer pairs as per theme. Don't miss any question pair given above. give numbering to questions.
            """.format(question_answer)

    prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)

    # If the user has input a URL, fetch the data from Google Sheets
    if st.button('generate'):

        messages = [
                    {"role": "system", "content": "You are helpful assistant"},
                    {"role": "user", "content": prompt}
                ]

        model_response = get_chatgpt_response(messages, selected_model)

        question_answers= model_response

        st.session_state.question_answers = question_answers

        st.write(question_answers)


    if st.button('Write output to sheet'):
            question_answers = st.session_state.question_answers
            # Create a new Google Sheets file

            try:
                new_worksheet = sheet.worksheet('QnA_analyzed')
            except gspread.exceptions.WorksheetNotFound:
                # If it doesn't exist, create it
                new_worksheet = sheet.add_worksheet(title="QnA_analyzed", rows="100", cols="20")
                

            # Add rows one by one
            new_worksheet.append_row(['question_answers', question_answers])
            st.write('Done!')


if page == 'feature_mapping':
    st.title('feature_mapping')

    gc = gspread.service_account_from_dict(credentials_dict)

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

    # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Select sheet named 'Listing'
    worksheet = sheet.worksheet('Listing')
    # Get all records of the data
    df = get_as_dataframe(worksheet)

    # Convert DataFrame to strings
    df_str = df.astype(str)

    title = df[df.eq('Title').any(axis=1)].iloc[0, 2]
    bullets = [df[df.eq(f"Bullet #{i}").any(axis=1)].iloc[0, 2] for i in range(1, 6)]

    backend_search_terms = df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)]) > 0 else None

    image_text_row = df[df.eq('Image Text').any(axis=1)]
    image_text = list(image_text_row.dropna(axis=1).iloc[0, :])

    a_plus_desc_mask = df.astype(str).apply(lambda x: 'A+ Description' in ' '.join(x), axis=1)
    if a_plus_desc_mask.any():
        a_plus_desc_row = df[a_plus_desc_mask].index[0]
        a_plus_desc = df.iloc[a_plus_desc_row:, :].fillna('').values.flatten()
        a_plus_desc = ' '.join(a_plus_desc).strip()
    else:
        a_plus_desc = None

    legacy_desc = df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)]) > 0 else None

    image_text_string = ' '.join(image_text)

    bullet_str = ""
    for i, bullet in enumerate(bullets, 1):
        bullet_str += f"Bullet #{i}: {bullet}\n"

    # Display default prompt to user
    default_prompt = """Features: 
1) Professional Artist Testing: Developed and tested by professional artists to ensure exceptional product quality and results for all types of artistic expression.
2) 1:1 Aspect Ratio: Square aspect ratio of 1:1 facilitates an easy design process and balanced compositions based on famous artworks.
3) 100% Cotton Fibers: Uses 100% cotton fibers to provide natural and lasting beauty and exceptional resistance to scratching and erasing.
4) Gelatin-sized Paper: Offers astounding absorbency resulting in harmonious, natural grain texture and no optical brightening agents that maintain the original brightness of the artwork.
5) Cold & Hot Presses: Available in both cold press and hot press finishes, resulting in slightly textured, toothed surfaces (cold press) for depth and almost no tooth (hot press) surfaces for seamless blending, overlay, or paint removal.
6) European Mill Crafting: Crafted by European mill masters with over 400 years of paper-making experience, ensuring that you produce your finest works.
7) Easy-Block Format: Offers a unique "Easy-Block" format that eliminates the need to stretch watercolor paper sheets, saves time and effort, and ensures that paint dries to the desired texture and prevent buckling or creasing.
8) 2 Glued, 2 Open Edges: Glued on 2 edges and open on 2 edges for easy removal of finished painting, saving time and effort while maintaining paper integrity and preservation.
9) Travel-friendly: Suitable for travel, providing increased convenience to artists.
10) Various Paper Weights: Available in a range of paper weights, ensuring maximum versatility for different artistic styles and techniques.
11) 100 Series Youth Quality: Offers youth quality in the 100 series, providing a traditional watercolor feel for artistic expression.
12) Cotton & Wood Pulp Choices: Offers two different choices of watercolor paper: 100% cotton pulp and 100% wood pulp.
13) Indoor & Outdoor Use: Designed for use indoors and outdoors, providing versatility for artists on the go.
14) 14 Cold-pressed Sheets: Offers 14 sheets of cold-pressed, textured watercolor paper, providing ample surface area for larger projects.
15) Glue Binding: Offers glue binding for easy tearing out of pages, allowing for experimentation and perfecting techniques.
16) Drawing, Sketching, Painting: Versatile for drawing, sketching, and painting.
17) Mixed Media Suitable: Top choice for mixed media projects.
18) Artist-approved: Tested and approved by artists.
19) Chlorine-free & pH-neutral: Chlorine-free, pH-neutral, and archival, ensuring the longevity of artwork.
20) Natural White Paper: Natural white, providing a textured surface for extraordinary vibrant colors.
21) Rough Watercolor Paper: Rough, providing both smooth and textured surfaces for experimentation.
22) Dual-Sided Surfaces: Dual-sided with smooth and textured surfaces.
23) Expert Quality: Provides expert quality for perfect combination of durability and ease of use.
24) Art-N-Fly Compatibility: Compatible with Art-N-Fly Watercolor Pens.
25) Historic Paper Quality: Offers exceptional papers since the 1700s.
26) Calcium Carbonate Buffer: Buffered with calcium carbonate for added protection.
27) Random Texture: Provides a distinctive random texture.
28) Internally Sized: Internally sized to maintain paper integrity and prevent paper deterioration.


--------------------------------------------------------------------------------------------

You have to search for presence of above features in below information of listing:

--------------------------------------------------------------------------------------------
    ------------------------------------
    Title: {}\n 
    bullet_str: {}\n
    Legacy Product Description: {}\n
    A+ Description: {}\n
    Image_text: {}\n
    backend_search_terms: {}\n
    ------------------------------------- 



    Please carefully read the given information of listing and create feature present matrix. if feature is present then you have to mark it using 1 and if absent leave empty. please create in table format. please include feature name in the table. note, only present features should be covered. You have to explain at the end where did you find the present features. also once table is ready create one json that I can easily load in pandas df
            """.format(title, bullet_str, legacy_desc, a_plus_desc, image_text_string, backend_search_terms)

    prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)



    # If the user has input a URL, fetch the data from Google Sheets
    if st.button('generate'):

        messages = [
                    {"role": "system", "content": "You are helpful assistant"},
                    {"role": "user", "content": prompt}
                ]

        

        model_response = get_chatgpt_response(messages, selected_model)

        feature_mapping= model_response

        st.session_state.feature_mapping = feature_mapping

        st.write(feature_mapping)


if page == 'benefit_mapping':
    st.title('benefit_mapping')

    
    gc = gspread.service_account_from_dict(credentials_dict)

    # Ask user for Google Sheet URL
    sheet_url = st.text_input('Enter the URL of your Google Sheet')

    # Add this in the part of your Streamlit app where the user can select the model
    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

    # Extract sheet ID from URL
    sheet_id = sheet_url.split('/')[5]
    # Open the Google spreadsheet
    sheet = gc.open_by_key(sheet_id)

    # Select sheet named 'Listing'
    worksheet = sheet.worksheet('Listing')
    # Get all records of the data
    df = get_as_dataframe(worksheet)

    # Convert DataFrame to strings
    df_str = df.astype(str)

    title = df[df.eq('Title').any(axis=1)].iloc[0, 2]
    bullets = [df[df.eq(f"Bullet #{i}").any(axis=1)].iloc[0, 2] for i in range(1, 6)]

    backend_search_terms = df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Backend Search Terms' in ' '.join(x), axis=1)]) > 0 else None

    image_text_row = df[df.eq('Image Text').any(axis=1)]
    image_text = list(image_text_row.dropna(axis=1).iloc[0, :])

    a_plus_desc_mask = df.astype(str).apply(lambda x: 'A+ Description' in ' '.join(x), axis=1)
    if a_plus_desc_mask.any():
        a_plus_desc_row = df[a_plus_desc_mask].index[0]
        a_plus_desc = df.iloc[a_plus_desc_row:, :].fillna('').values.flatten()
        a_plus_desc = ' '.join(a_plus_desc).strip()
    else:
        a_plus_desc = None

    legacy_desc = df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)].iloc[0, 2] if len(df[df.astype(str).apply(lambda x: 'Legacy Product Description' in ' '.join(x), axis=1)]) > 0 else None

    image_text_string = ' '.join(image_text)

    bullet_str = ""
    for i, bullet in enumerate(bullets, 1):
        bullet_str += f"Bullet #{i}: {bullet}\n"

    # Display default prompt to user
    default_prompt = """Benefits:
1) Long-lasting Artwork: Exceptional product quality ensures long-lasting, beautiful artwork and exceptional results for various artistic expressions.
2) Easy Composition: A square aspect ratio of 1:1 on the paper facilitates an easy design process and balanced compositions based on famous artworks.
3) Natural Beauty: 100% cotton fibers provide natural and lasting beauty and exceptional resistance to scratching and erasing.
4) Superior Absorbency: Gelatin-sized paper offers astounding absorbency resulting in harmonious, natural grain texture and no optical brightening agents that maintain the original brightness of the artwork.
5) Versatile Finishes: Available in both cold press and hot press finishes result in slightly textured, toothed surfaces (cold press) for depth and almost no tooth (hot press) surfaces for seamless blending, overlay, or paint removal and provide vibrantly versatile paper for customization based on the artist's preference.
6) Expert Craftsmanship: Crafted by experienced mill masters guarantees that artists produce their finest works.
7) No Stretching Needed: Unique "Easy-Block" format eliminates the need to stretch watercolor paper sheets, saves time and effort, and ensures that paint dries to the desired texture and prevents buckling or creasing.
8) Easy Painting Removal: Glued on 2 edges and open on 2 edges for easy removal of finished painting saves time and effort while maintaining paper integrity and preservation.
9) Travel Convenience: Suitable for travel, providing increased convenience to artists.
10) Versatility: Available in a range of paper weights ensures maximum versatility for different artistic styles and techniques.
11) Time & Money Savings: Saves time and money with ready-made sizes.
12) Durable Preservation: Durable material for long-lasting preservation.
13) Traditional Feel: Traditional watercolor feel for artistic expression.
14) Ample Surface Area: Provides ample surface area for larger projects.
15) Superior Value: Superior value compared to other brands.
16) Artistic Versatility: Approved by professional artists for various artistic endeavors.
17) Mixed Media Ideal: Suitable for creating mixed media artwork.
18) Professional Quality: Ensures the longevity of artwork with high-quality material.
19) Artwork Longevity: Provides a textured surface for extraordinary vibrant colors, contributing to artwork longevity.
20) Beautiful Paintings: Ideal for creating beautiful artwork for weddings, gifts, and more.
21) Vibrant Colors: Provides a textured surface that results in extraordinary vibrant colors.
22) Technique Experimentation: Offers both smooth and textured surfaces for experimentation with different techniques.
23) Creative Possibilities: Versatile enough for painting, sketching, printmaking, or drawing.
24) Durable & Easy: Smooth edges and easy tear sheets ensure no damage to artwork.
25) Professional Look: Provides a professional appearance for watercolor paintings.
26) Unique Artisan Touch: Innovative artisan paper adds a unique touch to artwork.
27) Added Protection: High-quality material ensures artwork longevity and protection.
28) Budget-friendly: Cost-effective paper for budget-conscious artists.
29) Quality Assurance: Highest grade of watercolor paper available, providing quality assurance and contributing to artistic confidence.


--------------------------------------------------------------------------------------------

You have to search for presence of above Benefits in below information of listing:

--------------------------------------------------------------------------------------------
    ------------------------------------
    Title: {}\n 
    bullet_str: {}\n
    Legacy Product Description: {}\n
    A+ Description: {}\n
    Image_text: {}\n
    backend_search_terms: {}\n
    ------------------------------------- 



    Please carefully read the given information of listing and create Benefits present matrix. if Benefit is present then you have to mark it using 1 and if absent leave empty. please create in table format. note, only present Benefits should be covered. please include benefit name in the table. You have to explain at the end where did you find the present Benefits. also once table is ready create one json that I can easily load in pandas df
            """.format(title, bullet_str, legacy_desc, a_plus_desc, image_text_string, backend_search_terms)

    prompt = st.text_area("Edit the prompt:", value=default_prompt, height=200)



    # If the user has input a URL, fetch the data from Google Sheets
    if st.button('generate'):

        messages = [
                    {"role": "system", "content": "You are helpful assistant"},
                    {"role": "user", "content": prompt}
                ]

        

        model_response = get_chatgpt_response(messages, selected_model)

        benefit_mapping= model_response

        st.session_state.feature_mapping = benefit_mapping

        st.write(benefit_mapping)



import textwrap
from gspread_dataframe import get_as_dataframe

# def page_combined_features_and_benefits():
#     st.title('combined features and benefits')
#     gc = gspread.service_account_from_dict(credentials_dict)

#     # Use a text area for URLs input
#     sheet_urls_input = st.text_area('Enter the URLs of your Google Sheets, each on a new line')
#     sheet_urls = sheet_urls_input.split('\n')  # split input into a list of URLs

#     if sheet_urls:
#         st.write('You entered these URLs:')
#         for url in sheet_urls:
#             st.write(url)

#     models = ["gpt-3.5-turbo", "gpt-4", "gpt-3.5-turbo-16k"]
#     selected_model = st.selectbox("Choose a model:", models)

#     user_char_limit = st.number_input("Enter character limit for chunk:", value=9000, step=500)

#     all_feature_benefit_strings = []

#     if st.button('generate'):
#         # Read all features and benefits from all sheets separately
#         for sheet_url in sheet_urls:
#             feature_benefit_strings = []
#             sheet_id = sheet_url.split('/')[5]
#             sheet = gc.open_by_key(sheet_id)
#             worksheet_name = 'top_features_benefits'
#             worksheet = sheet.worksheet(worksheet_name)
#             df = get_as_dataframe(worksheet).dropna(how='all')

#             for _, row in df.iterrows():
#                 feature_benefit_strings.append(f"{row['Type']} {row['Theme']}: {row['Description']}")

#             # Process the strings for each sheet separately
#             chunk_string = "\n".join(feature_benefit_strings)
#             if len(chunk_string) <= user_char_limit:
#                 messages = [
#                     {"role": "system", "content": "You are helpful assistant."},
#                     {"role": "user", "content": f"from below features and benefits, please get top 25 features and benefits. top features or benefits are those which are more prominenent/repeated. please create 2 sections each for features and benefits in your response\n{chunk_string}"}
#                 ]
#                 model_response = get_chatgpt_response(messages, selected_model)
#                 processed_strings = model_response
#                 all_feature_benefit_strings.append(processed_strings)

#         # Check if the length of the final string is less than the user-defined character limit
#         final_string = "\n".join(all_feature_benefit_strings)

#         #st.write(final_string)

#         while len(final_string) > user_char_limit:
#             chunks = textwrap.wrap(final_string, user_char_limit)
#             all_processed_strings = []

#             for chunk in chunks:
#                 messages = [
#                     {"role": "system", "content": "You are helpful assistant."},
#                     {"role": "user", "content": f"from below features and benefits, please get top 25 features and benefits. top features or benefits are those which are more prominenent/repeated. please create 2 sections each for features and benefits in your response\n{chunk}"}
#                 ]
#                 model_response = get_chatgpt_response(messages, selected_model)
#                 processed_strings = model_response
#                 all_processed_strings.append(processed_strings)

#             final_string = "\n".join(all_processed_strings)

#         # Run the final prompt
#         messages = [
#             {"role": "system", "content": "You are helpful assistant."},
#             {"role": "user", "content": f"from below features and benefits, please get top 30 features and top 30 benefits. top features or benefits are those which are more prominenent/repeated. please create 2 sections each for features and benefits in your response\n{final_string}"}
#         ]
#         final_model_response = get_chatgpt_response(messages, selected_model)
#         st.write('final response')
#         st.write(final_model_response)

    

# if page == 'combined features and benefits':
#     page_combined_features_and_benefits()

# import textwrap
# from gspread_dataframe import get_as_dataframe



def page_combined_features_and_benefits():
    st.title('combined features and benefits')

    #gc = gspread.service_account(filename='arctic-rite-381810-b124ba8c96a9.json')
    gc = gspread.service_account_from_dict(credentials_dict)
    #gc = gspread.service_account_from_dict(credentials_dict)

    # Use a text area for URLs input
    sheet_urls_input = st.text_area('Enter the URLs of your Google Sheets, each on a new line')
    sheet_urls = sheet_urls_input.split('\n')  # split input into a list of URLs

    if sheet_urls:
        st.write('You entered these URLs:')
        for url in sheet_urls:
            st.write(url)

    models = ["gpt-3.5-turbo","gpt-4-0125-preview", "gpt-3.5-turbo-0125" ]
    selected_model = st.selectbox("Choose a model:", models)

    user_char_limit = st.number_input("Enter character limit for chunk:", value=9000, step=500)

    # Provide a default message for user to edit for final prompt
    default_message = "From below features and benefits, please get top 30 features and top 30 benefits. Top features or benefits are those which are more prominent/repeated. Please create 2 sections each for features and benefits in your response\n{}"
    user_message = st.text_area("Edit the final message to be sent:", default_message)

    all_feature_benefit_strings = []

    if st.button('generate'):
        # Read all features and benefits from all sheets separately
        for sheet_url in sheet_urls:
            feature_benefit_strings = []
            sheet_id = sheet_url.split('/')[5]
            sheet = gc.open_by_key(sheet_id)
            worksheet_name = 'top_features_benefits'
            worksheet = sheet.worksheet(worksheet_name)
            df = get_as_dataframe(worksheet).dropna(how='all')

            for _, row in df.iterrows():
                feature_benefit_strings.append(f"{row['Type']} {row['Theme']}: {row['Description']}")

            # Process the strings for each sheet separately
            chunk_string = "\n".join(feature_benefit_strings)
            if len(chunk_string) <= user_char_limit:
                messages = [
                    {"role": "system", "content": "You are helpful assistant."},
                    {"role": "user", "content": f"from below features and benefits, please get top 25 features and benefits. top features or benefits are those which are more prominenent/repeated. please create 2 sections each for features and benefits in your response\n{chunk_string}"}
                ]
                model_response = get_chatgpt_response(messages, selected_model)
                processed_strings = model_response
                all_feature_benefit_strings.append(processed_strings)

        # Check if the length of the final string is less than the user-defined character limit
        final_string = "\n".join(all_feature_benefit_strings)

        # Run the final prompt
        messages = [
            {"role": "system", "content": "You are helpful assistant."},
            {"role": "user", "content": user_message.format(final_string)}
        ]
        final_model_response = get_chatgpt_response(messages, selected_model)
        st.write('final response')
        st.write(final_model_response)

if page == 'combined features and benefits':
    page_combined_features_and_benefits()