Spaces:

amoldwalunj
/

listing_app

Runtime error

App Files Files Community

amoldwalunj commited on May 8, 2023

Commit

19ae87d

1 Parent(s): 39fe2fe

Update app.py

Browse files

Files changed (1) hide show

app.py +258 -136

app.py CHANGED Viewed

@@ -472,6 +472,15 @@ if page == 'Identify Avatars':
     # Ask user for Google Sheet URL
     sheet_url = st.text_input('Enter the URL of your Google Sheet')
@@ -486,69 +495,37 @@ if page == 'Identify Avatars':
     # Let user choose between pros/cons and reviews
-    user_choice = st.selectbox("Select input for avatars:", ('Top Pros/Cons', 'Reviews'))
-    if user_choice == 'Top Pros/Cons':
-        worksheet = sheet.worksheet('top_pros_cons')
-        #st.write("Worksheet:", worksheet)
-        # df_pros_cons = get_as_dataframe(worksheet).dropna()
-        data = worksheet.get_all_records()
-        df_pros_cons = pd.DataFrame(data)
-        df_pros_cons.dropna(how='all', inplace=True)
-        #st.write(df_pros_cons)
-        # Filter pros and cons separately
-        pros_df = df_pros_cons[df_pros_cons['Type'] == 'Pros']
-        cons_df = df_pros_cons[df_pros_cons['Type'] == 'Cons']
-        # Initialize pros and cons strings
-        pros_str = "Pros:\n"
-        cons_str = "Cons:\n"
-        # Iterate through pros DataFrame and append Theme and Description
-        for _, row in pros_df.iterrows():
-            theme = row['Theme']
-            description = row['Description']
-            pros_str += f"{theme}: {description}\n"
-        # Iterate through cons DataFrame and append Theme and Description
-        for _, row in cons_df.iterrows():
-            theme = row['Theme']
-            description = row['Description']
-            cons_str += f"{theme}: {description}\n"
-        # Combine pros and cons into a single string
-        pros_and_cons = pros_str + "\n" + cons_str
-        #st.write(pros_and_cons)
     elif user_choice == 'Reviews':
         worksheet = sheet.worksheet('Reviews')
         # Get all records of the data
         df = get_as_dataframe(worksheet)
-        #st.write(df)
-        # df['Body']= df['Body'].astype(str)
-        # # drop rows where 'Body' column has NaN
-        # df = df.dropna(subset=['Body'])
-        # ####first chunking
-        # separator = '\n\n'
-        # pros_and_cons= separator.join(df['Body'])
-        # # df['Body']= df['Body'].astype(str)
-        # # # drop rows where 'Body' column has NaN
-        # # df = df.dropna(subset=['Body'])
-        # # Clean up data
-        # pros_and_cons = df['Body'].str.cat(sep='\n\n')
         # Select only the 'Body' column and drop rows with NaN values
         body_series = df['Body'].dropna()
@@ -558,23 +535,12 @@ if page == 'Identify Avatars':
         pros_and_cons = separator.join(body_series)
         # Clean up data
-        pros_and_cons = body_series.str.cat(sep='\n\n')
-    # Get all records of the data
-    #df = get_as_dataframe(worksheet)
-    # Display pros and cons or reviews to the user
-    #st.text('Selected pros and cons or reviews:')
-    #st.write(pros_and_cons)
     # Prepare the prompt
-    prompt = """Below are the pros and cons we have identified from customer reviews for our product:
     -----------------------------------------------
     {}
@@ -602,12 +568,15 @@ if page == 'Identify Avatars':
     - List the social media they use and channels they follow
     - List top 10 lead magnets used for marketing purpose
-    And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(pros_and_cons)
     # Editable prompt
     prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
     if st.button('Generate avatars'):
         messages = [
             {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
             {"role": "user", "content": prompt}
@@ -623,78 +592,6 @@ if page == 'Identify Avatars':
         # Store the result in the session state
         st.session_state.avatars = model_response
-        # if "generate_more" not in st.session_state:
-        #     st.session_state.generate_more = False
-        # if st.button('Generate more avatars'):
-        #     st.session_state.generate_more = True
-        # if st.session_state.generate_more:
-        #     messages1 = [
-        #         {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
-        #         {"role": "user", "content": """Please summarize these avatars.
-        #         mention their income in summary. /n {}""".format(model_response)}
-        #     ]
-        #     model_response1 = get_chatgpt_response(messages1, selected_model)
-        #     prompt2 = """Below are the pros and cons we have identified from customer reviews for our product:
-        #     -----------------------------------------------
-        #     {}
-        #     -----------------------------------------------
-        #     please exclude these avatar as we already have them. this is summary of earlier avatars
-        #     {}
-        #     -----------------------------------------------
-        #     You need to use above information and Write a list of the top 5 Avatars (persona) that you identify from the product reviews of the listing.
-        #     For each avatar pls specify the following:
-        #     - Name
-        #     - Gender
-        #     - Age range
-        #     - Income range
-        #     - Demographics
-        #     - General description of the avatar
-        #     - Personality Traits
-        #     - Interests and Hobbies
-        #     - Pains (Psychological, Spiritual, Physical)
-        #     - Top 10 insecurities
-        #     - Top 10 books that they read with title and author
-        #     - Top 5 movies
-        #     - Top 5 TV shows
-        #     - List their top 10 desires
-        #     - List the social media they use and channels they follow
-        #     - List top 10 lead magnets used for marketing purpose
-        #     And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(pros_and_cons, model_response1)
-        #     # Editable prompt
-        #     prompt2 = st.text_area("Edit the prompt:", value=prompt2, height=200)
-        #     if st.button('Generate additional avatars'):
-        #         st.session_state.more_avatars_generated = True
-        #         messages2 = [
-        #             {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
-        #             {"role": "user", "content": prompt2}
-        #         ]
-        #         # Get model response
-        #         model_response2 = get_chatgpt_response(messages2, selected_model)
-        #         final_avatars= model_response + '\n\n' + model_response2
-        #         st.write(final_avatars)
-        #         st.session_state.avatars = final_avatars
-        #     else:
-        #         st.write(st.session_state.avatars)
@@ -863,3 +760,228 @@ Please make this manual very exhaustive and detailed. explain each of the point
         except Exception as e:
             st.error(f"An error occurred while writing to Google Sheets: {e}")

+    st.title('Identify Avatars Page')
+    # Authenticate Google Sheets API
+    gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')
     # Ask user for Google Sheet URL
     sheet_url = st.text_input('Enter the URL of your Google Sheet')
     # Let user choose between pros/cons and reviews
+    user_choice = st.selectbox("Select input for avatars:", ('review summary', 'Reviews'))
+    if user_choice == 'review summary':
+        worksheet = sheet.worksheet('reviews summary')
+        # Get all records of the data
+        df = get_as_dataframe(worksheet)
+        # Select only the 'Body' column and drop rows with NaN values
+        body_series = df['reviews summary'].dropna()
+        #### first chunking
+        separator = '\n\n'
+        pros_and_cons = separator.join(body_series)
+        # Clean up data
+        reviews_ = body_series.str.cat(sep='\n\n')
+        st.session_state.reviews_ = reviews_
     elif user_choice == 'Reviews':
         worksheet = sheet.worksheet('Reviews')
         # Get all records of the data
         df = get_as_dataframe(worksheet)
         # Select only the 'Body' column and drop rows with NaN values
         body_series = df['Body'].dropna()
         pros_and_cons = separator.join(body_series)
         # Clean up data
+        reviews_ = body_series.str.cat(sep='\n\n')
+        st.session_state.reviews_ = reviews_
     # Prepare the prompt
+    prompt = """Below are the customer reviews/ review summary for our product:
     -----------------------------------------------
     {}
     - List the social media they use and channels they follow
     - List top 10 lead magnets used for marketing purpose
+    And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(reviews_)
     # Editable prompt
     prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
     if st.button('Generate avatars'):
+        #formatted_prompt = user_edited_prompt.format(reviews=reviews_)
         messages = [
             {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
             {"role": "user", "content": prompt}
         # Store the result in the session state
         st.session_state.avatars = model_response
         except Exception as e:
             st.error(f"An error occurred while writing to Google Sheets: {e}")
+if page == 'review summary':
+    st.title('Review summarys Page')
+    gc = gspread.service_account_from_dict(credentials_dict)
+    # Ask user for Google Sheet URL
+    sheet_url = st.text_input('Enter the URL of your Google Sheet')
+    # Add this in the part of your Streamlit app where the user can select the model
+    models = ["gpt-3.5-turbo", "gpt-4"]
+    selected_model = st.selectbox("Choose a model:", models)
+     # Set default prompts
+    phase1_prompt = "Analyze a set of customer reviews and create a summary of reviews. don't miss any point in your summary\n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the reviews\n {}"
+    phase2_prompt = "Analyze a set of customer reviews summary and create a summary covering all points. don't create too small summary. it should be exhaustive. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the review summary \n {}"
+    final_prompt = "please analyze below reviews summary and create final review summary in less than 2500 words. don't miss any point in your summary. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience.\n {}"
+    editable_phase1_prompt = st.text_area("Edit the first prompt:", value=phase1_prompt, height=200)
+    user_char_limit = st.number_input("Enter character limit:", value=9000, step=500)
+    # If the user has input a URL, fetch the data from Google Sheets
+    if st.button('generate'):
+    #if sheet_url:
+    # Extract sheet ID from URL
+        sheet_id = sheet_url.split('/')[5]
+        # Open the Google spreadsheet
+        sheet = gc.open_by_key(sheet_id)
+        #st.session_state['sheet'] = sheet
+        # Select sheet named 'Reviews'
+        worksheet = sheet.worksheet('Reviews')
+        # Get all records of the data
+        df = get_as_dataframe(worksheet)
+        # Convert dataframe to list and drop NaNs
+        df['Body']= df['Body'].astype(str)
+        # drop rows where 'Body' column has NaN
+        df = df.dropna(subset=['Body'])
+        ####first chunking
+        phase1_prompt = editable_phase1_prompt
+        char_limit = user_char_limit
+        separator = '\n'
+        #char_limit = 9000
+        top_reviews = []
+        start_index = 0
+        chunk_count = 1
+        while start_index < len(df):
+            char_count = 0
+            end_index = start_index
+            # Get the reviews within the character limit
+            while end_index < len(df) and char_count + len(df['Body'][end_index]) <= char_limit:
+                char_count += len(df['Body'][end_index])
+                end_index += 1
+            chunk_reviews = df['Body'][start_index:end_index]
+            reviews_string = separator.join(chunk_reviews)
+            num_reviews = len(chunk_reviews)
+            print(f'Chunk {chunk_count}:')
+            print(f'Number of reviews: {num_reviews}')
+            print(f'Character length: {len(reviews_string)}')
+            # For the first phase, replace the hardcoded prompt with the user's input
+            messages = [
+                {"role": "system", "content": "You are helpful assistant"},
+                {"role": "user", "content": phase1_prompt.format(reviews_string)}
+            ]
+            model_response = get_chatgpt_response(messages, selected_model)
+            top_reviews.append(model_response)
+            start_index = end_index
+            chunk_count += 1
+        # After the first chunking phase, you create reviews_string_1
+        separator = "\n" + "-" * 80 + "\n"
+        reviews_string_1 = separator.join(top_reviews)
+        print(len(reviews_string_1))
+        if len(reviews_string_1) > 14000:
+        # Now, we start a loop to repeatedly perform the second chunking phase
+            while len(reviews_string_1) > 11000:
+                dfn= pd.DataFrame(top_reviews, columns=['pros/cons']) #convert top_reviews in df
+                separator = '\n'
+                char_limit = 9000
+                top_reviews = []
+                start_index = 0
+                chunk_count = 1
+                while start_index < len(dfn):
+                    char_count = 0
+                    end_index = start_index
+                    # Get the reviews within the character limit
+                    while end_index < len(dfn) and char_count + len(dfn['pros/cons'][end_index]) <= char_limit:
+                        char_count += len(dfn['pros/cons'][end_index])
+                        end_index += 1
+                    chunk_reviews = dfn['pros/cons'][start_index:end_index]
+                    reviews_string = separator.join(chunk_reviews)
+                    num_reviews = len(chunk_reviews)
+                    print(f'Chunk {chunk_count}:')
+                    print(f'Number of reviews: {num_reviews}')
+                    print(f'Character length: {len(reviews_string)}')
+                    # For the second phase, replace the hardcoded prompt with the user's input
+                    messages = [
+                        {"role": "system", "content": "You are helpful assistant"},
+                        {"role": "user", "content": phase2_prompt.format(reviews_string)}
+                    ]
+                    try:
+                        model_response = get_chatgpt_response(messages, selected_model)
+                        top_reviews.append(model_response)
+                    except Exception as e:
+                        if e.__class__ =="RateLimitError":
+                            print("here")
+                            time.sleep(30)
+                            model_response = get_chatgpt_response(messages, selected_model)
+                    start_index = end_index
+                    chunk_count += 1
+                    # After the second chunking phase, you again create reviews_string_1
+                separator = "\n" + "-" * 80 + "\n"
+                reviews_string_1 = separator.join(top_reviews)
+            if len(reviews_string_1) > 14000:
+                # For the final message, replace the hardcoded prompt with the user's input
+                messages = [
+                    {"role": "system", "content": "You are helpful assistant who analyzes reviews"},
+                    {"role": "user", "content": final_prompt.format(reviews_string_1)}
+                ]
+                final_output = get_chatgpt_response(messages, selected_model)
+                # Display the output
+                st.text('Here are the top pros and cons:')
+                st.markdown(final_output)
+                st.session_state.df_final_output = final_output
+            else:
+                final_output = reviews_string_1
+                st.markdown(final_output)
+                st.session_state.df_final_output = final_output
+        else:
+            final_output = reviews_string_1
+            print('we are here')
+            st.markdown(final_output)
+            st.session_state.df_final_output = final_output
+    # Create a button that will trigger writing to Google Sheets
+    if st.button('Write output to sheet'):
+        #if st.button('Write output to sheet'):
+        #st.session_state.df_final_output = final_output
+        # sheet_id = sheet_url.split('/')[5]
+        # # Open the Google spreadsheet
+        # sheet = gc.open_by_key(sheet_id)
+        final_output= st.session_state.df_final_output
+        st.write("Button clicked, processing data...")
+        sheet_id = sheet_url.split('/')[5]  # Open the Google spreadsheet
+        sheet = gc.open_by_key(sheet_id)
+        try:
+            # Check if the "tone_of_voice_manual" worksheet already exists
+            worksheet_output = None
+            for wks in sheet.worksheets():
+                if wks.title == 'reviews summary':
+                    worksheet_output = wks
+                    break
+            # If the worksheet does not exist, create a new one
+            if worksheet_output is None:
+                worksheet_output = sheet.add_worksheet(title="reviews summary", rows="100", cols="20")
+            # Read the existing data from the worksheet
+            existing_data = get_as_dataframe(worksheet_output)
+            # Remove empty columns from the existing data
+            existing_data = existing_data.dropna(how='all', axis=1)
+            # Prepare a list to store the tone of voice manual data
+            tone_of_voice_manual_data = [final_output]
+            # Convert the list of tone of voice manual data into a DataFrame
+            new_data = pd.DataFrame(tone_of_voice_manual_data, columns=['reviews summary'])
+            # Append the new data to the existing data
+            updated_data = pd.concat([existing_data, new_data], axis=1)
+            # Clear the worksheet before writing the updated data
+            worksheet_output.clear()
+            # Write the updated data to the "tone_of_voice_manual" worksheet
+            set_with_dataframe(worksheet_output, updated_data)
+            st.write("Data written successfully to Google Sheets.")
+        except Exception as e:
+            st.error(f"An error occurred while writing to Google Sheets: {e}")