amoldwalunj commited on
Commit
19ae87d
·
1 Parent(s): 39fe2fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +258 -136
app.py CHANGED
@@ -472,6 +472,15 @@ if page == 'Identify Avatars':
472
 
473
 
474
 
 
 
 
 
 
 
 
 
 
475
  # Ask user for Google Sheet URL
476
  sheet_url = st.text_input('Enter the URL of your Google Sheet')
477
 
@@ -486,69 +495,37 @@ if page == 'Identify Avatars':
486
 
487
 
488
  # Let user choose between pros/cons and reviews
489
- user_choice = st.selectbox("Select input for avatars:", ('Top Pros/Cons', 'Reviews'))
490
-
491
-
492
- if user_choice == 'Top Pros/Cons':
493
- worksheet = sheet.worksheet('top_pros_cons')
494
 
495
- #st.write("Worksheet:", worksheet)
496
- # df_pros_cons = get_as_dataframe(worksheet).dropna()
497
-
498
- data = worksheet.get_all_records()
499
- df_pros_cons = pd.DataFrame(data)
500
- df_pros_cons.dropna(how='all', inplace=True)
501
 
502
- #st.write(df_pros_cons)
 
 
 
503
 
504
- # Filter pros and cons separately
505
- pros_df = df_pros_cons[df_pros_cons['Type'] == 'Pros']
506
- cons_df = df_pros_cons[df_pros_cons['Type'] == 'Cons']
507
 
508
- # Initialize pros and cons strings
509
- pros_str = "Pros:\n"
510
- cons_str = "Cons:\n"
511
 
512
- # Iterate through pros DataFrame and append Theme and Description
513
- for _, row in pros_df.iterrows():
514
- theme = row['Theme']
515
- description = row['Description']
516
- pros_str += f"{theme}: {description}\n"
517
 
518
- # Iterate through cons DataFrame and append Theme and Description
519
- for _, row in cons_df.iterrows():
520
- theme = row['Theme']
521
- description = row['Description']
522
- cons_str += f"{theme}: {description}\n"
523
 
524
- # Combine pros and cons into a single string
525
- pros_and_cons = pros_str + "\n" + cons_str
526
 
527
- #st.write(pros_and_cons)
528
 
529
  elif user_choice == 'Reviews':
530
  worksheet = sheet.worksheet('Reviews')
531
  # Get all records of the data
532
  df = get_as_dataframe(worksheet)
533
 
534
- #st.write(df)
535
-
536
- # df['Body']= df['Body'].astype(str)
537
- # # drop rows where 'Body' column has NaN
538
- # df = df.dropna(subset=['Body'])
539
-
540
- # ####first chunking
541
-
542
- # separator = '\n\n'
543
-
544
- # pros_and_cons= separator.join(df['Body'])
545
-
546
- # # df['Body']= df['Body'].astype(str)
547
- # # # drop rows where 'Body' column has NaN
548
- # # df = df.dropna(subset=['Body'])
549
-
550
- # # Clean up data
551
- # pros_and_cons = df['Body'].str.cat(sep='\n\n')
552
 
553
  # Select only the 'Body' column and drop rows with NaN values
554
  body_series = df['Body'].dropna()
@@ -558,23 +535,12 @@ if page == 'Identify Avatars':
558
  pros_and_cons = separator.join(body_series)
559
 
560
  # Clean up data
561
- pros_and_cons = body_series.str.cat(sep='\n\n')
562
-
563
 
564
 
565
-
566
-
567
- # Get all records of the data
568
- #df = get_as_dataframe(worksheet)
569
-
570
-
571
-
572
- # Display pros and cons or reviews to the user
573
- #st.text('Selected pros and cons or reviews:')
574
- #st.write(pros_and_cons)
575
-
576
  # Prepare the prompt
577
- prompt = """Below are the pros and cons we have identified from customer reviews for our product:
578
 
579
  -----------------------------------------------
580
  {}
@@ -602,12 +568,15 @@ if page == 'Identify Avatars':
602
  - List the social media they use and channels they follow
603
  - List top 10 lead magnets used for marketing purpose
604
 
605
- And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(pros_and_cons)
606
 
607
  # Editable prompt
608
  prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
 
 
609
 
610
  if st.button('Generate avatars'):
 
611
  messages = [
612
  {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
613
  {"role": "user", "content": prompt}
@@ -623,78 +592,6 @@ if page == 'Identify Avatars':
623
  # Store the result in the session state
624
  st.session_state.avatars = model_response
625
 
626
- # if "generate_more" not in st.session_state:
627
- # st.session_state.generate_more = False
628
-
629
- # if st.button('Generate more avatars'):
630
- # st.session_state.generate_more = True
631
-
632
-
633
- # if st.session_state.generate_more:
634
-
635
- # messages1 = [
636
- # {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
637
- # {"role": "user", "content": """Please summarize these avatars.
638
- # mention their income in summary. /n {}""".format(model_response)}
639
- # ]
640
-
641
- # model_response1 = get_chatgpt_response(messages1, selected_model)
642
-
643
- # prompt2 = """Below are the pros and cons we have identified from customer reviews for our product:
644
-
645
- # -----------------------------------------------
646
- # {}
647
-
648
- # -----------------------------------------------
649
- # please exclude these avatar as we already have them. this is summary of earlier avatars
650
-
651
- # {}
652
- # -----------------------------------------------
653
- # You need to use above information and Write a list of the top 5 Avatars (persona) that you identify from the product reviews of the listing.
654
- # For each avatar pls specify the following:
655
-
656
- # - Name
657
- # - Gender
658
- # - Age range
659
- # - Income range
660
- # - Demographics
661
- # - General description of the avatar
662
- # - Personality Traits
663
- # - Interests and Hobbies
664
- # - Pains (Psychological, Spiritual, Physical)
665
- # - Top 10 insecurities
666
- # - Top 10 books that they read with title and author
667
- # - Top 5 movies
668
- # - Top 5 TV shows
669
- # - List their top 10 desires
670
- # - List the social media they use and channels they follow
671
- # - List top 10 lead magnets used for marketing purpose
672
-
673
- # And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(pros_and_cons, model_response1)
674
-
675
- # # Editable prompt
676
- # prompt2 = st.text_area("Edit the prompt:", value=prompt2, height=200)
677
-
678
- # if st.button('Generate additional avatars'):
679
- # st.session_state.more_avatars_generated = True
680
-
681
- # messages2 = [
682
- # {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
683
- # {"role": "user", "content": prompt2}
684
- # ]
685
-
686
- # # Get model response
687
- # model_response2 = get_chatgpt_response(messages2, selected_model)
688
-
689
- # final_avatars= model_response + '\n\n' + model_response2
690
-
691
- # st.write(final_avatars)
692
-
693
-
694
- # st.session_state.avatars = final_avatars
695
-
696
- # else:
697
- # st.write(st.session_state.avatars)
698
 
699
 
700
 
@@ -863,3 +760,228 @@ Please make this manual very exhaustive and detailed. explain each of the point
863
  except Exception as e:
864
  st.error(f"An error occurred while writing to Google Sheets: {e}")
865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
 
474
 
475
+
476
+
477
+ st.title('Identify Avatars Page')
478
+
479
+ # Authenticate Google Sheets API
480
+ gc = gspread.service_account(filename='arctic-rite-381810-e8bee8664772.json')
481
+
482
+
483
+
484
  # Ask user for Google Sheet URL
485
  sheet_url = st.text_input('Enter the URL of your Google Sheet')
486
 
 
495
 
496
 
497
  # Let user choose between pros/cons and reviews
498
+ user_choice = st.selectbox("Select input for avatars:", ('review summary', 'Reviews'))
 
 
 
 
499
 
500
+
 
 
 
 
 
501
 
502
+ if user_choice == 'review summary':
503
+ worksheet = sheet.worksheet('reviews summary')
504
+ # Get all records of the data
505
+ df = get_as_dataframe(worksheet)
506
 
 
 
 
507
 
508
+ # Select only the 'Body' column and drop rows with NaN values
509
+ body_series = df['reviews summary'].dropna()
 
510
 
511
+ #### first chunking
512
+ separator = '\n\n'
513
+ pros_and_cons = separator.join(body_series)
 
 
514
 
515
+ # Clean up data
516
+ reviews_ = body_series.str.cat(sep='\n\n')
517
+ st.session_state.reviews_ = reviews_
518
+
 
519
 
520
+
 
521
 
522
+
523
 
524
  elif user_choice == 'Reviews':
525
  worksheet = sheet.worksheet('Reviews')
526
  # Get all records of the data
527
  df = get_as_dataframe(worksheet)
528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
  # Select only the 'Body' column and drop rows with NaN values
531
  body_series = df['Body'].dropna()
 
535
  pros_and_cons = separator.join(body_series)
536
 
537
  # Clean up data
538
+ reviews_ = body_series.str.cat(sep='\n\n')
539
+ st.session_state.reviews_ = reviews_
540
 
541
 
 
 
 
 
 
 
 
 
 
 
 
542
  # Prepare the prompt
543
+ prompt = """Below are the customer reviews/ review summary for our product:
544
 
545
  -----------------------------------------------
546
  {}
 
568
  - List the social media they use and channels they follow
569
  - List top 10 lead magnets used for marketing purpose
570
 
571
+ And anything else can be used for sales and marketing purpose to target those specific avatars. Please make sure you provide long info for each of the point you mention in avatar""".format(reviews_)
572
 
573
  # Editable prompt
574
  prompt = st.text_area("Edit the prompt:", value=prompt, height=200)
575
+
576
+
577
 
578
  if st.button('Generate avatars'):
579
+ #formatted_prompt = user_edited_prompt.format(reviews=reviews_)
580
  messages = [
581
  {"role": "system", "content": "You are a helpful assistant who helps identify avatars from product reviews"},
582
  {"role": "user", "content": prompt}
 
592
  # Store the result in the session state
593
  st.session_state.avatars = model_response
594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
596
 
597
 
 
760
  except Exception as e:
761
  st.error(f"An error occurred while writing to Google Sheets: {e}")
762
 
763
+
764
+
765
+
766
+ if page == 'review summary':
767
+ st.title('Review summarys Page')
768
+
769
+ gc = gspread.service_account_from_dict(credentials_dict)
770
+
771
+ # Ask user for Google Sheet URL
772
+ sheet_url = st.text_input('Enter the URL of your Google Sheet')
773
+
774
+ # Add this in the part of your Streamlit app where the user can select the model
775
+ models = ["gpt-3.5-turbo", "gpt-4"]
776
+ selected_model = st.selectbox("Choose a model:", models)
777
+
778
+ # Set default prompts
779
+ phase1_prompt = "Analyze a set of customer reviews and create a summary of reviews. don't miss any point in your summary\n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the reviews\n {}"
780
+ phase2_prompt = "Analyze a set of customer reviews summary and create a summary covering all points. don't create too small summary. it should be exhaustive. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience. here are the review summary \n {}"
781
+ final_prompt = "please analyze below reviews summary and create final review summary in less than 2500 words. don't miss any point in your summary. \n\n In your analysis, consider the language and tone used in the reviews to accurately represent the customer's experience.\n {}"
782
+
783
+ editable_phase1_prompt = st.text_area("Edit the first prompt:", value=phase1_prompt, height=200)
784
+ user_char_limit = st.number_input("Enter character limit:", value=9000, step=500)
785
+
786
+
787
+ # If the user has input a URL, fetch the data from Google Sheets
788
+ if st.button('generate'):
789
+ #if sheet_url:
790
+ # Extract sheet ID from URL
791
+ sheet_id = sheet_url.split('/')[5]
792
+ # Open the Google spreadsheet
793
+ sheet = gc.open_by_key(sheet_id)
794
+
795
+ #st.session_state['sheet'] = sheet
796
+ # Select sheet named 'Reviews'
797
+ worksheet = sheet.worksheet('Reviews')
798
+ # Get all records of the data
799
+ df = get_as_dataframe(worksheet)
800
+ # Convert dataframe to list and drop NaNs
801
+ df['Body']= df['Body'].astype(str)
802
+ # drop rows where 'Body' column has NaN
803
+ df = df.dropna(subset=['Body'])
804
+
805
+ ####first chunking
806
+
807
+ phase1_prompt = editable_phase1_prompt
808
+ char_limit = user_char_limit
809
+
810
+ separator = '\n'
811
+ #char_limit = 9000
812
+
813
+ top_reviews = []
814
+
815
+ start_index = 0
816
+ chunk_count = 1
817
+ while start_index < len(df):
818
+ char_count = 0
819
+ end_index = start_index
820
+
821
+ # Get the reviews within the character limit
822
+ while end_index < len(df) and char_count + len(df['Body'][end_index]) <= char_limit:
823
+ char_count += len(df['Body'][end_index])
824
+ end_index += 1
825
+
826
+ chunk_reviews = df['Body'][start_index:end_index]
827
+ reviews_string = separator.join(chunk_reviews)
828
+
829
+ num_reviews = len(chunk_reviews)
830
+ print(f'Chunk {chunk_count}:')
831
+ print(f'Number of reviews: {num_reviews}')
832
+ print(f'Character length: {len(reviews_string)}')
833
+
834
+ # For the first phase, replace the hardcoded prompt with the user's input
835
+ messages = [
836
+ {"role": "system", "content": "You are helpful assistant"},
837
+ {"role": "user", "content": phase1_prompt.format(reviews_string)}
838
+ ]
839
+
840
+ model_response = get_chatgpt_response(messages, selected_model)
841
+
842
+ top_reviews.append(model_response)
843
+
844
+ start_index = end_index
845
+ chunk_count += 1
846
+
847
+
848
+ # After the first chunking phase, you create reviews_string_1
849
+ separator = "\n" + "-" * 80 + "\n"
850
+ reviews_string_1 = separator.join(top_reviews)
851
+
852
+ print(len(reviews_string_1))
853
+
854
+
855
+ if len(reviews_string_1) > 14000:
856
+ # Now, we start a loop to repeatedly perform the second chunking phase
857
+ while len(reviews_string_1) > 11000:
858
+ dfn= pd.DataFrame(top_reviews, columns=['pros/cons']) #convert top_reviews in df
859
+
860
+ separator = '\n'
861
+ char_limit = 9000
862
+
863
+ top_reviews = []
864
+
865
+ start_index = 0
866
+ chunk_count = 1
867
+ while start_index < len(dfn):
868
+ char_count = 0
869
+ end_index = start_index
870
+
871
+ # Get the reviews within the character limit
872
+ while end_index < len(dfn) and char_count + len(dfn['pros/cons'][end_index]) <= char_limit:
873
+ char_count += len(dfn['pros/cons'][end_index])
874
+ end_index += 1
875
+
876
+ chunk_reviews = dfn['pros/cons'][start_index:end_index]
877
+ reviews_string = separator.join(chunk_reviews)
878
+
879
+ num_reviews = len(chunk_reviews)
880
+ print(f'Chunk {chunk_count}:')
881
+ print(f'Number of reviews: {num_reviews}')
882
+ print(f'Character length: {len(reviews_string)}')
883
+ # For the second phase, replace the hardcoded prompt with the user's input
884
+ messages = [
885
+ {"role": "system", "content": "You are helpful assistant"},
886
+ {"role": "user", "content": phase2_prompt.format(reviews_string)}
887
+ ]
888
+
889
+ try:
890
+ model_response = get_chatgpt_response(messages, selected_model)
891
+ top_reviews.append(model_response)
892
+ except Exception as e:
893
+ if e.__class__ =="RateLimitError":
894
+ print("here")
895
+ time.sleep(30)
896
+ model_response = get_chatgpt_response(messages, selected_model)
897
+
898
+ start_index = end_index
899
+ chunk_count += 1
900
+
901
+ # After the second chunking phase, you again create reviews_string_1
902
+ separator = "\n" + "-" * 80 + "\n"
903
+ reviews_string_1 = separator.join(top_reviews)
904
+
905
+ if len(reviews_string_1) > 14000:
906
+ # For the final message, replace the hardcoded prompt with the user's input
907
+ messages = [
908
+ {"role": "system", "content": "You are helpful assistant who analyzes reviews"},
909
+ {"role": "user", "content": final_prompt.format(reviews_string_1)}
910
+ ]
911
+
912
+ final_output = get_chatgpt_response(messages, selected_model)
913
+
914
+ # Display the output
915
+ st.text('Here are the top pros and cons:')
916
+ st.markdown(final_output)
917
+
918
+ st.session_state.df_final_output = final_output
919
+
920
+ else:
921
+ final_output = reviews_string_1
922
+ st.markdown(final_output)
923
+ st.session_state.df_final_output = final_output
924
+
925
+ else:
926
+ final_output = reviews_string_1
927
+ print('we are here')
928
+ st.markdown(final_output)
929
+ st.session_state.df_final_output = final_output
930
+
931
+ # Create a button that will trigger writing to Google Sheets
932
+ if st.button('Write output to sheet'):
933
+ #if st.button('Write output to sheet'):
934
+ #st.session_state.df_final_output = final_output
935
+
936
+ # sheet_id = sheet_url.split('/')[5]
937
+ # # Open the Google spreadsheet
938
+ # sheet = gc.open_by_key(sheet_id)
939
+
940
+ final_output= st.session_state.df_final_output
941
+
942
+ st.write("Button clicked, processing data...")
943
+
944
+ sheet_id = sheet_url.split('/')[5] # Open the Google spreadsheet
945
+ sheet = gc.open_by_key(sheet_id)
946
+
947
+
948
+
949
+
950
+
951
+ try:
952
+ # Check if the "tone_of_voice_manual" worksheet already exists
953
+ worksheet_output = None
954
+ for wks in sheet.worksheets():
955
+ if wks.title == 'reviews summary':
956
+ worksheet_output = wks
957
+ break
958
+
959
+ # If the worksheet does not exist, create a new one
960
+ if worksheet_output is None:
961
+ worksheet_output = sheet.add_worksheet(title="reviews summary", rows="100", cols="20")
962
+
963
+ # Read the existing data from the worksheet
964
+ existing_data = get_as_dataframe(worksheet_output)
965
+
966
+ # Remove empty columns from the existing data
967
+ existing_data = existing_data.dropna(how='all', axis=1)
968
+
969
+ # Prepare a list to store the tone of voice manual data
970
+ tone_of_voice_manual_data = [final_output]
971
+
972
+ # Convert the list of tone of voice manual data into a DataFrame
973
+ new_data = pd.DataFrame(tone_of_voice_manual_data, columns=['reviews summary'])
974
+
975
+ # Append the new data to the existing data
976
+ updated_data = pd.concat([existing_data, new_data], axis=1)
977
+
978
+ # Clear the worksheet before writing the updated data
979
+ worksheet_output.clear()
980
+
981
+ # Write the updated data to the "tone_of_voice_manual" worksheet
982
+ set_with_dataframe(worksheet_output, updated_data)
983
+ st.write("Data written successfully to Google Sheets.")
984
+ except Exception as e:
985
+ st.error(f"An error occurred while writing to Google Sheets: {e}")
986
+
987
+