long1104 commited on
Commit
72e913d
·
verified ·
1 Parent(s): 3d6e3bf

Create setup_code.py

Browse files
Files changed (1) hide show
  1. setup_code.py +83 -0
setup_code.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain.vectorstores import Pinecone
3
+ from pinecone import Pinecone, ServerlessSpec
4
+ from tqdm.notebook import tqdm
5
+ import langchain
6
+ import openai
7
+ from openai import OpenAI
8
+ import string
9
+ import pandas as pd
10
+ import urllib.request
11
+ from io import BytesIO
12
+ from PIL import Image
13
+ import pillow_heif
14
+ from itertools import islice
15
+ from sklearn.metrics.pairwise import cosine_similarity
16
+ import gc
17
+ import ast
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from sentence_transformers import SentenceTransformer
20
+ import streamlit as st
21
+ import re
22
+ import Levenshtein
23
+
24
+ # from google.colab import drive
25
+ # from dotenv import load_dotenv, find_dotenv
26
+ import os
27
+
28
+ # open_ai_key_file = "/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt" # Your OPEN AI Key in this file
29
+
30
+ # with open(open_ai_key_file, "r") as f:
31
+ #for line in f:
32
+ #OPENAI_KEY = line.strip()
33
+ #OPEN_AI_API_KEY = line
34
+ #break
35
+
36
+ #_ = load_dotenv(find_dotenv())
37
+
38
+ # GETTING OpenAI and Pinecone api key
39
+ openai.api_key = st.secrets['OPENAI_KEY']
40
+ pc_apikey = st.secrets['pc_apikey']
41
+
42
+ openai_client = OpenAI(api_key=openai.api_key)
43
+
44
+ # Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
45
+ def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
46
+ text = text.replace("\n", " ")
47
+ return openai_client.embeddings.create(input = [text], model=model).data[0].embedding
48
+
49
+ def get_completion(client, prompt, model="gpt-3.5-turbo"):
50
+ message = {"role": "user", "content": prompt}
51
+ response = openai_client.chat.completions.create(
52
+ model="gpt-4",
53
+ messages=[message]
54
+ )
55
+ return response.choices[0].message.content
56
+
57
+ def query_pinecone_vector_store(index, query_embeddn, top_k=5):
58
+ ns = get_namespace(index)
59
+
60
+ return index.query(
61
+ namespace=ns,
62
+ top_k=top_k,
63
+ vector=query_embeddn,
64
+ include_values=True,
65
+ include_metadata=True
66
+ )
67
+
68
+ def get_top_k_text(matches):
69
+ text_list = []
70
+
71
+ for i in range(0, 5):
72
+ text_list.append(matches.get('matches')[i]['metadata']['text'])
73
+
74
+ return ' '.join(text_list)
75
+
76
+ def is_Yes(response) -> bool:
77
+ similarityYes = Levenshtein.ratio("Yes", response)
78
+ similarityNo = Levenshtein.ratio("No", response)
79
+
80
+ return similarityYes > similarityNo
81
+
82
+ def contains_sorry(response) -> bool:
83
+ return "Sorry" in response