Spaces:
Paused
Paused
File size: 3,902 Bytes
4c95dc7 31f9732 4c95dc7 d523035 31f9732 4c95dc7 8187b01 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 8187b01 4c95dc7 31f9732 4c95dc7 8187b01 4c95dc7 d523035 31f9732 d523035 31f9732 8187b01 31f9732 8187b01 31f9732 4c95dc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.tracers import LangChainTracer
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore, Qdrant
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from qdrant_client import QdrantClient
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_cohere import CohereRerank
import constants
import os
os.environ["LANGCHAIN_API_KEY"] = constants.LANGCHAIN_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = str(constants.LANGCHAIN_TRACING_V2)
os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
tracer = LangChainTracer()
callback_manager = CallbackManager([tracer])
########################
### Chat Models ###
########################
opus3 = ChatAnthropic(
api_key=constants.ANTRHOPIC_API_KEY,
temperature=0,
model='claude-3-opus-20240229',
callbacks=callback_manager
)
sonnet35 = ChatAnthropic(
api_key=constants.ANTRHOPIC_API_KEY,
temperature=0,
model='claude-3-5-sonnet-20240620',
max_tokens=4096,
callbacks=callback_manager
)
gpt4 = ChatOpenAI(
model="gpt-4",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
gpt4o = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
gpt4o_mini = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
########################
### Embedding Models ###
########################
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
#######################
### Text Splitters ###
#######################
semanticChunker = SemanticChunker(
te3_small,
breakpoint_threshold_type="percentile"
)
semanticChunker_tuned = SemanticChunker(
tuned_embeddings,
breakpoint_threshold_type="percentile",
breakpoint_threshold_amount=85
)
RCTS = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size=500,
chunk_overlap=25,
length_function=len,
)
#######################
### Vector Stores ###
#######################
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
semantic_Qdrant_vs = QdrantVectorStore(
client=qdrant_client,
collection_name="docs_from_ripped_urls",
embedding=te3_small
)
rcts_Qdrant_vs = QdrantVectorStore(
client=qdrant_client,
collection_name="docs_from_ripped_urls_recursive",
embedding=te3_small
)
semantic_tuned_Qdrant_vs = QdrantVectorStore(
client=qdrant_client,
collection_name="docs_from_ripped_urls_semantic_tuned",
embedding=tuned_embeddings
)
#######################
### Retrievers ###
#######################
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor, base_retriever=semantic_tuned_retriever
) |