Spaces:
Running
Running
Enhance app.py with improved user interface and instructions, update model ID in llm.py, and add image classification capabilities across various components. Introduce segment anything functionality and refine README for clarity on model capabilities.
518d841
import datasets | |
import faiss | |
from langchain_community.docstore.in_memory import InMemoryDocstore | |
from langchain_community.vectorstores import FAISS | |
from langchain_core.documents import Document | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from rag.settings import get_embeddings_model | |
def get_vector_store(): | |
embeddings = get_embeddings_model() | |
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) | |
vector_store = FAISS( | |
embedding_function=embeddings, | |
index=index, | |
docstore=InMemoryDocstore(), | |
index_to_docstore_id={}, | |
) | |
return vector_store | |
def get_docs(dataset): | |
page_content = """ | |
Model card: | |
{models} | |
List of labels: | |
{labels} | |
""" | |
source_docs = [ | |
Document( | |
page_content=page_content, | |
metadata={ | |
"model_id": model["model_id"], | |
"model_labels": model["model_labels"], | |
}, | |
) | |
for model in dataset | |
] | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, # Characters per chunk | |
chunk_overlap=50, # Overlap between chunks to maintain context | |
add_start_index=True, | |
strip_whitespace=True, | |
separators=["\n\n", "\n", ".", " ", ""], # Priority order for splitting | |
) | |
docs_processed = text_splitter.split_documents(source_docs) | |
print(f"Knowledge base prepared with {len(docs_processed)} document chunks") | |
return docs_processed | |
if __name__ == "__main__": | |
dataset = datasets.load_dataset("stevenbucaille/image-classification-models-dataset", split="train") | |
docs_processed = get_docs(dataset) | |
vector_store = get_vector_store() | |
vector_store.add_documents(docs_processed) | |
vector_store.save_local( | |
folder_path="vector_store/image-classification", | |
index_name="faiss_index", | |
) | |