amezi commited on
Commit
2e8783e
·
1 Parent(s): 008b520

more minor bux fixedd

Browse files
Files changed (3) hide show
  1. requirements.txt +3 -1
  2. src/embedder.py +2 -2
  3. src/pinecone_store.py +13 -16
requirements.txt CHANGED
@@ -11,4 +11,6 @@ numpy
11
  python-dotenv
12
  together
13
  einops
14
- opencv-python
 
 
 
11
  python-dotenv
12
  together
13
  einops
14
+ opencv-python
15
+ timm
16
+ json
src/embedder.py CHANGED
@@ -6,8 +6,8 @@ import decord
6
  class InternVLEmbedder:
7
  def __init__(self):
8
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
- self.model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-8B-MPO", trust_remote_code=True).to(self.device)
10
- self.processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL2_5-8B-MPO")
11
 
12
  def embed_video(self, video_path):
13
  vr = decord.VideoReader(video_path)
 
6
  class InternVLEmbedder:
7
  def __init__(self):
8
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ self.model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True).to(self.device)
10
+ self.processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True)
11
 
12
  def embed_video(self, video_path):
13
  vr = decord.VideoReader(video_path)
src/pinecone_store.py CHANGED
@@ -1,4 +1,4 @@
1
- import pinecone
2
  import os
3
  from dotenv import load_dotenv
4
 
@@ -6,27 +6,24 @@ load_dotenv()
6
 
7
  class PineconeStore:
8
  def __init__(self):
9
- api_key = os.getenv("PINECONE_API_KEY")
10
- environment = os.getenv("PINECONE_ENV")
11
- pinecone.init(api_key=api_key, environment=environment)
12
 
13
- self.index_name = "soccer-highlights"
14
- if self.index_name not in pinecone.list_indexes():
15
- pinecone.create_index(
16
- name=self.index_name,
17
- dimension=1024, # Ensure this matches your embedding model's output dimension
18
- metric="cosine" # Choose the appropriate metric (e.g., cosine, euclidean)
19
  )
20
- self.index = pinecone.Index(self.index_name)
 
21
 
22
  def upsert(self, id, vector, metadata):
23
  self.index.upsert([(id, vector.tolist(), metadata)])
24
 
25
  def query(self, vector, filter_key, top_k):
 
26
  return [
27
- m["metadata"] for m in self.index.query(
28
- vector.tolist(),
29
- top_k=top_k,
30
- include_metadata=True
31
- )["matches"] if filter_key in m["id"]
32
  ]
 
1
+ from pinecone import Pinecone, ServerlessSpec
2
  import os
3
  from dotenv import load_dotenv
4
 
 
6
 
7
  class PineconeStore:
8
  def __init__(self):
9
+ self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
 
 
10
 
11
+ if 'blip-soccer-highlights' not in self.pc.list_indexes().names():
12
+ self.pc.create_index(
13
+ name='blip-soccer-highlights',
14
+ dimension=1024, # This must match InternVL2_5-8B-MPO
15
+ metric='cosine',
16
+ spec=ServerlessSpec(cloud='aws', region=os.getenv("PINECONE_ENV", "us-east-1"))
17
  )
18
+
19
+ self.index = self.pc.Index('blip-soccer-highlights')
20
 
21
  def upsert(self, id, vector, metadata):
22
  self.index.upsert([(id, vector.tolist(), metadata)])
23
 
24
  def query(self, vector, filter_key, top_k):
25
+ results = self.index.query(vector.tolist(), top_k=top_k, include_metadata=True)
26
  return [
27
+ match["metadata"] for match in results["matches"]
28
+ if filter_key in match["id"]
 
 
 
29
  ]