ccm commited on
Commit
cceda69
·
verified ·
1 Parent(s): 3f75e1c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -7
main.py CHANGED
@@ -25,13 +25,6 @@ LLM_MODEL_NAME = "Qwen/Qwen2-7B-Instruct"
25
  # Load the dataset and convert to pandas
26
  data = pandas.read_parquet("hf://datasets/ccm/rag-idetc/data/train-00000-of-00001.parquet")
27
 
28
- # Filter out any publications without an abstract
29
- abstract_is_null = [
30
- '"abstract": null' in json.dumps(bibdict) for bibdict in data["bib_dict"].values
31
- ]
32
- data = data[~pandas.Series(abstract_is_null)]
33
- data.reset_index(inplace=True)
34
-
35
  # Load the model for later use in embeddings
36
  model = sentence_transformers.SentenceTransformer(EMBEDDING_MODEL_NAME)
37
 
 
25
  # Load the dataset and convert to pandas
26
  data = pandas.read_parquet("hf://datasets/ccm/rag-idetc/data/train-00000-of-00001.parquet")
27
 
 
 
 
 
 
 
 
28
  # Load the model for later use in embeddings
29
  model = sentence_transformers.SentenceTransformer(EMBEDDING_MODEL_NAME)
30