update scripts
Browse files- environment.yml +166 -0
- scripts/crawling.sh +1 -0
- scripts/embedding.sh +1 -0
- scripts/env.sh +16 -0
- scripts/generator.sh +5 -0
- scripts/retriever_eval.sh +7 -0
environment.yml
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: scipip
|
2 |
+
channels:
|
3 |
+
- pytorch
|
4 |
+
- nvidia
|
5 |
+
- defaults
|
6 |
+
dependencies:
|
7 |
+
- _libgcc_mutex=0.1=main
|
8 |
+
- _openmp_mutex=5.1=1_gnu
|
9 |
+
- blas=1.0=mkl
|
10 |
+
- brotli-python=1.0.9=py310h6a678d5_8
|
11 |
+
- bzip2=1.0.8=h5eee18b_6
|
12 |
+
- ca-certificates=2024.9.24=h06a4308_0
|
13 |
+
- certifi=2024.8.30=py310h06a4308_0
|
14 |
+
- cffi=1.17.1=py310h1fdaa30_0
|
15 |
+
- charset-normalizer=3.3.2=pyhd3eb1b0_0
|
16 |
+
- cryptography=43.0.0=py310hdda0065_0
|
17 |
+
- cuda-cudart=11.8.89=0
|
18 |
+
- cuda-cupti=11.8.87=0
|
19 |
+
- cuda-libraries=11.8.0=0
|
20 |
+
- cuda-nvrtc=11.8.89=0
|
21 |
+
- cuda-nvtx=11.8.86=0
|
22 |
+
- cuda-runtime=11.8.0=0
|
23 |
+
- cuda-version=12.6=3
|
24 |
+
- filelock=3.13.1=py310h06a4308_0
|
25 |
+
- gmp=6.2.1=h295c915_3
|
26 |
+
- gmpy2=2.1.2=py310heeb90bb_0
|
27 |
+
- idna=3.7=py310h06a4308_0
|
28 |
+
- intel-openmp=2023.1.0=hdb19cb5_46306
|
29 |
+
- jinja2=3.1.4=py310h06a4308_0
|
30 |
+
- ld_impl_linux-64=2.40=h12ee557_0
|
31 |
+
- libcublas=11.11.3.6=0
|
32 |
+
- libcufft=10.9.0.58=0
|
33 |
+
- libcufile=1.11.1.6=0
|
34 |
+
- libcurand=10.3.7.77=0
|
35 |
+
- libcusolver=11.4.1.48=0
|
36 |
+
- libcusparse=11.7.5.86=0
|
37 |
+
- libffi=3.4.4=h6a678d5_1
|
38 |
+
- libgcc-ng=11.2.0=h1234567_1
|
39 |
+
- libgomp=11.2.0=h1234567_1
|
40 |
+
- libnpp=11.8.0.86=0
|
41 |
+
- libnvjpeg=11.9.0.86=0
|
42 |
+
- libstdcxx-ng=11.2.0=h1234567_1
|
43 |
+
- libuuid=1.41.5=h5eee18b_0
|
44 |
+
- llvm-openmp=14.0.6=h9e868ea_0
|
45 |
+
- markupsafe=2.1.3=py310h5eee18b_0
|
46 |
+
- mkl=2023.1.0=h213fc3f_46344
|
47 |
+
- mpc=1.1.0=h10f8cd9_1
|
48 |
+
- mpfr=4.0.2=hb69a4c5_1
|
49 |
+
- mpmath=1.3.0=py310h06a4308_0
|
50 |
+
- ncurses=6.4=h6a678d5_0
|
51 |
+
- networkx=3.3=py310h06a4308_0
|
52 |
+
- openssl=3.0.15=h5eee18b_0
|
53 |
+
- pip=24.2=py310h06a4308_0
|
54 |
+
- pycparser=2.21=pyhd3eb1b0_0
|
55 |
+
- pyopenssl=24.2.1=py310h06a4308_0
|
56 |
+
- pysocks=1.7.1=py310h06a4308_0
|
57 |
+
- python=3.10.13=h955ad1f_0
|
58 |
+
- pytorch=2.1.0=py3.10_cuda11.8_cudnn8.7.0_0
|
59 |
+
- pytorch-cuda=11.8=h7e8668a_5
|
60 |
+
- pytorch-mutex=1.0=cuda
|
61 |
+
- pyyaml=6.0=py310h5eee18b_1
|
62 |
+
- readline=8.2=h5eee18b_0
|
63 |
+
- requests=2.31.0=py310h06a4308_0
|
64 |
+
- setuptools=68.0.0=py310h06a4308_0
|
65 |
+
- sqlite=3.45.3=h5eee18b_0
|
66 |
+
- sympy=1.13.2=py310h06a4308_0
|
67 |
+
- tbb=2021.8.0=hdb19cb5_0
|
68 |
+
- tk=8.6.14=h39e8969_0
|
69 |
+
- torchtriton=2.1.0=py310
|
70 |
+
- tqdm=4.65.0=py310h2f386ee_0
|
71 |
+
- typing_extensions=4.11.0=py310h06a4308_0
|
72 |
+
- urllib3=1.26.19=py310h06a4308_0
|
73 |
+
- wheel=0.44.0=py310h06a4308_0
|
74 |
+
- xz=5.4.6=h5eee18b_1
|
75 |
+
- yaml=0.2.5=h7b6447c_0
|
76 |
+
- yaml-cpp=0.7.0=h295c915_1
|
77 |
+
- zlib=1.2.13=h5eee18b_0
|
78 |
+
- pip:
|
79 |
+
- annotated-types==0.7.0
|
80 |
+
- antlr4-python3-runtime==4.9.3
|
81 |
+
- anyio==4.6.2.post1
|
82 |
+
- arrow==1.3.0
|
83 |
+
- beautifulsoup4==4.12.3
|
84 |
+
- bibtexparser==1.4.2
|
85 |
+
- blis==0.7.11
|
86 |
+
- cachetools==5.5.0
|
87 |
+
- catalogue==2.0.10
|
88 |
+
- click==8.1.7
|
89 |
+
- cloudpathlib==0.16.0
|
90 |
+
- confection==0.1.5
|
91 |
+
- cymem==2.0.8
|
92 |
+
- deprecated==1.2.14
|
93 |
+
- distro==1.9.0
|
94 |
+
- exceptiongroup==1.2.2
|
95 |
+
- free-proxy==1.1.2
|
96 |
+
- fsspec==2024.10.0
|
97 |
+
- h11==0.14.0
|
98 |
+
- httpcore==1.0.6
|
99 |
+
- httpx==0.27.2
|
100 |
+
- huggingface-hub==0.26.2
|
101 |
+
- interchange==2021.0.4
|
102 |
+
- joblib==1.4.2
|
103 |
+
- langcodes==3.4.1
|
104 |
+
- language-data==1.2.0
|
105 |
+
- loguru==0.7.2
|
106 |
+
- lxml==5.3.0
|
107 |
+
- marisa-trie==1.2.1
|
108 |
+
- markdown-it-py==3.0.0
|
109 |
+
- mdurl==0.1.2
|
110 |
+
- monotonic==1.6
|
111 |
+
- murmurhash==1.0.10
|
112 |
+
- neo4j==5.21.0
|
113 |
+
- numpy==1.26.0
|
114 |
+
- omegaconf==2.3.0
|
115 |
+
- openai==1.12.0
|
116 |
+
- outcome==1.3.0.post0
|
117 |
+
- pandas==2.2.3
|
118 |
+
- pansi==2020.7.3
|
119 |
+
- pillow==10.4.0
|
120 |
+
- preshed==3.0.9
|
121 |
+
- py2neo==2021.2.4
|
122 |
+
- pydantic==2.9.2
|
123 |
+
- pydantic-core==2.23.4
|
124 |
+
- pygments==2.18.0
|
125 |
+
- pyjwt==2.8.0
|
126 |
+
- pymupdf==1.24.5
|
127 |
+
- pymupdfb==1.24.3
|
128 |
+
- pyparsing==3.2.0
|
129 |
+
- pyphen==0.17.0
|
130 |
+
- python-dateutil==2.9.0.post0
|
131 |
+
- pytz==2024.2
|
132 |
+
- regex==2024.9.11
|
133 |
+
- rich==13.9.3
|
134 |
+
- safetensors==0.4.5
|
135 |
+
- scikit-learn==1.5.2
|
136 |
+
- scipy==1.14.1
|
137 |
+
- selenium==4.25.0
|
138 |
+
- sentence-transformers==3.0.1
|
139 |
+
- shellingham==1.5.4
|
140 |
+
- six==1.16.0
|
141 |
+
- smart-open==6.4.0
|
142 |
+
- sniffio==1.3.1
|
143 |
+
- sortedcontainers==2.4.0
|
144 |
+
- soupsieve==2.6
|
145 |
+
- spacy==3.7.4
|
146 |
+
- spacy-legacy==3.0.12
|
147 |
+
- spacy-loggers==1.0.5
|
148 |
+
- srsly==2.4.8
|
149 |
+
- textstat==0.7.4
|
150 |
+
- thinc==8.2.5
|
151 |
+
- threadpoolctl==3.5.0
|
152 |
+
- tokenizers==0.19.1
|
153 |
+
- transformers==4.44.0
|
154 |
+
- trio==0.27.0
|
155 |
+
- trio-websocket==0.11.1
|
156 |
+
- typer==0.9.4
|
157 |
+
- types-python-dateutil==2.9.0.20241003
|
158 |
+
- tzdata==2024.2
|
159 |
+
- wasabi==1.1.3
|
160 |
+
- weasel==0.3.4
|
161 |
+
- websocket-client==1.8.0
|
162 |
+
- wrapt==1.16.0
|
163 |
+
- wsproto==1.2.0
|
164 |
+
- zhipuai==2.1.5.20230904
|
165 |
+
- streamlit==1.39.0
|
166 |
+
- huggingface-hub==0.26.2
|
scripts/crawling.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python src/paper_manager.py update --year all --venue-name iccv
|
scripts/embedding.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python data/paper_manager.py embedding -c .configs/datasets.yaml
|
scripts/env.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export NEO4J_URL="bolt://127.0.0.1:7687"
|
2 |
+
export NEO4J_USERNAME="neo4j" # default neo4j
|
3 |
+
export NEO4J_PASSWD="****" # your passwd
|
4 |
+
## Use Qwen
|
5 |
+
export MODEL_NAME="qwen-turbo"
|
6 |
+
export MODEL_TYPE="OpenAI"
|
7 |
+
export MODEL_API_KEY="sk-****"
|
8 |
+
export BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
9 |
+
|
10 |
+
## Use GPT-4o
|
11 |
+
# export MODEL_NAME="gpt-4o"
|
12 |
+
# export MODEL_TYPE="OpenAI"
|
13 |
+
# export MODEL_API_KEY="sk-***"
|
14 |
+
# export BASE_URL="https://openai.***"
|
15 |
+
|
16 |
+
|
scripts/generator.sh
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/generator.py backtracking -m with_cited_paper --use-cue-words True --sum-api Qwen --gen-api Qwen
|
2 |
+
# python generator.py backtracking -m with_cited_paper --use-cue-words False &&
|
3 |
+
# python generator.py backtracking -m with_retrieved_paper --use-cue-words True &&
|
4 |
+
# python generator.py backtracking -m with_retrieved_paper --use-cue-words False &&
|
5 |
+
# python src/generator.py new-idea
|
scripts/retriever_eval.sh
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
python src/retriever.py retrieve \
|
3 |
+
-c configs/datasets.yaml \
|
4 |
+
--ids-path assets/data/test_acl_2024.json
|
5 |
+
|
6 |
+
wait
|
7 |
+
echo "Retriever Eval Finish..."
|