nopperl commited on
Commit
176b16b
·
1 Parent(s): 2f37cf4

implement inference using llama.cpp

Browse files
Files changed (6) hide show
  1. .gitmodules +3 -0
  2. README.md +0 -3
  3. app.py +7 -2
  4. dependencies.txt +1 -0
  5. install-llamacpp.sh +8 -0
  6. llama.cpp +1 -0
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "llama.cpp"]
2
+ path = llama.cpp
3
+ url = https://github.com/nopperl/llama.cpp
README.md CHANGED
@@ -8,9 +8,6 @@ sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- preload_from_hub:
12
- - mistralai/Mistral-7B-Instruct-v0.2
13
- - nopperl/emissions-extraction-lora
14
  datasets:
15
  - nopperl/sustainability-report-emissions-instruction-style
16
  - nopperl/corporate-emission-reports
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
 
 
11
  datasets:
12
  - nopperl/sustainability-report-emissions-instruction-style
13
  - nopperl/corporate-emission-reports
app.py CHANGED
@@ -1,11 +1,16 @@
 
 
1
  import gradio as gr
 
2
 
3
  from corporate_emission_reports.inference import extract_emissions
4
 
 
 
5
 
6
  def predict(input_method, document_file, document_url):
7
  document_path = document_file if input_method == "File" else document_url
8
- emissions = extract_emissions(document_path, "mistralai/Mistral-7B-Instruct-v0.2", lora="nopperl/emissions-extraction-lora", engine="hf", low_cpu_mem_usage=True)
9
  return emissions.model_dump_json()
10
 
11
  with open("description.md", "r") as f:
@@ -28,5 +33,5 @@ interface = gr.Interface(
28
  analytics_enabled=False,
29
  cache_examples=False,
30
  )
31
- interface.queue().launch(debug=True, share=True)
32
 
 
1
+ from subprocess import run
2
+
3
  import gradio as gr
4
+ from huggingface_hub import snapshot_download
5
 
6
  from corporate_emission_reports.inference import extract_emissions
7
 
8
+ run(["sh", "install-llamacpp.sh"])
9
+ MODEL_PATH = snapshot_download("nopperl/emissions-extraction-lora-merged-GGUF")
10
 
11
  def predict(input_method, document_file, document_url):
12
  document_path = document_file if input_method == "File" else document_url
13
+ emissions = extract_emissions(document_path, MODEL_PATH, model_name="ggml-model-q8_0.gguf")
14
  return emissions.model_dump_json()
15
 
16
  with open("description.md", "r") as f:
 
33
  analytics_enabled=False,
34
  cache_examples=False,
35
  )
36
+ interface.queue().launch()
37
 
dependencies.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ cmake
install-llamacpp.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ if [ ! -f llama.cpp/build/bin/main ]; then
3
+ cd llama.cpp
4
+ mkdir build
5
+ cd build
6
+ cmake ..
7
+ cmake --build . --config Release
8
+ fi
llama.cpp ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit f172de03f11465dc6c5a0fc3a22f8ec254c6832c