initial commit
Browse files- .gitignore +171 -0
- README.md +1 -1
- configs/prompts.toml +9 -0
- main.py +117 -0
- requirements.txt +3 -0
- statics/styles.css +3 -0
- utils.py +22 -0
.gitignore
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# PyPI configuration file
|
171 |
+
.pypirc
|
README.md
CHANGED
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
configs/prompts.toml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[summarization]
|
2 |
+
prompt = """
|
3 |
+
based on the last conversation between user and AI assistant,
|
4 |
+
: last conversation($previous_summary)
|
5 |
+
|
6 |
+
Update the following summary. Update only the relevant part of the summary.
|
7 |
+
Otherwise, keep the summary as it is.
|
8 |
+
: $latest_conversation
|
9 |
+
"""
|
main.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import argparse
|
4 |
+
from functools import partial
|
5 |
+
from string import Template
|
6 |
+
from utils import load_prompt, setup_gemini_client
|
7 |
+
|
8 |
+
def parse_args():
|
9 |
+
parser = argparse.ArgumentParser()
|
10 |
+
parser.add_argument("--ai-studio-api-key", type=str, default=os.getenv("GEMINI_API_KEY"))
|
11 |
+
parser.add_argument("--vertexai", action="store_true", default=False)
|
12 |
+
parser.add_argument("--vertexai-project", type=str, default="gcp-ml-172005")
|
13 |
+
parser.add_argument("--vertexai-location", type=str, default="us-central1")
|
14 |
+
parser.add_argument("--model", type=str, default="gemini-1.5-flash")
|
15 |
+
|
16 |
+
parser.add_argument("--prompt-tmpl-path", type=str, default="configs/prompts.toml")
|
17 |
+
parser.add_argument("--css-path", type=str, default="statics/styles.css")
|
18 |
+
args = parser.parse_args()
|
19 |
+
return args
|
20 |
+
|
21 |
+
def find_attached_file(filename, attached_files):
|
22 |
+
for file in attached_files:
|
23 |
+
if file['name'] == filename:
|
24 |
+
return file
|
25 |
+
return None
|
26 |
+
|
27 |
+
def echo(message, history, state):
|
28 |
+
summary = ""
|
29 |
+
attached_file = None
|
30 |
+
if message['files']:
|
31 |
+
path_local = message['files'][0]
|
32 |
+
filename = os.path.basename(path_local)
|
33 |
+
|
34 |
+
attached_file = find_attached_file(filename, state["attached_files"])
|
35 |
+
if attached_file is None:
|
36 |
+
path_gcp = client.files.upload(path=path_local)
|
37 |
+
state["attached_files"].append({
|
38 |
+
"name": filename,
|
39 |
+
"path_local": path_local,
|
40 |
+
"gcp_entity": path_gcp,
|
41 |
+
"path_gcp": path_gcp.name,
|
42 |
+
"mime_type=": path_gcp.mime_type,
|
43 |
+
"expiration_time": path_gcp.expiration_time,
|
44 |
+
})
|
45 |
+
attached_file = path_gcp
|
46 |
+
|
47 |
+
# [{'role': 'user', 'metadata': None, 'content': 'asdf', 'options': None}, {'role': 'assistant', 'metadata': None, 'content': 'asdf', 'options': None}]
|
48 |
+
|
49 |
+
user_message = [message['text']]
|
50 |
+
if attached_file: user_message.append(attached_file)
|
51 |
+
|
52 |
+
chat_history = state['messages']
|
53 |
+
chat_history = chat_history + user_message
|
54 |
+
state['messages'] = chat_history
|
55 |
+
|
56 |
+
response = client.models.generate_content(
|
57 |
+
model="gemini-1.5-flash",
|
58 |
+
contents=state['messages']
|
59 |
+
)
|
60 |
+
|
61 |
+
# make summary
|
62 |
+
if state['summary'] == "":
|
63 |
+
state['summary'] = response.text
|
64 |
+
else:
|
65 |
+
response = client.models.generate_content(
|
66 |
+
model="gemini-1.5-flash",
|
67 |
+
contents=[
|
68 |
+
Template(
|
69 |
+
prompt_tmpl['summarization']['prompt']
|
70 |
+
).safe_substitute(
|
71 |
+
previous_summary=state['summary'],
|
72 |
+
latest_conversation=str({"user": message['text'], "assistant": response.text})
|
73 |
+
)
|
74 |
+
]
|
75 |
+
)
|
76 |
+
state['summary'] = response.text
|
77 |
+
|
78 |
+
return response.text, state, state['summary']
|
79 |
+
|
80 |
+
def main(args):
|
81 |
+
style_css = open(args.css_path, "r").read()
|
82 |
+
|
83 |
+
global client, prompt_tmpl
|
84 |
+
client = setup_gemini_client(args)
|
85 |
+
prompt_tmpl = load_prompt(args)
|
86 |
+
|
87 |
+
## Gradio Blocks
|
88 |
+
with gr.Blocks(css=style_css) as demo:
|
89 |
+
# State per session
|
90 |
+
state = gr.State({
|
91 |
+
"messages": [],
|
92 |
+
"attached_files": [],
|
93 |
+
"summary": ""
|
94 |
+
})
|
95 |
+
|
96 |
+
gr.Markdown("# Adaptive Summarization")
|
97 |
+
gr.Markdown("AdaptSum stands for Adaptive Summarization. This project focuses on developing an LLM-powered system for dynamic summarization. Instead of generating entirely new summaries with each update, the system intelligently identifies and modifies only the necessary parts of the existing summary. This approach aims to create a more efficient and fluid summarization process within a continuous chat interaction with an LLM.")
|
98 |
+
|
99 |
+
with gr.Row(elem_id="chat-interface"):
|
100 |
+
with gr.Column(scale=3, elem_id="summary-window"):
|
101 |
+
summary = gr.Markdown(label="Summary so far")
|
102 |
+
|
103 |
+
with gr.Column(scale=7):
|
104 |
+
gr.ChatInterface(
|
105 |
+
multimodal=True,
|
106 |
+
type="messages",
|
107 |
+
fn=echo,
|
108 |
+
additional_inputs=[state],
|
109 |
+
additional_outputs=[state, summary],
|
110 |
+
)
|
111 |
+
|
112 |
+
return demo
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
args = parse_args()
|
116 |
+
demo = main(args)
|
117 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
google-genai
|
2 |
+
toml
|
3 |
+
gradio
|
statics/styles.css
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
/* #summary-window {
|
2 |
+
text-align: center;
|
3 |
+
} */
|
utils.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import toml
|
2 |
+
from google import genai
|
3 |
+
|
4 |
+
def load_prompt(args):
|
5 |
+
with open(args.prompt_tmpl_path, 'r') as f:
|
6 |
+
prompts = toml.load(f)
|
7 |
+
|
8 |
+
return prompts
|
9 |
+
|
10 |
+
def setup_gemini_client(args):
|
11 |
+
if args.vertexai:
|
12 |
+
client = genai.Client(
|
13 |
+
vertexai=args.vertexai,
|
14 |
+
project=args.vertexai_project,
|
15 |
+
location=args.vertexai_location
|
16 |
+
)
|
17 |
+
else:
|
18 |
+
client = genai.Client(
|
19 |
+
api_key=args.ai_studio_api_key,
|
20 |
+
)
|
21 |
+
|
22 |
+
return client
|