김탱 commited on
Commit
1b3c845
·
1 Parent(s): fc02665
Files changed (7) hide show
  1. .gitignore +176 -0
  2. .python-version +1 -0
  3. README.md +9 -0
  4. app.py +194 -0
  5. pyproject.toml +19 -0
  6. requirements.txt +91 -0
  7. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python
3
+
4
+ ### Python ###
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ # For a library or package, you might want to ignore these files since the code is
91
+ # intended to run in multiple environments; otherwise, check them in:
92
+ # .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # poetry
102
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
104
+ # commonly ignored for libraries.
105
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106
+ #poetry.lock
107
+
108
+ # pdm
109
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110
+ #pdm.lock
111
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112
+ # in version control.
113
+ # https://pdm.fming.dev/#use-with-ide
114
+ .pdm.toml
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
165
+
166
+ ### Python Patch ###
167
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168
+ poetry.toml
169
+
170
+ # ruff
171
+ .ruff_cache/
172
+
173
+ # LSP config files
174
+ pyrightconfig.json
175
+
176
+ # End of https://www.toptal.com/developers/gitignore/api/python
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
README.md CHANGED
@@ -11,3 +11,12 @@ short_description: tutorial
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ # How to do it
16
+
17
+
18
+
19
+
20
+ # Reference
21
+
22
+ - https://medium.com/@james.irving.phd/creating-your-personal-chatbot-using-hugging-face-spaces-and-streamlit-596a54b9e3ed
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_huggingface import HuggingFaceEndpoint
4
+ import streamlit as st
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain_core.output_parsers import StrOutputParser
7
+
8
+ # 사용할 Hugging Face 모델 ID를 정의합니다.
9
+ model_id = "mistralai/Mistral-7B-Instruct-v0.3"
10
+
11
+ # .env 파일 로드
12
+ load_dotenv()
13
+
14
+
15
+ def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
16
+ """
17
+ Hugging Face 추론을 위한 언어 모델을 반환합니다.
18
+
19
+ 매개변수:
20
+ - model_id (str): Hugging Face 모델 저장소의 ID입니다.
21
+ - max_new_tokens (int): 생성할 수 있는 최대 새 토큰 수입니다.
22
+ - temperature (float): 모델에서 샘플링할 때의 온도 값입니다.
23
+
24
+ 반환값:
25
+ - llm (HuggingFaceEndpoint): Hugging Face 추론을 위한 언어 모델입니다.
26
+ """
27
+ # HuggingFaceEndpoint를 사용하여 언어 모델을 초기화합니다.
28
+ llm = HuggingFaceEndpoint(
29
+ repo_id=model_id, # 사용할 모델 ID
30
+ max_new_tokens=max_new_tokens, # 생성할 최대 토큰 수
31
+ temperature=temperature, # 샘플링 시 온도 설정
32
+ token=os.getenv("HF_TOKEN"), # Hugging Face API 토큰 (환경 변수에서 가져옴)
33
+ )
34
+ return llm # 초기화된 언어 모델을 반환합니다.
35
+
36
+
37
+ # Streamlit 앱 설정을 구성합니다.
38
+ st.set_page_config(page_title="HuggingFace ChatBot", page_icon="🤗")
39
+ st.title("개인 HuggingFace 챗봇")
40
+ st.markdown(
41
+ f"*이것은 HuggingFace transformers 라이브러리를 사용하여 텍스트 입력에 대한 응답을 생성하는 간단한 챗봇입니다. {model_id} 모델을 사용합니다.*"
42
+ )
43
+
44
+ # 아바타에 대한 세션 상태를 초기화합니다.
45
+ if "avatars" not in st.session_state:
46
+ st.session_state.avatars = {"user": None, "assistant": None}
47
+
48
+ # 사용자 텍스트 입력에 대한 세션 상태를 초기화합니다.
49
+ if "user_text" not in st.session_state:
50
+ st.session_state.user_text = None
51
+
52
+ # 모델 매개변수에 대한 세션 상태를 초기화합니다.
53
+ if "max_response_length" not in st.session_state:
54
+ st.session_state.max_response_length = 256
55
+
56
+ # 시스템 메시지에 대한 세션 상태를 초기화합니다.
57
+ if "system_message" not in st.session_state:
58
+ st.session_state.system_message = "인간 사용자와 대화하는 친절한 AI"
59
+
60
+ # 시작 메시지에 대한 세션 상태를 초기화합니다.
61
+ if "starter_message" not in st.session_state:
62
+ st.session_state.starter_message = "안녕하세요! 오늘 무엇을 도와드릴까요?"
63
+
64
+
65
+ # 설정을 위한 사이드바를 구성합니다.
66
+ with st.sidebar:
67
+ st.header("시스템 설정")
68
+
69
+ # AI 설정
70
+ st.session_state.system_message = st.text_area(
71
+ "시스템 메시지", value="당신은 인간 사용자와 대화하는 친절한 AI입니다."
72
+ )
73
+ st.session_state.starter_message = st.text_area(
74
+ "첫 번째 AI 메시지", value="안녕하세요! 오늘 무엇을 도와드릴까요?"
75
+ )
76
+
77
+ # 모델 설정
78
+ st.session_state.max_response_length = st.number_input("최대 응답 길이", value=128)
79
+
80
+ # 아바타 선택
81
+ st.markdown("*아바타 선택:*")
82
+ col1, col2 = st.columns(2)
83
+ with col1:
84
+ st.session_state.avatars["assistant"] = st.selectbox(
85
+ "AI 아바타", options=["🤗", "💬", "🤖"], index=0
86
+ )
87
+ with col2:
88
+ st.session_state.avatars["user"] = st.selectbox(
89
+ "사용자 아바타", options=["👤", "👱‍♂️", "👨🏾", "👩", "👧🏾"], index=0
90
+ )
91
+ # 채팅 기록 초기화 버튼
92
+ reset_history = st.button("채팅 기록 초기화")
93
+
94
+ # 채팅 기록을 초기화하거나, 초기화 버튼이 눌렸을 경우 초기화합니다.
95
+ if "chat_history" not in st.session_state or reset_history:
96
+ st.session_state.chat_history = [
97
+ {"role": "assistant", "content": st.session_state.starter_message}
98
+ ]
99
+
100
+
101
+ def get_response(
102
+ system_message,
103
+ chat_history,
104
+ user_text,
105
+ eos_token_id=["User"],
106
+ max_new_tokens=256,
107
+ get_llm_hf_kws={},
108
+ ):
109
+ """
110
+ 챗봇 모델로부터 응답을 생성합니다.
111
+
112
+ 매개변수:
113
+ system_message (str): 대화의 시스템 메시지입니다.
114
+ chat_history (list): 이전 채팅 메시지 목록입니다.
115
+ user_text (str): 사용자의 입력 텍스트입니다.
116
+ model_id (str, optional): 사용할 Hugging Face 모델의 ID입니다.
117
+ eos_token_id (list, optional): 문장 종료 토큰 ID 목록입니다.
118
+ max_new_tokens (int, optional): 생성할 수 있는 최대 새 토큰 수입니다.
119
+ get_llm_hf_kws (dict, optional): get_llm_hf 함수에 전달할 추가 키워드 인자입니다.
120
+
121
+ 반환값:
122
+ tuple: 생성된 응답과 업데이트된 채팅 기록을 포함하는 튜플입니다.
123
+ """
124
+ # 모델을 설정합니다.
125
+ hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=0.1)
126
+
127
+ # 프롬프트 템플릿을 생성합니다.
128
+ prompt = PromptTemplate.from_template(
129
+ (
130
+ "[INST] {system_message}"
131
+ "\n현재 대화:\n{chat_history}\n\n"
132
+ "\n사용자: {user_text}.\n [/INST]"
133
+ "\nAI:"
134
+ )
135
+ )
136
+ # 프롬프트를 연결하여 채팅 체인을 만듭니다.
137
+ chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key="content")
138
+
139
+ # 응답을 생성합니다.
140
+ response = chat.invoke(
141
+ input=dict(
142
+ system_message=system_message,
143
+ user_text=user_text,
144
+ chat_history=chat_history,
145
+ )
146
+ )
147
+ # "AI:" 접두사를 제거합니다.
148
+ response = response.split("AI:")[-1]
149
+
150
+ # 채팅 기록을 업데이트합니다.
151
+ chat_history.append({"role": "user", "content": user_text})
152
+ chat_history.append({"role": "assistant", "content": response})
153
+ return response, chat_history
154
+
155
+
156
+ # 채팅 인터페이스를 설정합니다.
157
+ chat_interface = st.container(border=True)
158
+ with chat_interface:
159
+ output_container = st.container()
160
+ st.session_state.user_text = st.chat_input(
161
+ placeholder="여기에 텍스트를 입력하세요."
162
+ )
163
+
164
+ # 채팅 메시지를 표시합니다.
165
+ with output_container:
166
+ # 채팅 기록에 있는 각 메시지에 대해 반복합니다.
167
+ for message in st.session_state.chat_history:
168
+ # 시스템 메시지는 건너뜁니다.
169
+ if message["role"] == "system":
170
+ continue
171
+
172
+ # 올바른 아바타를 사용하여 채팅 메시지를 표시합니다.
173
+ with st.chat_message(
174
+ message["role"], avatar=st.session_state["avatars"][message["role"]]
175
+ ):
176
+ st.markdown(message["content"])
177
+
178
+ # 사용자가 새 텍스트를 입력했을 때:
179
+ if st.session_state.user_text:
180
+ # 사용자의 새 메시지를 즉시 표시합니다.
181
+ with st.chat_message("user", avatar=st.session_state.avatars["user"]):
182
+ st.markdown(st.session_state.user_text)
183
+
184
+ # 응답을 기다리는 동안 스피너 상태 표시줄을 표시합니다.
185
+ with st.chat_message("assistant", avatar=st.session_state.avatars["assistant"]):
186
+ with st.spinner("생각 중..."):
187
+ # 시스템 프롬프트, 사용자 텍스트 및 기록을 사용하여 추론 API를 호출합니다.
188
+ response, st.session_state.chat_history = get_response(
189
+ system_message=st.session_state.system_message,
190
+ user_text=st.session_state.user_text,
191
+ chat_history=st.session_state.chat_history,
192
+ max_new_tokens=st.session_state.max_response_length,
193
+ )
194
+ st.markdown(response)
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "first-chatbot"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "accelerate>=1.5.2",
9
+ "huggingface-hub>=0.29.3",
10
+ "langchain-community>=0.3.20",
11
+ "langchain-core>=0.3.49",
12
+ "langchain-huggingface>=0.1.2",
13
+ "langchain-text-splitters>=0.3.7",
14
+ "python-dotenv>=1.1.0",
15
+ "streamlit>=1.44.0",
16
+ "tqdm>=4.67.1",
17
+ "transformers>=4.50.2",
18
+ "watchdog>=6.0.0",
19
+ ]
requirements.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ altair==5.5.0
6
+ annotated-types==0.7.0
7
+ anyio==4.9.0
8
+ attrs==25.3.0
9
+ blinker==1.9.0
10
+ cachetools==5.5.2
11
+ certifi==2025.1.31
12
+ charset-normalizer==3.4.1
13
+ click==8.1.8
14
+ dataclasses-json==0.6.7
15
+ filelock==3.18.0
16
+ frozenlist==1.5.0
17
+ fsspec==2025.3.0
18
+ gitdb==4.0.12
19
+ gitpython==3.1.44
20
+ h11==0.14.0
21
+ httpcore==1.0.7
22
+ httpx==0.28.1
23
+ httpx-sse==0.4.0
24
+ huggingface-hub==0.29.3
25
+ idna==3.10
26
+ jinja2==3.1.6
27
+ joblib==1.4.2
28
+ jsonpatch==1.33
29
+ jsonpointer==3.0.0
30
+ jsonschema==4.23.0
31
+ jsonschema-specifications==2024.10.1
32
+ langchain==0.3.21
33
+ langchain-community==0.3.20
34
+ langchain-core==0.3.49
35
+ langchain-huggingface==0.1.2
36
+ langchain-text-splitters==0.3.7
37
+ langsmith==0.3.19
38
+ markupsafe==3.0.2
39
+ marshmallow==3.26.1
40
+ mpmath==1.3.0
41
+ multidict==6.2.0
42
+ mypy-extensions==1.0.0
43
+ narwhals==1.32.0
44
+ networkx==3.4.2
45
+ numpy==2.2.4
46
+ orjson==3.10.16
47
+ packaging==24.2
48
+ pandas==2.2.3
49
+ pillow==11.1.0
50
+ propcache==0.3.1
51
+ protobuf==5.29.4
52
+ psutil==7.0.0
53
+ pyarrow==19.0.1
54
+ pydantic==2.10.6
55
+ pydantic-core==2.27.2
56
+ pydantic-settings==2.8.1
57
+ pydeck==0.9.1
58
+ python-dateutil==2.9.0.post0
59
+ python-dotenv==1.1.0
60
+ pytz==2025.2
61
+ pyyaml==6.0.2
62
+ referencing==0.36.2
63
+ regex==2024.11.6
64
+ requests==2.32.3
65
+ requests-toolbelt==1.0.0
66
+ rpds-py==0.24.0
67
+ safetensors==0.5.3
68
+ scikit-learn==1.6.1
69
+ scipy==1.15.2
70
+ sentence-transformers==4.0.1
71
+ six==1.17.0
72
+ smmap==5.0.2
73
+ sniffio==1.3.1
74
+ sqlalchemy==2.0.39
75
+ streamlit==1.44.0
76
+ sympy==1.13.1
77
+ tenacity==9.0.0
78
+ threadpoolctl==3.6.0
79
+ tokenizers==0.21.1
80
+ toml==0.10.2
81
+ torch==2.6.0
82
+ tornado==6.4.2
83
+ tqdm==4.67.1
84
+ transformers==4.50.2
85
+ typing-extensions==4.13.0
86
+ typing-inspect==0.9.0
87
+ tzdata==2025.2
88
+ urllib3==2.3.0
89
+ watchdog==6.0.0
90
+ yarl==1.18.3
91
+ zstandard==0.23.0
uv.lock ADDED
The diff for this file is too large to render. See raw diff