tien314 commited on
Commit
e2982b0
·
verified ·
1 Parent(s): 7ce3015

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -27
app.py CHANGED
@@ -4,7 +4,7 @@ from operator import itemgetter
4
  import os
5
  import re
6
  import pandas as pd
7
-
8
 
9
  @st.cache_data
10
  def load_data():
@@ -17,42 +17,92 @@ def load_data():
17
 
18
  return retriever
19
 
20
- def extract_hscode(text):
21
- match = re.search(r'hs_code:\s*(\d+)', text)
22
- if match:
23
- return match.group(1)
24
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- df2 = pd.read_csv("hscode_main.csv")
27
- new_col = [len(str(code))for code in df2['hs_code'].to_list()]
28
- df2['len'] = new_col
29
 
30
- new_hscode = [str(code) for code in df2['hs_code']]
31
 
32
- for i in range(len(new_col)):
33
- if new_col[i]==5:
34
- new_hscode[i] = '0'+ new_hscode[i]
35
- df2['hs_code'] = new_hscode
36
- df2=df2.drop(columns='len')
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  if 'retriever' not in st.session_state:
39
  st.session_state.retriever = None
40
 
 
 
 
41
  if st.session_state.retriever is None:
42
  st.session_state.retriever = load_data()
43
 
44
-
 
 
45
  sentence = st.text_input("please enter description:")
46
 
47
  if sentence !='':
48
- results,_ = st.session_state.retriever.retrieve(bm25s.tokenize(sentence), k=5)
49
- doc = [d for d in results]
50
- hscodes = [extract_hscode(item) for item in doc[0]]
51
- for code in hscodes:
52
- if len(code)==5:
53
- code = '0'+ code
54
-
55
- filter_df = df2[df2['hs_code']==code]
56
- answer = filter_df['description'].iloc[0]
57
- st.write("Hscode:",code)
58
- st.write("Description:",answer.lower())
 
4
  import os
5
  import re
6
  import pandas as pd
7
+ from langchain_groq import ChatGroq
8
 
9
  @st.cache_data
10
  def load_data():
 
17
 
18
  return retriever
19
 
20
+ # def extract_hscode(text):
21
+ # match = re.search(r'hs_code:\s*(\d+)', text)
22
+ # if match:
23
+ # return match.group(1)
24
+ # return None
25
+
26
+ # df2 = pd.read_csv("hscode_main.csv")
27
+ # new_col = [len(str(code))for code in df2['hs_code'].to_list()]
28
+ # df2['len'] = new_col
29
+
30
+ # new_hscode = [str(code) for code in df2['hs_code']]
31
+
32
+ # for i in range(len(new_col)):
33
+ # if new_col[i]==5:
34
+ # new_hscode[i] = '0'+ new_hscode[i]
35
+ # df2['hs_code'] = new_hscode
36
+ # df2=df2.drop(columns='len')
37
+
38
+ # if 'retriever' not in st.session_state:
39
+ # st.session_state.retriever = None
40
 
41
+ # if st.session_state.retriever is None:
42
+ # st.session_state.retriever = load_data()
 
43
 
 
44
 
45
+ # sentence = st.text_input("please enter description:")
 
 
 
 
46
 
47
+ # if sentence !='':
48
+ # results,_ = st.session_state.retriever.retrieve(bm25s.tokenize(sentence), k=5)
49
+ # doc = [d for d in results]
50
+ # hscodes = [extract_hscode(item) for item in doc[0]]
51
+ # for code in hscodes:
52
+ # if len(code)==5:
53
+ # code = '0'+ code
54
+
55
+ # filter_df = df2[df2['hs_code']==code]
56
+ # answer = filter_df['description'].iloc[0]
57
+ # st.write("Hscode:",code)
58
+ # st.write("Description:",answer.lower())
59
+
60
+ def load_model():
61
+ prompt = ChatPromptTemplate.from_messages([
62
+ HumanMessagePromptTemplate.from_template(
63
+ f"""
64
+ Extract the appropriate 8-digit HS Code base on the product description and retrieved document by thoroughly analyzing its details and utilizing a reliable and up-to-date HS Code database for accurate results.
65
+ Only return the HS Code as a 6-digit number .
66
+ Example: 123456
67
+ Context: {{context}}
68
+ Description: {{description}}
69
+ Answer:
70
+ """
71
+ )
72
+ ])
73
+
74
+
75
+ #device = "cuda" if torch.cuda.is_available() else "cpu"
76
+
77
+ #llm = OllamaLLM(model="gemma2", temperature=0, device=device)
78
+ #api_key = "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"
79
+ api_key = "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"
80
+ llm = ChatGroq(model = "llama-3.1-70b-versatile", temperature = 0,api_key = api_key)
81
+ chain = prompt|llm
82
+ return chain
83
+
84
+ def process_input(sentence):
85
+ docs, _ = st.session_state.retriever.retrieve(bm25s.tokenize(sentence), k=15)
86
+ documents =[]
87
+ for doc in docs[0]:
88
+ documents.append(Document(doc['text']))
89
+ return documents
90
+
91
  if 'retriever' not in st.session_state:
92
  st.session_state.retriever = None
93
 
94
+ if 'chain' not in st.session_state:
95
+ st.session_state.chain = None
96
+
97
  if st.session_state.retriever is None:
98
  st.session_state.retriever = load_data()
99
 
100
+ if st.session_state.chain is None:
101
+ st.session_state.chain = load_model()
102
+
103
  sentence = st.text_input("please enter description:")
104
 
105
  if sentence !='':
106
+ documents = process_input(sentence)
107
+ hscode = st.session_state.chain.invoke({'context': documents,'description':sentence})
108
+ st.write("answer:",hscode.content)