sivan22 commited on
Commit
a2e592b
verified
1 Parent(s): 9dc7296

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +132 -118
tools.py CHANGED
@@ -1,118 +1,132 @@
1
- from langchain_core.tools import tool
2
- from sefaria import get_text as sefaria_get_text, get_commentaries as sefaria_get_commentaries
3
- from tantivy_search import TantivySearch
4
- from typing import Optional
5
- from pydantic import BaseModel, Field
6
-
7
- from app import INDEX_PATH
8
-
9
- class ReadTextArgs(BaseModel):
10
- reference: str = Field(description="The reference to retrieve the text for. examples: 讘专讗砖讬转 讗 驻专拽 讗, Genesis 1:1")
11
-
12
- class SearchArgs(BaseModel):
13
- query: str = Field(description="""the query for the search.
14
- Instructions for generating a query:
15
-
16
- 1. Boolean Operators:
17
-
18
- - AND: term1 AND term2 (both required)
19
- - OR: term1 OR term2 (either term)
20
- - Multiple words default to OR operation (cloud network = cloud OR network)
21
- - AND takes precedence over OR
22
- - Example: Shabath AND (walk OR go)
23
-
24
- 2. Field-specific Terms:
25
- - Field-specific terms: field:term
26
- - Example: text:讗讚诐 AND reference:讘专讗砖讬转
27
- - available fields: text, reference, topics
28
- - text contains the text of the document
29
- - reference contains the citation of the document, e.g. 讘专讗砖讬转, 驻专拽 讗
30
- - topics contains the topics of the document. available topics includes: 转谞讱, 讛诇讻讛, 诪讚专砖, etc.
31
-
32
- 3. Required/Excluded Terms:
33
- - Required (+): +term (must contain)
34
- - Excluded (-): -term (must not contain)
35
- - Example: +security cloud -deprecated
36
- - Equivalent to: security AND cloud AND NOT deprecated
37
-
38
- 4. Phrase Search:
39
- - Use quotes: "exact phrase"
40
- - Both single/double quotes work
41
- - Escape quotes with \\"
42
- - Slop operator: "term1 term2"~N
43
- - Example: "cloud security"~2
44
- - the above will find "cloud framework and security "
45
- - Prefix matching: "start of phrase"*
46
-
47
- 5. Wildcards:
48
- - ? for single character
49
- - * for any number of characters
50
- - Example: sec?rity cloud*
51
-
52
- 6. Special Features:
53
- - All docs: *
54
- - Boost terms: term^2.0 (positive numbers only)
55
- - Example: security^2.0 cloud
56
- - the above will boost security by 2.0
57
-
58
- Query Examples:
59
- 1. Basic: +砖讘转 +讞讜诇讛 +讗住讜专
60
- 2. Field-specific: text:住讬谞讬 AND topics:转谞讱
61
- 3. Phrase with slop: "security framework"~2
62
- 4. Complex: +reference:讘专讗砖讬转 +text:"讛讘诇"^2.0 +(讚诪讬 OR 讚诪讬诐) -讛讘诇讬诐
63
- 6. Mixed: (text:"专讘谞讜 诪砖讛"^2.0 OR reference:"诪砖谞讛 转讜专讛") AND topics:讛诇讻讛) AND text:"转讜专讛 讛诪诇讱"~3 AND NOT topics:诪讚专砖
64
-
65
- Tips:
66
- - Group complex expressions with parentheses
67
- - Use quotes for exact phrases
68
- - Add + for required terms, - for excluded terms
69
- - Boost important terms with ^N
70
- - use field-specific terms for better results.
71
- - the corpus to search in is an ancient Hebrew corpus: Tora and Talmud. so Try to use ancient Hebrew terms and or Talmudic expressions and prevent modern words that are not common in talmudic texts
72
- """)
73
- num_results: int = Field(description="the maximum number of results to return. Default: 10", default=10)
74
-
75
-
76
-
77
- index_path = INDEX_PATH
78
- try:
79
- tantivy = TantivySearch(index_path)
80
- tantivy.validate_index()
81
- except Exception as e:
82
- raise Exception(f"failed to create index: {e}")
83
-
84
-
85
-
86
- @tool(args_schema=SearchArgs)
87
- def search( query: str, num_results: int = 10):
88
- """Searches the index for the given query."""
89
- results = tantivy.search(query, num_results)
90
- formatted_results = []
91
- for result in results:
92
- formatted_results.append({
93
- 'text': result.get('text', 'N/A'),
94
- 'reference': result.get('reference', 'N/A')
95
- })
96
-
97
- return formatted_results
98
-
99
-
100
- @tool(args_schema=ReadTextArgs)
101
- def read_text(reference: str )->str:
102
- """Retrieves the text for a given reference.
103
- """
104
- text = sefaria_get_text(reference)
105
- return {
106
- 'text': str(text),
107
- 'reference': reference
108
- }
109
-
110
- @tool
111
- def get_commentaries(reference: str, num_results: int = 10)->str:
112
- """Retrieves references to all available commentaries on the given verse."""
113
- commentaries = sefaria_get_commentaries(reference)
114
- return {
115
- 'text': '\n'.join(commentaries) if isinstance(commentaries, list) else str(commentaries),
116
- 'reference': f"Commentaries on {reference}"
117
- }
118
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from sefaria import get_text as sefaria_get_text, get_commentaries as sefaria_get_commentaries
3
+ from tantivy_search import TantivySearch
4
+ from typing import Optional
5
+ from pydantic import BaseModel, Field
6
+
7
+ from app import INDEX_PATH
8
+
9
+ def download_index_from_gdrive(self) -> bool:
10
+ try:
11
+ zip_path = "index.zip"
12
+ url = f"https://drive.google.com/uc?id={self.gdrive_index_id}"
13
+ gdown.download(url, zip_path, quiet=False)
14
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
15
+ zip_ref.extractall(".")
16
+ os.remove(zip_path)
17
+ return True
18
+
19
+ except Exception as e:
20
+ st.error(f"Failed to download index: {str(e)}")
21
+ return False
22
+
23
+ class ReadTextArgs(BaseModel):
24
+ reference: str = Field(description="The reference to retrieve the text for. examples: 讘专讗砖讬转 讗 驻专拽 讗, Genesis 1:1")
25
+
26
+ class SearchArgs(BaseModel):
27
+ query: str = Field(description="""the query for the search.
28
+ Instructions for generating a query:
29
+
30
+ 1. Boolean Operators:
31
+
32
+ - AND: term1 AND term2 (both required)
33
+ - OR: term1 OR term2 (either term)
34
+ - Multiple words default to OR operation (cloud network = cloud OR network)
35
+ - AND takes precedence over OR
36
+ - Example: Shabath AND (walk OR go)
37
+
38
+ 2. Field-specific Terms:
39
+ - Field-specific terms: field:term
40
+ - Example: text:讗讚诐 AND reference:讘专讗砖讬转
41
+ - available fields: text, reference, topics
42
+ - text contains the text of the document
43
+ - reference contains the citation of the document, e.g. 讘专讗砖讬转, 驻专拽 讗
44
+ - topics contains the topics of the document. available topics includes: 转谞讱, 讛诇讻讛, 诪讚专砖, etc.
45
+
46
+ 3. Required/Excluded Terms:
47
+ - Required (+): +term (must contain)
48
+ - Excluded (-): -term (must not contain)
49
+ - Example: +security cloud -deprecated
50
+ - Equivalent to: security AND cloud AND NOT deprecated
51
+
52
+ 4. Phrase Search:
53
+ - Use quotes: "exact phrase"
54
+ - Both single/double quotes work
55
+ - Escape quotes with \\"
56
+ - Slop operator: "term1 term2"~N
57
+ - Example: "cloud security"~2
58
+ - the above will find "cloud framework and security "
59
+ - Prefix matching: "start of phrase"*
60
+
61
+ 5. Wildcards:
62
+ - ? for single character
63
+ - * for any number of characters
64
+ - Example: sec?rity cloud*
65
+
66
+ 6. Special Features:
67
+ - All docs: *
68
+ - Boost terms: term^2.0 (positive numbers only)
69
+ - Example: security^2.0 cloud
70
+ - the above will boost security by 2.0
71
+
72
+ Query Examples:
73
+ 1. Basic: +砖讘转 +讞讜诇讛 +讗住讜专
74
+ 2. Field-specific: text:住讬谞讬 AND topics:转谞讱
75
+ 3. Phrase with slop: "security framework"~2
76
+ 4. Complex: +reference:讘专讗砖讬转 +text:"讛讘诇"^2.0 +(讚诪讬 OR 讚诪讬诐) -讛讘诇讬诐
77
+ 6. Mixed: (text:"专讘谞讜 诪砖讛"^2.0 OR reference:"诪砖谞讛 转讜专讛") AND topics:讛诇讻讛) AND text:"转讜专讛 讛诪诇讱"~3 AND NOT topics:诪讚专砖
78
+
79
+ Tips:
80
+ - Group complex expressions with parentheses
81
+ - Use quotes for exact phrases
82
+ - Add + for required terms, - for excluded terms
83
+ - Boost important terms with ^N
84
+ - use field-specific terms for better results.
85
+ - the corpus to search in is an ancient Hebrew corpus: Tora and Talmud. so Try to use ancient Hebrew terms and or Talmudic expressions and prevent modern words that are not common in talmudic texts
86
+ """)
87
+ num_results: int = Field(description="the maximum number of results to return. Default: 10", default=10)
88
+
89
+
90
+
91
+ index_path = INDEX_PATH
92
+ try:
93
+ tantivy = TantivySearch(index_path)
94
+ tantivy.validate_index()
95
+ except Exception as e:
96
+ raise Exception(f"failed to create index: {e}")
97
+
98
+
99
+
100
+ @tool(args_schema=SearchArgs)
101
+ def search( query: str, num_results: int = 10):
102
+ """Searches the index for the given query."""
103
+ results = tantivy.search(query, num_results)
104
+ formatted_results = []
105
+ for result in results:
106
+ formatted_results.append({
107
+ 'text': result.get('text', 'N/A'),
108
+ 'reference': result.get('reference', 'N/A')
109
+ })
110
+
111
+ return formatted_results
112
+
113
+
114
+ @tool(args_schema=ReadTextArgs)
115
+ def read_text(reference: str )->str:
116
+ """Retrieves the text for a given reference.
117
+ """
118
+ text = sefaria_get_text(reference)
119
+ return {
120
+ 'text': str(text),
121
+ 'reference': reference
122
+ }
123
+
124
+ @tool
125
+ def get_commentaries(reference: str, num_results: int = 10)->str:
126
+ """Retrieves references to all available commentaries on the given verse."""
127
+ commentaries = sefaria_get_commentaries(reference)
128
+ return {
129
+ 'text': '\n'.join(commentaries) if isinstance(commentaries, list) else str(commentaries),
130
+ 'reference': f"Commentaries on {reference}"
131
+ }
132
+