Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -111,28 +111,38 @@ def split_text(text, max_tokens=500):
|
|
111 |
return chunks
|
112 |
|
113 |
def create_bibtex_entry(data):
|
114 |
-
author = data.get('Author', '')
|
115 |
-
title = data.get('Title', '')
|
116 |
-
journal = data.get('Journal', '')
|
117 |
-
year = data.get('Year', '')
|
118 |
-
volume = data.get('Volume', '')
|
119 |
-
pages = data.get('Pages', '')
|
120 |
-
doi = data.get('Doi', '')
|
121 |
|
122 |
# Remove "doi: " prefix if present
|
123 |
doi = doi.replace('doi: ', '')
|
124 |
|
125 |
-
bibtex =
|
126 |
-
author = {{{author}}}
|
127 |
-
title = {{{title}}}
|
128 |
-
journal = {{{journal}}}
|
129 |
-
year = {{{year}}}
|
130 |
-
volume = {{{volume}}}
|
131 |
-
pages = {{{pages}}}
|
132 |
-
doi = {{{doi}}}
|
133 |
-
}
|
|
|
134 |
return bibtex
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
def transform_chunks(marianne_segmentation):
|
137 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
138 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
@@ -155,17 +165,24 @@ def transform_chunks(marianne_segmentation):
|
|
155 |
else:
|
156 |
bibtex_data[entity_group] = word
|
157 |
current_entity = entity_group
|
158 |
-
elif entity_group == 'None'
|
159 |
-
|
|
|
|
|
|
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
169 |
|
170 |
bibtex_entry = create_bibtex_entry(bibtex_data)
|
171 |
|
|
|
111 |
return chunks
|
112 |
|
113 |
def create_bibtex_entry(data):
|
114 |
+
author = data.get('Author', '').strip()
|
115 |
+
title = data.get('Title', '').strip()
|
116 |
+
journal = data.get('Journal', '').strip()
|
117 |
+
year = data.get('Year', '').strip()
|
118 |
+
volume = data.get('Volume', '').strip()
|
119 |
+
pages = data.get('Pages', '').strip()
|
120 |
+
doi = data.get('Doi', '').strip()
|
121 |
|
122 |
# Remove "doi: " prefix if present
|
123 |
doi = doi.replace('doi: ', '')
|
124 |
|
125 |
+
bibtex = "@article{idnothing,\n"
|
126 |
+
if author: bibtex += f" author = {{{author}}},\n"
|
127 |
+
if title: bibtex += f" title = {{{title}}},\n"
|
128 |
+
if journal: bibtex += f" journal = {{{journal}}},\n"
|
129 |
+
if year: bibtex += f" year = {{{year}}},\n"
|
130 |
+
if volume: bibtex += f" volume = {{{volume}}},\n"
|
131 |
+
if pages: bibtex += f" pages = {{{pages}}},\n"
|
132 |
+
if doi: bibtex += f" doi = {{{doi}}},\n"
|
133 |
+
bibtex += "}"
|
134 |
+
|
135 |
return bibtex
|
136 |
|
137 |
+
These changes should result in a more complete and accurate BibTeX entry. The fields will only be included if they have content, and all the information from the input should now be properly captured.
|
138 |
+
|
139 |
+
If you're still experiencing issues after making these changes, please provide an example of the input text you're using and the output you're getting. This will help me further diagnose and resolve any remaining problems.
|
140 |
+
Claude does not have the ability to run the code it generates yet.
|
141 |
+
Claude can make mistakes. Please double-check responses.
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
def transform_chunks(marianne_segmentation):
|
147 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
148 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
|
|
165 |
else:
|
166 |
bibtex_data[entity_group] = word
|
167 |
current_entity = entity_group
|
168 |
+
elif entity_group == 'None':
|
169 |
+
if current_entity:
|
170 |
+
bibtex_data[current_entity] += ' ' + word
|
171 |
+
else:
|
172 |
+
bibtex_data['None'] = bibtex_data.get('None', '') + ' ' + word
|
173 |
|
174 |
+
html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
|
175 |
+
|
176 |
+
# Extract year from the 'None' field if present
|
177 |
+
none_content = bibtex_data.get('None', '')
|
178 |
+
year_match = re.search(r'\((\d{4})\)', none_content)
|
179 |
+
if year_match:
|
180 |
+
bibtex_data['Year'] = year_match.group(1)
|
181 |
+
|
182 |
+
# Extract volume from the 'None' field if present
|
183 |
+
volume_match = re.search(r',\s*(\d+),', none_content)
|
184 |
+
if volume_match:
|
185 |
+
bibtex_data['Volume'] = volume_match.group(1)
|
186 |
|
187 |
bibtex_entry = create_bibtex_entry(bibtex_data)
|
188 |
|