Something

Running

App Files Files Community

Pclanglais commited on Jul 6, 2024

Commit

4405d10

verified ·

1 Parent(s): 7a72935

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -26

app.py CHANGED Viewed

@@ -111,28 +111,38 @@ def split_text(text, max_tokens=500):
     return chunks
 def create_bibtex_entry(data):
-    author = data.get('Author', '')
-    title = data.get('Title', '')
-    journal = data.get('Journal', '')
-    year = data.get('Year', '')
-    volume = data.get('Volume', '')
-    pages = data.get('Pages', '')
-    doi = data.get('Doi', '')
     # Remove "doi: " prefix if present
     doi = doi.replace('doi: ', '')
-    bibtex = f"""@article{{idnothing,
-  author = {{{author}}},
-  title = {{{title}}},
-  journal = {{{journal}}},
-  year = {{{year}}},
-  volume = {{{volume}}},
-  pages = {{{pages}}},
-  doi = {{{doi}}}
-}}"""
     return bibtex
 def transform_chunks(marianne_segmentation):
     marianne_segmentation = pd.DataFrame(marianne_segmentation)
     marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
@@ -155,17 +165,24 @@ def transform_chunks(marianne_segmentation):
             else:
                 bibtex_data[entity_group] = word
             current_entity = entity_group
-        elif entity_group == 'None' and current_entity:
-            bibtex_data[current_entity] += ' ' + word
-        if entity_group == 'title':
-            html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content title-content"><h2>{word}</h2></div></div>')
-        elif entity_group == 'bibliography':
-            html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content bibliography-content">{word}</div></div>')
-        elif entity_group == 'paratext':
-            html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content paratext-content">{word}</div></div>')
-        else:
-            html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
     bibtex_entry = create_bibtex_entry(bibtex_data)

     return chunks
 def create_bibtex_entry(data):
+    author = data.get('Author', '').strip()
+    title = data.get('Title', '').strip()
+    journal = data.get('Journal', '').strip()
+    year = data.get('Year', '').strip()
+    volume = data.get('Volume', '').strip()
+    pages = data.get('Pages', '').strip()
+    doi = data.get('Doi', '').strip()
     # Remove "doi: " prefix if present
     doi = doi.replace('doi: ', '')
+    bibtex = "@article{idnothing,\n"
+    if author: bibtex += f"  author = {{{author}}},\n"
+    if title: bibtex += f"  title = {{{title}}},\n"
+    if journal: bibtex += f"  journal = {{{journal}}},\n"
+    if year: bibtex += f"  year = {{{year}}},\n"
+    if volume: bibtex += f"  volume = {{{volume}}},\n"
+    if pages: bibtex += f"  pages = {{{pages}}},\n"
+    if doi: bibtex += f"  doi = {{{doi}}},\n"
+    bibtex += "}"
     return bibtex
+These changes should result in a more complete and accurate BibTeX entry. The fields will only be included if they have content, and all the information from the input should now be properly captured.
+If you're still experiencing issues after making these changes, please provide an example of the input text you're using and the output you're getting. This will help me further diagnose and resolve any remaining problems.
+Claude does not have the ability to run the code it generates yet.
+Claude can make mistakes. Please double-check responses.
 def transform_chunks(marianne_segmentation):
     marianne_segmentation = pd.DataFrame(marianne_segmentation)
     marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
             else:
                 bibtex_data[entity_group] = word
             current_entity = entity_group
+        elif entity_group == 'None':
+            if current_entity:
+                bibtex_data[current_entity] += ' ' + word
+            else:
+                bibtex_data['None'] = bibtex_data.get('None', '') + ' ' + word
+        html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
+    # Extract year from the 'None' field if present
+    none_content = bibtex_data.get('None', '')
+    year_match = re.search(r'\((\d{4})\)', none_content)
+    if year_match:
+        bibtex_data['Year'] = year_match.group(1)
+    # Extract volume from the 'None' field if present
+    volume_match = re.search(r',\s*(\d+),', none_content)
+    if volume_match:
+        bibtex_data['Volume'] = volume_match.group(1)
     bibtex_entry = create_bibtex_entry(bibtex_data)