Abhilashvj commited on
Commit
d947145
·
1 Parent(s): 5c80a9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -30,8 +30,19 @@ def read_docx(file):
30
 
31
  def compare_texts(text1, text2):
32
  d = difflib.Differ()
33
- diff = d.compare(text1.splitlines(), text2.splitlines())
34
- return '\n'.join(diff)
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  st.title('PDF and DOCX Comparison Tool')
37
 
 
30
 
31
  def compare_texts(text1, text2):
32
  d = difflib.Differ()
33
+ diff = list(d.compare(text1.splitlines(), text2.splitlines()))
34
+
35
+ result = []
36
+ page_no = 1
37
+ for line in diff:
38
+ if 'Page' in line: # if a new page starts
39
+ page_no += 1
40
+ elif line.startswith('+ '): # text present in text2 but not in text1
41
+ result.append(f'Additional text detected on page {page_no}')
42
+ elif line.startswith('- '): # text present in text1 but not in text2
43
+ result.append(f'Less text detected on page {page_no}')
44
+
45
+ return "\n".join(set(result)) # using set to remove duplicates
46
 
47
  st.title('PDF and DOCX Comparison Tool')
48