I was just tracking down a bug (it turns out that Python’s datetime handling is a lot of fun) and I came across a very cute bit of code for dumping out the differences between two dataframes.

You’re welcome.

def DiffDataFrames(df1, df2):
  ne_stacked = (df1 != df2).stack()
  changed = ne_stacked[ne_stacked]
  changed.index.names = ['ID', 'Column']
    
  difference_locations = np.where(df1 != df2)
  changed_from = df1.values[difference_locations]
  changed_to = df2.values[difference_locations]

  diff = pd.DataFrame({'DF1': changed_from, 'DF2': changed_to}, index=changed.index)
  return diff