Category Archives: Python

Comparing Dataframes with Python

I created a "class" that will help compare two dataframes in Python. This is a easy-to-use version compared to the one that was created earlier.
Calling the functions in this class is easy.[3:8], df[3:4])
import pandas as pd

class Compare(object):
    #e.g., compare(df[3:8], df[3:4])
    def compare(x, y):
        return pd.merge(x, y, on = list(x), how = 'outer', indicator = '_difference_')

    #e.g Compare.readxl("c:\\users\\vprab\downloads\\Financial Sample.xlsx")[3:8]
    def readxl(file, sheet_name = 0, header = 0, skiprows = 0):
        return pd.read_excel(file, sheet_name = sheet_name, header = header, skiprows = skiprows)
    #e.g., readcboard(sep = ',')
    def readcboard(sep = ','):
        return pd.read_clipboard(sep = sep)
    #e.g., writexl(pd.read_clipboard(sep='a'), "prp.xlsx")
    def writexl(df, file):
        writer = pd.ExcelWriter(file)
    #e.g., writecboard(pd.read_clipboard(sep='a'))
    def writecboard(df, excel = True):
        df.to_clipboard(excel = excel, index = False)

Retrieving Oracle table results from Python

A simple two liner to retrieve data results from Oracle.

# 1. Download and unzip 
#      Instant Client Package - Basic (64 bit or 32 bit same as your Python client) 
#      from
# 2. Add ..\instantclient_12_2 to environment variable "Path"
# 3. Install module cx_Oracle: 
#      py!pip install C:/cx_Oracle-6.0.2-cp36-cp36m-win_amd64.whl

import cx_Oracle as ora
import pandas as pd
conn = ora.connect("uname", "pwd", "host:port/sid")
dset = pd.read_sql("SELECT * FROM TABLE", conn)

Combining multiple files to one Excel file

This python code below allows you to choose multiple files (.csv, .xls, .xlsx) and create a multi-worksheet Excel file.

# -*- coding: utf-8 -*-

from tkinter import filedialog
from tkinter import *
from pathlib import Path
import pandas as pd

root = Tk()
infiles = filedialog.askopenfilenames(
                title = "Select the file you wish to convert", 
                filetypes = (("Excel files", "*.xls;*.xlsx"),("all files","*.*")))
counter = 1
if (len(infiles) > 0):
    p = Path(infiles[0])
    outfile = str(p.parent) + "\\newfile.xlsx"
    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')

for infile in infiles:
    p = Path(infile)
    if (p.suffix == ".csv"):
        inp=pd.read_csv(infile, sep = ",")
    elif (p.suffix in [".xlsx", ".xls"]):
        inp=pd.read_excel(infile, dype = str)

    counter = counter + 1
    sheetname =
    sheetname = sheetname.replace(p.suffix, "")
    inp.to_excel(writer, index = False, sheet_name = sheetname[0:30])

except NameError:
    print("No file selected")