Category Archives: Tools

Converting CSV to Table rows in SQL Server and Oracle

Another useful block of code that will help in converting CSV to a table row.

with temp as
(
    select 108 Id, 'australia, australia, united states of america, iran' str
    UNION ALL
    select 109, 'india, iran, iraq'
    UNION ALL
    SELECT 109, 'israel, australia'
), xmlData as
(
     SELECT Id,  
         CAST ('<M>' + REPLACE(str, ',', '</M><M>') + '</M>' AS XML) AS Data  
     FROM  temp
)
SELECT xmlData.Id,  
     ltrim(rtrim(Split.a.value('.', 'VARCHAR(100)'))) AS Data  
FROM  xmlData CROSS APPLY Data.nodes ('/M') AS Split(a)

and in Oracle,

with temp as
(
    select 108 Id, 'australia, australia, united states of america, iran' str  from dual
    UNION ALL
    select 109, 'india, iran, iraq' from dual
    UNION ALL
    SELECT 109, 'israel, australia' FROM dual
)
SELECT DISTINCT ID, ltrim(regexp_substr(str, '[^' || ',' || ']+', 1, LEVEL)) str
FROM temp T
CONNECT BY regexp_substr(str, '[^' || ',' || ']+', 1, LEVEL) IS NOT NULL
ORDER BY ID;

Preparing NULL counts for SQL and Oracle data tables

I am posting this one after a very long time. Just thought this information will be useful for someone.

Here is the T-SQL for SQL Server that will fetch the columns in a table and provide you the NULL, ZERO, NEGATIVE and UNIQUE count of each table columns.

USE <database>
GO

BEGIN
	SET NOCOUNT ON
	DECLARE @TAB NVARCHAR(100) = <table_name>
	DECLARE @SQL NVARCHAR(500)
	DECLARE @RECORDS INT, @TOTRECORDS INT
	DECLARE @OBJECT_ID INT,
		@TOTAL INT
	DECLARE @COL NVARCHAR(100),
		@COLTYPE NVARCHAR(100),
		@TS NVARCHAR(100)
	SELECT @OBJECT_ID = OBJECT_ID FROM SYS.TABLES WHERE NAME = @TAB

	DECLARE col_cursor CURSOR FOR 
	SELECT	COLUMNS.NAME COLNAME, SYSTYPES.NAME COLTYPE
	FROM SYS.COLUMNS 
		INNER JOIN SYS.systypes ON SYSTEM_TYPE_ID = XUSERTYPE
		WHERE object_id = @OBJECT_ID;

	OPEN col_cursor
	FETCH NEXT FROM col_cursor INTO @COL, @COLTYPE

	set @sql = N'SELECT @ptotrecords = count(1) FROM ' + @TAB 
	EXECUTE sp_executeSQL
		@sql,
        N'@ptotrecords INT OUTPUT', @ptotrecords=@totrecords OUTPUT

    WHILE @@FETCH_STATUS = 0
    BEGIN
		set @sql = N'SELECT @precords = count(1) FROM ' + @TAB + ' WHERE ' + @COL + ' IS NULL'
		EXECUTE sp_executeSQL
			@sql,
            N'@precords INT OUTPUT', @precords=@records OUTPUT
		PRINT(convert(varchar, CURRENT_TIMESTAMP) + CHAR(9)+ @TAB + '.' + @COL + '[' + @COLTYPE + ']' + CHAR(9) + 'NULL' + CHAR(9) + convert(varchar, @records) + CHAR(9) + convert(varchar, @totrecords)+ CHAR(9) + convert(varchar, @records*1.00/@totrecords*100))

		set @sql = N'SELECT @precords = count(distinct ' + @COL + ') FROM ' + @TAB  + ' WHERE ' + @COL + ' IS NOT NULL'
		EXECUTE sp_executeSQL
			@sql,
            N'@precords INT OUTPUT', @precords=@records OUTPUT
		PRINT(convert(varchar, CURRENT_TIMESTAMP) + CHAR(9)+ @TAB + '.' + @COL + '[' + @COLTYPE + ']' + CHAR(9) + 'UNIQUE' + CHAR(9) + convert(varchar, @records) + CHAR(9) + convert(varchar, @totrecords)+ CHAR(9) + convert(varchar, @records*1.00/@totrecords*100))

		IF @COLTYPE IN ('MONEY', 'NUMERIC', 'INT') 
		BEGIN
			set @sql = N'SELECT @precords = count(1) FROM ' + @TAB + ' WHERE ' + @COL + ' = 0'
			EXECUTE sp_executeSQL
				@sql,
				N'@precords INT OUTPUT', @precords=@records OUTPUT
			PRINT(convert(varchar, CURRENT_TIMESTAMP) + CHAR(9)+ @TAB + '.' + @COL + '[' + @COLTYPE + ']' + CHAR(9) + 'ZERO' + CHAR(9) + convert(varchar, @records) + CHAR(9) + convert(varchar, @totrecords)+ CHAR(9) + convert(varchar, @records*1.00/@totrecords*100))
			set @sql = N'SELECT @precords = count(1) FROM ' + @TAB + ' WHERE ' + @COL + ' < 0'
			EXECUTE sp_executeSQL
				@sql,
				N'@precords INT OUTPUT', @precords=@records OUTPUT
			PRINT(convert(varchar, CURRENT_TIMESTAMP) + CHAR(9)+ @TAB + '.' + @COL + '[' + @COLTYPE + ']' + CHAR(9) + 'NEGATIVE' + CHAR(9) + convert(varchar, @records) + CHAR(9) + convert(varchar, @totrecords)+ CHAR(9) + convert(varchar, @records*1.00/@totrecords*100))
		END
		FETCH NEXT FROM col_cursor INTO @COL, @COLTYPE
    END
	CLOSE col_cursor;
	DEALLOCATE col_cursor;
END

PL/SQL code below does a similar job in Oracle.

SET SERVEROUTPUT ON SIZE 1000000
DECLARE
   L_ROW_COUNT                        PLS_INTEGER;
   L_TOTAL_COUNT                        PLS_INTEGER;
   COL  VARCHAR(200);
BEGIN
  FOR Y IN (SELECT * FROM ALL_TABLES WHERE TABLESPACE_NAME = '<table space name>' AND TABLE_NAME LIKE '<table name>%'  ORDER BY OWNER, TABLE_NAME ) LOOP
    EXECUTE IMMEDIATE    'SELECT COUNT(*)FROM ' || Y.OWNER || '.' || Y.TABLE_NAME INTO L_TOTAL_COUNT;
     FOR x IN (SELECT * FROM all_tab_columns WHERE table_name = Y.TABLE_NAME and owner = Y.OWNER ORDER BY  column_id ) LOOP
        COL := X.TABLE_NAME || '.' || X.COLUMN_NAME || '[' || X.DATA_TYPE || ']' || CHR(9) ;
  
        EXECUTE IMMEDIATE    'SELECT COUNT(*) FROM ' || X.OWNER || '.' || X.TABLE_NAME || ' WHERE ' || X.TABLE_NAME || '.' || X.COLUMN_NAME || ' IS NULL' INTO L_ROW_COUNT;
        DBMS_OUTPUT.PUT_LINE (COL || 'NULL' || CHR(9) || TO_CHAR (l_row_count, 'fm999,999,999,990') || CHR(9) || TO_CHAR (L_TOTAL_COUNT, 'fm999,999,999,990') || CHR(9) || TO_CHAR (L_ROW_COUNT*1.0/L_TOTAL_COUNT*100.0, 'fm999,999,999,990'));
  
        EXECUTE IMMEDIATE    'SELECT COUNT(DISTINCT ' || X.TABLE_NAME || '.' || X.COLUMN_NAME || ') FROM ' || X.OWNER || '.' || X.TABLE_NAME INTO L_ROW_COUNT;
        DBMS_OUTPUT.PUT_LINE (COL || 'UNIQUE' || CHR(9) || TO_CHAR (L_ROW_COUNT, 'fm999,999,999,990')|| CHR(9) || TO_CHAR (L_TOTAL_COUNT, 'fm999,999,999,990') || CHR(9) || TO_CHAR (L_ROW_COUNT*1.0/L_TOTAL_COUNT*100.0, 'fm999,999,999,990'));
        IF X.DATA_TYPE IN ( 'NUMBER') THEN
          EXECUTE IMMEDIATE    'SELECT COUNT(*) FROM ' || X.OWNER || '.' || X.TABLE_NAME || ' WHERE ' || X.TABLE_NAME || '.' || X.COLUMN_NAME || ' = 0' INTO L_ROW_COUNT;
          DBMS_OUTPUT.PUT_LINE (COL || 'ZERO' || CHR(9) || TO_CHAR (L_ROW_COUNT, 'fm999,999,999,990')|| CHR(9) || TO_CHAR (L_TOTAL_COUNT, 'fm999,999,999,990') || CHR(9) || TO_CHAR (L_ROW_COUNT*1.0/L_TOTAL_COUNT*100.0, 'fm999,999,999,990'));
          
          EXECUTE IMMEDIATE    'SELECT COUNT(*) FROM ' || X.OWNER || '.' || X.TABLE_NAME || ' WHERE ' || X.TABLE_NAME || '.' || X.COLUMN_NAME || ' < 0' INTO L_ROW_COUNT;
          DBMS_OUTPUT.PUT_LINE (COL || 'NEGATIVE' || CHR(9) || TO_CHAR (L_ROW_COUNT, 'fm999,999,999,990')|| CHR(9) || TO_CHAR (L_TOTAL_COUNT, 'fm999,999,999,990') || CHR(9) || TO_CHAR (L_ROW_COUNT*1.0/L_TOTAL_COUNT*100.0, 'fm999,999,999,990'));
        END IF;
     END LOOP;
  END LOOP;
END;

Converting an image to text

There would have been many instances when you were looking for a way to convert text inside an image to a text (OCR). Here is a solution for you. Copy the image containing the text to Microsoft OneNote. Then right click the image and press Copy Text from Picture.

Four apps that adds value to my Google Nexus 7 tablet

First I paid only around $250 for the tablet with $25 on your Google Play account with which you can buy paid apps. But let us find what value it brings. Here are four free apps that I love.

  • ES File Explorer allows music streaming from Windows computer to Nexus through WiFi. This app can also be used to explore the tablet’s file system, or an FTP address, or Dropbox, Skydrive, and more more.
  • Google Translate is cool and it simply translates your speech to nearly any language. Oh, that’s good time-pass or can  be treated a language learning tool.
  • Navigation replaces your GPS Navigation system.
  • I can even pair my LG TV with my YouTube app and stream my videos directly on my TV

How much have you saved now with this?

Levenshtein and Damerau–Levenshtein edit distance

I have put the functions that can be embedded as excel functions to calculate the distance between two strings using the algorithms Levenshtein and Damerau–Levenshtein. The four functions available are:

  • Levenshtein – returns the edit distance between two words
  • Damerau – returns the edit distance between two words (a variant of Levenshtein with Transposition)
  • NormalizedLevenshtein – returns Levenshtein score between 0 and 1
  • NormalizedDamerau – returns Damerau-Levenshtein score between 0 and 1
Function Levenshtein(ByVal String1 As String, ByVal String2 As String) As Long

    Dim i As Long, j As Long
    Dim lngString1 As Long
    Dim lngString2 As Long
    Dim Distance() As Long

    lngString1 = Len(String1)
    lngString2 = Len(String2)
    ReDim Distance(lngString1, lngString2)

    For i = 0 To lngString1
        Distance(i, 0) = i
    Next

    For j = 0 To lngString2
        Distance(0, j) = j
    Next

    For i = 1 To lngString1
        For j = 1 To lngString2
            If Asc(Mid$(String1, i, 1)) = Asc(Mid$(String2, j, 1)) Then
                Distance(i, j) = Distance(i - 1, j - 1)
            Else
                Distance(i, j) = Application.WorksheetFunction.Min _
                (Distance(i - 1, j) + 1, _
                 Distance(i, j - 1) + 1, _
                 Distance(i - 1, j - 1) + 1)
            End If

        Next
    Next

    Levenshtein = Distance(lngString1, lngString2)

End Function

Function Damerau(String1 As String, String2 As String) As Long
    Dim i As Long, j As Long
    Dim lngString1 As Long
    Dim lngString2 As Long
    Dim Distance() As Long
    Dim cost As Integer

    lngString1 = Len(String1)
    lngString2 = Len(String2)
    ReDim Distance(lngString1 + 1, lngString2 + 1)

    For i = 0 To lngString1
        Distance(i, 0) = i
    Next

    For j = 0 To lngString2
        Distance(0, j) = j
    Next

    For i = 1 To lngString1
        For j = 1 To lngString2
            If Asc(Mid$(String1, i, 1)) = Asc(Mid$(String2, j, 1)) Then
                cost = 0
            Else
                cost = 1
            End If

            Distance(i, j) = Application.WorksheetFunction.Min _
            (Distance(i - 1, j) + 1, _
             Distance(i, j - 1) + 1, _
             Distance(i - 1, j - 1) + cost)

            If (i > 1 And j > 1) Then
                If (Asc(Mid$(String1, i, 1)) = Asc(Mid$(String2, j - 1, 1)) And Asc(Mid$(String1, i - 1, 1)) = Asc(Mid$(String2, j, 1))) Then
                Distance(i, j) = Application.WorksheetFunction.Min _
                    (Distance(i, j), _
                     Distance(i - 2, j - 2) + cost)
                End If
            End If

        Next
    Next

    Damerau = Distance(lngString1, lngString2)

End Function

Function NormalizedDamerau(ByVal String1 As String, ByVal String2 As String) As Double
    Dim lngDamerau
    lngDamerau = Damerau(String1, String2)
    NormalizedDamerau = 1 - lngDamerau / IIf((Len(String1) + 1 > Len(String2) + 1), Len(String1) + 1, Len(String2) + 1)
End Function

Function NormalizedLevenshtein(ByVal String1 As String, ByVal String2 As String) As Double
    Dim lngLevenshtein
    lngLevenshtein = Levenshtein(String1, String2)
    NormalizedLevenshtein = 1 - lngLevenshtein / IIf((Len(String1) + 1 > Len(String2) + 1), Len(String1) + 1, Len(String2) + 1)
End Function

Connecting to SQL Server from Oracle’s SQL Developer

All you have to do is download Microsoft JDBC Driver for SQL Server and extract it to a folder and then add the driver in SQL Developer.

To add the driver in SQL Developer click Tools –> Preferences… and select Database –> Third Party JDBC Drivers and add the extract .jar files. Now that you have added the driver, you will see the SQL Server tab under “New Connection…” window.

Oracle’s SQL Developer is free and can be downloaded from here.

Splitting large files using VBScript

Ever thought that you wanted to split a large multi-gigabit file into multiple chunks. Here is a simple VB script file that will do the trick for you. It may take a longer time for larger files, but it will serve its purpose.

''This simple VBScript spilts large text files into multiple files

Dim  Counter
Const InputFile = "C:\input.txt"
Const OutputFile = "C:\output"
Const RecordSize = 200000
Const ForReading = 1
Const ForWriting = 2
Const ForAppending = 8
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objTextFile = objFSO.OpenTextFile (InputFile, ForReading)
Counter = 0
FileCounter = 0
Set objOutTextFile = Nothing

Do Until objTextFile.AtEndOfStream
	if Counter = 0 Or Counter = RecordSize Then
		Counter = 0
		FileCounter = FileCounter + 1
		if Not objOutTextFile is Nothing then objOutTextFile.Close
		Set objOutTextFile = objFSO.OpenTextFile( OutputFile & "_" & FileCounter & ".csv", ForWriting, True)
	end if
	strNextLine = objTextFile.Readline
	objOutTextFile.WriteLine(strNextLine)
	Counter = Counter + 1
Loop
objTextFile.Close
objOutTextFile.Close
Msgbox "Split process complete"

Constants that may require changes are InputFile-the input file name, OutputFile-the output file path along with the output file prefix and Recordsize-number of line in each output file. Multiple files beginning with [OutputFile] will get created.

Running web applications from command prompt

With the ASP.NET Development Server it is easy to run the web application without opening Visual Studio. Usually I have an AutoHotKey entry to start the server and open the web page.

"C:\Program Files\Common Files\Microsoft Shared\DevServer\10.0\WebDev.WebServer40.EXE" /port:9999 /path:"C:\Documents and Settings\prabhuram\My Documents\Visual Studio 2010\Projects\demoapp\webapp"

and call the web page simply like this.

"C:\Documents and Settings\prabhuram\Local Settings\Application Data\Google\Chrome\Application\chrome.exe" "http://localhost:9999/"

You may have to substitute prabhuram with your profile name. In the above statement I am opening the page using Google’s chrome, you may want to use firefox.exe or iexplore.exe. So with the following command in my AutoHotKey file, Windows Key+1 will do both the above mentioned jobs in one key event.

#1::
Run "C:\Program Files\Common Files\Microsoft Shared\DevServer\10.0\WebDev.WebServer40.EXE" /port:9999 /path:"C:\Documents and Settings\prabhuram\My Documents\Visual Studio 2010\Projects\demoapp\webapp"
Run "C:\Documents and Settings\prabhuram\Local Settings\Application Data\Google\Chrome\Application\chrome.exe" "http://localhost:9999/"
return