Integration import and parsing of a CSV file

The process of importing and parsing a CSV file is as follows.

A CSV file is Loaded via the file Content resource (fileContent).

The resulting file is passed as a string to the parsing procedure - as_parsingCSV.

Parsing algorithm.

  1.  The input string is split into strings – the string separator is CHAR(10).
  2.   The first line is considered the header – field names. The number of columns can be variable.
  3. A temporary table with fields from the header is created in the database.
  4.  After creating the table, rows starting from the 2nd are processed. Rows are divided into columns & ndash; the separator is passed in the parameters (@divider). And are inserted into the table. The type of all data will be text - NVARCHAR(MAX).

As a result, we get a table with data from the CSV file.

Procedure parameter as_parsingCSV.

Parameter

Type of data

Default

Note

1

@strCSV

nvarchar(max)

no

 

2

@divider

nvarchar(12)

','

 

3

@param_username

nvarchar(256)

NULL

if isn't specified, the value will be CURRENT_USER

Note.

Tested working with column separators – comma and semicolon.

 

Possible types of errors.

'Error! An empty line was received.'

'Error! Don't find a newline character CHAR(10).'

'Error! Don't find the column separation symbol '

'Error! Don't find the column separation symbol - '+ @divider +' after the first line (title). Perhaps there is no data.'

 

Example of the result of the procedure.

User: admin2;
Received lines: 19;
Column separator: ,
Deleted empty lines: 0;
Table #CSVLoad_tmp: create;
Inserted lines with data: 18;
Columns: [QuotaAmount] , [StartDate] , [OwnerName] , [Username] ;
Lines with data in the table: 18;

CREATE PROCEDURE [dbo].[as_parsingCSV]
				 @strCSV			nvarchar(max) -- -- the input parameter is -- a line
				,@divider			nvarchar(12)  = ','		-- you can specify a different separator, for example ;
			    ,@param_username	nvarchar(256) = NULL	-- v1_4 -- user name
---------------------------------------------------------------------------------

AS
BEGIN

SET NOCOUNT ON;

DECLARE @result_str nvarchar(max)=N'' -- output status string for results

-- user name -- if the user is not specified , we take the server one CURRENT_USER
SET @param_username = ISNULL( @param_username , CURRENT_USER)
SELECT @result_str = @result_str + N'User: '+ @param_username + '; ' + CHAR(10)
----------------------------------------------------------------------------------------------------

-- check the length of the line
DECLARE @lenStr int = 0
SELECT  @lenStr = LEN ( ISNULL( @strCSV , '' ) )

IF @lenStr = 0
BEGIN
	SELECT 0 Result, 'Error! An empty line was received.' Msg
	RETURN
END

-- CHARINDEX -- search for a single character expression inside a second character expression, returning the starting position of the first expression if found.
-- character check - CHAR line translation (10 )/separator - @divider
DECLARE @posCHAR10 int = 0
SELECT  @posCHAR10 = ISNULL( CHARINDEX ( CHAR(10) , @strCSV, 1) , 0 )

IF @posCHAR10 = 0
BEGIN
	SELECT 0 Result, 'Error! Don't find a newline character CHAR(10).' Msg
	RETURN
END

DECLARE @posComma int = 0
SELECT  @posComma = ISNULL( CHARINDEX ( @divider , @strCSV, 1) , 0 )

IF @posComma = 0
BEGIN
	SELECT 0 Result, 'Error! Don't find the column separation symbol '+ @divider AS Msg
	RETURN
END

-- find the first comma in the 2nd line
DECLARE @posComma2 int = 0
SELECT  @posComma2 = CHARINDEX ( @divider , @strCSV, @posCHAR10 )

IF @posComma2 = 0
BEGIN
	SELECT 0 Result, 'Error! Don't find the column separation symbol - '+ @divider +' after the first line (title). Perhaps there is no data.' AS Msg
	RETURN
END

-- The error analysis of the input line is complete -- we believe that the correct line was received
----------------------------------------------------------------------------------------------------
-- 1 Dividing by lines -- 1 line - title

-- save the result to a table variable
DECLARE @T_allStings table ( id int IDENTITY(1,1) NOT NULL
                            ,valueSting nvarchar(max) NULL
							,rowNum int NULL -- number in order -- may not match the id after deletion
                            )

INSERT INTO @T_allStings ( valueSting ) SELECT value FROM STRING_SPLIT( @strCSV, CHAR(10) );


DECLARE @T_allStingsСounter int = 0
SELECT  @T_allStingsСounter = ISNULL ( COUNT( * ) , 0 ) FROM @T_allStings
-- Received NN lines
SELECT @result_str = @result_str + N'Received lines: '+CAST( @T_allStingsСounter AS VARCHAR(10) ) + '; ' + CHAR(10)
SELECT @result_str = @result_str + N'Column separator: '+ @divider + ' '  + CHAR(10)
----------------------------------------------------------------------------------------------------

-- Delete lines with empty values LEN( valueSting ) = 0  -- otherwise an error occurs
DECLARE @valueStingEmptyСounter int = 0
SELECT  @valueStingEmptyСounter = ISNULL ( COUNT( * ) , 0 ) FROM @T_allStings WHERE LEN( valueSting ) = 0


IF @valueStingEmptyСounter > 0
	DELETE  FROM @T_allStings WHERE LEN( valueSting ) = 0

SELECT @result_str = @result_str + N'Deleted empty lines: '+CAST( @valueStingEmptyСounter AS VARCHAR(10) )  + '; '  + CHAR(10)

-- re-sort the - ord field so that the numbering is in order after deletion
UPDATE T_allStings SET T_allStings.rowNum = t1.rowNum
FROM (SELECT   id, row_number() over(ORDER BY id ) rowNum
      FROM     @T_allStings
      ) as t1
     , @T_allStings AS T_allStings
WHERE T_allStings.id = t1.id

----------------------------------------------------------------------------------------------------
DECLARE @tmp_valueSting nvarchar(max)
SELECT  @tmp_valueSting = valueSting FROM @T_allStings WHERE rowNum = 1 -- id = 1

-- The titles is the table's file field
-- The essence of this method is simple: we consistently write value after value to the variable (what is in the variable + the current value), as we read data from the column, adding the necessary separator between the values.
-- https://info-comp.ru/obucheniest/706-get-column-values-in-string-sql.html
DECLARE @ALTER_TABLE_AllText NVARCHAR(MAX);


-- Table #CSVLoad_tmp created separately, below, here is the script for adding fields

SET @ALTER_TABLE_AllText ='ALTER TABLE #CSVLoad_tmp ADD '


 --Declaring a variable for a line - a list of columns with the data type-until all columns NVARCHAR(MAX)
   DECLARE @ALTER_TABLE_СolumnText NVARCHAR(MAX);
   --form line
   SELECT @ALTER_TABLE_СolumnText = ISNULL( @ALTER_TABLE_СolumnText + N' NVARCHAR(MAX) NULL, ','') + QUOTENAME( t.value ) -- QUOTENAME = []
   FROM (
         SELECT value FROM STRING_SPLIT( @tmp_valueSting, @divider )
         ) as t

SET @ALTER_TABLE_СolumnText = @ALTER_TABLE_СolumnText + N' NVARCHAR(MAX) NULL ' -- write the type of the last field

--  in the last column there was a translation to a new line-we remove it-it turned out CHAR(13)
SET @ALTER_TABLE_СolumnText = REPLACE ( @ALTER_TABLE_СolumnText  , CHAR(13) , N'' )

----------------------------------------------------------------------------------------------
-- If the title contains Russian field names, we will recode them function [dbo].[str_cyrillic2Latin]


-- SET @CREATE_TABLE_СolumnText = dbo.str_cyrillic2Latin(lower(ltrim(rtrim( @CREATE_TABLE_СolumnText ))))
-- we don't change the register
SET @ALTER_TABLE_СolumnText = dbo.str_cyrillic2Latin  (ltrim (rtrim ( @ALTER_TABLE_СolumnText )))

----------------------------------------------------------------------------------------------

-- LINE store a comma-separated list of fields to insert, WITHOUT TYPES
DECLARE @insertHeaderСolumn NVARCHAR(MAX);
SET     @insertHeaderСolumn = REPLACE ( @ALTER_TABLE_СolumnText , N'NVARCHAR(MAX) NULL' , N'' )


-- full line ALTER table
SET @ALTER_TABLE_AllText = @ALTER_TABLE_AllText + @ALTER_TABLE_СolumnText -- + ')'
-- Look result -- SELECT @ALTER_TABLE_AllText AS ALTER_TABLE_AllText



-- CREATING TABLE #CSVLoad_tmp
DROP TABLE if exists #CSVLoad_tmp

CREATE TABLE #CSVLoad_tmp ( id int IDENTITY(1,1) NOT NULL , [created] [datetime] DEFAULT GETDATE() )


-- in this script, ALTER columns are added to the table-headers from the file
EXEC ( @ALTER_TABLE_AllText )

IF OBJECT_ID(N'tempdb..#CSVLoad_tmp', N'U') IS NOT NULL
	SELECT @result_str = @result_str + N' Table #CSVLoad_tmp: create' + '; '  + CHAR(10)

IF OBJECT_ID(N'tempdb..#CSVLoad_tmp', N'U') IS  NULL
BEGIN
	SELECT @result_str = @result_str + N' Table #CSVLoad_tmp: IS NOT CREATED !' + '; '  + CHAR(10)
	SELECT 0 Result, 'Error! Table #CSVLoad_tmp: IS NOT CREATED !' Msg
	RETURN
END


----------------------------------------------------------------------------------------------------
-- Filling in data
-- string sorting-line by line
-- the line to separate into fields
-- insert in the table

-- the rowNum field -- number in order -- may not match the id after deletion
-- get the number of lines to insert - subtract the first line-title
DECLARE @dataStringsCounter int = 0
SELECT  @dataStringsCounter = MAX( rowNum ) FROM  @T_allStings

DECLARE @idCounter int = 2 -- counter of string iteration -- 1st line title, started with the 2nd line

--Declaring a variable for a line - column DATA with the data type-until all columns NVARCHAR(MAX)
DECLARE @insertDataСolumn NVARCHAR(MAX);
DECLARE @quotationFirstPos int = 0 -- position of the first quotation mark in the formed line
DECLARE @insertStr NVARCHAR(MAX);  -- Insert line completely with title and data

SELECT @insertHeaderСolumn = N'INSERT INTO #CSVLoad_tmp ( ' + @insertHeaderСolumn + ') VALUES ( ' -- add a permanent part of the insert

WHILE @idCounter <=  @dataStringsCounter
    BEGIN
	    -- SELECT @idCounter AS idCounter

		SELECT  @tmp_valueSting   =  N'' -- reset
		SELECT  @insertDataСolumn =  N'' -- reset
		-- iterating through lines-line by line
		SELECT  @tmp_valueSting = valueSting FROM @T_allStings WHERE rowNum = @idCounter -- id = @idCounter

		-- line - split into fields
		   --Forms line
		   SELECT @insertDataСolumn = ISNULL( @insertDataСolumn + N' , ','') + ''''+( t.value )+'''' -- quotation marks because all text
		   FROM (
				 SELECT value FROM STRING_SPLIT( @tmp_valueSting, @divider )
				 ) as t

		-- position of the first quotation mark in the formed line
		SELECT @quotationFirstPos = CHARINDEX ( '''' , @insertDataСolumn, 1)
		-- if the position is greater than 1, then spaces and a comma appear in front of the line (it is unclear from where?)
		-- crop - to make the line start with quotation marks
		IF  @quotationFirstPos > 1
			SELECT @insertDataСolumn =  SUBSTRING ( @insertDataСolumn , @quotationFirstPos , len(@insertDataСolumn) )

		-- The formation of the insert line
		SELECT @insertStr = @insertHeaderСolumn + @insertDataСolumn + ') '
		--  in the last column there was a translation to a new line-we remove it-it turned out CHAR(13)
		SET @insertStr = REPLACE ( @insertStr , CHAR(13) , N'' )

		-- SELECT @insertStr
		-- SELECT CAST( (@idCounter) AS VARCHAR(10) )  + ' - ' + @insertStr

		-- executing an insert in a table
		EXEC ( @insertStr )

		--
        SET @idCounter = @idCounter + 1
    END;

SELECT @result_str = @result_str + N' Вставлено строк с данными: '+CAST( (@dataStringsCounter -1) AS VARCHAR(10) )  + '; '  + CHAR(10)
----------------------------------------------------------------------------------------------------
-- Data is recorded in the table.
-- data analysis

-- list of columns -- cut part of the line from the insertion script, reference point-brackets
-- the position of the opening bracket
DECLARE @posBracket1 int = 0
SELECT  @posBracket1 = CHARINDEX ( '(' , @insertHeaderСolumn , 1)
-- the position of the closing bracket
DECLARE @posBracket2 int = 0
SELECT  @posBracket2 = CHARINDEX ( ')' , @insertHeaderСolumn , 1)
-- length between the brackets

SELECT @result_str = @result_str + N' Колонки: '+ SUBSTRING ( @insertHeaderСolumn , @posBracket1+1 , ( @posBracket2 - (@posBracket1+1) ) )  + '; '  + CHAR(10)

DECLARE @CSVLoadСounter int = 0
SELECT  @CSVLoadСounter = ISNULL ( COUNT( * ) , 0 ) FROM #CSVLoad_tmp -- [dbo].[CSVLoad_tmp]
SELECT  @result_str = @result_str + N' Cтрок с данными в таблице: '+ CAST( @CSVLoadСounter AS VARCHAR(10) )  + '; '  + CHAR(10)

----------------------------------------------------------------------------------------------------
-- SELECT * FROM #CSVLoad_tmp
-- delete the table in the end --
DROP TABLE #CSVLoad_tmp

-- the result in the format -- 1 SELECT (Result, Msg)
SELECT 1 Result, @result_str AS Msg


END 

Falcon Space is a functional web development platform on a narrow stack MS SQL/Bootstrap. Falcon Space Gettting started
Note