Technical Article

Proper Case A String Or Name

,

SQL 2005 user defined function to convert a string to proper case (title case).

Flexible handling of delimiters and apostrophes to cater for words like "hasn't" and also names like "O'Reilly" and "d'Angelo".

Mac and Mc surnames are handled generically, with some rules and exceptions which may need to be extended.  Since McIntyre and Mcintyre are both valid capitalisations, the input string capitalisation is preserved where capitalisation cannot be conclusively determined as incorrect.  Credits to Kim Ryan's NameParse http://cpan.uwinnipeg.ca/htdocs/Lingua-EN-NameParse/Lingua/EN/NameParse.pm.htm for Mac and Mc exceptions.

This rule could also apply for other family name variants like "d'Angelo" and "D'Angelo" but it has not been applied in this version.

To use in SQL 2000, replace nvarchar(max) with unicode or non-unicode limited length string, and remove semi-colons within the script.

CREATE FUNCTION [dbo].[ProperCase] 
(
@inputnvarchar(max)
) 
/*
Copyright © D.Allen-Williams
SQL 2005 user defined function to convert a string to proper case (title case).

Flexible handling of delimiters and apostrophes to cater for words like "hasn't" and also names like "O'Reilly" and "d'Angelo".

Mac and Mc surnames are handled generically, with some rules and exceptions which may need to be extended.  Since McIntyre and Mcintyre are both valid capitalisations, the input string capitalisation is preserved where capitalisation cannot be conclusively determined as incorrect.  Credits to Kim Ryan's NameParse http://cpan.uwinnipeg.ca/htdocs/Lingua-EN-NameParse/Lingua/EN/NameParse.pm.htm for Mac and Mc exceptions.

This rule could also apply for other name variants like "d'Angelo" and "D'Angelo" but it has not been applied in this version.

To use in SQL 2000, replace nvarchar(max) with unicode or non-unicode limited length string, and remove semi-colons within the script.

Examples:
SELECTdbo.ProperCase(null),
dbo.ProperCase('')
SELECTdbo.ProperCase('gus van der sant'),
dbo.ProperCase('gus van sant')-- this won't work unless the exceptions section is modified
SELECTdbo.ProperCase('machinery macaw mackie macjewski m'),
dbo.ProperCase('MacDonald Macdonald macdonald '),-- middle spelling should be preserved
dbo.ProperCase('McIntyre mcintyre Mcintyre MCINTYRE McinàTyre')
SELECTdbo.ProperCase('23b 8th avenue, o''reilly''s tavern, i''ve found something, haven''t i?')
*/RETURNS nvarchar(max)
AS
BEGIN
-- Return NULL if input string is NULL
IF @input IS NULL RETURN NULL;

-- Declarations
DECLARE @outputnvarchar(max)
,@charnvarchar(1)
,@nextCharnvarchar(1)
,@delimnvarchar(1)
,@posnint
,@nextPosnint
,@lenint
,@lowerExistsbit;

-- Initialisation
SELECT@posn= 1
,@len= LEN(@input)
,@output= LOWER(@input)
,@char= LEFT(LOWER(@input), 1)
,@lowerExists= CASE WHEN LEFT(LOWER(@input), 1) = 'i' THEN 1 ELSE 0 END;

-- Get the first character
WHILE @posn <= @len
BEGIN
-- Skip non-letters (delimiters)
WHILE UNICODE(@char) = UNICODE(UPPER(@char)) AND @posn <= @len
SELECT@delim = @char
,@char  = SUBSTRING(@output, @posn+1, 1)
,@posn  = @posn + 1;

-- Convert the first character after a delimiter to upper case
IF (@delim <> N'''' OR @lowerExists = 0)-- ignore Isn't, but match O'Reilly
   AND (CHARINDEX(@delim,'0123456789') = 0 OR SUBSTRING(@output, @posn, 2) NOT IN ('st','nd','rd','th'))-- ignore 1st, 2nd, 3rd, 4th
   AND @posn <= @len
BEGIN
SET @output = STUFF(@output, @posn, 1, UPPER(@char));

-- Check for McXXX
IF SUBSTRING(@output, @posn, 2) = 'Mc'
BEGIN
-- Find next delimiter
SET @nextPosn = @posn + 1;
WHILE UNICODE(SUBSTRING(@output, @nextPosn, 1)) <> UNICODE(UPPER(SUBSTRING(@output, @nextPosn, 1))) AND @nextPosn <= @len
SET @nextPosn = @nextPosn + 1;

-- Ignore any names less than 5 letters
IF @nextPosn >= @posn + 5
   -- preserve original capitalisation if first letter was capitalised and remaining weren't
   AND (SUBSTRING(@output, @posn, @nextPosn-@posn) COLLATE Latin1_General_CS_AS) NOT LIKE (SUBSTRING(@input, @posn, @nextPosn-@posn) COLLATE Latin1_General_CS_AS)
SET @output = STUFF(@output, @posn + 2, 1, UPPER(SUBSTRING(@output, @posn + 2, 1)));

SET @posn = @nextPosn - 1;-- skip to last character of name
END

-- Check for MacXXX
IF SUBSTRING(@output, @posn, 3) = 'Mac'
BEGIN
-- Find next delimiter and the trailing character of the name
SET @nextPosn = @posn + 2;
WHILE UNICODE(SUBSTRING(@output, @nextPosn, 1)) <> UNICODE(UPPER(SUBSTRING(@output, @nextPosn, 1))) AND @nextPosn <= @len
SELECT@nextChar = SUBSTRING(@output, @nextPosn, 1)
,@nextPosn = @nextPosn + 1;

-- Ignore any names less than 6 letters
IF @nextPosn >= @posn + 6
   -- exclude typical Polish or Italian Mac names ending in a,c,i,o,z or j
   AND @nextChar NOT IN ('a','c','i','o','z','j')
   -- preserve original capitalisation if first letter was capitalised and remaining weren't
   AND (SUBSTRING(@output, @posn, @nextPosn-@posn) COLLATE Latin1_General_CS_AS) NOT LIKE (SUBSTRING(@input, @posn, @nextPosn-@posn) COLLATE Latin1_General_CS_AS)
SET @output = STUFF(@output, @posn + 3, 1, UPPER(SUBSTRING(@output, @posn + 3, 1)));

SET @posn = @nextPosn - 1;-- skip to last character of name
END

END

-- Increment position.  Treat as if the apostrophe follows non capitals for I've, I'm, I'd.
SELECT@lowerExists = CASE WHEN @char = 'i' THEN 1 ELSE 0 END
,@char  = SUBSTRING(@output, @posn+1, 1)
,@posn  = @posn + 1;

-- Skip remaining letters.
WHILE UNICODE(@char) <> UNICODE(UPPER(@char)) AND @posn <= @len
SELECT@lowerExists = 1
,@char  = SUBSTRING(@output, @posn+1, 1)
,@posn  = @posn + 1;
END

-- Handle special name cases
SET @output = ' ' + @output + ' ';-- pad so we can find matches at the start and end of the string
SET @output = REPLACE(@output, ' D''',  ' d''');
SET @output = REPLACE(@output, ' De ',  ' de ');
SET @output = REPLACE(@output, ' Del ', ' del ');
SET @output = REPLACE(@output, ' Der ', ' der ');
SET @output = REPLACE(@output, ' Di ',  ' di ');
SET @output = REPLACE(@output, ' Du ',  ' du ');
SET @output = REPLACE(@output, ' El ',  ' el ');
SET @output = REPLACE(@output, ' La ',  ' la ');
SET @output = REPLACE(@output, ' Le ',  ' le ');
SET @output = REPLACE(@output, ' Lo ',  ' lo ');
SET @output = REPLACE(@output, ' Los ', ' los ');
SET @output = REPLACE(@output, ' Von ', ' von ');
SET @output = REPLACE(@output, ' Van de ', ' van de ');
SET @output = REPLACE(@output, ' Van der ',' van der ');
-- SET @output = REPLACE(@output, ' Van ', ' van '); -- don't replace because this might be a motor vehicle
SET @output = SUBSTRING(@output, 2, @len);-- remove padding

-- Correct Mac exceptions
SET @output = REPLACE(@output, 'MacHin','Machin');-- machine and variants
SET @output = REPLACE(@output, 'MacHlin','Machlin');
SET @output = REPLACE(@output, 'MacKle','Mackle');
SET @output = REPLACE(@output, 'MacKlin','Macklin');
SET @output = REPLACE(@output, 'MacKie','Mackie');
SET @output = REPLACE(@output, 'MacHado','Machado');-- Portuguese
SET @output = REPLACE(@output, 'MacEvicius','Macevicius');-- Lithuanian
SET @output = REPLACE(@output, 'MacIulis','Maciulis');-- Lithuanian
SET @output = REPLACE(@output, 'MacIas','Macias');-- Lithuanian
SET @output = REPLACE(@output, 'Macmurdo','MacMurdo');-- only 'Mac' name ending in 'o'?
-- SET @output = REPLACE(@output, 'MacHar','Machar');-- not an exception, MacHarg is valid

RETURN @output;
END

Rate

5 (2)

You rated this post out of 5. Change rating

Share

Share

Rate

5 (2)

You rated this post out of 5. Change rating