Pattern Search

  • Situation 2:- I want to extract everything after 366,690,304 and just get the movie name. i.e CSI Miami s01e20.HiDef.HDTV - Grave Young Men.avi

    ('21/09/2007 03:31 366,690,304 CSI Miami s01e20.HiDef.HDTV - Grave Young Men.avi')

    but I need to extract the directory name and season i.e Directory of Z:\Series\CSI\CSI Miami\Season 2.

    How do I do that using sql server script

    IF OBJECT_ID('tempdb..#Series') IS NOT NULL

    DROP TABLE #Series

    CREATE TABLE #Series (

    Rowid INT IDENTITY(1,1)

    ,String VARCHAR(200)

    )

    INSERT #Series VALUES('21/09/2007 03:31 366,690,304 CSI Miami s01e20.HiDef.HDTV - Grave Young Men.avi')

    INSERT #Series VALUES('21/09/2007 01:42 366,708,736 CSI Miami s01e21.HiDef.HDTV - Spring Break.avi')

    INSERT #Series VALUES('19/09/2007 04:05 368,050,176 CSI Miami s01e22.HiDef.HDTV - Tinder Box.avi')

    INSERT #Series VALUES('21/09/2007 04:44 366,733,312 CSI Miami s01e23.HiDef.HDTV - Freaks and Tweaks.avi')

    INSERT #Series VALUES('21/09/2007 04:53 367,427,584 CSI Miami s01e24.HiDef.HDTV - Body Count.avi')

    INSERT #Series VALUES(' 24 File(s) 8,802,140,160 bytes')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of Z:\Series\CSI\CSI Miami\Season 2')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('12/05/2009 13:33 <DIR> .')

    INSERT #Series VALUES('12/05/2009 13:33 <DIR> ..')

    INSERT #Series VALUES('24/09/2007 23:51 365,912,064 CSI Miam s02e24 - Innocent.avi')

    INSERT #Series VALUES('25/09/2007 02:34 368,277,504 CSI Miami s02e01 - Blood Brothers.avi')

    INSERT #Series VALUES('24/09/2007 20:55 367,949,824 CSI Miami s02e02 - Dead Zone.avi')

    INSERT #Series VALUES('25/09/2007 01:57 367,446,016 CSI Miami s02e03 - Hard time.avi')

    INSERT #Series VALUES('25/09/2007 02:42 366,909,440 CSI Miami s02e04 - Death Grip.avi')

    INSERT #Series VALUES('25/09/2007 03:57 365,985,792 CSI Miami s02e23 - MIA NYC NonStop.avi')

    INSERT #Series VALUES(' 24 File(s) 8,801,640,448 bytes')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of Z:\Series\CSI\CSI Miami\Season 3')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('12/05/2009 13:33 <DIR> .')

    INSERT #Series VALUES('12/05/2009 13:33 <DIR> ..')

    INSERT #Series VALUES('24/09/2007 17:45 365,987,840 CSI Miami - Season 03 - Episode 01 - Lost Son.avi')

    INSERT #Series VALUES('25/09/2007 04:01 364,562,432 CSI Miami - Season 03 - Episode 22 - Vengeance.avi')

    INSERT #Series VALUES('25/09/2007 03:46 364,847,104 CSI Miami - Season 03 - Episode 23 - Whacked.avi')

    INSERT #Series VALUES('25/09/2007 04:03 367,067,136 CSI Miami - Season 03 - Episode 24 - 10-7.avi')

    INSERT #Series VALUES(' 24 File(s) 9,528,989,696 bytes')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of Z:\Series\CSI\CSI Miami\Season 4')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('15/07/2009 12:50 <DIR> .')

    INSERT #Series VALUES('15/07/2009 12:50 <DIR> ..')

    INSERT #Series VALUES('24/01/2006 23:13 366,571,520 CSI Miami 4x13 Silencer.avi')

    INSERT #Series VALUES('30/01/2006 12:35 366,178,304 CSI Miami 4x14 Fade Out.avi')

    INSERT #Series VALUES('09/04/2007 22:01 160,688,104 X-Men Evolution - 3x12 - Dark Horizon(2).avi')

    INSERT #Series VALUES('15/07/2007 11:56 183,873,536 X-Men Evolution - 3x13 - Cruise Control.avi')

    INSERT #Series VALUES(' 13 File(s) 2,360,287,968 bytes')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of Z:\Series\X-Men Evolution\Season 4')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('28/04/2009 11:56 <DIR> .')

    INSERT #Series VALUES('28/04/2009 11:56 <DIR> ..')

    INSERT #Series VALUES('12/08/2007 17:40 186,238,976 X-Men Evolution - 4x01 - Impact.avi')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of Z:\Series\Zero Punctuation')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('20/03/2009 12:17 <DIR> .')

    INSERT #Series VALUES('20/03/2009 12:17 <DIR> ..')

    INSERT #Series VALUES('14/09/2007 22:25 3,891,021 Zero Punctuation E001 - Darkness Demo For PS3 Review.flv')

    INSERT #Series VALUES('14/09/2007 22:25 4,478,275 Zero Punctuation E002 - Fable The Lost Chapters Review.flv')

    INSERT #Series VALUES('13/08/2007 23:18 1,230,396 Zero Punctuation E003 - And So It Begins.flv')

    INSERT #Series VALUES('13/09/2007 16:46 7,404,214 Zero Punctuation E004 - Heavenly Sword and Other Stuff Review.flv')

    INSERT #Series VALUES('13/09/2007 16:22 5,829,513 Zero Punctuation E005 - Psychonauts Review.flv')

    INSERT #Series VALUES('13/09/2007 16:08 5,368,408 Zero Punctuation E006 - Console Rundown.flv')

    INSERT #Series VALUES('13/09/2007 16:07 4,746,637 Zero Punctuation E007 - Tomb Raider Anniversary Review.flv')

    INSERT #Series VALUES(' 7 File(s) 32,948,464 bytes')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Total Files Listed:')

    INSERT #Series VALUES(' 14843 File(s) 4,189,505,583,082 bytes')

    INSERT #Series VALUES(' 3749 Dir(s) 3,119,153,532,928 bytes free')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Volume in drive C has no label.')

    INSERT #Series VALUES(' Volume Serial Number is 1493-C864')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES(' Directory of C:\BACKUP\Desktop_More')

    INSERT #Series VALUES('NULL')

    INSERT #Series VALUES('12/01/2010 13:33 <DIR> .')

    INSERT #Series VALUES('12/01/2010 13:33 <DIR> ..')

    INSERT #Series VALUES('08/04/2009 20:50 8,434 20090408.xlsx')

    INSERT #Series VALUES('28/04/2009 15:35 10 20090428..txt')

    INSERT #Series VALUES('03/04/2009 09:10 246,272 AppCurrentAccIndividualTIP 14 3 ENG.doc')

    INSERT #Series VALUES('03/04/2009 09:01 52,854 AppCurrentAccIndividualTIP 14 3 ENG.docx')

    INSERT #Series VALUES('12/03/2009 20:29 23,040 Bottom Line.xls')

    INSERT #Series VALUES('28/12/2009 21:25 20,992 BUDGET.xlsx')

  • Why do you put all the data into 1 column ? A little normalization would help.

  • Its directory contents. Thats How I received them in the form of a text file . Its just a string

  • The listing is missing the directory information for season 1. Any chance of you attaching the actual file so I can take a look-see and maybe come up with a rather high speed solution?

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • This might a be a long way around but it will give a heads up, chances are you will optimize it later.

    Split your string according to the default given format (date and time, numbers, season and file name)

    Logic,

    1. Declare your cursor (you need one or alternative).

    2. Declare 3 varchar variables

    3. on your select statement, use substring function to get the date and time which should be around the first 16 characters.

    4. what ever that comes next is the number which is separated by an empty space (so you select your substring from 17th character until value found by Char(" ")[This will give the location of an empty space])

    5. using the the very same char function that got you the space, then you start to substring till the end.

    then inside the cursor you can then save your three string in a table

    😉

  • If I can get an accurate copy of the file, I'll show you how to do this without a cursor. 😉

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • I have attached the sample text file I have received. And this is for Situation 2. Movies is for situation 1

  • Perfect... I'm on my way to work. I'll bang out my version tonight when I get home. It'll be interesting to see what other folks do in the meantime.

    In case anyone wants to give it a try, a simple BULK INSERT with supressed parallelism and a "Quirky Update" should do the job very nicely and very quickly.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Ok... I put your two files in C:\Temp on my home computer. The following imports, rips, and strips the MOVIES.Txt file. About the only thing you need to change to import the SERIES.Txt file is the file name itself.

    As always, the details are in the comments in the code. Fire a flare if you have any questions on the following code...

    --===== Identify a nice safe place to do this demo in

    USE TempDB

    GO

    --CAREFUL!

    --drop table dbo.DirectoryStaging

    --drop view dbo.DirectoryImport

    --===== Create a staging table. It has to be a "real" table

    -- because we're going to build an "import" view for it

    -- instead of using a BCP Format File to control the import.

    CREATE TABLE dbo.DirectoryStaging

    (

    RowNum INT IDENTITY(1,1),

    RawData VARCHAR(1000),

    FilePath VARCHAR(1000),

    FileName VARCHAR(1000),

    FileDate DATETIME,

    FileSize BIGINT

    CONSTRAINT PK_DirectoryStaging

    PRIMARY KEY CLUSTERED (RowNum)

    )

    GO

    --===== Here's the import view. Notice that it contains only the

    -- column we want to import to. Keeps us from having to make

    -- a BCP Format File for this simple task.

    CREATE VIEW dbo.DirectoryImport

    AS

    SELECT RawData

    FROM dbo.DirectoryStaging

    GO

    --===== All set... import the movies file.

    BULK INSERT dbo.DirectoryImport

    FROM 'C:\TEMP\Movies.txt'

    WITH (

    CODEPAGE = 'RAW',

    DATAFILETYPE = 'CHAR',

    TABLOCK

    )

    --===== Declare a very necessary variable to "smear"

    -- the file path (directory) across file rows

    DECLARE @FilePath VARCHAR(1000)

    --===== Change to the "other" date format ;-)

    SET DATEFORMAT dmy

    --===== Split and "smear" the data all in one shot using the "Quirky Update" method.

    UPDATE ds

    SET @FilePath = FilePath =

    CASE

    WHEN RawData NOT LIKE ' Directory of _:\%'

    THEN @FilePath

    ELSE SUBSTRING(RawData,CHARINDEX(':\',RawData)-1,1000)

    END,

    FileName =

    CASE

    WHEN RawData NOT LIKE ' Directory of _:\%'

    THEN SUBSTRING(RAWDATA,37,1000)

    END,

    FileDate =

    CASE

    WHEN RawData NOT LIKE ' Directory of _:\%'

    THEN CAST(LEFT(RAWDATA,17) AS DATETIME)

    END,

    FileSize =

    CASE

    WHEN RawData NOT LIKE ' Directory of _:\%'

    THEN CAST(CAST(SUBSTRING(RAWDATA,19,17) AS MONEY) AS BIGINT)

    END

    FROM dbo.DirectoryStaging ds WITH(TABLOCKX)

    WHERE RawData LIKE ' Directory of _:\%'

    OR (RawData LIKE '[0-3][0-9]/[0-1][0-9]/[0-9][0-9][0-9][0-9] [0-2][0-9]:[0-5][0-9] %'

    AND CHARINDEX('<DIR>',RawData)=0)

    OPTION (MAXDOP 1)

    --====== Let's see what we've got... recommend you also look for dates > today because I found some.

    -- Obviously, you can dump these results into a table

    SELECT * FROM dbo.DirectoryStaging WHERE FileDate > 0

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Thank you.. It works....

  • mranganwa (1/21/2010)


    Thank you.. It works....

    You bet. Thank you for the feedback.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

Viewing 11 posts - 1 through 10 (of 10 total)

You must be logged in to reply to this topic. Login to reply