Ensuring No Duplicates

  • I have this stored procedure that is inserting/updataing a staging tabe and another table. I dont have access to the Azure Data Factory.

    (a) I would like to know if i can be assured if no duplicates gets passed, see SQL:


    ALTER PROCEDURE [dbo].[sp_Process_stageAFS]
    AS
    BEGIN
    WITH CTE AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY sa.FuelEventId ORDER BY sa.FleetId) rn
    FROM stageAFSData AS sa
    )
    DELETE CTE WHERE rn > 1

    /* INSERT MISSING MODELS */
    INSERT INTO lstModel
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT ISNULL(Model, '(To Be Updated)'),
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstModel AS lm
    ON lm.Name = ISNULL(sa.Model, '(To Be Updated)')
    WHERE lm.Id IS NULL


    /* INSERT MISSING MAKES */
    INSERT INTO lstMake
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT ISNULL(Make, '(To Be Updated)'),
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstMake AS lm
    ON lm.Name = ISNULL(sa.Make, '(To Be Updated)')
    WHERE lm.Id IS NULL
    /* INSERT MISSING CONSUMPTION TYPES */
    INSERT INTO lstConsumptionType
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT sa.ConsumptionType,
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstConsumptionType AS lct
    ON lct.Name = sa.ConsumptionType
    WHERE sa.ConsumptionType IS NOT NULL
    AND lct.Id IS NULL



    /* INSERT MISSING EQUIPMENT */

    INSERT INTO lstEquipment
    (
    -- Id -- this column value is auto-generated
    FleetId,
    RegNumber,
    EquipmentDescription,
    ModelId,
    MakeId,
    TankSize,
    ConsumptionTypeId,
    DecommissionDate,
    CreateDate,
    ModifyDate,
    IsActive
    )
    SELECT TRIM(REPLACE(sa.FleetId, '-', '')) /*{ FleetId }*/,
    MIN(sa.RegNumber) /*{ RegNumber }*/,
    NULL /*{ EquipmentDescription }*/,
    MIN(lmo.Id) /*{ ModelId }*/,
    MIN(lma.Id) /*{ MakeId }*/,
    -1 /*{ TankSize }*/,
    MAX(lct.Id) /*{ ConsumptionTypeId }*/,
    NULL /*{ DecommissionDate }*/,
    GETDATE() /*{ CreateDate }*/,
    GETDATE() /*{ ModifyDate }*/,
    1 /*{ IsActive }*/
    FROM stageAFSData AS sa
    LEFT JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstModel AS lmo
    ON lmo.Name = ISNULL(sa.Model, '(To Be Updated)')
    JOIN dbo.lstMake AS lma
    ON lma.Name = ISNULL(sa.Make, '(To Be Updated)')
    LEFT JOIN dbo.lstConsumptionType AS lct
    ON lct.Name = sa.ConsumptionType
    WHERE le.FleetId IS NULL
    AND sa.FleetId IS NOT NULL
    GROUP BY
    TRIM(REPLACE(sa.FleetId, '-', ''))

    /* INSERT MISSING LOCATIONS */
    INSERT INTO lstLocation
    (
    -- Id -- this column value is auto-generated
    [Description],
    IsFixed,
    IsMobile,
    Capacity,
    CreateDate,
    ModifyDate,
    IsActive
    )
    SELECT DISTINCT sa.Location,
    IIF(sa.Location LIKE 'ST%', 1, 0),
    IIF(sa.Location LIKE 'ST%', 0, 1),
    -1,
    GETDATE(),
    GETDATE(),
    1
    FROM stageAFSData AS sa
    LEFT JOIN lstLocation AS ll
    ON ll.[Description] = sa.Location
    WHERE sa.Location IS NOT NULL
    AND ll.Id IS NULL

    /* INSERT MISSING PRODUCTS */

    INSERT INTO lstProduct
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT sa.ProductName,
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstProduct AS lp
    ON lp.Name = sa.ProductName
    WHERE lp.Id IS NULL
    AND sa.ProductName IS NOT NULL


    /* UPDATE Model And Make */

    UPDATE le
    SET le.ModelId = t1.ModelId,
    ModifyDate = GETDATE()
    FROM (
    SELECT DISTINCT sa.FleetId,
    lm4.Id ModelId --, lm3.Name, lm4.Name
    FROM lstEquipment AS le
    JOIN dbo.stageAFSData AS sa
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstModel AS lm3
    ON lm3.Id = le.ModelId
    JOIN dbo.lstModel AS lm4
    ON lm4.Name = sa.Model
    AND lm4.Id != lm3.Id
    WHERE sa.FleetId != 'MERGED-DELETED'
    ) t1
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(t1.FleetId, '-', '')


    UPDATE le
    SET le.MakeId = t1.MakeId,
    ModifyDate = GETDATE()
    FROM (
    SELECT DISTINCT sa.FleetId,
    lm2.Id MakeId--, lm.Name, lm2.Name
    FROM lstEquipment AS le
    JOIN dbo.stageAFSData AS sa
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstMake AS lm
    ON lm.Id = le.MakeId
    JOIN dbo.lstMake AS lm2
    ON lm2.Name = sa.Make
    AND lm2.Id != lm.Id
    WHERE sa.FleetId != 'MERGED-DELETED'
    ) t1
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(t1.FleetId, '-', '')



    INSERT INTO datAFSRecord
    (
    -- Id -- this column value is auto-generated
    EquipmentId,
    TransactionDateTime,
    Litres,
    Location,
    Pump,
    ProductId,
    CostCentre,
    HoursOdoReading,
    IsActive,
    CreateDate,
    ModifyDate,
    LocationId,
    VoucherNumber,
    FuelEventId,
    DeviceId
    )
    SELECT le.Id /*{ EquipmentId }*/,
    sa.TransactionDateTime /*{ TransactionDateTime }*/,
    sa.Liters /*{ Litres }*/,
    sa.Location /*{ Location }*/,
    sa.Pump /*{ Pump }*/,
    lp.Id /*{ ProductId }*/,
    sa.CostCentre /*{ CostCentre }*/,
    sa.ODOReading /*{ HoursOdoReading }*/,
    1 /*{ IsActive }*/,
    GETDATE() /*{ CreateDate }*/,
    GETDATE() /*{ ModifyDate }*/,
    ll.Id /*{ LocationId }*/,
    sa.VoucherNumber,
    sa.FuelEventId,
    sa.DeviceId
    FROM stageAFSData AS sa
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN lstProduct AS lp
    ON lp.Name = sa.ProductName
    JOIN lstLocation AS ll
    ON ll.[Description] = sa.Location
    LEFT JOIN dbo.datAFSRecord AS da3
    ON da3.FuelEventId = sa.FuelEventId
    WHERE da3.Id IS NULL


    UPDATE le
    SET TankSize = MaxL,
    ModifyDate = GETDATE()
    FROM lstEquipment AS le
    JOIN (
    SELECT MAX(Litres) MaxL,
    le2.Id
    FROM lstEquipment AS le2
    JOIN dbo.datAFSRecord AS da
    ON da.EquipmentId = le2.Id
    WHERE ISNULL(le2.TankSize, -1) = -1
    GROUP BY
    le2.Id
    ) t1
    ON t1.Id = le.Id

    DELETE sa
    FROM stageAFSData AS sa
    JOIN datAFSRecord AS da2
    ON da2.FuelEventId = sa.FuelEventId
    END
    ---------------------------



    --EXEC [dbo].[sp_RefreshCache] 180
    CREATE PROCEDURE [dbo].[sp_RefreshCache] (@pNumberOfDays INT = NULL)
    AS BEGIN


    IF @pNumberOfDays IS NULL
    BEGIN
    SET @pNumberOfDays = 180
    END

    DECLARE @StartDate DATE = DATEADD(DAY,-@pNumberOfDays,GETDATE())

    DELETE FROM cacheStorageLogbook WHERE [Date] >= @StartDate OR @pNumberOfDays IS NULL

    INSERT INTO cacheStorageLogbook
    (
    [Date],
    OpeningBalance,
    InvoiceNumber,
    PurchaseDate,
    LitresReceived,
    OpeningBalancePlusReceipts,
    DisposalDate,
    LitresDisposed,
    DisposedToVehicle,
    RegNumber,
    PurposeOfDisposal,
    MeterReadingAfterDisposal,
    LocationId
    )
    SELECT
    fgsrl.[Date],
    fgsrl.OpeningBalance,
    fgsrl.InvoiceNumber,
    fgsrl.PurchaseDate,
    fgsrl.LitresReceived,
    fgsrl.OpeningBalancePlusReceipts,
    fgsrl.DisposalDate,
    fgsrl.LitresDisposed,
    fgsrl.DisposedToVehicle,
    fgsrl.RegNumber,
    fgsrl.PurposeOfDisposal,
    fgsrl.MeterReadingAfterDisposal,
    ll.Id LocationId
    FROM
    lstLocation AS ll
    CROSS APPLY dbo.fn_GetStorageReportLive(ll.Id) AS fgsrl
    WHERE fgsrl.[Date] >= @StartDate OR @pNumberOfDays IS NULL

    DELETE FROM cacheUsageLogbook WHERE TransactionDateTime >= @StartDate OR @pNumberOfDays IS NULL

    INSERT INTO cacheUsageLogbook
    (
    TransactionDateTime,
    ReceivedFromStorageUnitNumber,
    QuantityReceived,
    TypeOfVehicle,
    OpeningBalanceFuel,
    RegNumber,
    OpeningOdo,
    ClosingOdo,
    TotalOdoUsed,
    TotalFuelUsed,
    UnusedBalance,
    SpecificActivityPerformed,
    NonEligible,
    WhenActivityPerformed,
    WhereActivityPerformed,
    EligiblePurchases
    )
    SELECT
    TransactionDateTime,
    ReceivedFromStorageUnitNumber,
    QuantityReceived,
    TypeOfVehicle,
    OpeningBalanceFuel,
    RegNumber,
    OpeningOdo,
    ClosingOdo,
    TotalOdoUsed,
    TotalFuelUsed,
    UnusedBalance,
    SpecificActivityPerformed,
    NonEligible,
    WhenActivityPerformed,
    WhereActivityPerformed,
    EligiblePurchases
    FROM vw_UsageReport AS vur
    WHERE vur.TransactionDateTime >= @StartDate OR @pNumberOfDays IS NULL




    END
    GO


    ------------


    /************************************************************
    * SP To Populate datAFS from stageAFS
    * Time: 2021/07/05 10:54:41
    *
    ************************************************************/

    CREATE PROCEDURE [dbo].[sp_Process_stageAFS]
    AS
    BEGIN
    WITH CTE AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY sa.FuelEventId ORDER BY sa.FleetId) rn
    FROM stageAFSData AS sa
    )
    DELETE CTE WHERE rn > 1

    /* INSERT MISSING MODELS */
    INSERT INTO lstModel
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT ISNULL(Model, '(To Be Updated)'),
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstModel AS lm
    ON lm.Name = ISNULL(sa.Model, '(To Be Updated)')
    WHERE lm.Id IS NULL


    /* INSERT MISSING MAKES */
    INSERT INTO lstMake
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT ISNULL(Make, '(To Be Updated)'),
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstMake AS lm
    ON lm.Name = ISNULL(sa.Make, '(To Be Updated)')
    WHERE lm.Id IS NULL
    /* INSERT MISSING CONSUMPTION TYPES */
    INSERT INTO lstConsumptionType
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT sa.ConsumptionType,
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstConsumptionType AS lct
    ON lct.Name = sa.ConsumptionType
    WHERE sa.ConsumptionType IS NOT NULL
    AND lct.Id IS NULL



    /* INSERT MISSING EQUIPMENT */

    INSERT INTO lstEquipment
    (
    -- Id -- this column value is auto-generated
    FleetId,
    RegNumber,
    EquipmentDescription,
    ModelId,
    MakeId,
    TankSize,
    ConsumptionTypeId,
    DecommissionDate,
    CreateDate,
    ModifyDate,
    IsActive
    )
    SELECT TRIM(REPLACE(sa.FleetId, '-', '')) /*{ FleetId }*/,
    MIN(sa.RegNumber) /*{ RegNumber }*/,
    NULL /*{ EquipmentDescription }*/,
    MIN(lmo.Id) /*{ ModelId }*/,
    MIN(lma.Id) /*{ MakeId }*/,
    -1 /*{ TankSize }*/,
    MAX(lct.Id) /*{ ConsumptionTypeId }*/,
    NULL /*{ DecommissionDate }*/,
    GETDATE() /*{ CreateDate }*/,
    GETDATE() /*{ ModifyDate }*/,
    1 /*{ IsActive }*/
    FROM stageAFSData AS sa
    LEFT JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstModel AS lmo
    ON lmo.Name = ISNULL(sa.Model, '(To Be Updated)')
    JOIN dbo.lstMake AS lma
    ON lma.Name = ISNULL(sa.Make, '(To Be Updated)')
    LEFT JOIN dbo.lstConsumptionType AS lct
    ON lct.Name = sa.ConsumptionType
    WHERE le.FleetId IS NULL
    AND sa.FleetId IS NOT NULL
    GROUP BY
    TRIM(REPLACE(sa.FleetId, '-', ''))

    /* INSERT MISSING LOCATIONS */
    INSERT INTO lstLocation
    (
    -- Id -- this column value is auto-generated
    [Description],
    IsFixed,
    IsMobile,
    Capacity,
    CreateDate,
    ModifyDate,
    IsActive
    )
    SELECT DISTINCT sa.Location,
    IIF(sa.Location LIKE 'ST%', 1, 0),
    IIF(sa.Location LIKE 'ST%', 0, 1),
    -1,
    GETDATE(),
    GETDATE(),
    1
    FROM stageAFSData AS sa
    LEFT JOIN lstLocation AS ll
    ON ll.[Description] = sa.Location
    WHERE sa.Location IS NOT NULL
    AND ll.Id IS NULL

    /* INSERT MISSING PRODUCTS */

    INSERT INTO lstProduct
    (
    -- Id -- this column value is auto-generated
    NAME,
    IsActive,
    CreateDate,
    ModifyDate
    )
    SELECT DISTINCT sa.ProductName,
    1,
    GETDATE(),
    GETDATE()
    FROM stageAFSData AS sa
    LEFT JOIN lstProduct AS lp
    ON lp.Name = sa.ProductName
    WHERE lp.Id IS NULL
    AND sa.ProductName IS NOT NULL


    /* UPDATE Model And Make */

    UPDATE le
    SET le.ModelId = t1.ModelId,
    ModifyDate = GETDATE()
    FROM (
    SELECT DISTINCT sa.FleetId,
    lm4.Id ModelId --, lm3.Name, lm4.Name
    FROM lstEquipment AS le
    JOIN dbo.stageAFSData AS sa
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstModel AS lm3
    ON lm3.Id = le.ModelId
    JOIN dbo.lstModel AS lm4
    ON lm4.Name = sa.Model
    AND lm4.Id != lm3.Id
    WHERE sa.FleetId != 'MERGED-DELETED'
    ) t1
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(t1.FleetId, '-', '')


    UPDATE le
    SET le.MakeId = t1.MakeId,
    ModifyDate = GETDATE()
    FROM (
    SELECT DISTINCT sa.FleetId,
    lm2.Id MakeId--, lm.Name, lm2.Name
    FROM lstEquipment AS le
    JOIN dbo.stageAFSData AS sa
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN dbo.lstMake AS lm
    ON lm.Id = le.MakeId
    JOIN dbo.lstMake AS lm2
    ON lm2.Name = sa.Make
    AND lm2.Id != lm.Id
    WHERE sa.FleetId != 'MERGED-DELETED'
    ) t1
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(t1.FleetId, '-', '')



    INSERT INTO datAFSRecord
    (
    -- Id -- this column value is auto-generated
    EquipmentId,
    TransactionDateTime,
    Litres,
    Location,
    Pump,
    ProductId,
    CostCentre,
    HoursOdoReading,
    IsActive,
    CreateDate,
    ModifyDate,
    LocationId,
    VoucherNumber,
    FuelEventId,
    DeviceId
    )
    SELECT le.Id /*{ EquipmentId }*/,
    sa.TransactionDateTime /*{ TransactionDateTime }*/,
    sa.Liters /*{ Litres }*/,
    sa.Location /*{ Location }*/,
    sa.Pump /*{ Pump }*/,
    lp.Id /*{ ProductId }*/,
    sa.CostCentre /*{ CostCentre }*/,
    sa.ODOReading /*{ HoursOdoReading }*/,
    1 /*{ IsActive }*/,
    GETDATE() /*{ CreateDate }*/,
    GETDATE() /*{ ModifyDate }*/,
    ll.Id /*{ LocationId }*/,
    sa.VoucherNumber,
    sa.FuelEventId,
    sa.DeviceId
    FROM stageAFSData AS sa
    JOIN lstEquipment AS le
    ON REPLACE(le.FleetId, '-', '') = REPLACE(sa.FleetId, '-', '')
    JOIN lstProduct AS lp
    ON lp.Name = sa.ProductName
    JOIN lstLocation AS ll
    ON ll.[Description] = sa.Location
    LEFT JOIN dbo.datAFSRecord AS da3
    ON da3.FuelEventId = sa.FuelEventId
    WHERE da3.Id IS NULL


    UPDATE le
    SET TankSize = MaxL,
    ModifyDate = GETDATE()
    FROM lstEquipment AS le
    JOIN (
    SELECT MAX(Litres) MaxL,
    le2.Id
    FROM lstEquipment AS le2
    JOIN dbo.datAFSRecord AS da
    ON da.EquipmentId = le2.Id
    WHERE ISNULL(le2.TankSize, -1) = -1
    GROUP BY
    le2.Id
    ) t1
    ON t1.Id = le.Id

    DELETE sa
    FROM stageAFSData AS sa
    JOIN datAFSRecord AS da2
    ON da2.FuelEventId = sa.FuelEventId
    END
    GO


    --------------



    CREATE TABLE [dbo].[stageAFSData](
    [FleetId] [nvarchar](25) NULL,
    [FuelEventId] [int] NULL,
    [RegNumber] [nvarchar](15) NULL,
    [Make] [nvarchar](50) NULL,
    [Model] [nvarchar](50) NULL,
    [TransactionDateTime] [datetime] NULL,
    [Liters] [float] NULL,
    [Location] [nvarchar](50) NULL,
    [Pump] [nvarchar](50) NULL,
    [ProductName] [nvarchar](15) NULL,
    [CostCentre] [nvarchar](50) NULL,
    [ConsumptionType] [nvarchar](20) NULL,
    [ODOReading] [float] NULL,
    [VoucherNumber] [nvarchar](50) NULL,
    [DepotId] [int] NULL,
    [EquipmentId] [int] NULL,
    [TankGroupId] [int] NULL,
    [PumpId] [int] NULL,
    [VehicleCategoryDescription] [nvarchar](50) NULL,
    [Group2] [nvarchar](50) NULL,
    [TransactionPlate] [nvarchar](50) NULL,
    [DeviceId] [varchar](max) NULL
    ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
    GO


    (b)Problem is, the 60 day RefreshCache failed on my local with the STRING_AGG issue when my DB timed out.

    (c) The current method is that the FuelEventId is set to the highest value of the AFSDataStage & the AFSDatRecord and they fetch the latest events. If i manually set the FuelEventId to “1” to basically fetch all the records , the data fetched  should populate the AFSDataStage table. If i then run AllStaging stored Proc (which runs all the pipeline tasks manually from the DB side) that data is not overwritten or is overwritten but no duplicated.


    /****** Object: StoredProcedure [dbo].[sp_Process_All_Staging] Script Date: 2022/09/08 17:08:20 ******/
    SET ANSI_NULLS ON
    GO

    SET QUOTED_IDENTIFIER ON
    GO


    CREATE PROCEDURE [dbo].[sp_Process_All_Staging]
    AS BEGIN
    PRINT 'Start - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageEquipment ;
    PRINT 'sp_Process_stageEquipment - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageDepot
    PRINT 'sp_Process_stageDepot - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageAFS
    PRINT 'sp_Process_stageAFS - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageODO
    PRINT 'sp_Process_stageODO - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageSAPInfo
    PRINT 'sp_Process_stageSAPInfo - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageTripData
    PRINT 'sp_Process_stageTripData - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageFuelEventsByModifiedDate
    PRINT 'sp_Process_stageFuelEventsByModifiedDate - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC [sp_Process_stageTankDeliveryEvent]
    PRINT '[sp_Process_stageTankDeliveryEvent] - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_Process_stageTankEvents
    PRINT 'sp_Process_stageTankEvents - ' + CAST(GETDATE() AS NVARCHAR(25))
    EXEC sp_RefreshCache
    @pNumberOfDays = 30
    PRINT 'sp_RefreshCache - ' + CAST(GETDATE() AS NVARCHAR(25))

    END
    GO


    This is the code i used for testing of the Fueliventid:

     

    Attachments:
    You must be logged in to view attached files.
  • This is a very long post containing precisely zero questions.

    Vetting such code is a job for a paid consultant.

    The absence of evidence is not evidence of absence.
    Martin Rees

    You can lead a horse to water, but a pencil must be lead.
    Stan Laurel

  • I have this stored procedure that is inserting/updataing a staging tabe and another table. I dont have access to the Azure Data Factory.

    (a) I would like to know if i can be assured if no duplicates gets passed, see SQL:

    Nope - cannot ensure no duplicates are passed and cannot ensure that no duplicates are inserted.  You haven't defined what actually determines uniqueness in each table - nor have you provided anything to identify the primary key for the tables where you are inserting the data.

    It looks like you put a DISTINCT on every single statement - which either means a bad join or bad design.

    And - right at the beginning you try to eliminate rows from the staging table by deleted from a CTE based on the row number.  I would assume that you don't have multiple Fuel Event ID's - and probably don't have the same Fuel Event ID for different fleets, so that isn't going to 'remove' duplicates either.

     

    Jeffrey Williams
    “We are all faced with a series of great opportunities brilliantly disguised as impossible situations.”

    ― Charles R. Swindoll

    How to post questions to get better answers faster
    Managing Transaction Logs

  • @JeffreyWilliams Thank you for your input. Would you say, definning primary keys for each row would be the quickest way to ensure uniqueness or no duplicate values being imported?

    I agree with you that it is a bad db design, thats why the cartesian joins. In order to avoid cartesian joins and to speed up the db/tables, what would you recommend?

    I did run an update on the db with this code:


    ALTER INDEX [Coordinates] ON [dbo].[CoordRef] DISABLE
    GO

    ALTER INDEX [IX_CostCentre_FuelEventId] ON [dbo].[CostCentre] DISABLE
    GO

    ALTER TABLE [dbo].[datAFSRecord] DROP CONSTRAINT [FK_datAFSRecord_lstProduct]
    GO
    ALTER TABLE [dbo].[datAFSRecord] DROP CONSTRAINT [FK_datAFSRecord_lstLocation]
    GO
    ALTER TABLE [dbo].[datAFSRecord] DROP CONSTRAINT [FK_datAFSRecord_lstEquipment]
    GO

    ALTER INDEX [IX_datAFSRecord_EquipmentId] ON [dbo].[datAFSRecord] DISABLE
    GO
    ALTER INDEX [IX_datAFSRecord_EquipmentId_TransactionDateTime] ON [dbo].[datAFSRecord] DISABLE
    GO
    ALTER TABLE [dbo].[datAFSRecord] WITH NOCHECK ADD CONSTRAINT [FK_datAFSRecord_lstEquipment] FOREIGN KEY ([EquipmentId]) REFERENCES [dbo].[lstEquipment] ([Id])
    GO
    ALTER TABLE [dbo].[datAFSRecord] WITH NOCHECK ADD CONSTRAINT [FK_datAFSRecord_lstLocation] FOREIGN KEY ([LocationId]) REFERENCES [dbo].[lstLocation] ([Id])
    GO
    ALTER TABLE [dbo].[datAFSRecord] WITH NOCHECK ADD CONSTRAINT [FK_datAFSRecord_lstProduct] FOREIGN KEY ([ProductId]) REFERENCES [dbo].[lstProduct] ([Id])
    GO
    ALTER TABLE [dbo].[datAFSRecord] NOCHECK CONSTRAINT [FK_datAFSRecord_lstEquipment]
    GO
    ALTER TABLE [dbo].[datAFSRecord] NOCHECK CONSTRAINT [FK_datAFSRecord_lstLocation]
    GO
    ALTER TABLE [dbo].[datAFSRecord] NOCHECK CONSTRAINT [FK_datAFSRecord_lstProduct]
    GO

    ALTER TABLE [dbo].[datFuelDelivery] DROP CONSTRAINT [FK_datFuelDelivery_lstProduct]
    GO
    ALTER TABLE [dbo].[datFuelDelivery] DROP CONSTRAINT [FK_datFuelDelivery_lstLocation]
    GO

    ALTER TABLE [dbo].[datFuelDelivery] WITH NOCHECK ADD CONSTRAINT [FK_datFuelDelivery_lstLocation] FOREIGN KEY ([LocationId]) REFERENCES [dbo].[lstLocation] ([Id])
    GO
    ALTER TABLE [dbo].[datFuelDelivery] WITH NOCHECK ADD CONSTRAINT [FK_datFuelDelivery_lstProduct] FOREIGN KEY ([ProductId]) REFERENCES [dbo].[lstProduct] ([Id])
    GO
    ALTER TABLE [dbo].[datFuelDelivery] NOCHECK CONSTRAINT [FK_datFuelDelivery_lstLocation]
    GO
    ALTER TABLE [dbo].[datFuelDelivery] NOCHECK CONSTRAINT [FK_datFuelDelivery_lstProduct]
    GO


    ALTER TABLE [dbo].[datLocationVolumeReading] DROP CONSTRAINT [FK_datLocationVolumeReading_lstLocation]
    GO

    ALTER TABLE [dbo].[datLocationVolumeReading] WITH NOCHECK ADD CONSTRAINT [FK_datLocationVolumeReading_lstLocation] FOREIGN KEY ([LocationId]) REFERENCES [dbo].[lstLocation] ([Id])
    GO
    ALTER TABLE [dbo].[datLocationVolumeReading] NOCHECK CONSTRAINT [FK_datLocationVolumeReading_lstLocation]
    GO

    ALTER INDEX [IX_datMeterReading_IsActive] ON [dbo].[datMeterReading] DISABLE
    GO
    ALTER INDEX [IX_datMeterReading_ReadingDate_CounterReading_EquipmentId] ON [dbo].[datMeterReading] DISABLE
    GO

    ALTER TABLE [dbo].[datMeterReading] DROP CONSTRAINT [FK_datMeterReading_lstEquipment]
    GO
    ALTER TABLE [dbo].[datMeterReading] DROP CONSTRAINT [FK_datMeterReading_lstConsumptionType]
    GO

    ALTER TABLE [dbo].[datMeterReading] WITH NOCHECK ADD CONSTRAINT [FK_datMeterReading_lstConsumptionType] FOREIGN KEY ([ConsumptionTypeId]) REFERENCES [dbo].[lstConsumptionType] ([Id])
    GO
    ALTER TABLE [dbo].[datMeterReading] WITH NOCHECK ADD CONSTRAINT [FK_datMeterReading_lstEquipment] FOREIGN KEY ([EquipmentId]) REFERENCES [dbo].[lstEquipment] ([Id])
    GO
    ALTER TABLE [dbo].[datMeterReading] NOCHECK CONSTRAINT [FK_datMeterReading_lstConsumptionType]
    GO
    ALTER TABLE [dbo].[datMeterReading] NOCHECK CONSTRAINT [FK_datMeterReading_lstEquipment]
    GO

    ALTER INDEX [IDX_datTripRecord_EquipmentIdSourceTime] ON [dbo].[datTripRecord] DISABLE
    GO

    ALTER TABLE [dbo].[datTripRecord] DROP CONSTRAINT [FK_datTripRecord_lstEquipment]
    GO

    ALTER TABLE [dbo].[datTripRecord] WITH NOCHECK ADD CONSTRAINT [FK_datTripRecord_lstEquipment] FOREIGN KEY ([EquipmentId]) REFERENCES [dbo].[lstEquipment] ([Id])
    GO
    ALTER TABLE [dbo].[datTripRecord] NOCHECK CONSTRAINT [FK_datTripRecord_lstEquipment]
    GO

    ALTER TABLE [dbo].[lstAlias] DROP CONSTRAINT [FK_lstAlias_lstLocation]
    GO
    ALTER TABLE [dbo].[lstAlias] DROP CONSTRAINT [FK_lstAlias_lstEquipment]
    GO

    ALTER TABLE [dbo].[lstAlias] WITH NOCHECK ADD CONSTRAINT [FK_lstAlias_lstEquipment] FOREIGN KEY ([FleetId]) REFERENCES [dbo].[lstEquipment] ([Id])
    GO
    ALTER TABLE [dbo].[lstAlias] WITH NOCHECK ADD CONSTRAINT [FK_lstAlias_lstLocation] FOREIGN KEY ([LocationId]) REFERENCES [dbo].[lstLocation] ([Id])
    GO
    ALTER TABLE [dbo].[lstAlias] NOCHECK CONSTRAINT [FK_lstAlias_lstEquipment]
    GO
    ALTER TABLE [dbo].[lstAlias] NOCHECK CONSTRAINT [FK_lstAlias_lstLocation]
    GO


    ALTER TABLE [dbo].[lstEquipment] DROP CONSTRAINT [FK_lstEquipment_lstVehicleType]
    GO
    ALTER TABLE [dbo].[lstEquipment] DROP CONSTRAINT [FK_lstEquipment_lstModel]
    GO
    ALTER TABLE [dbo].[lstEquipment] DROP CONSTRAINT [FK_lstEquipment_lstMake]
    GO
    ALTER TABLE [dbo].[lstEquipment] DROP CONSTRAINT [FK_lstEquipment_lstConsumptionType]
    GO

    ALTER TABLE [dbo].[lstEquipment] WITH NOCHECK ADD CONSTRAINT [FK_lstEquipment_lstConsumptionType] FOREIGN KEY ([ConsumptionTypeId]) REFERENCES [dbo].[lstConsumptionType] ([Id])
    GO
    ALTER TABLE [dbo].[lstEquipment] WITH NOCHECK ADD CONSTRAINT [FK_lstEquipment_lstMake] FOREIGN KEY ([MakeId]) REFERENCES [dbo].[lstMake] ([Id])
    GO
    ALTER TABLE [dbo].[lstEquipment] WITH NOCHECK ADD CONSTRAINT [FK_lstEquipment_lstModel] FOREIGN KEY ([ModelId]) REFERENCES [dbo].[lstModel] ([Id])
    GO
    ALTER TABLE [dbo].[lstEquipment] WITH NOCHECK ADD CONSTRAINT [FK_lstEquipment_lstVehicleType] FOREIGN KEY ([VehicleTypeId]) REFERENCES [dbo].[lstVehicleType] ([Id])
    GO
    ALTER TABLE [dbo].[lstEquipment] NOCHECK CONSTRAINT [FK_lstEquipment_lstConsumptionType]
    GO
    ALTER TABLE [dbo].[lstEquipment] NOCHECK CONSTRAINT [FK_lstEquipment_lstMake]
    GO
    ALTER TABLE [dbo].[lstEquipment] NOCHECK CONSTRAINT [FK_lstEquipment_lstModel]
    GO
    ALTER TABLE [dbo].[lstEquipment] NOCHECK CONSTRAINT [FK_lstEquipment_lstVehicleType]
    GO


    ALTER TABLE [dbo].[mapEquipmentLocation] DROP CONSTRAINT [FK_mapEquipmentLocation_lstLocation]
    GO
    ALTER TABLE [dbo].[mapEquipmentLocation] DROP CONSTRAINT [FK_mapEquipmentLocation_lstEquipment]
    GO

    ALTER TABLE [dbo].[mapEquipmentLocation] WITH NOCHECK ADD CONSTRAINT [FK_mapEquipmentLocation_lstEquipment] FOREIGN KEY ([EquipmentId]) REFERENCES [dbo].[lstEquipment] ([Id])
    GO
    ALTER TABLE [dbo].[mapEquipmentLocation] WITH NOCHECK ADD CONSTRAINT [FK_mapEquipmentLocation_lstLocation] FOREIGN KEY ([LocationId]) REFERENCES [dbo].[lstLocation] ([Id])
    GO
    ALTER TABLE [dbo].[mapEquipmentLocation] NOCHECK CONSTRAINT [FK_mapEquipmentLocation_lstEquipment]
    GO
    ALTER TABLE [dbo].[mapEquipmentLocation] NOCHECK CONSTRAINT [FK_mapEquipmentLocation_lstLocation]
    GO



    ALTER INDEX [IX_stageAFSData_ConsumptionType] ON [dbo].[stageAFSData] DISABLE
    GO
    ALTER INDEX [IX_stageAFSData_FleetId] ON [dbo].[stageAFSData] DISABLE
    GO
    ALTER INDEX [IX_stageAFSData_Location] ON [dbo].[stageAFSData] DISABLE
    GO
    ALTER INDEX [IX_stageAFSData_Make] ON [dbo].[stageAFSData] DISABLE
    GO
    ALTER INDEX [IX_stageAFSData_Model] ON [dbo].[stageAFSData] DISABLE
    GO
    ALTER INDEX [IX_stageAFSData_ProductName] ON [dbo].[stageAFSData] DISABLE
    GO
  • yrstruly wrote:

    @JeffreyWilliams Thank you for your input. Would you say, definning primary keys for each row would be the quickest way to ensure uniqueness or no duplicate values being imported?

    I agree with you that it is a bad db design, thats why the cartesian joins. In order to avoid cartesian joins and to speed up the db/tables, what would you recommend?

    I really have no idea what you are asking - you don't define PK's on rows.  You define a PK on a table.  And - I am not convinced the database is a bad design.

    Jeffrey Williams
    “We are all faced with a series of great opportunities brilliantly disguised as impossible situations.”

    ― Charles R. Swindoll

    How to post questions to get better answers faster
    Managing Transaction Logs

  • @jeffwilliams "It looks like you put a DISTINCT on every single statement - which either means a bad join or bad design." What will you recommend here or how would you have done it?

  • The question is too long for me to read.

    If you are inserting with DISTINCT, as you appear to be, that will ensure no duplicates are generated at the source

    If you also add:

    WHERE NOT EXISTS(SELECT * 
    FROM dbo.TargetTable t
    WHERE t.ColumnIdDontWantDuplicated1 = source.ColumnIdDontWantDuplicated1
    AND t.ColumnIdDontWantDuplicated2 = source.ColumnIdDontWantDuplicated2
    AND ...)

    That will ensure that the duplicate row isn't already in the target table.

    You could also consider using a MERGE statement.

  • yrstruly wrote:

    @jeffwilliams "It looks like you put a DISTINCT on every single statement - which either means a bad join or bad design." What will you recommend here or how would you have done it?

    There's no quick fix.

    1. Make sure that you thoroughly understand the entity-relationship model of the database you are working with.
    2. Formulate your queries (joins in particular) according to the E/R model.
    3. If these queries still need lots of DISTINCTs, the DB is probably badly designed and in need of attention.

    The absence of evidence is not evidence of absence.
    Martin Rees

    You can lead a horse to water, but a pencil must be lead.
    Stan Laurel

Viewing 8 posts - 1 through 7 (of 7 total)

You must be logged in to reply to this topic. Login to reply