Remove duplicate geographical distance


  • If Object_Id('tempdb..#TravelDistance') Is Not Null
        Drop Table #TravelDistance

    Create table #TravelDistance
    (
        StartPt        Varchar(200),
        EndPt        Varchar(200),
        Distance    Int
    )

    Insert Into #TravelDistance
    Select 'Mumbai','Delhi',2000
    Union All
    Select 'Delhi','Mumbai',2000
    Union All
    Select 'Varanasi','Mumbai',1500
    Union All
    Select 'Mumbai','Varanasi',1500
    Union All
    Select 'Delhi','Shimla',4000
    Union All
    Select 'Chennai','Shimla',4000
    Union All
    Select 'Pune','Mumbai',4000
    Union All
    Select 'Mumbai','Pune',4000
    Union All
    Select 'Thane','Banglore',4000

    In the above table, Mumbai to Pune and Pune to Mumbai distance are appearing twice. Could you please give a query to remove such duplicate entries across the columns. Also Thane to Banglore is appearing only once so it should also come in output. 

    Query output should be :

    Mumbai    Varanasi    1500
    Delhi    Mumbai    2000
    Mumbai    Pune    4000
    Chennai    Shimla    4000
    Delhi    Shimla    4000
    Thane    Banglore    4000

  • This is one solution. you don't need to do it in 2 CTE's, you could do it in one, but split it for readability:
    --Put the Places in Alphabetical order. So Mumbai Delhi becomes Dehli Mumbai, but Dehli Mumbai remains the same
    --This creates true duplicates. Good!
    WITH Alphabetical AS (
      SELECT CASE WHEN StartPt < EndPt THEN StartPt ELSE EndPt END AS LowerAlpha,
            CASE WHEN StartPt > EndPt THEN StartPt ELSE EndPt END AS UpperAlpha,
            Distance
      FROM #TravelDistance),
    --Now give them Row Numbers
    RNs AS (
      SELECT *,
            ROW_NUMBER() OVER (PARTITION BY LowerAlpha, UpperAlpha, Distance ORDER BY Distance) AS RN
      FROM Alphabetical)
    --Finally, delete the duplicates (those with an RN > 1) Using CTE "Magic".
    DELETE FROM RNs
    WHERE RN > 1;

    Thom~

    Excuse my typos and sometimes awful grammar. My fingers work faster than my brain does.
    Larnu.uk

  • Here's another alternative:
    If OBJECT_ID(N'tempdb..#TravelDistance', N'U') IS NOT NULL
        BEGIN
        DROP TABLE #TravelDistance;
        END;

    CREATE TABLE #TravelDistance (
        StartPt varchar(200),
        EndPt varchar(200),
        Distance int
    );

    CREATE NONCLUSTERED INDEX [IX_#TravelDistance_Distance_StartPt_EndPt] ON #TravelDistance
        (
        Distance ASC,
        StartPt ASC,
        EndPt ASC
        );

    CREATE NONCLUSTERED INDEX [IX_#TravelDistance_StartPt_EndPt] ON #TravelDistance
        (
        StartPt ASC,
        EndPt ASC
        )
        INCLUDE (Distance);

    CREATE NONCLUSTERED INDEX [IX_#TravelDistance_EndPt_StartPt] ON #TravelDistance
        (
        EndPt ASC,
        StartPt ASC
        )
        INCLUDE (Distance);

    INSERT INTO #TravelDistance (StartPt, EndPt, Distance)
    SELECT 'Mumbai','Delhi',2000 UNION ALL
    SELECT 'Delhi','Mumbai',2000 UNION ALL
    SELECT 'Varanasi','Mumbai',1500 UNION ALL
    SELECT 'Mumbai','Varanasi',1500 UNION ALL
    SELECT 'Delhi','Shimla',4000 UNION ALL
    SELECT 'Chennai','Shimla',4000 UNION ALL
    SELECT 'Pune','Mumbai',4000 UNION ALL
    SELECT 'Mumbai','Pune',4000 UNION ALL
    SELECT 'Thane','Banglore',4000;

    SELECT TD.StartPt, TD.EndPt, TD.Distance
    FROM #TravelDistance AS TD WITH (INDEX([IX_#TravelDistance_Distance_StartPt_EndPt]))
        LEFT OUTER JOIN #TravelDistance AS T2 WITH (INDEX([IX_#TravelDistance_EndPt_StartPt]))
            ON TD.StartPt = T2.EndPt
            AND TD.EndPt = T2.StartPt
            AND TD.Distance = T2.Distance
    WHERE T2.Distance IS NULL
        OR T2.StartPt > TD.StartPt
    ORDER BY TD.Distance, TD.StartPt, TD.EndPt;

    DROP TABLE #TravelDistance;

    Steve (aka sgmunson) 🙂 🙂 🙂
    Rent Servers for Income (picks and shovels strategy)

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply