April 18, 2016 at 1:48 am
I need to shuffle the Primary key data such that if a table has auto increment Id running values 1,2,3,7,8,10 and so on, when passed this column to shuffling function, it returns data in same sequence as for Primary key as 1,2,3,7,8,10 but the shuffled column should random number for 1 could be any valid number from (2,3,7,8,10) for 2, it could be (1,3,7,8,10) and so on .... In each select it should generate different combination.
select PK_Column, dbo.funcShuffle(PK_Column) from MyTable
PK_Columndbo.funcShuffle(PK_Column)
-----------------------------------
18
210
31
72
83
107
Next time when we run same query it may give different result
PK_Columndbo.funcShuffle(PK_Column)
-----------------------------------
110
21
38
77
82
103
actually we are trying to mask data by adding another column and put those valid values but with different relevant data so that it could not break referential integrity with other tables.
sort of data masking to protect and shuffle original data, but should be valid (not garbage). Can anyone help me on this?
Shamshad Ali
April 18, 2016 at 2:46 am
Quick suggestion
😎
USE Test;
GO
SET NOCOUNT ON;
GO
IF OBJECT_ID(N'dbo.VNEWID') IS NULL
BEGIN
DECLARE @CREATE_VIEW NVARCHAR(MAX) = N'
CREATE VIEW dbo.VNEWID
WITH SCHEMABINDING
AS
SELECT NEWID() AS NID;
';
EXEC (@CREATE_VIEW);
END
GO
IF OBJECT_ID(N'dbo.funcShuffle') IS NULL
BEGIN
DECLARE @CREATE_FUNCTION NVARCHAR(MAX) = N'CREATE FUNCTION dbo.funcShuffle
(
@SAMPLE_SIZE INT
)
RETURNS TABLE
WITH SCHEMABINDING
AS
/**********************************************************
Generate scrabled set of keys
2016-04-18 Eirikur Eiriksson, Initial coding
Parameter @SAMPLE_SIZE INT Number of rows to match
Usage example
DECLARE @SAMPLE_SIZE INT = 100000;
SELECT
[ALIAS 1].COLUMN_LIST
,[ALIAS 2].ORIGINAL_ID
,[ALIAS 2].PSEUDO_ID
FROM [TABLE_NAME] [ALIAS 1]
CROSS APPLY dbo.funcShuffle( @SAMPLE_SIZE ) [ALIAS 2]
WHERE [ALIAS 1].ID_COLUMN < (@SAMPLE_SIZE + 1)
AND [ALIAS 1].ID_COLUMN = [ALIAS 2].ORIGINAL_ID;
**********************************************************/
RETURN
WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))
, NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N
FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7)
,SCRAMBLED_NUMS AS
(
SELECT TOP(@SAMPLE_SIZE)
NM.N AS PSEUDO_ID
FROM NUMS NM
CROSS APPLY dbo.VNEWID VN
ORDER BY CHECKSUM(VN.NID)
)
SELECT
SN.PSEUDO_ID
,ROW_NUMBER() OVER
(
ORDER BY (SELECT NULL)
) AS ORIGINAL_ID
FROM SCRAMBLED_NUMS SN
';
EXEC (@CREATE_FUNCTION);
END
DECLARE @SAMPLE_SIZE INT = 10;
WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))
, NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N
FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7)
SELECT
NM.N
,X.PSEUDO_ID
FROM NUMS NM
CROSS APPLY dbo.funcShuffle(@SAMPLE_SIZE) X
WHERE NM.N < (@SAMPLE_SIZE + 1)
AND NM.N = X.ORIGINAL_ID
ORDER BY NM.N;
Sample output
N PSEUDO_ID
---- ----------
1 3
2 8
3 7
4 2
5 10
6 6
7 5
8 1
9 4
10 9
April 18, 2016 at 7:02 am
Ids could be deleted so they won't match from random generated numbers, I need something like existing Ids only to be shuffled.
Tried following and it only uses the random number for such objectIds which does not exists in reality.
SELECT
object_Id
,X.PSEUDO_ID
FROM sys.objects NM
CROSS APPLY dbo.funcShuffle(30) X
WHERE NM.object_Id < (30 + 1)
AND NM.object_Id = X.ORIGINAL_ID
ORDER BY NM.object_Id;
Please help.
Shamshad
April 18, 2016 at 9:44 am
shamshad.ali (4/18/2016)
Ids could be deleted so they won't match from random generated numbers, I need something like existing Ids only to be shuffled.Tried following and it only uses the random number for such objectIds which does not exists in reality.
SELECT
object_Id
,X.PSEUDO_ID
FROM sys.objects NM
CROSS APPLY dbo.funcShuffle(30) X
WHERE NM.object_Id < (30 + 1)
AND NM.object_Id = X.ORIGINAL_ID
ORDER BY NM.object_Id;
Please help.
Shamshad
You don't need a function for this, easy to do in a select
😎
USE Test;
GO
SET NOCOUNT ON;
--SAMPLE DATA SET
IF OBJECT_ID(N'dbo.TBL_SAMPLE_SCRAMBLE') IS NOT NULL DROP TABLE dbo.TBL_SAMPLE_SCRAMBLE;
CREATE TABLE dbo.TBL_SAMPLE_SCRAMBLE
(
SS_ID INT NOT NULL CONSTRAINT PK_DBO_TBL_SAMPLE_SCRAMBLE_SS_ID PRIMARY KEY CLUSTERED
,SS_VAL INT NOT NULL
);
DECLARE @SAMPLE_SIZE INT = 100;
WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))
, NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N
FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7)
INSERT INTO dbo.TBL_SAMPLE_SCRAMBLE(SS_ID,SS_VAL)
SELECT
NM.N
,CHECKSUM(NEWID()) % 1000000
FROM NUMS NM
WHERE NM.N % 3 > 0;
-- SCRAMBLING EXISTING IDs QUERY
;WITH SCRAMBLE_SET AS
(
SELECT TOP(100000)
SS.SS_ID AS PSEUDO_ID
FROM TBL_SAMPLE_SCRAMBLE SS
ORDER BY CHECKSUM(NEWID())
)
,SCRAMBLE_WITH_LINKID AS
(
SELECT
ROW_NUMBER() OVER
(
ORDER BY (SELECT NULL)
) AS RID
,SCS.PSEUDO_ID
FROM SCRAMBLE_SET SCS
)
,BASE_SET AS
(
SELECT
ROW_NUMBER() OVER
(
ORDER BY (SELECT NULL)
) AS RID
,SSC.SS_ID
,SSC.SS_VAL
FROM dbo.TBL_SAMPLE_SCRAMBLE SSC
)
SELECT
BS.SS_ID
,SWL.PSEUDO_ID
,BS.SS_VAL
FROM BASE_SET BS
INNER JOIN SCRAMBLE_WITH_LINKID SWL
ON BS.RID = SWL.RID;
Example output
SS_ID PSEUDO_ID SS_VAL
----------- ----------- -----------
1 53 552347
2 10 906627
4 19 204964
5 38 -678924
7 73 300218
8 71 443743
10 91 664690
11 17 262823
13 62 774952
14 11 -907962
16 34 420740
17 44 -434785
19 23 -447531
20 37 973530
22 70 -457080
23 43 -491094
25 41 780831
26 50 792157
28 47 569184
29 25 10273
31 65 -492659
32 40 551954
34 68 -962127
35 95 616930
37 82 426276
38 14 32258
40 4 975796
41 55 -279654
43 16 328136
44 13 -782839
46 83 -300461
47 20 289727
49 26 -911585
50 80 635564
52 1 -738821
53 85 133879
55 97 -859507
56 77 -266033
58 32 -657742
59 52 -891490
61 94 -586774
62 67 -940843
64 98 -211073
65 64 -546324
67 46 -955116
68 89 98590
70 88 663690
71 29 -679942
73 61 -322936
74 74 -930818
76 92 -228937
77 8 376651
79 2 -118888
80 79 -914288
82 58 -636086
83 35 -150468
85 76 -979250
86 22 -476588
88 86 909844
89 31 310657
91 49 453483
92 5 374528
94 7 288590
95 56 -111569
97 28 -982752
98 59 -133487
100 100 -978524
Viewing 4 posts - 1 through 3 (of 3 total)
You must be logged in to reply to this topic. Login to reply