Click or drag to resize
sqx

How To Do PCA On Small Data

Principal Component Analysis On Small Data

The defenition for Small Data is less than 2GB, in other words, the Matrix capacity.

The example below is the same as on Visual Studio Magazine - Principal Component Analysis (PCA) from Scratch Using the Classical Technique with C# (2024) by James McCaffrey.

Principal Component Analysis On Small Data

  • SQL
    USE [SQX]
    GO
    
    SET NOCOUNT ON;
    SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
    DROP TABLE IF EXISTS #SourceData
    ;WITH sample_data AS (SELECT [ID],[1],[2],[3],[4],[5] FROM (VALUES
    ((0),(5.1),(3.5),(1.4),(0.2),(0)),
    ((1),(4.9),(3.1),(1.5),(0.1),(0)),
    ((2),(5.1),(3.8),(1.5),(0.3),(0)),
    ((3),(7),(3.2),(4.7),(1.4),(1)),
    ((4),(5),(2),(3.5),(1),(1)),
    ((5),(5.9),(3.2),(4.8),(1.8),(1)),
    ((6),(6.3),(3.3),(6),(2.5),(2)),
    ((7),(6.5),(3.2),(5.1),(2),(2)),
    ((8),(6.9),(3.2),(5.7),(2.3),(2))
    ) AS T([ID],[1],[2],[3],[4],[5]))
    SELECT ROW_NUMBER() OVER (ORDER BY s.ID) [ID], s.ID [SourceID]
    , s.[1], s.[2], s.[3], s.[4], s.[5]
    INTO #SourceData
    FROM sample_data s
    
    CREATE UNIQUE INDEX IX_SourceData_ID ON #SourceData (ID ASC)
    
    /* SMALL DATA Pipeline Start Here */
    DECLARE @PCAID int, @MEANS [sqx].[Vector], @STDEVS [sqx].[Vector]
    , @DATA [sqx].[Matrix], @COV [sqx].[Matrix]
    DECLARE @Query nvarchar(max) = N'
    select [1], [2], [3], [4]
    from #SourceData
    order by [ID]
    '
    EXEC [sqx].[MatrixFromQuery] @Query = @Query, @SnapShot = 0, @A = @DATA OUTPUT
    
    SELECT @MEANS = @DATA.Avg(), @STDEVS = @DATA.StDevp()
    
    SET @DATA = @DATA.Standardize(1)
    SET @COV = @DATA.Covariance(1)
    
    INSERT [PCA] ([Description],[DATA],[MEANS],[STDEVS],[COV])
    VALUES (N'Hello PCA',@DATA,@MEANS,@STDEVS,@COV)
    SELECT @PCAID = @@IDENTITY
    
    DECLARE @Rotation int, @EIGENVECTORS [sqx].[Matrix]
    , @EIGENVALUES [sqx].[Vector], @VAREXPLAINED [sqx].[Vector]
    
    SELECT @EIGENVECTORS = pcat.EigenVectors
    , @EIGENVALUES = pcat.EigenValues
    , @VAREXPLAINED = pcat.VarianceExplained
    , @Rotation = pcat.Rotations
    FROM sqx.PrincipalComponentAnalysisTable(@R) pcat
    
    UPDATE [PCA]
    SET [EIGENVECTORS] = @EIGENVECTORS, [EIGENVALUES] = @EIGENVALUES
    , [VAREXPLAINED] = @VAREXPLAINED
    WHERE [ID] = @PCAID
    
    -- Transform Data
    UPDATE [PCA]
    SET [DATA] = [DATA].Product([EIGENVECTORS])
    WHERE [ID] = @PCAID
    
    -- Show Results
    PRINT '** Covariance Matrix'
    SELECT @COV = [COV]
    FROM [PCA]
    WHERE [ID] = @PCAID
    EXEC sqx.MatrixToTable @COV
    
    PRINT '** Means, StDevs, Eigen Values and Variance Explained'
    SELECT m.ID FieldNumber, m.Value Mean, s.Value [StDev]
    , a.[Value] [EigenValue], b.[Value] [VarianceExplained]
    FROM sqx.VectorTable(@MEANS) m
    INNER JOIN sqx.VectorTable(@STDEVS) s ON m.ID = s.ID
    INNER JOIN [sqx].[VectorTable](@EIGENVALUES) a ON m.ID = a.ID
    INNER JOIN [sqx].[VectorTable](@VAREXPLAINED) b ON m.[ID] = b.[ID]
    ORDER BY b.[Value] DESC
    
    PRINT '** Eigen Vectors'
    EXEC sqx.MatrixToTable @EIGENVECTORS
    
    PRINT '** Show Data'
    DROP TABLE IF EXISTS #TransformedData
    CREATE TABLE #TransformedData
    ([ID] int NOT NULL PRIMARY KEY CLUSTERED
    ,[1] float,[2] float,[3] float,[4] float)
    
    SELECT @DATA = [DATA]
    FROM [PCA]
    WHERE [ID] = @PCAID
    
    INSERT #TransformedData
    EXEC [sqx].[MatrixToTable] @DATA
    
    SELECT t.*, s.*
    FROM #TransformedData t, #SourceData s
    WHERE t.[ID] = s.[ID]
    
    DROP TABLE IF EXISTS #TransformedData
    DROP TABLE IF EXISTS #SourceData
    -- And it's all saved in PCA table except Source Data
    /*
    PCA Rotations : 1600
    ** Covariance Matrix
    ID          1                      2                      3                      4
    ----------- ---------------------- ---------------------- ---------------------- ----------------------
    1           1                      0.146574230062663      0.84786268242462       0.813104741419685
    2           0.146574230062663      1                      -0.175673686188737     -0.0918692447874022
    3           0.84786268242462       -0.175673686188737     1                      0.986243572832952
    4           0.813104741419685      -0.0918692447874022    0.986243572832952      1
    
    ** Means, StDevs, Eigen Values and Variance Explained
    FieldNumber Mean                   StDev                  EigenValue             VarianceExplained
    ----------- ---------------------- ---------------------- ---------------------- ----------------------
    1           5.85555555555556       0.802926745117319      2.77035570477325       0.692588926193314
    2           3.16666666666667       0.459468291736341      1.06042756746187       0.265106891865467
    3           3.8                    1.77576274691563       0.166001159367613      0.0415002898419033
    4           1.28888888888889       0.877426089884428      0.00321556839726321    0.000803892099315804
    
    ** Eigen Vectors
    ID          1                      2                      3                      4
    ----------- ---------------------- ---------------------- ---------------------- ----------------------
    1           0.549818337635731      0.239524007501859      -0.782298471507741     0.168336409924838
    2           -0.043808150553534     0.963742668683788      0.24202609118867       -0.103461517905271
    3           0.59394004579127       -0.110213667522951     0.218796347641008      -0.766300416126443
    4           0.585675561680717      -0.0410032423064341    0.530622870709076      0.611344616113363
    
    ** Show Data
    ID          1                      2                      3                      4                      ID                   SourceID    1                                       2                                       3                                       4                                       5
    ----------- ---------------------- ---------------------- ---------------------- ---------------------- -------------------- ----------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- -----------
    1           -2.07871621536819      0.673622338895131      -0.0424866147460089    0.0435348734398709     1                    0           5.1                                     3.5                                     1.4                                     0.2                                     0
    2           -2.2108338748977       -0.226580717665575     -0.106479578770823     -0.0211532610246278    2                    1           4.9                                     3.1                                     1.5                                     0.1                                     0
    3           -2.00712349667912      1.2919978330871        0.18833534486084       0.0025033467578206     3                    2           5.1                                     3.8                                     1.5                                     0.3                                     0
    4           1.15568967230191       0.350269293429886      -0.919397986099257     -0.0785324780373214    4                    3           7.0                                     3.2                                     4.7                                     1.4                                     1
    5           -0.76779300117014      -2.67020755287846      0.00736139037068315    0.0115130957365036     5                    4           5.0                                     2.0                                     3.5                                     1.0                                     1
    6           0.702889293398109      -0.00277478560648754   0.40656253289585       -0.073605566375896     6                    5           5.9                                     3.2                                     4.8                                     1.8                                     1
    7           1.83587125085685       0.219111861160725      0.640694005278087      -0.0423781588211668    7                    6           6.3                                     3.3                                     6.0                                     2.5                                     2
    8           1.34758960781168       0.148247487518865      -0.0201090024812672    0.0620761484510532     8                    7           6.5                                     3.2                                     5.1                                     2.0                                     2
    9           2.02242676374659       0.216314242058818      -0.154480091308105     0.0960419998737658     9                    8           6.9                                     3.2                                     5.7                                     2.3                                     2*/
    
See Also