(maxima.info)Functions and Variables for descriptive statistics


Next: Functions and Variables for statistical graphs Prev: Functions and Variables for data manipulation Up: descriptive-pkg
Enter node , (file) or (file)node

50.3 Functions and Variables for descriptive statistics
=======================================================

 -- Function: mean
          mean (<list>)
          mean (<matrix>)

     This is the sample mean, defined as
                                 n
                               ====
                       _   1   \
                       x = -    >    x
                           n   /      i
                               ====
                               i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) mean (s1);
                                         471
          (%o3)                          ---
                                         100
          (%i4) %, numer;
          (%o4)                         4.71
          (%i5) s2 : read_matrix (file_search ("wind.data"))$
          (%i6) mean (s2);
          (%o6)     [9.9485, 10.1607, 10.8685, 15.7166, 14.8441]

 -- Function: var
          var (<list>)
          var (<matrix>)

     This is the sample variance, defined as
                               n
                             ====
                     2   1   \          _ 2
                    s  = -    >    (x - x)
                         n   /       i
                             ====
                             i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) var (s1), numer;
          (%o3)                   8.425899999999999

     See also function 'var1'.

 -- Function: var1
          var1 (<list>)
          var1 (<matrix>)

     This is the sample variance, defined as
                               n
                             ====
                         1   \          _ 2
                        ---   >    (x - x)
                        n-1  /       i
                             ====
                             i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) var1 (s1), numer;
          (%o3)                    8.5110101010101
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) var1 (s2);
          (%o5) [17.39586540404041, 15.13912778787879, 15.63204924242424,
                                      32.50152569696971, 24.66977392929294]

     See also function 'var'.

 -- Function: std
          std (<list>)
          std (<matrix>)

     This is the square root of the function 'var', the variance with
     denominator n.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) std (s1), numer;
          (%o3)                   2.902740084816414
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) std (s2);
          (%o5) [4.149928523480858, 3.871399812729241, 3.933920277534866,
                                      5.672434260526957, 4.941970881136392]

     See also functions 'var' and 'std1'.

 -- Function: std1
          std1 (<list>)
          std1 (<matrix>)

     This is the square root of the function 'var1', the variance with
     denominator n-1.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) std1 (s1), numer;
          (%o3)                   2.917363553109228
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) std1 (s2);
          (%o5) [4.170835096721089, 3.89090320978032, 3.953738641137555,
                                      5.701010936401517, 4.966867617451963]

     See also functions 'var1' and 'std'.

 -- Function: noncentral_moment
          noncentral_moment (<list>, <k>)
          noncentral_moment (<matrix>, <k>)

     The non central moment of order k, defined as
                                 n
                               ====
                           1   \      k
                           -    >    x
                           n   /      i
                               ====
                               i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) noncentral_moment (s1, 1), numer; /* the mean */
          (%o3)                         4.71
          (%i5) s2 : read_matrix (file_search ("wind.data"))$
          (%i6) noncentral_moment (s2, 5);
          (%o6) [319793.8724761505, 320532.1923892463,
                391249.5621381556, 2502278.205988911, 1691881.797742255]

     See also function 'central_moment'.

 -- Function: central_moment
          central_moment (<list>, <k>)
          central_moment (<matrix>, <k>)

     The central moment of order k, defined as
                              n
                            ====
                        1   \          _ k
                        -    >    (x - x)
                        n   /       i
                            ====
                            i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) central_moment (s1, 2), numer; /* the variance */
          (%o3)                   8.425899999999999
          (%i5) s2 : read_matrix (file_search ("wind.data"))$
          (%i6) central_moment (s2, 3);
          (%o6) [11.29584771375004, 16.97988248298583, 5.626661952750102,
                                       37.5986572057918, 25.85981904394192]

     See also functions 'central_moment' and 'mean'.

 -- Function: cv
          cv (<list>)
          cv (<matrix>)

     The variation coefficient is the quotient between the sample
     standard deviation ('std') and the 'mean',

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) cv (s1), numer;
          (%o3)                   .6193977819764815
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) cv (s2);
          (%o5) [.4192426091090204, .3829365309260502, 0.363779605385983,
                                      .3627381836021478, .3346021393989506]

     See also functions 'std' and 'mean'.

 -- Function: smin
          smin (<list>)
          smin (<matrix>)

     This is the minimum value of the sample <list>.  When the argument
     is a matrix, 'smin' returns a list containing the minimum values of
     the columns, which are associated to statistical variables.

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) smin (s1);
          (%o3)                           0
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) smin (s2);
          (%o5)             [0.58, 0.5, 2.67, 5.25, 5.17]

     See also function 'smax'.

 -- Function: smax
          smax (<list>)
          smax (<matrix>)

     This is the maximum value of the sample <list>.  When the argument
     is a matrix, 'smax' returns a list containing the maximum values of
     the columns, which are associated to statistical variables.

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) smax (s1);
          (%o3)                           9
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) smax (s2);
          (%o5)          [20.25, 21.46, 20.04, 29.63, 27.63]

     See also function 'smin'.

 -- Function: range
          range (<list>)
          range (<matrix>)

     The range is the difference between the extreme values.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) range (s1);
          (%o3)                           9
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) range (s2);
          (%o5)          [19.67, 20.96, 17.37, 24.38, 22.46]

 -- Function: quantile
          quantile (<list>, <p>)
          quantile (<matrix>, <p>)

     This is the <p>-quantile, with <p> a number in [0, 1], of the
     sample <list>.  Although there are several definitions for the
     sample quantile (Hyndman, R. J., Fan, Y. (1996) <Sample quantiles
     in statistical packages>.  American Statistician, 50, 361-365), the
     one based on linear interpolation is implemented in package Note:
     descriptive-pkg

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) /* 1st and 3rd quartiles */
                   [quantile (s1, 1/4), quantile (s1, 3/4)], numer;
          (%o3)                      [2.0, 7.25]
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) quantile (s2, 1/4);
          (%o5)    [7.2575, 7.477500000000001, 7.82, 11.28, 11.48]

 -- Function: median
          median (<list>)
          median (<matrix>)

     Once the sample is ordered, if the sample size is odd the median is
     the central value, otherwise it is the mean of the two central
     values.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) median (s1);
                                          9
          (%o3)                           -
                                          2
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) median (s2);
          (%o5)         [10.06, 9.855, 10.73, 15.48, 14.105]

     The median is the 1/2-quantile.

     See also function 'quantile'.

 -- Function: qrange
          qrange (<list>)
          qrange (<matrix>)

     The interquartilic range is the difference between the third and
     first quartiles, 'quantile(<list>,3/4) - quantile(<list>,1/4)',

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) qrange (s1);
                                         21
          (%o3)                          --
                                         4
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) qrange (s2);
          (%o5) [5.385, 5.572499999999998, 6.022500000000001,
                                      8.729999999999999, 6.649999999999999]

     See also function 'quantile'.

 -- Function: mean_deviation
          mean_deviation (<list>)
          mean_deviation (<matrix>)

     The mean deviation, defined as
                               n
                             ====
                         1   \          _
                         -    >    |x - x|
                         n   /       i
                             ====
                             i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) mean_deviation (s1);
                                         51
          (%o3)                          --
                                         20
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) mean_deviation (s2);
          (%o5) [3.287959999999999, 3.075342, 3.23907, 4.715664000000001,
                                                         4.028546000000002]

     See also function 'mean'.

 -- Function: median_deviation
          median_deviation (<list>)
          median_deviation (<matrix>)

     The median deviation, defined as
                           n
                         ====
                     1   \
                     -    >    |x - med|
                     n   /       i
                         ====
                         i = 1
     where 'med' is the median of <list>.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) median_deviation (s1);
                                          5
          (%o3)                           -
                                          2
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) median_deviation (s2);
          (%o5)           [2.75, 2.755, 3.08, 4.315, 3.31]

     See also function 'mean'.

 -- Function: harmonic_mean
          harmonic_mean (<list>)
          harmonic_mean (<matrix>)

     The harmonic mean, defined as
                            n
                         --------
                          n
                         ====
                         \     1
                          >    --
                         /     x
                         ====   i
                         i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) y : [5, 7, 2, 5, 9, 5, 6, 4, 9, 2, 4, 2, 5]$
          (%i3) harmonic_mean (y), numer;
          (%o3)                   3.901858027632205
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) harmonic_mean (s2);
          (%o5) [6.948015590052786, 7.391967752360356, 9.055658197151745,
                                      13.44199028193692, 13.01439145898509]

     See also functions 'mean' and 'geometric_mean'.

 -- Function: geometric_mean
          geometric_mean (<list>)
          geometric_mean (<matrix>)

     The geometric mean, defined as
                           /  n      \ 1/n
                           | /===\   |
                           |  ! !    |
                           |  ! !  x |
                           |  ! !   i|
                           | i = 1   |
                           \         /

     Example:

          (%i1) load ("descriptive")$
          (%i2) y : [5, 7, 2, 5, 9, 5, 6, 4, 9, 2, 4, 2, 5]$
          (%i3) geometric_mean (y), numer;
          (%o3)                   4.454845412337012
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) geometric_mean (s2);
          (%o5) [8.82476274347979, 9.22652604739361, 10.0442675714889,
                                      14.61274126349021, 13.96184163444275]

     See also functions 'mean' and 'harmonic_mean'.

 -- Function: kurtosis
          kurtosis (<list>)
          kurtosis (<matrix>)

     The kurtosis coefficient, defined as
                              n
                            ====
                      1     \          _ 4
                     ----    >    (x - x)  - 3
                        4   /       i
                     n s    ====
                            i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) kurtosis (s1), numer;
          (%o3)                  - 1.273247946514421
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) kurtosis (s2);
          (%o5) [- .2715445622195385, 0.119998784429451,
               - .4275233490482861, - .6405361979019522, - .4952382132352935]

     See also functions 'mean', 'var' and 'skewness'.

 -- Function: skewness
          skewness (<list>)
          skewness (<matrix>)

     The skewness coefficient, defined as
                              n
                            ====
                      1     \          _ 3
                     ----    >    (x - x)
                        3   /       i
                     n s    ====
                            i = 1

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) skewness (s1), numer;
          (%o3)                  .009196180476450424
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) skewness (s2);
          (%o5) [.1580509020000978, .2926379232061854, .09242174416107717,
                                      .2059984348148687, .2142520248890831]

     See also functions 'mean',, 'var' and 'kurtosis'.

 -- Function: pearson_skewness
          pearson_skewness (<list>)
          pearson_skewness (<matrix>)

     Pearson's skewness coefficient, defined as
                          _
                       3 (x - med)
                       -----------
                            s
     where <med> is the median of <list>.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) pearson_skewness (s1), numer;
          (%o3)                   .2159484029093895
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) pearson_skewness (s2);
          (%o5) [- .08019976629211892, .2357036272952649,
                   .1050904062491204, .1245042340592368, .4464181795804519]

     See also functions 'mean', 'var' and 'median'.

 -- Function: quartile_skewness
          quartile_skewness (<list>)
          quartile_skewness (<matrix>)

     The quartile skewness coefficient, defined as
                         c    - 2 c    + c
                          3/4      1/2    1/4
                         --------------------
                             c    - c
                              3/4    1/4
     where c_p is the <p>-quantile of sample <list>.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s1 : read_list (file_search ("pidigits.data"))$
          (%i3) quartile_skewness (s1), numer;
          (%o3)                  .04761904761904762
          (%i4) s2 : read_matrix (file_search ("wind.data"))$
          (%i5) quartile_skewness (s2);
          (%o5) [- 0.0408542246982353, .1467025572005382,
                 0.0336239103362392, .03780068728522298, .2105263157894735]

     See also function 'quantile'.

 -- Function: km
          km (<list>, <option> ...)
          km (<matrix>, <option> ...)

     Kaplan Meier estimator of the survival, or reliability, function
     S(x)=1-F(x).

     Data can be introduced as a list of pairs, or as a two column
     matrix.  The first component is the observed time, and the second
     component a censoring index (1 = non censored, 0 = right censored).

     The optional argument is the name of the variable in the returned
     expression, which is <x> by default.

     Examples:

     Sample as a list of pairs.

          (%i1) load ("descriptive")$
          (%i2) S: km([[2,1], [3,1], [5,0], [8,1]]);
                                 charfun((3 <= x) and (x < 8))
          (%o2) charfun(x < 0) + -----------------------------
                                               2
                          3 charfun((2 <= x) and (x < 3))
                        + -------------------------------
                                         4
                        + charfun((0 <= x) and (x < 2))
          (%i3) load ("draw")$
          (%i4) draw2d(
                  line_width = 3, grid = true,
                  explicit(S, x, -0.1, 10))$

     Estimate survival probabilities.

          (%i1) load ("descriptive")$
          (%i2) S(t):= ''(km([[2,1], [3,1], [5,0], [8,1]], t)) $
          (%i3) S(6);
                                      1
          (%o3)                       -
                                      2

 -- Function: cdf_empirical
          cdf_empirical (<list>, <option> ...)
          cdf_empirical (<matrix>, <option> ...)

     Empirical distribution function F(x).

     Data can be introduced as a list of numbers, or as a one column
     matrix.

     The optional argument is the name of the variable in the returned
     expression, which is <x> by default.

     Example:

     Empirical distribution function.

          (%i1) load ("descriptive")$
          (%i2) F(x):= ''(cdf_empirical([1,3,3,5,7,7,7,8,9]));
          (%o2) F(x) := (charfun(x >= 9) + charfun(x >= 8)
                         + 3 charfun(x >= 7) + charfun(x >= 5)
                         + 2 charfun(x >= 3) + charfun(x >= 1))/9
          (%i3) F(6);
                                     4
          (%o3)                      -
                                     9
          (%i4) load(draw)$
          (%i5) draw2d(
                  line_width = 3,
                  grid       = true,
                  explicit(F(z), z, -2, 12)) $

 -- Function: cov (<matrix>)
     The covariance matrix of the multivariate sample, defined as
                        n
                       ====
                    1  \           _        _
                S = -   >    (X  - X) (X  - X)'
                    n  /       j        j
                       ====
                       j = 1
     where X_j is the j-th row of the sample matrix.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) fpprintprec : 7$  /* change precision for pretty output */
          (%i4) cov (s2);
                [ 17.22191  13.61811  14.37217  19.39624  15.42162 ]
                [                                                  ]
                [ 13.61811  14.98774  13.30448  15.15834  14.9711  ]
                [                                                  ]
          (%o4) [ 14.37217  13.30448  15.47573  17.32544  16.18171 ]
                [                                                  ]
                [ 19.39624  15.15834  17.32544  32.17651  20.44685 ]
                [                                                  ]
                [ 15.42162  14.9711   16.18171  20.44685  24.42308 ]

     See also function 'cov1'.

 -- Function: cov1 (<matrix>)
     The covariance matrix of the multivariate sample, defined as
                        n
                       ====
                   1   \           _        _
             S  = ---   >    (X  - X) (X  - X)'
              1   n-1  /       j        j
                       ====
                       j = 1
     where X_j is the j-th row of the sample matrix.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) fpprintprec : 7$ /* change precision for pretty output */
          (%i4) cov1 (s2);
                [ 17.39587  13.75567  14.51734  19.59216  15.5774  ]
                [                                                  ]
                [ 13.75567  15.13913  13.43887  15.31145  15.12232 ]
                [                                                  ]
          (%o4) [ 14.51734  13.43887  15.63205  17.50044  16.34516 ]
                [                                                  ]
                [ 19.59216  15.31145  17.50044  32.50153  20.65338 ]
                [                                                  ]
                [ 15.5774   15.12232  16.34516  20.65338  24.66977 ]

     See also function 'cov'.

 -- Function: global_variances
          global_variances (<matrix>)
          global_variances (<matrix>, <options> ...)

     Function 'global_variances' returns a list of global variance
     measures:

        * <total variance>: 'trace(S_1)',
        * <mean variance>: 'trace(S_1)/p',
        * <generalized variance>: 'determinant(S_1)',
        * <generalized standard deviation>: 'sqrt(determinant(S_1))',
        * <efective variance> 'determinant(S_1)^(1/p)', (defined in:
          Pen~a, D. (2002) <Ana'lisis de datos multivariantes>;
          McGraw-Hill, Madrid.)
        * <efective standard deviation>: 'determinant(S_1)^(1/(2*p))'.
     where <p> is the dimension of the multivariate random variable and
     S_1 the covariance matrix returned by 'cov1'.

     Option:

        * ''data', default ''true', indicates whether the input matrix
          contains the sample data, in which case the covariance matrix
          'cov1' must be calculated, or not, and then the covariance
          matrix (symmetric) must be given, instead of the data.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) global_variances (s2);
          (%o3) [105.338342060606, 21.06766841212119, 12874.34690469686,
                   113.4651792608501, 6.636590811800795, 2.576158149609762]

     Calculate the 'global_variances' from the covariance matrix.

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) s : cov1 (s2)$
          (%i4) global_variances (s, data=false);
          (%o4) [105.338342060606, 21.06766841212119, 12874.34690469686,
                   113.4651792608501, 6.636590811800795, 2.576158149609762]

     See also 'cov' and 'cov1'.

 -- Function: cor
          cor (<matrix>)
          cor (<matrix>, <logical_value>)

     The correlation matrix of the multivariate sample.

     Option:

        * ''data', default ''true', indicates whether the input matrix
          contains the sample data, in which case the covariance matrix
          'cov1' must be calculated, or not, and then the covariance
          matrix (symmetric) must be given, instead of the data.

     Example:

          (%i1) load ("descriptive")$
          (%i2) fpprintprec : 7 $
          (%i3) s2 : read_matrix (file_search ("wind.data"))$
          (%i4) cor (s2);
                [   1.0     .8476339  .8803515  .8239624  .7519506 ]
                [                                                  ]
                [ .8476339    1.0     .8735834  .6902622  0.782502 ]
                [                                                  ]
          (%o4) [ .8803515  .8735834    1.0     .7764065  .8323358 ]
                [                                                  ]
                [ .8239624  .6902622  .7764065    1.0     .7293848 ]
                [                                                  ]
                [ .7519506  0.782502  .8323358  .7293848    1.0    ]

     Calculate de correlation matrix from the covariance matrix.

          (%i1) load ("descriptive")$
          (%i2) fpprintprec : 7 $
          (%i3) s2 : read_matrix (file_search ("wind.data"))$
          (%i4) s : cov1 (s2)$
          (%i5) cor (s, data=false); /* this is faster */
                [   1.0     .8476339  .8803515  .8239624  .7519506 ]
                [                                                  ]
                [ .8476339    1.0     .8735834  .6902622  0.782502 ]
                [                                                  ]
          (%o5) [ .8803515  .8735834    1.0     .7764065  .8323358 ]
                [                                                  ]
                [ .8239624  .6902622  .7764065    1.0     .7293848 ]
                [                                                  ]
                [ .7519506  0.782502  .8323358  .7293848    1.0    ]

     See also 'cov' and 'cov1'.

 -- Function: list_correlations
          list_correlations (<matrix>)
          list_correlations (<matrix>, <options> ...)

     Function 'list_correlations' returns a list of correlation
     measures:

        * <precision matrix>: the inverse of the covariance matrix S_1,
                      -1     ij
                     S   = (s  )
                      1         i,j = 1,2,...,p

        * <multiple correlation vector>: (R_1^2, R_2^2, ..., R_p^2),
          with
                      2          1
                     R  = 1 - -------
                      i        ii
                              s   s
                                   ii
          being an indicator of the goodness of fit of the linear
          multivariate regression model on X_i when the rest of
          variables are used as regressors.

        * <partial correlation matrix>: with element (i, j) being
                                        ij
                                       s
                     r        = - ------------
                      ij.rest     / ii  jj\ 1/2
                                  |s   s  |
                                  \       /

     Option:

        * ''data', default ''true', indicates whether the input matrix
          contains the sample data, in which case the covariance matrix
          'cov1' must be calculated, or not, and then the covariance
          matrix (symmetric) must be given, instead of the data.

     Example:

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) z : list_correlations (s2)$
          (%i4) fpprintprec : 5$ /* for pretty output */
          (%i5) z[1];  /* precision matrix */
                [  .38486   - .13856   - .15626   - .10239    .031179  ]
                [                                                      ]
                [ - .13856   .34107    - .15233    .038447   - .052842 ]
                [                                                      ]
          (%o5) [ - .15626  - .15233    .47296    - .024816  - .10054  ]
                [                                                      ]
                [ - .10239   .038447   - .024816   .10937    - .034033 ]
                [                                                      ]
                [ .031179   - .052842  - .10054   - .034033   .14834   ]
          (%i6) z[2];  /* multiple correlation vector */
          (%o6)      [.85063, .80634, .86474, .71867, .72675]
          (%i7) z[3];  /* partial correlation matrix */
                [  - 1.0     .38244   .36627   .49908   - .13049 ]
                [                                                ]
                [  .38244    - 1.0    .37927  - .19907   .23492  ]
                [                                                ]
          (%o7) [  .36627    .37927   - 1.0    .10911    .37956  ]
                [                                                ]
                [  .49908   - .19907  .10911   - 1.0     .26719  ]
                [                                                ]
                [ - .13049   .23492   .37956   .26719    - 1.0   ]

     See also 'cov' and 'cov1'.

 -- Function: principal_components
          principal_components (<matrix>)
          principal_components (<matrix>, <options> ...)

     Calculates the principal componentes of a multivariate sample.
     Principal components are used in multivariate statistical analysis
     to reduce the dimensionality of the sample.

     Option:

        * ''data', default ''true', indicates whether the input matrix
          contains the sample data, in which case the covariance matrix
          'cov1' must be calculated, or not, and then the covariance
          matrix (symmetric) must be given, instead of the data.

     The output of function 'principal_components' is a list with the
     following results:

        * variances of the principal components,
        * percentage of total variance explained by each principal
          component,
        * rotation matrix.

     Examples:

     In this sample, the first component explains 83.13 per cent of
     total variance.

          (%i1) load ("descriptive")$
          (%i2) s2 : read_matrix (file_search ("wind.data"))$
          (%i3) fpprintprec:4 $
          (%i4) res: principal_components(s2);
          0 errors, 0 warnings
          (%o4) [[87.57, 8.753, 5.515, 1.889, 1.613],
          [83.13, 8.31, 5.235, 1.793, 1.531],
          [ .4149  .03379   - .4757  - 0.581   - .5126 ]
          [                                            ]
          [ 0.369  - .3657  - .4298   .7237    - .1469 ]
          [                                            ]
          [ .3959  - .2178  - .2181  - .2749    .8201  ]]
          [                                            ]
          [ .5548   .7744    .1857    .2319    .06498  ]
          [                                            ]
          [ .4765  - .4669   0.712   - .09605  - .1969 ]
          (%i5) /* accumulated percentages  */
              block([ap: copy(res[2])],
                for k:2 thru length(ap) do ap[k]: ap[k]+ap[k-1],
                ap);
          (%o5)                 [83.13, 91.44, 96.68, 98.47, 100.0]
          (%i6) /* sample dimension */
                p: length(first(res));
          (%o6)                                  5
          (%i7) /* plot percentages to select number of
                   principal components for further work */
               draw2d(
                  fill_density = 0.2,
                  apply(bars, makelist([k, res[2][k], 1/2], k, p)),
                  points_joined = true,
                  point_type    = filled_circle,
                  point_size    = 3,
                  points(makelist([k, res[2][k]], k, p)),
                  xlabel = "Variances",
                  ylabel = "Percentages",
                  xtics  = setify(makelist([concat("PC",k),k], k, p))) $

     In case de covariance matrix is known, it can be passed to the
     function, but option 'data=false' must be used.

          (%i1) load ("descriptive")$
          (%i2) S: matrix([1,-2,0],[-2,5,0],[0,0,2]);
                                          [  1   - 2  0 ]
                                          [             ]
          (%o2)                           [ - 2   5   0 ]
                                          [             ]
                                          [  0    0   2 ]
          (%i3) fpprintprec:4 $
          (%i4) /* the argumment is a covariance matrix */
                res: principal_components(S, data=false);
          0 errors, 0 warnings
                                                            [ - .3827  0.0  .9239 ]
                                                            [                     ]
          (%o4) [[5.828, 2.0, .1716], [72.86, 25.0, 2.145], [  .9239   0.0  .3827 ]]
                                                            [                     ]
                                                            [   0.0    1.0   0.0  ]
          (%i5) /* transformation to get the principal components
                   from original records */
                matrix([a1,b2,c3],[a2,b2,c2]).last(res);
                       [ .9239 b2 - .3827 a1  1.0 c3  .3827 b2 + .9239 a1 ]
          (%o5)        [                                                  ]
                       [ .9239 b2 - .3827 a2  1.0 c2  .3827 b2 + .9239 a2 ]


automatically generated by info2www version 1.2.2.9