Re: Help with statistics windowing function

  • From: Luis Santos <lsantos@xxxxxxxxx>
  • To: harel.safra@xxxxxxxxx
  • Date: Sun, 25 Nov 2018 18:27:26 -0200

I included your tip along with the PCT_RANK function, showing that it gives
a real normalization.

U71013576@P00MDS.brux0333 [11g]> r

  1  select val
  2  , avg(val) over ()
  3  , val - avg(val) over ()
  4  , min(val) over ()
  5  , max(val) over ()
  6  , PERCENT_RANK() over (order by val)-0.5 PCT_RANK
  7  , (val - MIN(val) OVER())/ ( MAX(val) OVER() - MIN(val) OVER() ) -
0.5 norm
  8* from test
       VAL AVG(VAL)OVER() VAL-AVG(VAL)OVER() MIN(VAL)OVER()
MAX(VAL)OVER()  PCT_RANK      NORM
---------- -------------- ------------------ -------------- --------------
--------- ---------
         8          44,25             -36,25              8
 91    -0,500    -0,500
        12          44,25             -32,25              8
 91    -0,447    -0,452
        14          44,25             -30,25              8
 91    -0,395    -0,428
        14          44,25             -30,25              8
 91    -0,395    -0,428
        20          44,25             -24,25              8
 91    -0,289    -0,355
        25          44,25             -19,25              8
 91    -0,237    -0,295
        26          44,25             -18,25              8
 91    -0,184    -0,283
        32          44,25             -12,25              8
 91    -0,132    -0,211
        34          44,25             -10,25              8
 91    -0,079    -0,187
        35          44,25              -9,25              8
 91    -0,026    -0,175
        35          44,25              -9,25              8
 91    -0,026    -0,175
        49          44,25               4,75              8
 91     0,079    -0,006
        64          44,25              19,75              8
 91     0,132     0,175
        65          44,25              20,75              8
 91     0,184     0,187
        65          44,25              20,75              8
 91     0,184     0,187
        70          44,25              25,75              8
 91     0,289     0,247
        73          44,25              28,75              8
 91     0,342     0,283
        73          44,25              28,75              8
 91     0,342     0,283
        80          44,25              35,75              8
 91     0,447     0,367
        91          44,25              46,75              8
 91     0,500     0,500
20 rows selected.


U71013576@P00MDS.brux0333 [11g]> update test set val=910 where val=91;
1 row updated.



       VAL AVG(VAL)OVER() VAL-AVG(VAL)OVER() MIN(VAL)OVER()
MAX(VAL)OVER()  PCT_RANK      NORM
---------- -------------- ------------------ -------------- --------------
--------- ---------
         8           85,2              -77,2              8
910    -0,500    -0,500
        12           85,2              -73,2              8
910    -0,447    -0,496
        14           85,2              -71,2              8
910    -0,395    -0,493
        14           85,2              -71,2              8
910    -0,395    -0,493
        20           85,2              -65,2              8
910    -0,289    -0,487
        25           85,2              -60,2              8
910    -0,237    -0,481
        26           85,2              -59,2              8
910    -0,184    -0,480
        32           85,2              -53,2              8
910    -0,132    -0,473
        34           85,2              -51,2              8
910    -0,079    -0,471
        35           85,2              -50,2              8
910    -0,026    -0,470
        35           85,2              -50,2              8
910    -0,026    -0,470
        49           85,2              -36,2              8
910     0,079    -0,455
        64           85,2              -21,2              8
910     0,132    -0,438
        65           85,2              -20,2              8
910     0,184    -0,437
        65           85,2              -20,2              8
910     0,184    -0,437
        70           85,2              -15,2              8
910     0,289    -0,431
        73           85,2              -12,2              8
910     0,342    -0,428
        73           85,2              -12,2              8
910     0,342    -0,428
        80           85,2               -5,2              8
910     0,447    -0,420
       910           85,2              824,8              8
910     0,500     0,500
20 rows selected.


*--*
*Att*


*Luis Santos*



Em dom, 25 de nov de 2018 às 18:22, Luis Santos <lsantos@xxxxxxxxx>
escreveu:

Perfect Harel! Thanks a lot!

*--*
*Att*


*Luis Santos*



Em dom, 25 de nov de 2018 às 18:04, Harel Safra <harel.safra@xxxxxxxxx>
escreveu:

You normalize the values to a [0..n] range and them divide by the range
size.
This should work, the trunc is to make the output more readable.

SELECT
    val,
    trunc((val - MIN(val) OVER())/ ( MAX(val) OVER() - MIN(val) OVER()
),3) norm
FROM
    test order by val;

Harel Safra

On Sun, Nov 25, 2018 at 9:47 PM Luis Santos <lsantos@xxxxxxxxx> wrote:

Thanks for all replies. I forgot to mention that I really could use an
inline view or a with subfactoring view. But, to be frank, I was playing
with this because I want a way to normalize the data, creating a rank.

I discover the PERCENT_RANK windowing function. And applied a minus 0.5
to it to get this results.

U71013576@P00MDS.brux0333 [11g]> col pct_rank format 9990D999
U71013576@P00MDS.brux0333 [11g]>
U71013576@P00MDS.brux0333 [11g]> select val
  2  , avg(val) over ()
  3  , val - avg(val) over ()
  4  , min(val) over ()
  5  , max(val) over ()
  6  , PERCENT_RANK() over (order by val)-0.5 PCT_RANK
  7  from test
  8  /
       VAL AVG(VAL)OVER() VAL-AVG(VAL)OVER() MIN(VAL)OVER()
MAX(VAL)OVER()  PCT_RANK
---------- -------------- ------------------ --------------
-------------- ---------
         8          44,25             -36,25              8
 91    -0,500
        12          44,25             -32,25              8
 91    -0,447
        14          44,25             -30,25              8
 91    -0,395
        14          44,25             -30,25              8
 91    -0,395
        20          44,25             -24,25              8
 91    -0,289
        25          44,25             -19,25              8
 91    -0,237
        26          44,25             -18,25              8
 91    -0,184
        32          44,25             -12,25              8
 91    -0,132
        34          44,25             -10,25              8
 91    -0,079
        35          44,25              -9,25              8
 91    -0,026
        35          44,25              -9,25              8
 91    -0,026
        49          44,25               4,75              8
 91     0,079
        64          44,25              19,75              8
 91     0,132
        65          44,25              20,75              8
 91     0,184
        65          44,25              20,75              8
 91     0,184
        70          44,25              25,75              8
 91     0,289
        73          44,25              28,75              8
 91     0,342
        73          44,25              28,75              8
 91     0,342
        80          44,25              35,75              8
 91     0,447
        91          44,25              46,75              8
 91     0,500
20 rows selected.


But the "normalized" scale on PCT_RANK column is positional only. If I
change the MAX value to a real big one the value for PCT_RANK don't change.

U71013576@P00MDS.brux0333 [11g]> update test set val=9100 where val=91;
1 row updated.
U71013576@P00MDS.brux0333 [11g]> col pct_rank format 9990D999
U71013576@P00MDS.brux0333 [11g]>
U71013576@P00MDS.brux0333 [11g]> select val
  2  , avg(val) over ()
  3  , val - avg(val) over ()
  4  , min(val) over ()
  5  , max(val) over ()
  6  , PERCENT_RANK() over (order by val)-0.5 PCT_RANK
  7  from test
  8  /
       VAL AVG(VAL)OVER() VAL-AVG(VAL)OVER() MIN(VAL)OVER()
MAX(VAL)OVER()  PCT_RANK
---------- -------------- ------------------ --------------
-------------- ---------
         8          494,7             -486,7              8
 9100    -0,500
        12          494,7             -482,7              8
 9100    -0,447
        14          494,7             -480,7              8
 9100    -0,395
        14          494,7             -480,7              8
 9100    -0,395
        20          494,7             -474,7              8
 9100    -0,289
        25          494,7             -469,7              8
 9100    -0,237
        26          494,7             -468,7              8
 9100    -0,184
        32          494,7             -462,7              8
 9100    -0,132
        34          494,7             -460,7              8
 9100    -0,079
        35          494,7             -459,7              8
 9100    -0,026
        35          494,7             -459,7              8
 9100    -0,026
        49          494,7             -445,7              8
 9100     0,079
        64          494,7             -430,7              8
 9100     0,132
        65          494,7             -429,7              8
 9100     0,184
        65          494,7             -429,7              8
 9100     0,184
        70          494,7             -424,7              8
 9100     0,289
        73          494,7             -421,7              8
 9100     0,342
        73          494,7             -421,7              8
 9100     0,342
        80          494,7             -414,7              8
 9100     0,447
      9100          494,7             8605,3              8
 9100     0,500
20 rows selected.


Is there a better way to normalize a list of values, using a windowing
function, to a [0..1] scale?

*--*
*Att*


*Luis Santos*


Em dom, 25 de nov de 2018 às 17:21, Harel Safra <harel.safra@xxxxxxxxx>
escreveu:

SELECT
    t.*,
    MIN(dtm) OVER(),
    MAX(dtm) OVER()
FROM
    (
        SELECT
            val,
            AVG(val) OVER() avg,
            val - AVG(val) OVER() dtm,
            MIN(val) OVER() min,
            MAX(val) OVER() max
        FROM
            test
    ) t;

Harel

On Sun, Nov 25, 2018 at 8:59 PM Luis Santos <lsantos@xxxxxxxxx> wrote:

I have the following table, with 20 random values.

U71013576@P00MDS.brux0333 [11g]>  select val from test;
       VAL
----------
        65
        70
        12
        65
        91
        25
         8
        73
        35
        20
        26
        14
        73
        35
        49
        80
        34
        14
        32
        64
20 rows selected.


With the following query I can get the average, the min and max value,
and a calculated distance from mean.

U71013576@P00MDS.brux0333 [11g]> select val
  2  , avg(val) over () AVG
  3  , val - avg(val) over () DTM
  4  , min(val) over () MIN
  5  , max(val) over () MAX
  6  from test;



       VAL        AVG        DTM        MIN        MAX
---------- ---------- ---------- ---------- ----------
        65      44,25      20,75          8         91
        70      44,25      25,75          8         91
        12      44,25     -32,25          8         91
        65      44,25      20,75          8         91
        91      44,25      46,75          8         91
        25      44,25     -19,25          8         91
         8      44,25     -36,25          8         91
        73      44,25      28,75          8         91
        35      44,25      -9,25          8         91
        20      44,25     -24,25          8         91
        26      44,25     -18,25          8         91
        14      44,25     -30,25          8         91
        73      44,25      28,75          8         91
        35      44,25      -9,25          8         91
        49      44,25       4,75          8         91
        80      44,25      35,75          8         91
        34      44,25     -10,25          8         91
        14      44,25     -30,25          8         91
        32      44,25     -12,25          8         91
        64      44,25      19,75          8         91
20 rows selected.



I want to get, in also windowing fashion, the MIN/MAX values from the
DTM column. But...

select val
, avg(val) over ()
, val - avg(val) over ()
, min(val) over ()
, max(val) over ()
, min(val - avg(val) over ()) over()
, max(val - avg(val) over ()) over()
from test
/



, min(val - avg(val) over ()) over()
            *
ERROR at line 6:
ORA-30483: window  functions are not allowed here




*--*
*Att*


*Luis Santos*


Other related posts: