[Libreoffice-commits] core.git: sc/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Fri Dec 21 21:58:31 UTC 2018


 sc/source/ui/StatisticsDialogs/SamplingDialog.cxx |   82 +++++++++++++++++++++-
 sc/source/ui/inc/SamplingDialog.hxx               |    1 
 2 files changed, 82 insertions(+), 1 deletion(-)

New commits:
commit 0a2533aacc2dc98790510fdafd144aad66f231f2
Author:     Eike Rathke <erack at redhat.com>
AuthorDate: Fri Dec 21 20:57:08 2018 +0100
Commit:     Eike Rathke <erack at redhat.com>
CommitDate: Fri Dec 21 22:58:04 2018 +0100

    Data -> Statistics: sample random values in random order
    
    The previous implementation sampled random values in the order of
    the population data. This may be unexpected and is also not what
    other spreadsheet implementations do. Instead, pick the random
    values in random order. Keeping order can be made an option as
    future feature. Code is prepared to sample WR (WithReplacement) as
    well, additionally to the now (and previously) implemented WOR
    (WithOutReplacement).
    
    Change-Id: I83734d36605b28cf44c0cc2bbc2dfcafaef025f4
    Reviewed-on: https://gerrit.libreoffice.org/65559
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Jenkins

diff --git a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
index 1717baa1282e..b43cef257d4e 100644
--- a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
+++ b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
@@ -219,6 +219,86 @@ ScRange ScSamplingDialog::PerformPeriodicSampling(ScDocShell* pDocShell)
     return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) );
 }
 
+ScRange ScSamplingDialog::PerformRandomSampling(ScDocShell* pDocShell)
+{
+    ScAddress aStart = mInputRange.aStart;
+    ScAddress aEnd   = mInputRange.aEnd;
+
+    SCTAB outTab = mOutputAddress.Tab();
+    SCROW outRow = mOutputAddress.Row();
+
+    const sal_Int64 nSampleSize = mpSampleSize->GetValue();
+
+    // This implementation groups by columns. Other options could be grouping
+    // by rows or area.
+    const sal_Int64 nPopulationSize = aEnd.Row() - aStart.Row() + 1;
+
+    /* TODO: the previously existing implementation was WOR, we may want to
+     * additionally offer WR as option. */
+    bool bWithReplacement = false;
+
+    // WOR (WithOutReplacement) can't draw more than population. Catch that in
+    // the caller.
+    assert( bWithReplacement || nSampleSize <= nPopulationSize);
+    if (!bWithReplacement && nSampleSize > nPopulationSize)
+        // Would enter an endless loop below, bail out.
+        return ScRange( mOutputAddress);
+
+    for (SCROW inTab = aStart.Tab(); inTab <= aEnd.Tab(); inTab++)
+    {
+        SCCOL outCol = mOutputAddress.Col();
+        for (SCCOL inCol = aStart.Col(); inCol <= aEnd.Col(); inCol++)
+        {
+            outRow = mOutputAddress.Row();
+            std::vector<bool> vUsed( nPopulationSize, false);
+
+            while ((outRow - mOutputAddress.Row()) < nSampleSize)
+            {
+                // [a,b] *both* inclusive
+                SCROW nRandom = comphelper::rng::uniform_int_distribution( aStart.Row(), aEnd.Row());
+
+                if (!bWithReplacement)
+                {
+                    nRandom -= aStart.Row();
+                    if (vUsed[nRandom])
+                    {
+                        // Find a nearest one, preferring forwards.
+                        // Again: it's essential that the loop is entered only
+                        // if nSampleSize<=nPopulationSize, which is checked
+                        // above.
+                        SCROW nBack = nRandom;
+                        SCROW nForw = nRandom;
+                        do
+                        {
+                            if (nForw < nPopulationSize - 1 && !vUsed[++nForw])
+                            {
+                                nRandom = nForw;
+                                break;
+                            }
+                            if (nBack > 0 && !vUsed[--nBack])
+                            {
+                                nRandom = nBack;
+                                break;
+                            }
+                        }
+                        while (true);
+                    }
+                    vUsed[nRandom] = true;
+                    nRandom += aStart.Row();
+                }
+
+                const double fValue = mDocument->GetValue( ScAddress(inCol, nRandom, inTab) );
+                pDocShell->GetDocFunc().SetValueCell(ScAddress(outCol, outRow, outTab), fValue, true);
+                outRow++;
+            }
+            outCol++;
+        }
+        outTab++;
+    }
+
+    return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) );
+}
+
 ScRange ScSamplingDialog::PerformRandomSamplingKeepOrder(ScDocShell* pDocShell)
 {
     ScAddress aStart = mInputRange.aStart;
@@ -277,7 +357,7 @@ void ScSamplingDialog::PerformSampling()
 
     if (mpRandomMethodRadio->IsChecked())
     {
-        aModifiedRange = PerformRandomSamplingKeepOrder(pDocShell);
+        aModifiedRange = PerformRandomSampling(pDocShell);
     }
     else if (mpPeriodicMethodRadio->IsChecked())
     {
diff --git a/sc/source/ui/inc/SamplingDialog.hxx b/sc/source/ui/inc/SamplingDialog.hxx
index eb5f7310b857..4994f73cb8a4 100644
--- a/sc/source/ui/inc/SamplingDialog.hxx
+++ b/sc/source/ui/inc/SamplingDialog.hxx
@@ -67,6 +67,7 @@ private:
     void GetRangeFromSelection();
     void PerformSampling();
 
+    ScRange PerformRandomSampling(ScDocShell* pDocShell);
     ScRange PerformRandomSamplingKeepOrder(ScDocShell* pDocShell);
     ScRange PerformPeriodicSampling(ScDocShell* pDocShell);
 


More information about the Libreoffice-commits mailing list