Lust auf einen Coding contest? Zahlen ohne zurücklegen ziehen



  • Was ist mit der Dummie-Lösung? Also k Zahlen aus dem RNG ziehen und dabei in einem Bitset die bereits vorgekommenen markieren (und dann bei einer Zweitziehung überspringen)?
    Das hätte ich in Kombination mit einem Shuffle für größere k genommen, ist aber anscheinend nicht die Lösung.



  • Edit: Alles Quatsch, hatte einen Fehler im Code.



  • So, jetzt aber:

    #include <algorithm>
    #include <iomanip>
    #include <iostream>
    #include <random>
    #include <set>
    #include <unordered_set>
    #include <vector>
    
    namespace unspannend_detail
    {
    	template <typename T> void reserve(std::set<T>&, size_t) {}
    
    	template <typename T> void reserve(std::unordered_set<T>& s, size_t n) { s.reserve(n); }
    
    	template<bool Sorted, class Rng>
    	std::vector<unsigned int> draw_select(unsigned int k, unsigned int n, Rng& rng)
    	{
    		std::vector<unsigned> v;
    		v.reserve(k);
    		if (3*k > 20*n)   // Experimentell bestimmt! k/n > 3/20 = 0.15
    		{
    			for (unsigned i=0; i<n; ++i) v.push_back(i); // iota_n
    				while (k--)
    				{
    					int del = std::uniform_int_distribution<unsigned>(0, k)(rng);
    					std::swap(v[del], v.back());
    					v.pop_back();
    				}
    			if (Sorted) std::sort(v.begin(), v.end());
    		}
    		else
    		{
    			typename std::conditional<Sorted, std::set<unsigned>, std::unordered_set<unsigned> >::type set;
    			reserve(set, k);
    			std::uniform_int_distribution<unsigned> dis(0, n-1);
    			while (set.size() != k)
    				set.insert(dis(rng));
    			v.assign(set.begin(), set.end());
    		}
    		return v;
    	}
    }
    
    template<class Rng>
    std::vector<unsigned int> drawUniqueSubset_unspannend(unsigned int k, unsigned int n, Rng& rng)
    {
        return unspannend_detail::draw_select<false>(k, n, rng);
    }
    
    template<class Rng>
    std::vector<unsigned> drawUniqueSubset_Arcoth(unsigned k, unsigned n, Rng& rng)
    {
    	using namespace std;
    
    	uniform_int_distribution<unsigned> dis(0, n - 1);
    
    	vector<unsigned> rval;
    	rval.reserve(k);
    
    	if( float(k)/n < 0.6 ) // geschätzt!
    	{
    		vector<bool> previous(n);
    
    		while( k-- )
    		{
    			unsigned i;
    			do
    				i = dis(rng);
    			while( previous[i] );
    
    			previous[i] = true;
    
    			rval.push_back(i);
    		}
    	}
    	else
    	{
    		for( unsigned i = dis(rng); i != n && k != 0; ++i, --k )
    			rval.push_back(i);
    		for( unsigned i = 0; k != 0; ++i, --k )
    			rval.push_back(i);
    
    		shuffle( begin(rval), end(rval), rng );
    	}
    
    	return rval;
    }
    
    #include <ctime>
    
    template<typename PRNG>
    void test_against( std::vector<unsigned>(*first )(unsigned, unsigned, PRNG&),
                       std::vector<unsigned>(*second)(unsigned, unsigned, PRNG&) )
    {
    	PRNG prng(std::random_device{}());
    
    	for( unsigned N = 100000; N < 10000000; N *= 2.5 )
    	{
    		std::cout << "\nN = " << N << '\n';
    
    		auto now = clock();
    		for( float k_fac = 0.01; k_fac < 1; k_fac += 0.01 )
    			first(k_fac * N, N, prng);
    
    		auto time_first = clock() - now;
    		std::cout << "\tfirst  needs " << std::setw(11) << std::right << time_first << ",\n";
    
    		now = clock();
    		for( float k_fac = 0.01; k_fac < 1; k_fac += 0.01 )
    			second(k_fac * N, N, prng);
    
    		auto time_second = clock() - now;
    		std::cout << "\tsecond needs " << std::setw(11) << std::right << time_second << "\n";
    
    		std::cout << "\n\tfirst/second = " << float(time_first)/time_second*100 << "%\n";
    	}
    }
    
    int main()
    {
    	std::cout << "\n** LINEAR CONGRUENCY GENERATOR **\n\n";
    	test_against( drawUniqueSubset_Arcoth    <std::minstd_rand>,
    	              drawUniqueSubset_unspannend<std::minstd_rand> );
    
    	std::cout << "\n\n** MERSENNE TWISTER **\n\n";
    	test_against( drawUniqueSubset_Arcoth    <std::mt19937>,
    	              drawUniqueSubset_unspannend<std::mt19937> );
    }
    

    Läuft mit Clang 3.4 mit -O3 und gibt folgendes aus:

    ** LINEAR CONGRUENCY GENERATOR **
    
    N = 100000
    	first  needs      148951,
    	second needs      848336
    
    	first/second = 17.558%
    
    N = 250000
    	first  needs      356256,
    	second needs     2674922
    
    	first/second = 13.3184%
    
    N = 625000
    	first  needs      927288,
    	second needs     9211660
    
    	first/second = 10.0665%
    
    N = 1562500
    	first  needs     2270358,
    	second needs    27444058
    
    	first/second = 8.27268%
    
    N = 3906250
    	first  needs     6663331,
    	second needs    76202754
    
    	first/second = 8.74421%
    
    N = 9765625
    	first  needs    19572171,
    	second needs   203421893
    
    	first/second = 9.62147%
    
    ** MERSENNE TWISTER **
    
    N = 100000
    	first  needs      215800,
    	second needs      806042
    
    	first/second = 26.7728%
    
    N = 250000
    	first  needs      363053,
    	second needs     2655339
    
    	first/second = 13.6726%
    
    N = 625000
    	first  needs      885865,
    	second needs     9239554
    
    	first/second = 9.58775%
    
    N = 1562500
    	first  needs     2337987,
    	second needs    28214382
    
    	first/second = 8.28651%
    
    N = 3906250
    	first  needs     7117921,
    	second needs    79105437
    
    	first/second = 8.99802%
    
    N = 9765625
    	first  needs    21726353,
    	second needs   230246943
    
    	first/second = 9.43611%
    

    Geht bestimmt von der Implementierung noch optimierter; Otzes Variante ist seiner Aussage nach dann etwa noch doppelt so schnell wie die hier.

    Edit: Gerade erst bemerkt, dass ja die Shuffle-Variante nicht gleichverteilt ist... 🤡

    Damit ist Otzes Methode deutlich schneller als die von Volkard vorgeschlagene.


Anmelden zum Antworten