Effizienteste dynamische Typisierung

Ethon

Hoi,
was sollte denn die effizienteste Möglichkeit sein Operationen bei dynamischer Typisierung zu implementieren?
Kurzes Beispiel was ich meine:

struct Any
{
    Any(long Int)
        : type(Type::Int)
    {
        data.Int = Int;
    }

    Any(double Float)
        : type(Type::Float)
    {
        data.Float = Float;
    }

    enum class Type
    {
        Int,
        Float
    } type;

    union Data
    {
        long Int;
        double Float;
    } data;
};

Any add(Any lhs, Any rhs)
{
    switch(lhs.type)
    {
        case Any::Type::Int:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Int + rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Int + rhs.data.Float);
            }
        }

        case Any::Type::Float:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Float + rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Float + rhs.data.Float);
            }
        }
    }
}

int main()
{
    Any a = 10;
    Any b = 12.0;
    Any c = add(a, b);

    assert(c.type == Any::Type::Float && c.data.Float == 22.0);
}

Da könnte man entweder mit dieser if/switch-Kaskade arbeiten was echt nett wäre wenn der Compiler aus dem switch eine jumptable baut was ja machbar sein sollte.
(Kann man gängige Compiler irgendwie ermutigen Jumptables zu erzeugen ?)

Eine andere Möglichkeit wäre Polymorphie, was sich aber erst bei sehr vielen Typen lohnt schätze ich mal.

Hardmode wäre es eine Jumptable per Hand nachzubauen.

Es ist vor allem bei arithmetischen/logischen Operationen auf numerische/boolsche Datentypen notwendig sich etwas zu überlegen da die halt wirklich ins Gewicht fallen wenn sie langsam sind, wenn sich zb. hinter dem dynamischen Objekt eine Hashtable befindet sind die Operationen ja relativ gesehen so teuer dass der Overhead nicht mehr ins Gewicht fallen sollte.

Was sind denn die effizientesten Implementierungen und Tricks im Umlauf? Finde dazu leider nicht wirklich etwas.

Sind gerade Gedankenspielchen bezüglich Verwendung bei einer dynamisch typisierten Programmiersprache und der Klasse Any in C++ Code.

Danke & Grüße,
Ethon

Kellerautomat

Suchst du double dispatch?

TyRoXx

Etwas besser lesbar ist zumindest das hier:

constexpr int combined(Any::Type left, Any::Type right)
{
	return static_cast<int>(left) | (static_cast<int>(right) << 1);
}

Any add2(Any lhs, Any rhs)
{
	switch (combined(lhs.type, rhs.type))
	{
		case combined(Any::Type::Int, Any::Type::Int): return Any(lhs.data.Int + rhs.data.Int);
		case combined(Any::Type::Int, Any::Type::Float): return Any(lhs.data.Int + rhs.data.Float);
		case combined(Any::Type::Float, Any::Type::Int): return Any(lhs.data.Float + rhs.data.Int);
		case combined(Any::Type::Float, Any::Type::Float): return Any(lhs.data.Float + rhs.data.Float);
	}
}

Der generierte Code von GCC 4.8 mit -O3 für AMD64 ist anders als bei verschwachtelten switch es. Keine Ahnung, was schneller läuft.

Ethon schrieb:

Sind gerade Gedankenspielchen bezüglich Verwendung bei einer dynamisch typisierten Programmiersprache

JITter versuchen in der Regel die Typen herzuleiten, damit Typunterscheidungen wegfallen können. LuaJIT und so ziemlich alle JavaScript-Implementierungen machen das seit Jahren so.

Columbo

Any add2(Any lhs, Any rhs)
{
    static Any(*lookup[2][2])(Any&&, Any&&) // edit: korrigiert
    {
        [](Any&& lhs, Any&& rhs){ return lhs.data.Int   + rhs.data.Int   ; },
        [](Any&& lhs, Any&& rhs){ return lhs.data.Int   + rhs.data.Float ; },
        [](Any&& lhs, Any&& rhs){ return lhs.data.Float + rhs.data.Int   ; },
        [](Any&& lhs, Any&& rhs){ return lhs.data.Float + rhs.data.Float ; }
    };

    return lookup[lhs.type][rhs.type](std::move(lhs), std::move(rhs));
}

?

Ethon

Danke für die Antworten!
Habe mal etwas herumgetestet. Ist jetzt nicht ganz aussagekräftig da 3 mögliche Typen immer noch recht wenig sind, wenn man weitere Typen hinzufügt steigt ja die Komplexität die richtige Operation auszuwählen.

#include <cassert>
#include <iostream>
#include <chrono>
#include <vector>
#include <cstdlib>
#include <algorithm>
#include <stdexcept>

struct invalid_operation : std::runtime_error
{
    invalid_operation(char const* msg)
        : std::runtime_error(msg)
    { }
};

struct Any
{
    Any()
        : type(Type::None)
    { }

    Any(int Int)
        : type(Type::Int)
    {
        data.Int = Int;
    }

    Any(double Float)
        : type(Type::Float)
    {
        data.Float = Float;
    }

    enum class Type : unsigned
    {
        None = 0,
        Int = 1,
        Float = 2
    } type;

    union Data
    {
        int Int;
        double Float;
    } data;
};

struct Any2;
struct VTable
{
    Any2 (*add)(Any2 const&, Any2 const&);
    Any2 (*mul)(Any2 const&, Any2 const&);
    Any::Type type;
};

extern VTable NoneVTable;
extern VTable IntVTable;
extern VTable FloatVTable;

struct Any2
{
    Any2()
        : vtable(&NoneVTable)
    { }

    Any2(int Int)
        : vtable(&IntVTable)
    {
        data.Int = Int;
    }

    Any2(double Float)
        : vtable(&FloatVTable)
    {
        data.Float = Float;
    }

    VTable const* vtable;

    union Data
    {
        int Int;
        double Float;
    } data;
};

VTable NoneVTable =
{
    [](Any2 const&, Any2 const&) -> Any2
    {
        throw invalid_operation("add");
    },

    [](Any2 const&, Any2 const&) -> Any2
    {
        throw invalid_operation("mul");
    },

    Any::Type::None
};

VTable IntVTable =
{
    [](Any2 const& lhs, Any2 const& rhs) -> Any2
    {
        switch(rhs.vtable->type)
        {
            case Any::Type::Int:
                return Any2(lhs.data.Int + rhs.data.Int);

            case Any::Type::Float:
                return Any2(lhs.data.Int + rhs.data.Float);

            case Any::Type::None:
                throw invalid_operation("add");
        }

        assert(false);
    },

    [](Any2 const& lhs, Any2 const& rhs) -> Any2
    {
        switch(rhs.vtable->type)
        {
            case Any::Type::Int:
                return Any2(lhs.data.Int * rhs.data.Int);

            case Any::Type::Float:
                return Any2(lhs.data.Int * rhs.data.Float);

            case Any::Type::None:
                throw invalid_operation("mul");
        }

        assert(false);
    },
    Any::Type::Int
};

VTable FloatVTable =
{
    [](Any2 const& lhs, Any2 const& rhs) -> Any2
    {
        switch(rhs.vtable->type)
        {
            case Any::Type::Int:
                return Any2(lhs.data.Float + rhs.data.Int);

            case Any::Type::Float:
                return Any2(lhs.data.Float + rhs.data.Float);

            case Any::Type::None:
                throw invalid_operation("add");
        }

        assert(false);
    },

    [](Any2 const& lhs, Any2 const& rhs) -> Any2
    {
        switch(rhs.vtable->type)
        {
            case Any::Type::Int:
                return Any2(lhs.data.Float * rhs.data.Int);

            case Any::Type::Float:
                return Any2(lhs.data.Float * rhs.data.Float);

            case Any::Type::None:
                throw invalid_operation("mul");
        }

        assert(false);
    },
    Any::Type::Float
};

Any add1(Any lhs, Any rhs)
{
    switch(lhs.type)
    {
        case Any::Type::Int:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Int + rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Int + rhs.data.Float);

                case Any::Type::None:
                    throw invalid_operation("add");
            }
        }

        case Any::Type::Float:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Float + rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Float + rhs.data.Float);

                case Any::Type::None:
                    throw invalid_operation("add");
            }
        }

        case Any::Type::None:
            throw invalid_operation("add");
    }

    assert(false);
}

constexpr unsigned combined(Any::Type left, Any::Type right)
{
    return static_cast<int>(left) | (static_cast<int>(right) << 16);
}

Any add2(Any lhs, Any rhs)
{
    switch (combined(lhs.type, rhs.type))
    {
        case combined(Any::Type::Int, Any::Type::Int): return Any(lhs.data.Int + rhs.data.Int);
        case combined(Any::Type::Int, Any::Type::Float): return Any(lhs.data.Int + rhs.data.Float);
        case combined(Any::Type::Float, Any::Type::Int): return Any(lhs.data.Float + rhs.data.Int);
        case combined(Any::Type::Float, Any::Type::Float): return Any(lhs.data.Float + rhs.data.Float);

        default:
            if(lhs.type == Any::Type::None || rhs.type == Any::Type::None)
                throw invalid_operation("add");
    }

    assert(false);
}

Any add3(Any lhs, Any rhs)
{
    static Any(*lookup[3][3])(Any&&, Any&&)
    {
        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("add"); },
            [](Any&&, Any&&) -> Any { throw invalid_operation("add"); },
            [](Any&&, Any&&) -> Any { throw invalid_operation("add"); }
        },

        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("add"); },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Int   + rhs.data.Int   ; },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Int   + rhs.data.Float ; }
        },
        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("add"); },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Float + rhs.data.Int   ; },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Float + rhs.data.Float ; }
        }
    };

    return lookup[static_cast<unsigned>(lhs.type)][static_cast<unsigned>(rhs.type)](std::move(lhs), std::move(rhs));
}

Any2 add4(Any2 lhs, Any2 rhs)
{
    return lhs.vtable->add(lhs, rhs);
}

Any mul1(Any lhs, Any rhs)
{
    switch(lhs.type)
    {
        case Any::Type::Int:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Int * rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Int * rhs.data.Float);

                case Any::Type::None:
                    throw invalid_operation("mul");
            }
        }

        case Any::Type::Float:
        {
            switch(rhs.type)
            {
                case Any::Type::Int:
                    return Any(lhs.data.Float * rhs.data.Int);

                case Any::Type::Float:
                    return Any(lhs.data.Float * rhs.data.Float);

                case Any::Type::None:
                    throw invalid_operation("mul");
            }
        }

        case Any::Type::None:
            throw invalid_operation("mul");
    }

    assert(false);
}

Any mul2(Any lhs, Any rhs)
{
    switch (combined(lhs.type, rhs.type))
    {
        case combined(Any::Type::Int, Any::Type::Int): return Any(lhs.data.Int * rhs.data.Int);
        case combined(Any::Type::Int, Any::Type::Float): return Any(lhs.data.Int * rhs.data.Float);
        case combined(Any::Type::Float, Any::Type::Int): return Any(lhs.data.Float * rhs.data.Int);
        case combined(Any::Type::Float, Any::Type::Float): return Any(lhs.data.Float * rhs.data.Float);

        default:
            if(lhs.type == Any::Type::None || rhs.type == Any::Type::None)
                throw invalid_operation("mul");
    }

    assert(false);
}

Any mul3(Any lhs, Any rhs)
{
    static Any(*lookup[3][3])(Any&&, Any&&)
    {
        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("mul"); },
            [](Any&&, Any&&) -> Any { throw invalid_operation("mul"); },
            [](Any&&, Any&&) -> Any { throw invalid_operation("mul"); }
        },

        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("mul"); },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Int   * rhs.data.Int   ; },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Int   * rhs.data.Float ; }
        },
        {
            [](Any&&, Any&&) -> Any { throw invalid_operation("mul"); },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Float * rhs.data.Int   ; },
            [](Any&& lhs, Any&& rhs) -> Any { return lhs.data.Float * rhs.data.Float ; }
        }
    };

    return lookup[static_cast<unsigned>(lhs.type)][static_cast<unsigned>(rhs.type)](std::move(lhs), std::move(rhs));
}

Any2 mul4(Any2 lhs, Any2 rhs)
{
    return lhs.vtable->mul(lhs, rhs);
}

Any randomAny()
{
    if(std::rand() % 2)
        return Any(std::rand());
    else
        return Any(double(std::rand()));
}

Any2 randomAny2()
{
    if(std::rand() % 2)
        return Any2(std::rand());
    else
        return Any2(double(std::rand()));
}

template<   typename T,
            T Add(T lhs, T rhs),
            T Mul(T lhs, T rhs),
            T Random(),
            unsigned N>
void test(char const* name)
{
    using namespace std;
    using namespace std::chrono;

    vector<T> m1(N * N), m2(N * N), m3(N * N, T(0));
    srand(0xAFFEAFFE);
    generate(m1.begin(), m1.end(), Random);
    generate(m2.begin(), m2.end(), Random);

    auto start = high_resolution_clock::now();

    for(unsigned i = 0; i < N; ++i)
    {
        for(unsigned j = 0; j < N; ++j)
        {
            for(unsigned k = 0; k < N; ++k)
            {
                 T tmp = Mul(m1[i * N + k], m2[k * N + j]);
                 m3[i * N + j] = Add(m3[i * N + j], tmp);
            }
        }
    }

    auto end = high_resolution_clock::now();

    cout << name << "\t: " << duration_cast<milliseconds>(end - start).count() << "ms" << endl;
}

double addDouble(double lhs, double rhs)
{
    return lhs + rhs;
}

double mulDouble(double lhs, double rhs)
{
    return lhs * rhs;
}

double randomDouble()
{
    return std::rand();
}

int main()
{
    test<double, addDouble, mulDouble, randomDouble, 700>("double  ");
    test<Any, add1, mul1, randomAny, 700>("version1");
    test<Any, add2, mul2, randomAny, 700>("version2");
    test<Any, add3, mul3, randomAny, 700>("version3");
    test<Any2, add4, mul4, randomAny2, 700>("version4");
}

[ethon@desktop-fedora-20 WorkDir]$ ./any
double : 439ms
version1 : 6388ms
version2 : 6849ms
version3 : 6462ms
version4 : 7770ms
[ethon@desktop-fedora-20 WorkDir]$ ./any
double : 442ms
version1 : 6188ms
version2 : 6454ms
version3 : 6459ms
version4 : 7791ms
[ethon@desktop-fedora-20 WorkDir]$ ./any
double : 446ms
version1 : 6184ms
version2 : 6530ms
version3 : 6442ms
version4 : 7820ms

g++ -Wall -Wextra -O3 -std=c++11

Wenn noch jemand eine Idee hat, bitte her damit!

TyRoXx schrieb:

JITter versuchen in der Regel die Typen herzuleiten, damit Typunterscheidungen wegfallen können. LuaJIT und so ziemlich alle JavaScript-Implementierungen machen das seit Jahren so.

Stimmt. Aber das geht nicht immer. Und dann wird's interessant.