동시성 모델 비교

개요

현대 게임은 렌더링, 물리, AI, 네트워크를 동시에 처리해야 합니다. 동시성 모델을 잘못 선택하면 교착 상태(Deadlock), 경쟁 조건(Race Condition), 불필요한 컨텍스트 스위치가 발생합니다. 네 가지 핵심 동시성 모델의 원리와 적합한 사용 시나리오를 비교합니다.

1. OS 스레드 (Thread)

OS가 스케줄링하는 가장 기본적인 동시성 단위입니다.

#include <thread>
#include <mutex>
#include <vector>

std::mutex g_mutex;
int g_score = 0;

void UpdateScore(int delta)
{
    std::lock_guard<std::mutex> lock(g_mutex);
    g_score += delta;
}

// 스레드 생성 비용: ~수십 마이크로초
// 스택 크기: 기본 1~8 MB
// 컨텍스트 스위치: ~수 마이크로초
void RunThreadExample()
{
    std::vector<std::thread> threads;
    for (int i = 0; i < 10; i++)
        threads.emplace_back(UpdateScore, 1);
    for (auto& t : threads) t.join();
}

적합한 경우: CPU 바운드 병렬 연산, 물리 시뮬레이션 분할, 에셋 로딩

주의: 스레드 수 > CPU 코어 수이면 컨텍스트 스위치 오버헤드가 증가합니다.

2. 코루틴 (Coroutine)

실행을 중단하고 나중에 재개하는 협력형 멀티태스킹입니다. 컨텍스트 스위치 없이 단일 스레드에서 동시성을 표현합니다.

C++20 코루틴

#include <coroutine>
#include <iostream>

// 단순 제너레이터 코루틴
struct Generator
{
    struct promise_type
    {
        int current_value;
        Generator get_return_object() { return Generator{this}; }
        std::suspend_always initial_suspend() { return {}; }
        std::suspend_always final_suspend() noexcept { return {}; }
        std::suspend_always yield_value(int v)
        {
            current_value = v;
            return {};
        }
        void return_void() {}
        void unhandled_exception() { std::terminate(); }
    };

    std::coroutine_handle<promise_type> handle;
    explicit Generator(promise_type* p)
        : handle(std::coroutine_handle<promise_type>::from_promise(*p)) {}
    ~Generator() { if (handle) handle.destroy(); }

    bool Next() { handle.resume(); return !handle.done(); }
    int  Value() { return handle.promise().current_value; }
};

Generator CountDown(int from)
{
    for (int i = from; i >= 0; i--)
        co_yield i;
}

// 사용
auto gen = CountDown(5);
while (gen.Next())
    std::cout << gen.Value() << ' '; // 5 4 3 2 1 0

Unity C# 코루틴

// Unity 코루틴: 단일 메인 스레드에서 프레임 분산 실행
IEnumerator LoadLevelCoroutine(string sceneName)
{
    // 로딩 UI 표시
    loadingScreen.SetActive(true);

    // 비동기 씬 로드 (완료까지 프레임 양보)
    AsyncOperation op = SceneManager.LoadSceneAsync(sceneName);
    op.allowSceneActivation = false;

    while (op.progress < 0.9f)
    {
        loadingBar.value = op.progress;
        yield return null; // 다음 프레임까지 대기
    }

    loadingBar.value = 1f;
    yield return new WaitForSeconds(0.5f); // 0.5초 대기
    op.allowSceneActivation = true;
}

적합한 경우: 프레임 분산 처리, 순차적 타이밍 로직, 단순 비동기 흐름

3. Async/Await

I/O 대기 중 스레드를 반환해 적은 스레드로 높은 처리량을 달성합니다.

// C# async/await — 게임 서버 예시
public class GameServer
{
    private readonly HttpClient _http = new();

    // 스레드를 블로킹하지 않고 I/O 대기
    public async Task<PlayerData> FetchPlayerDataAsync(int playerId)
    {
        // 대기 중 스레드 반환 → 다른 요청 처리 가능
        string json = await _http.GetStringAsync(
            $"https://api.game.com/players/{playerId}");
        return JsonSerializer.Deserialize<PlayerData>(json)!;
    }

    // 병렬 비동기 작업
    public async Task<(PlayerData, LeaderboardData)> LoadGameDataAsync(int playerId)
    {
        // 두 요청을 동시에 시작
        Task<PlayerData>      playerTask  = FetchPlayerDataAsync(playerId);
        Task<LeaderboardData> boardTask   = FetchLeaderboardAsync();

        // 둘 다 완료될 때까지 대기
        await Task.WhenAll(playerTask, boardTask);
        return (playerTask.Result, boardTask.Result);
    }

    private async Task<LeaderboardData> FetchLeaderboardAsync()
        => await Task.FromResult(new LeaderboardData()); // 예시
}

적합한 경우: 게임 서버, API 클라이언트, 파일 I/O, 네트워크 통신

주의: async void는 예외를 삼키므로 이벤트 핸들러 외에는 사용하지 않습니다.

4. Actor 모델

각 Actor는 독립적인 상태를 가지며, 메시지 큐를 통해서만 통신합니다. 공유 상태와 락이 없어 교착 상태가 발생하지 않습니다.

// C# — 단순 Actor 구현 (Channel 기반)
using System.Threading.Channels;

public class PlayerActor
{
    private readonly Channel<IMessage> _mailbox =
        Channel.CreateUnbounded<IMessage>();

    private int _health = 100;
    private int _score  = 0;

    public interface IMessage {}
    public record TakeDamageMsg(int Amount) : IMessage;
    public record AddScoreMsg(int Points)   : IMessage;
    public record GetStateMsg(
        TaskCompletionSource<(int hp, int score)> Reply) : IMessage;

    public void Send(IMessage msg) => _mailbox.Writer.TryWrite(msg);

    public async Task RunAsync(CancellationToken ct)
    {
        await foreach (var msg in _mailbox.Reader.ReadAllAsync(ct))
        {
            switch (msg)
            {
                case TakeDamageMsg(var amt):
                    _health = Math.Max(0, _health - amt);
                    break;
                case AddScoreMsg(var pts):
                    _score += pts;
                    break;
                case GetStateMsg(var reply):
                    reply.SetResult((_health, _score));
                    break;
            }
        }
    }
}

// 사용
var actor = new PlayerActor();
_ = actor.RunAsync(CancellationToken.None);

actor.Send(new PlayerActor.TakeDamageMsg(30));
actor.Send(new PlayerActor.AddScoreMsg(100));

var tcs = new TaskCompletionSource<(int, int)>();
actor.Send(new PlayerActor.GetStateMsg(tcs));
var (hp, score) = await tcs.Task;

적합한 경우: 게임 서버 플레이어 세션 관리, 분산 시스템, 공유 상태 없는 독립 유닛

5. 모델 비교

항목	Thread	Coroutine	Async/Await	Actor
스케줄링	OS 선점형	협력형	I/O 이벤트 기반	메시지 큐
메모리 비용	높음 (MB)	낮음 (KB)	낮음	낮음
공유 상태	락 필요	단일 스레드	주의 필요	없음
교착 상태 위험	있음	없음	있음 (남용 시)	없음
복잡도	높음	낮음	중간	중간

6. 경쟁 조건과 교착 상태 — 흔한 함정

경쟁 조건 (Race Condition)

// 위험한 코드: 락 없이 공유 변수 수정
int g_score = 0;

void AddScore(int points)
{
    g_score += points;  // 읽기→증가→쓰기 3단계가 원자적이지 않음
    // Thread A: g_score 읽음 (0) -> 컨텍스트 스위치
    // Thread B: g_score 읽음 (0) -> +10 -> 씀 (10)
    // Thread A: +5 -> 씀 (5)  <- B의 결과가 덮어씌워짐!
}

// 해결책 1: mutex
std::mutex g_mutex;
void AddScoreSafe(int points)
{
    std::lock_guard<std::mutex> lock(g_mutex);
    g_score += points;
}

// 해결책 2: atomic (경량, 단순 정수 연산에 적합)
std::atomic<int> g_atomicScore = 0;
void AddScoreAtomic(int points)
{
    g_atomicScore.fetch_add(points, std::memory_order_relaxed);
}

// 해결책 3: atomic CAS (Compare-And-Swap) 루프
void AddScoreCAS(int points)
{
    int expected = g_atomicScore.load();
    while (!g_atomicScore.compare_exchange_weak(expected, expected + points))
    {
        // 실패하면 재시도 (다른 스레드가 먼저 수정한 경우)
    }
}

교착 상태 (Deadlock)

// 교착 상태 발생 코드
std::mutex mutexA, mutexB;

void Thread1()
{
    std::lock_guard<std::mutex> lockA(mutexA);  // A 획득
    std::this_thread::sleep_for(std::chrono::milliseconds(1));
    std::lock_guard<std::mutex> lockB(mutexB);  // B 대기 -> 교착!
}

void Thread2()
{
    std::lock_guard<std::mutex> lockB(mutexB);  // B 획득
    std::this_thread::sleep_for(std::chrono::milliseconds(1));
    std::lock_guard<std::mutex> lockA(mutexA);  // A 대기 -> 교착!
}

// 해결책 1: 항상 같은 순서로 락 획득
void Thread1Fixed()
{
    std::lock_guard<std::mutex> lockA(mutexA);  // 항상 A 먼저
    std::lock_guard<std::mutex> lockB(mutexB);  // 그다음 B
}
void Thread2Fixed()
{
    std::lock_guard<std::mutex> lockA(mutexA);  // 항상 A 먼저
    std::lock_guard<std::mutex> lockB(mutexB);  // 그다음 B
}

// 해결책 2: std::scoped_lock (C++17) — 데드락 없는 다중 락
void ThreadSafe()
{
    // 내부적으로 데드락 방지 알고리즘(try-lock 기반) 사용
    std::scoped_lock lock(mutexA, mutexB);
    // 두 락 모두 안전하게 획득
}

// 해결책 3: try_lock + 타임아웃
void ThreadWithTimeout()
{
    std::unique_lock<std::mutex> lockA(mutexA, std::defer_lock);
    std::unique_lock<std::mutex> lockB(mutexB, std::defer_lock);

    if (std::try_lock(lockA, lockB) == -1)
    {
        // 둘 다 획득 성공
    }
    else
    {
        // 획득 실패 -> 나중에 재시도
    }
}

7. 스레드 풀 (Thread Pool)

매번 스레드를 생성/소멸하는 비용을 줄이기 위해 미리 만들어 둔 스레드 집합에서 작업을 처리합니다.

#include <thread>
#include <queue>
#include <functional>
#include <condition_variable>

class ThreadPool
{
public:
    explicit ThreadPool(size_t threadCount)
    {
        for (size_t i = 0; i < threadCount; ++i)
        {
            _workers.emplace_back([this]
            {
                while (true)
                {
                    std::function<void()> task;
                    {
                        std::unique_lock<std::mutex> lock(_mutex);
                        // 작업이 있거나 종료 요청 시까지 대기
                        _condition.wait(lock, [this]
                        {
                            return _stop || !_tasks.empty();
                        });

                        if (_stop && _tasks.empty())
                            return;

                        task = std::move(_tasks.front());
                        _tasks.pop();
                    }
                    task();  // 락 밖에서 실행
                }
            });
        }
    }

    // 작업 제출 (반환값 필요 시 std::future 활용)
    template<typename F>
    auto Submit(F&& f) -> std::future<decltype(f())>
    {
        using ReturnType = decltype(f());
        auto task = std::make_shared<std::packaged_task<ReturnType()>>(
            std::forward<F>(f));
        std::future<ReturnType> result = task->get_future();

        {
            std::lock_guard<std::mutex> lock(_mutex);
            if (_stop) throw std::runtime_error("ThreadPool is stopped");
            _tasks.emplace([task]{ (*task)(); });
        }
        _condition.notify_one();
        return result;
    }

    ~ThreadPool()
    {
        {
            std::lock_guard<std::mutex> lock(_mutex);
            _stop = true;
        }
        _condition.notify_all();
        for (auto& worker : _workers)
            worker.join();
    }

private:
    std::vector<std::thread>          _workers;
    std::queue<std::function<void()>> _tasks;
    std::mutex                        _mutex;
    std::condition_variable           _condition;
    bool                              _stop = false;
};

// 사용 예
ThreadPool pool(std::thread::hardware_concurrency());

// 물리 시뮬레이션을 코어 수만큼 분할 처리
std::vector<std::future<void>> futures;
for (int i = 0; i < NUM_CHUNKS; ++i)
{
    futures.push_back(pool.Submit([i]
    {
        SimulatePhysicsChunk(i);
    }));
}
for (auto& f : futures)
    f.wait();  // 모든 작업 완료 대기

8. 모델 비교

항목	Thread	Coroutine	Async/Await	Actor
스케줄링	OS 선점형	협력형	I/O 이벤트 기반	메시지 큐
메모리 비용	높음 (MB)	낮음 (KB)	낮음	낮음
공유 상태	락 필요	단일 스레드	주의 필요	없음
교착 상태 위험	있음	없음	있음 (남용 시)	없음
복잡도	높음	낮음	중간	중간
적합한 작업	CPU 바운드	타이밍 로직	I/O 바운드	독립 상태 관리

요약

CPU 바운드 병렬 작업(물리, AI 배치)에는 스레드 풀이 적합하다.
프레임 분산 처리나 타이밍 로직에는 코루틴이 가장 단순하다.
I/O 바운드 작업(네트워크, 파일)에는 async/await로 스레드를 절약한다.
독립적 상태를 가진 유닛이 많은 게임 서버에는 Actor 모델이 교착 상태 없는 설계를 제공한다.
Race Condition은 atomic 또는 lock_guard로, Deadlock은 항상 동일한 순서의 락 획득 또는 scoped_lock으로 방지한다.