diff --git a/python/pycrdt/__init__.py b/python/pycrdt/__init__.py
index d849660..77446aa 100644
--- a/python/pycrdt/__init__.py
+++ b/python/pycrdt/__init__.py
@@ -35,6 +35,8 @@
 from ._sync import write_var_uint as write_var_uint
 from ._text import Text as Text
 from ._text import TextEvent as TextEvent
+from ._text import get_utf8_index as get_utf8_index
+from ._text import get_utf16_index as get_utf16_index
 from ._transaction import NewTransaction as NewTransaction
 from ._transaction import ReadTransaction as ReadTransaction
 from ._transaction import Transaction as Transaction
diff --git a/python/pycrdt/_base.py b/python/pycrdt/_base.py
index 1443d7d..53f614f 100644
--- a/python/pycrdt/_base.py
+++ b/python/pycrdt/_base.py
@@ -61,6 +61,7 @@ def __init__(
         *,
         client_id: int | None = None,
         skip_gc: bool | None = None,
+        offset_kind: str | None = None,
         doc: _Doc | None = None,
         Model=None,
         allow_multithreading: bool = False,
@@ -68,7 +69,11 @@ def __init__(
     ) -> None:
         super().__init__(**data)
         if doc is None:
-            doc = _Doc(client_id, skip_gc)
+            doc = _Doc(client_id, skip_gc, offset_kind)
+        elif offset_kind is not None and offset_kind != doc.offset_kind:
+            raise ValueError(
+                f"offset_kind={offset_kind!r} does not match doc.offset_kind={doc.offset_kind!r}"
+            )
         self._doc = doc
         self._txn = None
         self._exceptions = []
diff --git a/python/pycrdt/_doc.py b/python/pycrdt/_doc.py
index 07b140c..34f37c6 100644
--- a/python/pycrdt/_doc.py
+++ b/python/pycrdt/_doc.py
@@ -48,6 +48,7 @@ def __init__(
         *,
         client_id: int | None = None,
         skip_gc: bool | None = None,
+        offset_kind: str | None = None,
         doc: _Doc | None = None,
         Model=None,
         allow_multithreading: bool = False,
@@ -58,11 +59,18 @@ def __init__(
             client_id: An optional client ID for the document.
             skip_gc: Whether to skip garbage collection on deleted collections
                 on transaction commit.
+            offset_kind: How yrs counts text positions internally. ``"utf8"``
+                (the yrs default) uses byte offsets; ``"utf16"`` uses UTF-16
+                code unit offsets and is required for cross-runtime
+                compatibility with JS yjs. ``None`` (default) selects the yrs
+                default of ``"utf8"``. Regardless of this setting, the public
+                ``Text`` API always takes Python character indices.
             allow_multithreading: Whether to allow the document to be used in different threads.
         """
         super().__init__(
             client_id=client_id,
             skip_gc=skip_gc,
+            offset_kind=offset_kind,
             doc=doc,
             Model=Model,
             allow_multithreading=allow_multithreading,
@@ -86,6 +94,15 @@ def client_id(self) -> int:
         """The document client ID."""
         return self._doc.client_id()
 
+    @property
+    def offset_kind(self) -> str:
+        """The text offset kind used internally by yrs.
+
+        Returns ``"utf8"`` or ``"utf16"``. See [Doc.__init__][pycrdt.Doc.__init__]
+        for the meaning.
+        """
+        return self._doc.offset_kind
+
     def transaction(self, origin: Any = None) -> Transaction:
         """
         Creates a new transaction or gets the current one, if any.
diff --git a/python/pycrdt/_pycrdt.pyi b/python/pycrdt/_pycrdt.pyi
index 1498689..73f3ed1 100644
--- a/python/pycrdt/_pycrdt.pyi
+++ b/python/pycrdt/_pycrdt.pyi
@@ -17,7 +17,12 @@ class Snapshot:
 class Doc:
     """Shared document."""
 
-    def __init__(self, client_id: int | None, skip_gc: bool | None) -> None:
+    def __init__(
+        self,
+        client_id: int | None,
+        skip_gc: bool | None,
+        offset_kind: str | None,
+    ) -> None:
         """Create a new document with an optional global client ID.
         If no client ID is passed, a random one will be generated."""
 
@@ -28,6 +33,10 @@ class Doc:
     def client_id(self) -> int:
         """Returns the document unique client identifier."""
 
+    @property
+    def offset_kind(self) -> str:
+        """Returns the offset kind ('utf8' or 'utf16')."""
+
     def guid(self) -> int:
         """Returns the document globally unique identifier."""
 
diff --git a/python/pycrdt/_text.py b/python/pycrdt/_text.py
index 49356c0..d7275e7 100644
--- a/python/pycrdt/_text.py
+++ b/python/pycrdt/_text.py
@@ -11,6 +11,55 @@
     from ._doc import Doc
 
 
+def get_utf16_index(text: str, char_index: int) -> int:
+    """Convert a Python character (code point) index to a UTF-16 code unit index.
+
+    Characters outside the Basic Multilingual Plane (e.g. emoji) occupy 2
+    UTF-16 code units but only 1 Python character. For pure-ASCII / BMP
+    text this is a no-op.
+
+    Args:
+        text: The string against which ``char_index`` is interpreted.
+        char_index: A Python (code point) index into ``text``.
+
+    Returns:
+        The corresponding UTF-16 code unit offset.
+    """
+    if char_index == 0:
+        return 0
+    prefix = text[:char_index]
+    # Count characters that need a surrogate pair (code point > 0xFFFF)
+    extra = sum(1 for ch in prefix if ord(ch) > 0xFFFF)
+    return char_index + extra
+
+
+def get_utf8_index(text: str, char_index: int) -> int:
+    """Convert a Python character (code point) index to a UTF-8 byte index.
+
+    Args:
+        text: The string against which ``char_index`` is interpreted.
+        char_index: A Python (code point) index into ``text``.
+
+    Returns:
+        The corresponding UTF-8 byte offset.
+    """
+    if char_index == 0:
+        return 0
+    return len(text[:char_index].encode("utf-8"))
+
+
+def _char_to_offset(text: str, char_index: int, offset_kind: str) -> int:
+    if offset_kind == "utf16":
+        return get_utf16_index(text, char_index)
+    return get_utf8_index(text, char_index)
+
+
+def _single_char_unit_len(char: str, offset_kind: str) -> int:
+    if offset_kind == "utf16":
+        return 2 if ord(char) > 0xFFFF else 1
+    return len(char.encode("utf-8"))
+
+
 class Text(Sequence):
     """
     A shared data type used for collaborative text editing, similar to a Python `str`.
@@ -89,10 +138,9 @@ def __len__(self) -> int:
         ```
 
         Returns:
-            The length of the text.
+            The length of the text in Python characters.
         """
-        with self.doc.transaction() as txn:
-            return self.integrated.len(txn._txn)
+        return len(str(self))
 
     def __str__(self) -> str:
         """
@@ -128,7 +176,9 @@ def __iadd__(self, value: str) -> Text:
         """
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
-            self.integrated.insert(txn._txn, len(self), value)
+            current = str(self)
+            offset = _char_to_offset(current, len(current), self.doc.offset_kind)
+            self.integrated.insert(txn._txn, offset, value)
             return self
 
     def _check_slice(self, key: slice) -> tuple[int, int]:
@@ -169,13 +219,18 @@ def __delitem__(self, key: int | slice) -> None:
         """
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
+            current = str(self)
+            ok = self.doc.offset_kind
             if isinstance(key, int):
-                self.integrated.remove_range(txn._txn, key, 1)
+                offset = _char_to_offset(current, key, ok)
+                unit_len = _single_char_unit_len(current[key], ok)
+                self.integrated.remove_range(txn._txn, offset, unit_len)
             elif isinstance(key, slice):
                 start, stop = self._check_slice(key)
-                length = stop - start
-                if length > 0:
-                    self.integrated.remove_range(txn._txn, start, length)
+                if stop - start > 0:
+                    offset_start = _char_to_offset(current, start, ok)
+                    offset_stop = _char_to_offset(current, stop, ok)
+                    self.integrated.remove_range(txn._txn, offset_start, offset_stop - offset_start)
             else:
                 raise RuntimeError(f"Index not supported: {key}")
 
@@ -214,20 +269,26 @@ def __setitem__(self, key: int | slice, value: str) -> None:
         """
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
+            current = str(self)
+            ok = self.doc.offset_kind
             if isinstance(key, int):
                 value_len = len(value)
                 if value_len != 1:
                     raise RuntimeError(
                         f"Single item assigned value must have a length of 1, not {value_len}"
                     )
-                del self[key]
-                self.integrated.insert(txn._txn, key, value)
+                offset = _char_to_offset(current, key, ok)
+                unit_len = _single_char_unit_len(current[key], ok)
+                self.integrated.remove_range(txn._txn, offset, unit_len)
+                self.integrated.insert(txn._txn, offset, value)
             elif isinstance(key, slice):
                 start, stop = self._check_slice(key)
-                length = stop - start
+                offset_start = _char_to_offset(current, start, ok)
+                offset_stop = _char_to_offset(current, stop, ok)
+                length = offset_stop - offset_start
                 if length > 0:
-                    self.integrated.remove_range(txn._txn, start, length)
-                self.integrated.insert(txn._txn, start, value)
+                    self.integrated.remove_range(txn._txn, offset_start, length)
+                self.integrated.insert(txn._txn, offset_start, value)
             else:
                 raise RuntimeError(f"Index not supported: {key}")
 
@@ -251,8 +312,10 @@ def insert(self, index: int, value: str, attrs: dict[str, Any] | None = None) ->
         """
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
+            current = str(self)
+            offset = _char_to_offset(current, index, self.doc.offset_kind)
             self.integrated.insert(
-                txn._txn, index, value, iter(attrs.items()) if attrs is not None else None
+                txn._txn, offset, value, iter(attrs.items()) if attrs is not None else None
             )
 
     def insert_embed(self, index: int, value: Any, attrs: dict[str, Any] | None = None) -> None:
@@ -266,8 +329,10 @@ def insert_embed(self, index: int, value: Any, attrs: dict[str, Any] | None = No
         """
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
+            current = str(self)
+            offset = _char_to_offset(current, index, self.doc.offset_kind)
             self.integrated.insert_embed(
-                txn._txn, index, value, iter(attrs.items()) if attrs is not None else None
+                txn._txn, offset, value, iter(attrs.items()) if attrs is not None else None
             )
 
     def format(self, start: int, stop: int, attrs: dict[str, Any]) -> None:
@@ -282,9 +347,13 @@ def format(self, start: int, stop: int, attrs: dict[str, Any]) -> None:
         with self.doc.transaction() as txn:
             self._forbid_read_transaction(txn)
             start, stop = self._check_slice(slice(start, stop))
-            length = stop - start
+            current = str(self)
+            ok = self.doc.offset_kind
+            offset_start = _char_to_offset(current, start, ok)
+            offset_stop = _char_to_offset(current, stop, ok)
+            length = offset_stop - offset_start
             if length > 0:
-                self.integrated.format(txn._txn, start, length, iter(attrs.items()))
+                self.integrated.format(txn._txn, offset_start, length, iter(attrs.items()))
 
     def diff(self) -> list[tuple[Any, dict[str, Any] | None]]:
         """
diff --git a/src/doc.rs b/src/doc.rs
index 61109cc..455c61f 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -3,7 +3,7 @@ use pyo3::IntoPyObjectExt;
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::types::{PyBool, PyBytes, PyDict, PyInt, PyList};
 use yrs::{
-    Doc as _Doc, Options, ReadTxn, StateVector, SubdocsEvent as _SubdocsEvent, Transact, TransactionCleanupEvent, TransactionMut, Update, WriteTxn
+    Doc as _Doc, OffsetKind, Options, ReadTxn, StateVector, SubdocsEvent as _SubdocsEvent, Transact, TransactionCleanupEvent, TransactionMut, Update, WriteTxn
 };
 use yrs::updates::encoder::{Encode, Encoder};
 use yrs::updates::decoder::Decode;
@@ -32,6 +32,7 @@ impl Doc {
         let mut options = yrs::Options::default();
         options.client_id = original.doc.client_id();
         options.skip_gc = original.doc.skip_gc();
+        options.offset_kind = original.doc.offset_kind();
         if let Some(collection_id) = original.doc.collection_id() {
             options.collection_id = Some(collection_id);
         }
@@ -68,7 +69,11 @@ impl Doc {
 #[pymethods]
 impl Doc {
     #[new]
-    fn new(client_id: &Bound<'_, PyAny>, skip_gc: &Bound<'_, PyAny>) -> PyResult<Self> {
+    fn new(
+        client_id: &Bound<'_, PyAny>,
+        skip_gc: &Bound<'_, PyAny>,
+        offset_kind: &Bound<'_, PyAny>,
+    ) -> PyResult<Self> {
         let mut options = Options::default();
         if !client_id.is_none() {
             let _client_id: u64 = client_id.cast::<PyInt>()
@@ -84,10 +89,30 @@ impl Doc {
                 .map_err(|_| PyValueError::new_err("skip_gc must be a valid bool"))?;
             options.skip_gc = _skip_gc;
         }
+        if !offset_kind.is_none() {
+            let _offset_kind: String = offset_kind
+                .extract()
+                .map_err(|_| PyValueError::new_err("offset_kind must be a string"))?;
+            options.offset_kind = match _offset_kind.as_str() {
+                "utf8" | "utf-8" => OffsetKind::Bytes,
+                "utf16" | "utf-16" => OffsetKind::Utf16,
+                _ => return Err(PyValueError::new_err(
+                    "offset_kind must be 'utf8' or 'utf16'",
+                )),
+            };
+        }
         let doc = _Doc::with_options(options);
         Ok(Doc { doc })
     }
 
+    #[getter]
+    fn offset_kind(&self) -> &'static str {
+        match self.doc.offset_kind() {
+            OffsetKind::Bytes => "utf8",
+            OffsetKind::Utf16 => "utf16",
+        }
+    }
+
     #[staticmethod]
     #[pyo3(name = "from_snapshot")]
     pub fn from_snapshot(py: Python<'_>, snapshot: PyRef<'_, crate::snapshot::Snapshot>, doc: PyRef<'_, Doc>) -> PyResult<Py<Doc>> {
diff --git a/tests/test_text.py b/tests/test_text.py
index ba913a0..983e0ac 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -1,7 +1,18 @@
+from difflib import SequenceMatcher
+
 import pytest
 from anyio import TASK_STATUS_IGNORED, Event, create_task_group
 from anyio.abc import TaskStatus
-from pycrdt import Array, Assoc, Doc, Map, StickyIndex, Text
+from pycrdt import (
+    Array,
+    Assoc,
+    Doc,
+    Map,
+    StickyIndex,
+    Text,
+    get_utf8_index,
+    get_utf16_index,
+)
 
 pytestmark = pytest.mark.anyio
 
@@ -228,6 +239,360 @@ def test_sticky_index(serialize: str):
     assert text1[new_idx] == "*"
 
 
+@pytest.fixture(params=["utf8", "utf16"])
+def offset_kind(request):
+    return request.param
+
+
+def test_unicode_emoji_insert(offset_kind):
+    """Text.insert() after emoji characters should use character positions."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "A📊B"
+    assert str(text) == "A📊B"
+    assert len(text) == 3
+
+    # Insert at position 2 = between 📊 and B
+    text.insert(2, "X")
+    assert str(text) == "A📊XB", f"Got {str(text)!r}, emoji insert position is wrong"
+
+
+def test_unicode_emoji_sequential_inserts(offset_kind):
+    """Sequential inserts after emoji should maintain correct positions."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "# Analysis 📊\n"
+    text.insert(len(text), "model = fit()\n")
+    text.insert(len(text), "# 特征工程\n")
+    text.insert(len(text), 'print("done")\n')
+
+    expected = '# Analysis 📊\nmodel = fit()\n# 特征工程\nprint("done")\n'
+    assert str(text) == expected, f"Got {str(text)!r}"
+
+
+def test_unicode_emoji_iadd(offset_kind):
+    """`+=` after emoji should append at the end (regression for the original
+    Text.__iadd__ bug where len(self) was passed to yrs as if it were already
+    in offset units)."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "A📊B"
+    text += "X"
+
+    assert str(text) == "A📊BX"
+
+
+def test_unicode_emoji_len(offset_kind):
+    """len() should return Python character count, regardless of offset_kind."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "A📊B"
+    assert len(text) == 3
+
+    text += "🎉"
+    assert len(text) == 4
+
+
+def test_unicode_emoji_delete(offset_kind):
+    """Deleting a character after an emoji should work correctly."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text("A📊BC")
+
+    del text[2]  # delete B (after emoji)
+    assert str(text) == "A📊C", f"Got {str(text)!r}"
+
+
+def test_unicode_emoji_delete_emoji(offset_kind):
+    """Deleting an emoji character itself should work correctly."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text("A📊B")
+
+    del text[1]  # delete 📊
+    assert str(text) == "AB", f"Got {str(text)!r}"
+
+
+def test_unicode_emoji_slice_delete(offset_kind):
+    """Slice deletion across emoji boundaries should work correctly."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text("A📊B🎉C")
+
+    del text[1:4]  # delete 📊B🎉
+    assert str(text) == "AC", f"Got {str(text)!r}"
+
+
+def test_unicode_emoji_setitem(offset_kind):
+    """Replacing a character after an emoji should work correctly."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text("A📊BC")
+
+    text[2] = "X"  # replace B (after emoji)
+    assert str(text) == "A📊XC", f"Got {str(text)!r}"
+
+
+def test_unicode_emoji_slice_setitem(offset_kind):
+    """Slice replacement spanning emoji should work correctly."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text("A📊B🎉C")
+
+    text[1:4] = "XYZ"  # replace 📊B🎉 with XYZ
+    assert str(text) == "AXYZC", f"Got {str(text)!r}"
+
+
+def test_unicode_cjk(offset_kind):
+    """CJK characters (BMP, 1 UTF-16 code unit but 3 UTF-8 bytes each)."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "价格"
+    text.insert(2, "X")
+    assert str(text) == "价格X", f"Got {str(text)!r}"
+    assert len(text) == 3
+
+
+def test_unicode_mixed_scripts(offset_kind):
+    """Mixed ASCII, CJK, Cyrillic, and emoji in one text."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += "Hello"
+    text.insert(5, " 世界")
+    text.insert(8, " 📊")
+    text.insert(11, " мир")
+    text.insert(15, "!")
+
+    expected = "Hello 世界 📊 мир!"
+    assert str(text) == expected, f"Got {str(text)!r}"
+    assert len(text) == 15
+
+
+def test_unicode_supplementary_plane(offset_kind):
+    """Characters outside BMP."""
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    # 𝒜 (U+1D49C) = Mathematical Script Capital A
+    # 𠀀 (U+20000) = CJK Unified Ideograph Extension B
+    text += "A𝒜B𠀀C"
+    assert len(text) == 5
+
+    text.insert(2, "X")  # between 𝒜 and B
+    assert str(text) == "A𝒜XB𠀀C", f"Got {str(text)!r}"
+
+    text.insert(5, "Y")  # between 𠀀 and C
+    assert str(text) == "A𝒜XB𠀀YC", f"Got {str(text)!r}"
+
+
+def test_unicode_cross_doc_sync(offset_kind):
+    """Updates with Unicode content should sync correctly between two pycrdt docs.
+
+    Both docs must use the same offset_kind — peers with mismatched offset
+    kinds is a known incompatibility (yrs and yjs both require all peers in
+    a swarm to agree).
+    """
+    doc1 = Doc(offset_kind=offset_kind)
+    doc1["text"] = text1 = Text()
+
+    # Capture updates from doc1
+    updates = []
+    doc1.observe(lambda event: updates.append(event.update))
+
+    text1 += "# Analysis 📊\n"
+    text1.insert(len(text1), "model = fit()\n")
+    text1.insert(len(text1), "# 特征工程\n")
+
+    # Apply to doc2
+    doc2 = Doc(offset_kind=offset_kind)
+    doc2["text"] = Text()
+    for update in updates:
+        doc2.apply_update(update)
+
+    assert str(doc2["text"]) == str(text1), (
+        f"Docs diverged: doc1={str(text1)!r} doc2={str(doc2['text'])!r}"
+    )
+
+
+# Test cases adapted from jupyter-server/jupyter_ydoc#370 (prior art for
+# the workaround at the jupyter_ydoc layer). These exercise pycrdt's Text
+# operations directly with the same Unicode edge cases. Each test sets
+# initial content, then applies a granular edit (using SequenceMatcher on
+# byte offsets, matching how jupyter_ydoc.YUnicode.set() works), and verifies
+# the result is correct.
+
+
+def _apply_diff(text, old_value, new_value):
+    """Apply a granular diff from old_value to new_value using character-level
+    SequenceMatcher. With the UTF-16 offset fix, pycrdt Text indices are
+    character-based, so we diff on characters (not bytes)."""
+    matcher = SequenceMatcher(a=old_value, b=new_value)
+
+    offset = 0
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == "replace":
+            text[i1 + offset : i2 + offset] = new_value[j1:j2]
+            offset += (j2 - j1) - (i2 - i1)
+        elif tag == "delete":
+            del text[i1 + offset : i2 + offset]
+            offset -= i2 - i1
+        elif tag == "insert":
+            text.insert(i1 + offset, new_value[j1:j2])
+            offset += j2 - j1
+
+
+@pytest.mark.parametrize(
+    "initial, updated",
+    [
+        # emojis swapped
+        (
+            "I like security 🎨 but I really love painting 🔒",
+            "I like security 🔒 but I really love painting 🎨",
+        ),
+        # text changes, emojis stay in place
+        (
+            "Here is a rocket: ⭐ and a star: 🚀",
+            "Here is a star: ⭐ and a rocket: 🚀",
+        ),
+        # change of text and emojis
+        (
+            "Here are some happy faces: 😀😁😂",
+            "Here are some sad faces: 😞😢😭",
+        ),
+        # change of characters with combining marks
+        (
+            "Combining characters: á é í ó ú",
+            "Combining characters: ú ó í é á",
+        ),
+        # flags (regional indicator sequences)
+        (
+            "Flags: 🇺🇸🇬🇧🇨🇦",
+            "Flags: 🇨🇦🇬🇧🇺🇸",
+        ),
+        # Zero-width joiner sequences (family emoji)
+        (
+            "A family 👨\u200d👩\u200d👧\u200d👦 (with two children)",
+            "A family 👨\u200d👩\u200d👧 (with one child)",
+        ),
+        # Mixed RTL/LTR text
+        (
+            "Hello שלום world",
+            "Hello עולם world",
+        ),
+        # Keycap sequences
+        (
+            "Numbers: 1️⃣2️⃣3️⃣",
+            "Numbers: 3️⃣2️⃣1️⃣",
+        ),
+        # Emoji at boundaries
+        (
+            "👋 middle text 🎉",
+            "🎉 middle text 👋",
+        ),
+        # Japanese characters
+        (
+            "こんにちは世界",
+            "こんにちは地球",
+        ),
+        # Julia math operators
+        (
+            "x ∈ [1, 2, 3] && y ≥ 0",
+            "x ∉ [1, 2, 3] || y ≤ 0",
+        ),
+    ],
+    ids=[
+        "emoji_swap",
+        "text_change_emoji_stay",
+        "emoji_change",
+        "combining_marks",
+        "flags",
+        "zwj_family",
+        "rtl_ltr",
+        "keycap",
+        "emoji_boundaries",
+        "japanese",
+        "math_operators",
+    ],
+)
+def test_unicode_granular_diff(initial, updated, offset_kind):
+    """Granular text edits with multi-byte Unicode should produce correct results.
+
+    Test cases adapted from jupyter-server/jupyter_ydoc#370.
+    """
+    doc = Doc(offset_kind=offset_kind)
+    doc["text"] = text = Text()
+
+    text += initial
+    assert str(text) == initial
+
+    _apply_diff(text, initial, updated)
+    assert str(text) == updated, f"Got {str(text)!r}, expected {updated!r}"
+
+
+def test_get_utf16_index():
+    # ASCII: identity
+    assert get_utf16_index("hello", 0) == 0
+    assert get_utf16_index("hello", 5) == 5
+    # BMP CJK: 1 code point = 1 UTF-16 code unit
+    assert get_utf16_index("价格", 2) == 2
+    # Non-BMP emoji: 1 code point = 2 UTF-16 code units (surrogate pair)
+    assert get_utf16_index("A📊B", 1) == 1
+    assert get_utf16_index("A📊B", 2) == 3
+    assert get_utf16_index("A📊B", 3) == 4
+
+
+def test_get_utf8_index():
+    # ASCII: identity
+    assert get_utf8_index("hello", 0) == 0
+    assert get_utf8_index("hello", 5) == 5
+    # BMP CJK: 1 code point = 3 UTF-8 bytes
+    assert get_utf8_index("价格", 1) == 3
+    assert get_utf8_index("价格", 2) == 6
+    # Non-BMP emoji: 1 code point = 4 UTF-8 bytes
+    assert get_utf8_index("A📊B", 1) == 1
+    assert get_utf8_index("A📊B", 2) == 5
+    assert get_utf8_index("A📊B", 3) == 6
+
+
+def test_offset_kind_default_is_utf8():
+    assert Doc().offset_kind == "utf8"
+
+
+def test_offset_kind_explicit():
+    assert Doc(offset_kind="utf8").offset_kind == "utf8"
+    assert Doc(offset_kind="utf16").offset_kind == "utf16"
+    # hyphenated forms accepted
+    assert Doc(offset_kind="utf-8").offset_kind == "utf8"
+    assert Doc(offset_kind="utf-16").offset_kind == "utf16"
+
+
+def test_offset_kind_invalid_raises():
+    with pytest.raises(ValueError):
+        Doc(offset_kind="utf32")
+
+
+def test_offset_kind_doc_mismatch_raises():
+    """Doc(doc=existing, offset_kind=other) must reject conflicting values."""
+    utf8_doc = Doc(offset_kind="utf8")
+    with pytest.raises(ValueError, match="does not match"):
+        Doc(doc=utf8_doc._doc, offset_kind="utf16")
+
+
+def test_offset_kind_snapshot_round_trip(offset_kind):
+    """from_snapshot must preserve the source doc's offset_kind."""
+    from pycrdt import Snapshot
+
+    doc = Doc(offset_kind=offset_kind, skip_gc=True)
+    doc["text"] = Text("A📊B")
+    snap = Snapshot.from_doc(doc)
+    restored = Doc.from_snapshot(snap, doc)
+    assert restored.offset_kind == offset_kind, (
+        f"snapshot lost offset_kind: expected {offset_kind}, got {restored.offset_kind}"
+    )
+    assert str(restored["text"]) == "A📊B"
+
+
 def test_sticky_index_transaction():
     doc = Doc()
     text = doc.get("text", type=Text)