diff --git a/projects/jupyter-server-ydoc/jupyter_server_ydoc/handlers.py b/projects/jupyter-server-ydoc/jupyter_server_ydoc/handlers.py index 727cce0b..93aa563e 100644 --- a/projects/jupyter-server-ydoc/jupyter_server_ydoc/handlers.py +++ b/projects/jupyter-server-ydoc/jupyter_server_ydoc/handlers.py @@ -227,8 +227,6 @@ async def open(self, room_id: str) -> None: # type:ignore[override] """ On connection open. """ - self.create_task(self._websocket_server.serve(self)) - if isinstance(self.room, DocumentRoom): # Close the connection if the document session expired session_id = self.get_query_argument("sessionId", "") @@ -237,16 +235,20 @@ async def open(self, room_id: str) -> None: # type:ignore[override] 1003, f"Document session {session_id} expired. You need to reload this browser tab.", ) + return # cancel the deletion of the room if it was scheduled if self.room.cleaner is not None: self.room.cleaner.cancel() try: - # Initialize the room + # FIX: Initialize the room BEFORE starting to serve clients. + # This prevents a race condition where SYNC_STEP1 is sent before + # the document is loaded, causing cell duplication when the client + # responds with SYNC_STEP2. + # See: https://github.com/jupyterlab/jupyterlab/issues/14031 async with self._room_lock(self._room_id): await self.room.initialize() - self._emit_awareness_event(self.current_user.username, "join") except Exception as e: _, _, file_id = decode_file_path(self._room_id) file = self._file_loaders[file_id] @@ -267,9 +269,15 @@ async def open(self, room_id: str) -> None: # type:ignore[override] self._message_queue.put_nowait(b"") self._cleanup_delay = 0 await self._clean_room() + return # Don't start serving if initialization failed + # NOW start serving - after successful initialization + self.create_task(self._websocket_server.serve(self)) self._emit(LogLevel.INFO, "initialize", "New client connected.") + self._emit_awareness_event(self.current_user.username, "join") else: + # TransientRoom (e.g., awareness) - no initialization needed + self.create_task(self._websocket_server.serve(self)) if self._room_id != "JupyterLab:globalAwareness": self._emit_awareness_event(self.current_user.username, "join") diff --git a/projects/jupyter-server-ydoc/jupyter_server_ydoc/rooms.py b/projects/jupyter-server-ydoc/jupyter_server_ydoc/rooms.py index 0708f076..3eb19cde 100644 --- a/projects/jupyter-server-ydoc/jupyter_server_ydoc/rooms.py +++ b/projects/jupyter-server-ydoc/jupyter_server_ydoc/rooms.py @@ -140,7 +140,6 @@ async def initialize(self) -> None: if not read_from_source: # if YStore updates and source file are out-of-sync, resync updates with source if self._document.source != model["content"]: - # TODO: Delete document from the store. self._emit( LogLevel.INFO, "initialize", @@ -151,6 +150,14 @@ async def initialize(self) -> None: self._file.path, self.ystore.__class__.__name__, ) + # FIX: Clear the document content BEFORE loading from file to prevent + # cell duplication. When out-of-sync is detected, the YDoc contains + # content from the YStore that may have corrupted state (e.g., from + # previous duplications). If we don't clear it, the set() method will + # try to MERGE the YStore content with the file content instead of + # REPLACING it, which causes cell duplication. + # See: https://github.com/jupyterlab/jupyterlab/issues/14031 + self._clear_document() read_from_source = True if read_from_source: @@ -178,6 +185,28 @@ def _emit(self, level: LogLevel, action: str | None = None, msg: str | None = No self._logger.emit(schema_id=JUPYTER_COLLABORATION_EVENTS_URI, data=data) + def _clear_document(self) -> None: + """ + Clears the document content to allow a clean reload from file. + + This is necessary when an out-of-sync condition is detected between the + YStore and the file. Without clearing, the set() method would try to + merge the existing content with the new content, potentially causing + cell duplication in notebooks. + + For notebooks (YNotebook), this clears the cells array. The metadata + and state will be properly updated by the subsequent set() call. + """ + # Check if the document has a ycells attribute (notebooks) + if hasattr(self._document, "ycells"): + # Use a transaction to ensure atomic operation + with self._document.ydoc.transaction(): + self._document.ycells.clear() + self.log.info( + "Cleared document content in room %s to prevent duplication", + self._room_id, + ) + async def stop(self) -> None: """ Stop the room.