Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,24 @@ The API server provides:

For more details, see the [API README](./api/README.md).

### Publishing to understand-quickly (opt-in)

DeepWiki can emit its generated wiki as a [`generic@1`](https://github.com/looptech-ai/understand-quickly/blob/main/schemas/generic@1.json) knowledge graph and (optionally) register it with [`looptech-ai/understand-quickly`](https://github.com/looptech-ai/understand-quickly), a public registry of code-knowledge graphs that ships an MCP server and a stable `registry.json` API.

```bash
# Existing markdown / json export — unchanged.
curl -X POST http://localhost:8001/export/wiki \
-H "content-type: application/json" \
-d '{"repo_url":"https://github.com/owner/repo","format":"json","pages":[...]}' > wiki.json

# New: emit the knowledge graph and (optionally) ping the registry.
curl -X POST http://localhost:8001/export/wiki \
-H "content-type: application/json" \
-d '{"repo_url":"https://github.com/owner/repo","format":"graph","publish":true,"pages":[...]}' > graph.json
```

Set `UNDERSTAND_QUICKLY_TOKEN` in the API server env (a fine-grained PAT with `Repository dispatches: write` on `looptech-ai/understand-quickly` only) to enable the dispatch step. With the token unset, `format=graph` still emits the file — the dispatch is simply skipped. See the [integration protocol](https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md) for the full contract.

## 🔌 OpenRouter Integration

DeepWiki now supports [OpenRouter](https://openrouter.ai/) as a model provider, giving you access to hundreds of AI models through a single API:
Expand Down
62 changes: 57 additions & 5 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,31 @@ class WikiExportRequest(BaseModel):
"""
repo_url: str = Field(..., description="URL of the repository")
pages: List[WikiPage] = Field(..., description="List of wiki pages to export")
format: Literal["markdown", "json"] = Field(..., description="Export format (markdown or json)")
format: Literal["markdown", "json", "graph"] = Field(
...,
description=(
"Export format. 'markdown' / 'json' are the existing wiki dumps; "
"'graph' emits a generic@1 knowledge graph for the "
"looptech-ai/understand-quickly registry."
),
)
publish: bool = Field(
False,
description=(
"If true, after producing the export also fire a "
"repository_dispatch event at looptech-ai/understand-quickly "
"so the registry resyncs the entry. Opt-in; requires "
"UNDERSTAND_QUICKLY_TOKEN in the server env. No-ops cleanly "
"if the token is missing."
),
)
repo: Optional[str] = Field(
None,
description=(
"Optional 'owner/repo' override for the registry id. If "
"omitted, derived from `repo_url`."
),
)
Comment on lines +136 to +153

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To fully support the commit metadata field in the generated graph (as mentioned in the PR description and implemented in build_graph_payload), consider adding a commit field to the WikiExportRequest model. This allows the caller (e.g., a CI workflow or an orchestrator) to provide the specific git SHA associated with the wiki content.

    repo: Optional[str] = Field(
        None,
        description=(
            "Optional 'owner/repo' override for the registry id. If "
            "omitted, derived from `repo_url`."
        ),
    )
    commit: Optional[str] = Field(
        None,
        description="Optional 40-hex git commit SHA to embed in the graph metadata.",
    )


# --- Model Configuration Models ---
class Model(BaseModel):
Expand Down Expand Up @@ -227,7 +251,7 @@ async def get_model_config():
@app.post("/export/wiki")
async def export_wiki(request: WikiExportRequest):
"""
Export wiki content as Markdown or JSON.
Export wiki content as Markdown, JSON, or a knowledge graph.

Args:
request: The export request containing wiki pages and format
Expand All @@ -245,24 +269,52 @@ async def export_wiki(request: WikiExportRequest):
# Get current timestamp for the filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

publish_status: Optional[Dict[str, Any]] = None
headers: Dict[str, str] = {}

if request.format == "markdown":
# Generate Markdown content
content = generate_markdown_export(request.repo_url, request.pages)
filename = f"{repo_name}_wiki_{timestamp}.md"
media_type = "text/markdown"
elif request.format == "graph":
# generic@1 knowledge graph for looptech-ai/understand-quickly.
from api.publish import (
build_graph_payload,
derive_owner_repo,
publish as publish_to_registry,
)
Comment on lines +293 to +298

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If you apply the suggestion to include the commit SHA, ensure you also import the git_head_sha helper here.

Suggested change
from api.publish import (
build_graph_payload,
derive_owner_repo,
publish as publish_to_registry,
)
from api.publish import (
build_graph_payload,
derive_owner_repo,
git_head_sha,
publish as publish_to_registry,
)


payload = build_graph_payload(
[page.model_dump() for page in request.pages],
repo_url=request.repo_url,
)
Comment on lines +303 to +307

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Pass the commit from the request to the build_graph_payload function so it can be included in the graph metadata.

Suggested change
payload = build_graph_payload(
[page.model_dump() for page in request.pages],
repo_url=request.repo_url,
)
payload = build_graph_payload(
[page.model_dump() for page in request.pages],
repo_url=request.repo_url,
commit=request.commit,
)

Comment on lines +303 to +307

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The PR description mentions that metadata.commit is embedded when available, and api/publish.py includes a git_head_sha helper for this purpose. However, build_graph_payload is currently called without the commit argument, and git_head_sha is never invoked. To fulfill the intended functionality, you should attempt to resolve the current commit SHA.

Suggested change
payload = build_graph_payload(
[page.model_dump() for page in request.pages],
repo_url=request.repo_url,
)
payload = build_graph_payload(
[page.model_dump() for page in request.pages],
repo_url=request.repo_url,
commit=git_head_sha(),
)

content = json.dumps(payload, indent=2)
filename = f"{repo_name}_graph_{timestamp}.json"
media_type = "application/json"

if request.publish:
owner_repo = request.repo or derive_owner_repo(request.repo_url)
publish_status = publish_to_registry(payload, owner_repo=owner_repo)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The publish_to_registry function performs synchronous network I/O using urllib.request.urlopen. Calling this directly within an async def endpoint will block the FastAPI event loop, which can lead to performance degradation or unresponsiveness under load. Since asyncio is already imported and used elsewhere in this file, you should offload this blocking call to a separate thread using asyncio.to_thread.

                publish_status = await asyncio.to_thread(
                    publish_to_registry, payload, owner_repo=owner_repo
                )

headers["X-Understand-Quickly-Dispatched"] = (
"true" if publish_status.get("dispatched") else "false"
)
if publish_status.get("reason"):
headers["X-Understand-Quickly-Reason"] = str(
publish_status["reason"]
)
Comment on lines +312 to +337
else: # JSON format
# Generate JSON content
content = generate_json_export(request.repo_url, request.pages)
filename = f"{repo_name}_wiki_{timestamp}.json"
media_type = "application/json"

# Create response with appropriate headers for file download
headers["Content-Disposition"] = f"attachment; filename={filename}"
response = Response(
content=content,
media_type=media_type,
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
headers=headers,
)

return response
Expand Down
256 changes: 256 additions & 0 deletions api/publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
"""
Opt-in publishing helpers for the looptech-ai/understand-quickly registry.

This module is self-contained. It uses the Python stdlib only (no extra
dependencies beyond what the rest of the API already pulls in) and is
imported lazily from ``api.api`` so that an unused publish path costs
nothing at import time.

The contract is documented at:
https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md

DeepWiki emits a wiki graph in the ``generic@1`` format: pages are nodes
(``kind="wiki-page"``), and each ``relatedPages`` reference becomes an
edge (``kind="related"``).
"""

from __future__ import annotations

import json
import logging
import os
import re
import subprocess
from datetime import datetime, timezone
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
from urllib import error as urllib_error
from urllib import request as urllib_request

logger = logging.getLogger(__name__)

TOOL_NAME = "deepwiki-open"
DEFAULT_TOOL_VERSION = "1.0.0"
DISPATCH_URL = (
"https://api.github.com/repos/looptech-ai/understand-quickly/dispatches"
)


def derive_owner_repo(remote_url: Optional[str]) -> Optional[str]:
"""
Parse a GitHub remote URL and return ``owner/repo``.

Handles both HTTPS (``https://github.com/owner/repo(.git)``) and SSH
(``git@github.com:owner/repo(.git)``) shapes. Returns ``None`` for
anything we don't recognise — callers are expected to fall back to
an explicit ``owner_repo`` argument or no-op.
"""
if not remote_url:
return None
url = remote_url.strip()
# SSH: git@github.com:owner/repo(.git)
m = re.match(r"git@github\.com:([^/]+)/([^/]+?)(?:\.git)?/?$", url)
if m:
return f"{m.group(1)}/{m.group(2)}"
# HTTPS / git: https://github.com/owner/repo(.git)
m = re.match(
r"^(?:https?|git)://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url
)
if m:
return f"{m.group(1)}/{m.group(2)}"
return None


def git_head_sha(repo_path: Optional[str] = None) -> Optional[str]:
"""
Return the 40-hex SHA of HEAD in ``repo_path`` (or cwd), or ``None``
if not a git checkout / git is unavailable.
"""
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=repo_path or None,
capture_output=True,
text=True,
timeout=5,
check=False,
)
except (OSError, subprocess.SubprocessError) as exc: # pragma: no cover
logger.debug("git rev-parse failed: %s", exc)
return None
if result.returncode != 0:
return None
sha = result.stdout.strip()
if re.fullmatch(r"[0-9a-f]{40}", sha):
return sha
return None
Comment on lines +63 to +85

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The git_head_sha function is defined but appears to be unused in both this module and api/api.py. If it's intended for future use or by external callers, it's fine to keep, but otherwise, it should be removed to avoid dead code.



def build_graph_payload(
pages: Iterable[Mapping[str, Any]],
*,
repo_url: Optional[str] = None,
tool_version: str = DEFAULT_TOOL_VERSION,
commit: Optional[str] = None,
generated_at: Optional[str] = None,
) -> Dict[str, Any]:
"""
Build a ``generic@1``-shaped graph from a list of WikiPage-like dicts.

Each page becomes a node; each ``relatedPages`` reference becomes a
directed edge ``page -> related_page`` with ``kind="related"``. File
paths attached to a page are surfaced under ``data.filePaths`` so
downstream tools can map nodes back to source files.
"""
nodes = []
edges = []
page_ids = set()

pages_list = list(pages)
for page in pages_list:
page_ids.add(page.get("id"))

for page in pages_list:
page_id = page.get("id")
if not page_id:
continue
node: Dict[str, Any] = {
"id": page_id,
"kind": "wiki-page",
"label": page.get("title", page_id),
}
data: Dict[str, Any] = {}
file_paths = page.get("filePaths") or []
if file_paths:
data["filePaths"] = list(file_paths)
importance = page.get("importance")
if importance:
data["importance"] = importance
if data:
node["data"] = data
nodes.append(node)

for related_id in page.get("relatedPages") or []:
# Skip dangling refs so the graph stays internally consistent.
if related_id not in page_ids:
continue
edges.append(
{
"source": page_id,
"target": related_id,
"kind": "related",
}
)

metadata: Dict[str, Any] = {
"tool": TOOL_NAME,
"tool_version": tool_version,
"generated_at": generated_at
or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
}
if commit:
metadata["commit"] = commit
if repo_url:
metadata["repo_url"] = repo_url

return {"nodes": nodes, "edges": edges, "metadata": metadata}


def dispatch_sync(
id_: str,
token: str,
*,
url: str = DISPATCH_URL,
timeout: float = 10.0,
) -> Tuple[bool, Optional[str]]:
"""
Fire a ``repository_dispatch`` ``sync-entry`` event at the registry.

Returns ``(ok, error_message)``. Network / HTTP errors are caught and
surfaced as a soft failure — the caller is expected to keep going.
"""
body = json.dumps(
{"event_type": "sync-entry", "client_payload": {"id": id_}}
).encode("utf-8")
req = urllib_request.Request(
url,
data=body,
method="POST",
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"X-GitHub-Api-Version": "2022-11-28",
"Content-Type": "application/json",
"User-Agent": f"{TOOL_NAME}/{DEFAULT_TOOL_VERSION}",
},
)
try:
with urllib_request.urlopen(req, timeout=timeout) as resp:
status = getattr(resp, "status", 0) or resp.getcode()
if 200 <= status < 300:
return True, None
return False, f"unexpected status {status}"
except urllib_error.HTTPError as exc:
return False, f"HTTP {exc.code}: {exc.reason}"
except urllib_error.URLError as exc:
return False, f"network error: {exc.reason}"
except Exception as exc: # pragma: no cover - defensive
return False, str(exc)


def publish(
payload: Mapping[str, Any],
*,
owner_repo: Optional[str] = None,
token: Optional[str] = None,
) -> Dict[str, Any]:
"""
Best-effort publish path.

Always returns a small status dict. Never raises — callers can wire
this in next to a normal export and trust that a failure here will
not knock over the parent request.

``payload`` is the full graph dict (used here only for log lines /
sanity). ``owner_repo`` is an explicit ``owner/repo`` to register
against; if omitted, no dispatch is attempted.
"""
Comment on lines +200 to +216
token = token or os.environ.get("UNDERSTAND_QUICKLY_TOKEN")
if not token:
msg = (
"UNDERSTAND_QUICKLY_TOKEN not set; skipping repository_dispatch. "
"The graph was still produced — register your repo with "
"`npx @understand-quickly/cli add` and the nightly sync will "
"pick it up."
)
logger.info("[understand-quickly] %s", msg)
return {"dispatched": False, "reason": "no-token", "message": msg}

if not owner_repo:
msg = (
"owner/repo could not be determined; skipping dispatch. "
"Pass `repo` explicitly or set the git remote."
)
logger.info("[understand-quickly] %s", msg)
return {"dispatched": False, "reason": "no-owner-repo", "message": msg}

ok, err = dispatch_sync(owner_repo, token)
if ok:
logger.info(
"[understand-quickly] dispatched sync-entry for %s", owner_repo
)
return {"dispatched": True, "id": owner_repo}

msg = (
f"dispatch failed for {owner_repo}: {err}. "
"If this repo is not yet in the registry, register it with "
"`npx @understand-quickly/cli add` or the wizard at "
"https://looptech-ai.github.io/understand-quickly/add.html."
)
logger.warning("[understand-quickly] %s", msg)
return {
"dispatched": False,
"reason": "dispatch-failed",
"id": owner_repo,
"error": err,
"message": msg,
}
Loading
Loading