From dd5c1591081743f420369fcda87ff7f3987887f0 Mon Sep 17 00:00:00 2001 From: Mantao Huang Date: Mon, 9 Mar 2026 16:38:29 -0400 Subject: [PATCH] Add Zotero library and collection targeting --- README.md | 10 +++- zotero_automator.py | 124 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5390f39..ba6d802 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,15 @@ If you want to watch the browser process visually (helpful for debugging if a si uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed ``` +To save into a specific library or collection, pass `--library-name` and/or `--collection-id`: + +```bash +uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library" +uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --collection-id C13 +uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library" --collection-id C13 +``` + ## How It Works - **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`. -- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and then poll `sessionProgress` until Zotero finishes downloading the PDFs and metadata. +- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and can optionally retarget the save session to a specific library or collection before attachment handling continues. diff --git a/zotero_automator.py b/zotero_automator.py index d2251e7..b0a3443 100644 --- a/zotero_automator.py +++ b/zotero_automator.py @@ -43,7 +43,7 @@ def setup_extension(): print("[*] Zotero Extension setup complete.") return os.path.abspath(EXTENSION_DIR) -async def save_to_zotero(url, headless_mode="new"): +async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None): """Automates Chrome to load a URL and trigger Zotero Connector.""" extension_path = setup_extension() @@ -104,7 +104,102 @@ async def save_to_zotero(url, headless_mode="new"): else: assert worker is not None print("[*] Triggering save via extension service worker evaluation...") - save_result = await worker.evaluate('''async () => { + save_result = await worker.evaluate('''async ({ libraryName, collectionId }) => { + function normalizeCollectionId(value) { + if (!value) return null; + const trimmed = value.trim(); + if (/^[cC]\\d+$/.test(trimmed)) return `C${trimmed.slice(1)}`; + if (/^\\d+$/.test(trimmed)) return `C${trimmed}`; + return trimmed; + } + + function findOwningLibrary(targets, targetId) { + let index = targets.findIndex((target) => target.id === targetId); + if (index === -1) return null; + let currentLevel = targets[index].level || 0; + + for (let i = index - 1; i >= 0; i--) { + let candidate = targets[i]; + let candidateLevel = candidate.level || 0; + if (candidateLevel < currentLevel) { + if (candidate.id.startsWith("L")) { + return candidate; + } + currentLevel = candidateLevel; + } + } + return targets[index].id.startsWith("L") ? targets[index] : null; + } + + async function resolveTarget() { + if (!libraryName && !collectionId) return null; + + let response = await Zotero.Connector.callMethod("getSelectedCollection", { switchToReadableLibrary: true }); + let targets = response.targets || []; + if (!targets.length) { + throw new Error("Zotero did not return any selectable targets."); + } + + let libraryTarget = null; + if (libraryName) { + let normalizedLibraryName = libraryName.trim().toLowerCase(); + let matches = targets.filter((target) => + target.id.startsWith("L") && target.name.trim().toLowerCase() === normalizedLibraryName + ); + if (!matches.length) { + throw new Error(`Library '${libraryName}' was not found.`); + } + if (matches.length > 1) { + throw new Error(`Library '${libraryName}' is ambiguous.`); + } + libraryTarget = matches[0]; + } + + let collectionTarget = null; + if (collectionId) { + let normalizedCollectionId = normalizeCollectionId(collectionId); + collectionTarget = targets.find((target) => target.id === normalizedCollectionId); + if (!collectionTarget) { + throw new Error(`Collection '${collectionId}' was not found.`); + } + if (libraryTarget) { + let owningLibrary = findOwningLibrary(targets, collectionTarget.id); + if (!owningLibrary || owningLibrary.id !== libraryTarget.id) { + throw new Error( + `Collection '${collectionId}' does not belong to library '${libraryName}'.` + ); + } + } + } + + return collectionTarget || libraryTarget; + } + + let target = await resolveTarget(); + let applyTargetToSession = async (sessionID) => { + if (!target || !sessionID) return; + await Zotero.Connector.callMethod("updateSession", { sessionID, target: target.id }); + }; + + let originalCallMethodWithCookies = Zotero.Connector.callMethodWithCookies.bind(Zotero.Connector); + let originalSaveStandaloneAttachment = Zotero.ItemSaver?.saveStandaloneAttachmentToZotero?.bind(Zotero.ItemSaver); + + Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) { + let result = await originalCallMethodWithCookies(method, payload, ...args); + if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) { + await applyTargetToSession(payload.sessionID); + } + return result; + }; + + if (originalSaveStandaloneAttachment) { + Zotero.ItemSaver.saveStandaloneAttachmentToZotero = async function(attachment, sessionID, ...args) { + let result = await originalSaveStandaloneAttachment(attachment, sessionID, ...args); + await applyTargetToSession(sessionID); + return result; + }; + } + try { let tabs = await chrome.tabs.query({ active: true, currentWindow: true }); if (!tabs || tabs.length === 0) return {error: "No active tab found."}; @@ -113,24 +208,32 @@ async def save_to_zotero(url, headless_mode="new"): if (tabInfo && tabInfo.translators && tabInfo.translators.length) { let result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true}); - return { ok: true, mode: "translator", result }; + return { ok: true, mode: "translator", result, target }; } else if (tabInfo) { let result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true }); - return { ok: true, mode: "webpage", result }; + return { ok: true, mode: "webpage", result, target }; } else { return {error: "No translator or webpage saving options available."}; } } catch(e) { return {error: e.message}; + } finally { + Zotero.Connector.callMethodWithCookies = originalCallMethodWithCookies; + if (originalSaveStandaloneAttachment) { + Zotero.ItemSaver.saveStandaloneAttachmentToZotero = originalSaveStandaloneAttachment; + } } - }''') + }''', {"libraryName": library_name, "collectionId": collection_id}) if not save_result or "error" in save_result: print(f"[!] Save trigger failed: {save_result.get('error') if save_result else 'Unknown error'}") else: save_mode = save_result.get("mode", "unknown") returned = save_result.get("result") + target = save_result.get("target") print(f"[*] Save completed successfully via {save_mode}.") + if target: + print(f"[*] Save target: {target.get('name')} ({target.get('id')})") if returned is not None: print(f"[*] Save returned: {returned}") @@ -141,7 +244,16 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.") parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero") parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new") + parser.add_argument("--library-name", help="Save into the library with this exact name") + parser.add_argument("--collection-id", help="Save into the collection with this ID, such as 13 or C13") args = parser.parse_args() headless_arg = "false" if args.headed else "new" - asyncio.run(save_to_zotero(args.url, headless_mode=headless_arg)) + asyncio.run( + save_to_zotero( + args.url, + headless_mode=headless_arg, + library_name=args.library_name, + collection_id=args.collection_id, + ) + )