Add Zotero library and collection targeting

This commit is contained in:
Mantao Huang 2026-03-09 16:38:29 -04:00
parent f3076c52ce
commit dd5c159108
2 changed files with 127 additions and 7 deletions

View File

@ -42,7 +42,15 @@ If you want to watch the browser process visually (helpful for debugging if a si
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed
```
To save into a specific library or collection, pass `--library-name` and/or `--collection-id`:
```bash
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library"
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --collection-id C13
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library" --collection-id C13
```
## How It Works
- **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`.
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and then poll `sessionProgress` until Zotero finishes downloading the PDFs and metadata.
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and can optionally retarget the save session to a specific library or collection before attachment handling continues.

View File

@ -43,7 +43,7 @@ def setup_extension():
print("[*] Zotero Extension setup complete.")
return os.path.abspath(EXTENSION_DIR)
async def save_to_zotero(url, headless_mode="new"):
async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None):
"""Automates Chrome to load a URL and trigger Zotero Connector."""
extension_path = setup_extension()
@ -104,7 +104,102 @@ async def save_to_zotero(url, headless_mode="new"):
else:
assert worker is not None
print("[*] Triggering save via extension service worker evaluation...")
save_result = await worker.evaluate('''async () => {
save_result = await worker.evaluate('''async ({ libraryName, collectionId }) => {
function normalizeCollectionId(value) {
if (!value) return null;
const trimmed = value.trim();
if (/^[cC]\\d+$/.test(trimmed)) return `C${trimmed.slice(1)}`;
if (/^\\d+$/.test(trimmed)) return `C${trimmed}`;
return trimmed;
}
function findOwningLibrary(targets, targetId) {
let index = targets.findIndex((target) => target.id === targetId);
if (index === -1) return null;
let currentLevel = targets[index].level || 0;
for (let i = index - 1; i >= 0; i--) {
let candidate = targets[i];
let candidateLevel = candidate.level || 0;
if (candidateLevel < currentLevel) {
if (candidate.id.startsWith("L")) {
return candidate;
}
currentLevel = candidateLevel;
}
}
return targets[index].id.startsWith("L") ? targets[index] : null;
}
async function resolveTarget() {
if (!libraryName && !collectionId) return null;
let response = await Zotero.Connector.callMethod("getSelectedCollection", { switchToReadableLibrary: true });
let targets = response.targets || [];
if (!targets.length) {
throw new Error("Zotero did not return any selectable targets.");
}
let libraryTarget = null;
if (libraryName) {
let normalizedLibraryName = libraryName.trim().toLowerCase();
let matches = targets.filter((target) =>
target.id.startsWith("L") && target.name.trim().toLowerCase() === normalizedLibraryName
);
if (!matches.length) {
throw new Error(`Library '${libraryName}' was not found.`);
}
if (matches.length > 1) {
throw new Error(`Library '${libraryName}' is ambiguous.`);
}
libraryTarget = matches[0];
}
let collectionTarget = null;
if (collectionId) {
let normalizedCollectionId = normalizeCollectionId(collectionId);
collectionTarget = targets.find((target) => target.id === normalizedCollectionId);
if (!collectionTarget) {
throw new Error(`Collection '${collectionId}' was not found.`);
}
if (libraryTarget) {
let owningLibrary = findOwningLibrary(targets, collectionTarget.id);
if (!owningLibrary || owningLibrary.id !== libraryTarget.id) {
throw new Error(
`Collection '${collectionId}' does not belong to library '${libraryName}'.`
);
}
}
}
return collectionTarget || libraryTarget;
}
let target = await resolveTarget();
let applyTargetToSession = async (sessionID) => {
if (!target || !sessionID) return;
await Zotero.Connector.callMethod("updateSession", { sessionID, target: target.id });
};
let originalCallMethodWithCookies = Zotero.Connector.callMethodWithCookies.bind(Zotero.Connector);
let originalSaveStandaloneAttachment = Zotero.ItemSaver?.saveStandaloneAttachmentToZotero?.bind(Zotero.ItemSaver);
Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) {
let result = await originalCallMethodWithCookies(method, payload, ...args);
if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) {
await applyTargetToSession(payload.sessionID);
}
return result;
};
if (originalSaveStandaloneAttachment) {
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = async function(attachment, sessionID, ...args) {
let result = await originalSaveStandaloneAttachment(attachment, sessionID, ...args);
await applyTargetToSession(sessionID);
return result;
};
}
try {
let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs || tabs.length === 0) return {error: "No active tab found."};
@ -113,24 +208,32 @@ async def save_to_zotero(url, headless_mode="new"):
if (tabInfo && tabInfo.translators && tabInfo.translators.length) {
let result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true});
return { ok: true, mode: "translator", result };
return { ok: true, mode: "translator", result, target };
} else if (tabInfo) {
let result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true });
return { ok: true, mode: "webpage", result };
return { ok: true, mode: "webpage", result, target };
} else {
return {error: "No translator or webpage saving options available."};
}
} catch(e) {
return {error: e.message};
} finally {
Zotero.Connector.callMethodWithCookies = originalCallMethodWithCookies;
if (originalSaveStandaloneAttachment) {
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = originalSaveStandaloneAttachment;
}
}''')
}
}''', {"libraryName": library_name, "collectionId": collection_id})
if not save_result or "error" in save_result:
print(f"[!] Save trigger failed: {save_result.get('error') if save_result else 'Unknown error'}")
else:
save_mode = save_result.get("mode", "unknown")
returned = save_result.get("result")
target = save_result.get("target")
print(f"[*] Save completed successfully via {save_mode}.")
if target:
print(f"[*] Save target: {target.get('name')} ({target.get('id')})")
if returned is not None:
print(f"[*] Save returned: {returned}")
@ -141,7 +244,16 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.")
parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero")
parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new")
parser.add_argument("--library-name", help="Save into the library with this exact name")
parser.add_argument("--collection-id", help="Save into the collection with this ID, such as 13 or C13")
args = parser.parse_args()
headless_arg = "false" if args.headed else "new"
asyncio.run(save_to_zotero(args.url, headless_mode=headless_arg))
asyncio.run(
save_to_zotero(
args.url,
headless_mode=headless_arg,
library_name=args.library_name,
collection_id=args.collection_id,
)
)