Add Zotero library and collection targeting
This commit is contained in:
parent
f3076c52ce
commit
dd5c159108
10
README.md
10
README.md
@ -42,7 +42,15 @@ If you want to watch the browser process visually (helpful for debugging if a si
|
||||
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed
|
||||
```
|
||||
|
||||
To save into a specific library or collection, pass `--library-name` and/or `--collection-id`:
|
||||
|
||||
```bash
|
||||
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library"
|
||||
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --collection-id C13
|
||||
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library" --collection-id C13
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
- **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`.
|
||||
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and then poll `sessionProgress` until Zotero finishes downloading the PDFs and metadata.
|
||||
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and can optionally retarget the save session to a specific library or collection before attachment handling continues.
|
||||
|
||||
@ -43,7 +43,7 @@ def setup_extension():
|
||||
print("[*] Zotero Extension setup complete.")
|
||||
return os.path.abspath(EXTENSION_DIR)
|
||||
|
||||
async def save_to_zotero(url, headless_mode="new"):
|
||||
async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None):
|
||||
"""Automates Chrome to load a URL and trigger Zotero Connector."""
|
||||
extension_path = setup_extension()
|
||||
|
||||
@ -104,7 +104,102 @@ async def save_to_zotero(url, headless_mode="new"):
|
||||
else:
|
||||
assert worker is not None
|
||||
print("[*] Triggering save via extension service worker evaluation...")
|
||||
save_result = await worker.evaluate('''async () => {
|
||||
save_result = await worker.evaluate('''async ({ libraryName, collectionId }) => {
|
||||
function normalizeCollectionId(value) {
|
||||
if (!value) return null;
|
||||
const trimmed = value.trim();
|
||||
if (/^[cC]\\d+$/.test(trimmed)) return `C${trimmed.slice(1)}`;
|
||||
if (/^\\d+$/.test(trimmed)) return `C${trimmed}`;
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function findOwningLibrary(targets, targetId) {
|
||||
let index = targets.findIndex((target) => target.id === targetId);
|
||||
if (index === -1) return null;
|
||||
let currentLevel = targets[index].level || 0;
|
||||
|
||||
for (let i = index - 1; i >= 0; i--) {
|
||||
let candidate = targets[i];
|
||||
let candidateLevel = candidate.level || 0;
|
||||
if (candidateLevel < currentLevel) {
|
||||
if (candidate.id.startsWith("L")) {
|
||||
return candidate;
|
||||
}
|
||||
currentLevel = candidateLevel;
|
||||
}
|
||||
}
|
||||
return targets[index].id.startsWith("L") ? targets[index] : null;
|
||||
}
|
||||
|
||||
async function resolveTarget() {
|
||||
if (!libraryName && !collectionId) return null;
|
||||
|
||||
let response = await Zotero.Connector.callMethod("getSelectedCollection", { switchToReadableLibrary: true });
|
||||
let targets = response.targets || [];
|
||||
if (!targets.length) {
|
||||
throw new Error("Zotero did not return any selectable targets.");
|
||||
}
|
||||
|
||||
let libraryTarget = null;
|
||||
if (libraryName) {
|
||||
let normalizedLibraryName = libraryName.trim().toLowerCase();
|
||||
let matches = targets.filter((target) =>
|
||||
target.id.startsWith("L") && target.name.trim().toLowerCase() === normalizedLibraryName
|
||||
);
|
||||
if (!matches.length) {
|
||||
throw new Error(`Library '${libraryName}' was not found.`);
|
||||
}
|
||||
if (matches.length > 1) {
|
||||
throw new Error(`Library '${libraryName}' is ambiguous.`);
|
||||
}
|
||||
libraryTarget = matches[0];
|
||||
}
|
||||
|
||||
let collectionTarget = null;
|
||||
if (collectionId) {
|
||||
let normalizedCollectionId = normalizeCollectionId(collectionId);
|
||||
collectionTarget = targets.find((target) => target.id === normalizedCollectionId);
|
||||
if (!collectionTarget) {
|
||||
throw new Error(`Collection '${collectionId}' was not found.`);
|
||||
}
|
||||
if (libraryTarget) {
|
||||
let owningLibrary = findOwningLibrary(targets, collectionTarget.id);
|
||||
if (!owningLibrary || owningLibrary.id !== libraryTarget.id) {
|
||||
throw new Error(
|
||||
`Collection '${collectionId}' does not belong to library '${libraryName}'.`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return collectionTarget || libraryTarget;
|
||||
}
|
||||
|
||||
let target = await resolveTarget();
|
||||
let applyTargetToSession = async (sessionID) => {
|
||||
if (!target || !sessionID) return;
|
||||
await Zotero.Connector.callMethod("updateSession", { sessionID, target: target.id });
|
||||
};
|
||||
|
||||
let originalCallMethodWithCookies = Zotero.Connector.callMethodWithCookies.bind(Zotero.Connector);
|
||||
let originalSaveStandaloneAttachment = Zotero.ItemSaver?.saveStandaloneAttachmentToZotero?.bind(Zotero.ItemSaver);
|
||||
|
||||
Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) {
|
||||
let result = await originalCallMethodWithCookies(method, payload, ...args);
|
||||
if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) {
|
||||
await applyTargetToSession(payload.sessionID);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
if (originalSaveStandaloneAttachment) {
|
||||
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = async function(attachment, sessionID, ...args) {
|
||||
let result = await originalSaveStandaloneAttachment(attachment, sessionID, ...args);
|
||||
await applyTargetToSession(sessionID);
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
|
||||
if (!tabs || tabs.length === 0) return {error: "No active tab found."};
|
||||
@ -113,24 +208,32 @@ async def save_to_zotero(url, headless_mode="new"):
|
||||
|
||||
if (tabInfo && tabInfo.translators && tabInfo.translators.length) {
|
||||
let result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true});
|
||||
return { ok: true, mode: "translator", result };
|
||||
return { ok: true, mode: "translator", result, target };
|
||||
} else if (tabInfo) {
|
||||
let result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true });
|
||||
return { ok: true, mode: "webpage", result };
|
||||
return { ok: true, mode: "webpage", result, target };
|
||||
} else {
|
||||
return {error: "No translator or webpage saving options available."};
|
||||
}
|
||||
} catch(e) {
|
||||
return {error: e.message};
|
||||
} finally {
|
||||
Zotero.Connector.callMethodWithCookies = originalCallMethodWithCookies;
|
||||
if (originalSaveStandaloneAttachment) {
|
||||
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = originalSaveStandaloneAttachment;
|
||||
}
|
||||
}
|
||||
}''')
|
||||
}''', {"libraryName": library_name, "collectionId": collection_id})
|
||||
|
||||
if not save_result or "error" in save_result:
|
||||
print(f"[!] Save trigger failed: {save_result.get('error') if save_result else 'Unknown error'}")
|
||||
else:
|
||||
save_mode = save_result.get("mode", "unknown")
|
||||
returned = save_result.get("result")
|
||||
target = save_result.get("target")
|
||||
print(f"[*] Save completed successfully via {save_mode}.")
|
||||
if target:
|
||||
print(f"[*] Save target: {target.get('name')} ({target.get('id')})")
|
||||
if returned is not None:
|
||||
print(f"[*] Save returned: {returned}")
|
||||
|
||||
@ -141,7 +244,16 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.")
|
||||
parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero")
|
||||
parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new")
|
||||
parser.add_argument("--library-name", help="Save into the library with this exact name")
|
||||
parser.add_argument("--collection-id", help="Save into the collection with this ID, such as 13 or C13")
|
||||
args = parser.parse_args()
|
||||
|
||||
headless_arg = "false" if args.headed else "new"
|
||||
asyncio.run(save_to_zotero(args.url, headless_mode=headless_arg))
|
||||
asyncio.run(
|
||||
save_to_zotero(
|
||||
args.url,
|
||||
headless_mode=headless_arg,
|
||||
library_name=args.library_name,
|
||||
collection_id=args.collection_id,
|
||||
)
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user