Add Zotero library and collection targeting
This commit is contained in:
parent
f3076c52ce
commit
dd5c159108
10
README.md
10
README.md
@ -42,7 +42,15 @@ If you want to watch the browser process visually (helpful for debugging if a si
|
|||||||
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed
|
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To save into a specific library or collection, pass `--library-name` and/or `--collection-id`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library"
|
||||||
|
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --collection-id C13
|
||||||
|
uv run zotero_automator.py "https://arxiv.org/abs/1706.03762" --library-name "My Library" --collection-id C13
|
||||||
|
```
|
||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
|
|
||||||
- **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`.
|
- **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`.
|
||||||
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and then poll `sessionProgress` until Zotero finishes downloading the PDFs and metadata.
|
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We find the extension's background service worker, trigger the programmatic save, and can optionally retarget the save session to a specific library or collection before attachment handling continues.
|
||||||
|
|||||||
@ -43,7 +43,7 @@ def setup_extension():
|
|||||||
print("[*] Zotero Extension setup complete.")
|
print("[*] Zotero Extension setup complete.")
|
||||||
return os.path.abspath(EXTENSION_DIR)
|
return os.path.abspath(EXTENSION_DIR)
|
||||||
|
|
||||||
async def save_to_zotero(url, headless_mode="new"):
|
async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None):
|
||||||
"""Automates Chrome to load a URL and trigger Zotero Connector."""
|
"""Automates Chrome to load a URL and trigger Zotero Connector."""
|
||||||
extension_path = setup_extension()
|
extension_path = setup_extension()
|
||||||
|
|
||||||
@ -104,7 +104,102 @@ async def save_to_zotero(url, headless_mode="new"):
|
|||||||
else:
|
else:
|
||||||
assert worker is not None
|
assert worker is not None
|
||||||
print("[*] Triggering save via extension service worker evaluation...")
|
print("[*] Triggering save via extension service worker evaluation...")
|
||||||
save_result = await worker.evaluate('''async () => {
|
save_result = await worker.evaluate('''async ({ libraryName, collectionId }) => {
|
||||||
|
function normalizeCollectionId(value) {
|
||||||
|
if (!value) return null;
|
||||||
|
const trimmed = value.trim();
|
||||||
|
if (/^[cC]\\d+$/.test(trimmed)) return `C${trimmed.slice(1)}`;
|
||||||
|
if (/^\\d+$/.test(trimmed)) return `C${trimmed}`;
|
||||||
|
return trimmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
function findOwningLibrary(targets, targetId) {
|
||||||
|
let index = targets.findIndex((target) => target.id === targetId);
|
||||||
|
if (index === -1) return null;
|
||||||
|
let currentLevel = targets[index].level || 0;
|
||||||
|
|
||||||
|
for (let i = index - 1; i >= 0; i--) {
|
||||||
|
let candidate = targets[i];
|
||||||
|
let candidateLevel = candidate.level || 0;
|
||||||
|
if (candidateLevel < currentLevel) {
|
||||||
|
if (candidate.id.startsWith("L")) {
|
||||||
|
return candidate;
|
||||||
|
}
|
||||||
|
currentLevel = candidateLevel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return targets[index].id.startsWith("L") ? targets[index] : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolveTarget() {
|
||||||
|
if (!libraryName && !collectionId) return null;
|
||||||
|
|
||||||
|
let response = await Zotero.Connector.callMethod("getSelectedCollection", { switchToReadableLibrary: true });
|
||||||
|
let targets = response.targets || [];
|
||||||
|
if (!targets.length) {
|
||||||
|
throw new Error("Zotero did not return any selectable targets.");
|
||||||
|
}
|
||||||
|
|
||||||
|
let libraryTarget = null;
|
||||||
|
if (libraryName) {
|
||||||
|
let normalizedLibraryName = libraryName.trim().toLowerCase();
|
||||||
|
let matches = targets.filter((target) =>
|
||||||
|
target.id.startsWith("L") && target.name.trim().toLowerCase() === normalizedLibraryName
|
||||||
|
);
|
||||||
|
if (!matches.length) {
|
||||||
|
throw new Error(`Library '${libraryName}' was not found.`);
|
||||||
|
}
|
||||||
|
if (matches.length > 1) {
|
||||||
|
throw new Error(`Library '${libraryName}' is ambiguous.`);
|
||||||
|
}
|
||||||
|
libraryTarget = matches[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
let collectionTarget = null;
|
||||||
|
if (collectionId) {
|
||||||
|
let normalizedCollectionId = normalizeCollectionId(collectionId);
|
||||||
|
collectionTarget = targets.find((target) => target.id === normalizedCollectionId);
|
||||||
|
if (!collectionTarget) {
|
||||||
|
throw new Error(`Collection '${collectionId}' was not found.`);
|
||||||
|
}
|
||||||
|
if (libraryTarget) {
|
||||||
|
let owningLibrary = findOwningLibrary(targets, collectionTarget.id);
|
||||||
|
if (!owningLibrary || owningLibrary.id !== libraryTarget.id) {
|
||||||
|
throw new Error(
|
||||||
|
`Collection '${collectionId}' does not belong to library '${libraryName}'.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return collectionTarget || libraryTarget;
|
||||||
|
}
|
||||||
|
|
||||||
|
let target = await resolveTarget();
|
||||||
|
let applyTargetToSession = async (sessionID) => {
|
||||||
|
if (!target || !sessionID) return;
|
||||||
|
await Zotero.Connector.callMethod("updateSession", { sessionID, target: target.id });
|
||||||
|
};
|
||||||
|
|
||||||
|
let originalCallMethodWithCookies = Zotero.Connector.callMethodWithCookies.bind(Zotero.Connector);
|
||||||
|
let originalSaveStandaloneAttachment = Zotero.ItemSaver?.saveStandaloneAttachmentToZotero?.bind(Zotero.ItemSaver);
|
||||||
|
|
||||||
|
Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) {
|
||||||
|
let result = await originalCallMethodWithCookies(method, payload, ...args);
|
||||||
|
if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) {
|
||||||
|
await applyTargetToSession(payload.sessionID);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (originalSaveStandaloneAttachment) {
|
||||||
|
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = async function(attachment, sessionID, ...args) {
|
||||||
|
let result = await originalSaveStandaloneAttachment(attachment, sessionID, ...args);
|
||||||
|
await applyTargetToSession(sessionID);
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
|
let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
|
||||||
if (!tabs || tabs.length === 0) return {error: "No active tab found."};
|
if (!tabs || tabs.length === 0) return {error: "No active tab found."};
|
||||||
@ -113,24 +208,32 @@ async def save_to_zotero(url, headless_mode="new"):
|
|||||||
|
|
||||||
if (tabInfo && tabInfo.translators && tabInfo.translators.length) {
|
if (tabInfo && tabInfo.translators && tabInfo.translators.length) {
|
||||||
let result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true});
|
let result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true});
|
||||||
return { ok: true, mode: "translator", result };
|
return { ok: true, mode: "translator", result, target };
|
||||||
} else if (tabInfo) {
|
} else if (tabInfo) {
|
||||||
let result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true });
|
let result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true });
|
||||||
return { ok: true, mode: "webpage", result };
|
return { ok: true, mode: "webpage", result, target };
|
||||||
} else {
|
} else {
|
||||||
return {error: "No translator or webpage saving options available."};
|
return {error: "No translator or webpage saving options available."};
|
||||||
}
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
return {error: e.message};
|
return {error: e.message};
|
||||||
|
} finally {
|
||||||
|
Zotero.Connector.callMethodWithCookies = originalCallMethodWithCookies;
|
||||||
|
if (originalSaveStandaloneAttachment) {
|
||||||
|
Zotero.ItemSaver.saveStandaloneAttachmentToZotero = originalSaveStandaloneAttachment;
|
||||||
}
|
}
|
||||||
}''')
|
}
|
||||||
|
}''', {"libraryName": library_name, "collectionId": collection_id})
|
||||||
|
|
||||||
if not save_result or "error" in save_result:
|
if not save_result or "error" in save_result:
|
||||||
print(f"[!] Save trigger failed: {save_result.get('error') if save_result else 'Unknown error'}")
|
print(f"[!] Save trigger failed: {save_result.get('error') if save_result else 'Unknown error'}")
|
||||||
else:
|
else:
|
||||||
save_mode = save_result.get("mode", "unknown")
|
save_mode = save_result.get("mode", "unknown")
|
||||||
returned = save_result.get("result")
|
returned = save_result.get("result")
|
||||||
|
target = save_result.get("target")
|
||||||
print(f"[*] Save completed successfully via {save_mode}.")
|
print(f"[*] Save completed successfully via {save_mode}.")
|
||||||
|
if target:
|
||||||
|
print(f"[*] Save target: {target.get('name')} ({target.get('id')})")
|
||||||
if returned is not None:
|
if returned is not None:
|
||||||
print(f"[*] Save returned: {returned}")
|
print(f"[*] Save returned: {returned}")
|
||||||
|
|
||||||
@ -141,7 +244,16 @@ if __name__ == "__main__":
|
|||||||
parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.")
|
parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.")
|
||||||
parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero")
|
parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero")
|
||||||
parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new")
|
parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new")
|
||||||
|
parser.add_argument("--library-name", help="Save into the library with this exact name")
|
||||||
|
parser.add_argument("--collection-id", help="Save into the collection with this ID, such as 13 or C13")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
headless_arg = "false" if args.headed else "new"
|
headless_arg = "false" if args.headed else "new"
|
||||||
asyncio.run(save_to_zotero(args.url, headless_mode=headless_arg))
|
asyncio.run(
|
||||||
|
save_to_zotero(
|
||||||
|
args.url,
|
||||||
|
headless_mode=headless_arg,
|
||||||
|
library_name=args.library_name,
|
||||||
|
collection_id=args.collection_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user