diff --git a/zotero_automator.py b/zotero_automator.py index f25d574..e7bca9c 100644 --- a/zotero_automator.py +++ b/zotero_automator.py @@ -1,12 +1,10 @@ import argparse import asyncio import io -import json import os import tempfile import urllib.request import zipfile -from datetime import datetime from patchright.async_api import async_playwright @@ -23,16 +21,12 @@ EXTENSION_DIR = os.path.join(BASE_DIR, "zotero_extension") WINDOW_SIZE = {"width": 1280, "height": 800} TRANSLATOR_WAIT_SECONDS = 3 WELCOME_TAB_WAIT_SECONDS = 2 -POST_SAVE_WAIT_SECONDS = 5 +POST_SAVE_WAIT_SECONDS = 10 SERVICE_WORKER_POLL_ATTEMPTS = 60 SERVICE_WORKER_POLL_INTERVAL = 0.5 -SERVICE_WORKER_DEBUG_ATTEMPTS = {0, 5, 10, 20, 40, 59} SAVE_SCRIPT = r""" async ({ libraryName, collectionId }) => { - const debug = []; - const pushDebug = (label, value = null) => debug.push({ label, value }); - function normalizeCollectionId(value) { if (!value) return null; const trimmed = value.trim(); @@ -41,23 +35,6 @@ async ({ libraryName, collectionId }) => { return trimmed; } - function summarizeTargets(targets) { - return targets.map((target) => ({ - id: target.id, - name: target.name, - level: target.level - })); - } - - function summarizeTranslators(translators = []) { - return translators.map((translator) => ({ - translatorID: translator.translatorID, - label: translator.label, - itemType: translator.itemType, - priority: translator.priority - })); - } - function findOwningLibrary(targets, targetId) { const index = targets.findIndex((target) => target.id === targetId); if (index === -1) return null; @@ -75,17 +52,12 @@ async ({ libraryName, collectionId }) => { } async function resolveTarget() { - if (!libraryName && !collectionId) { - pushDebug("resolved target", null); - return null; - } + if (!libraryName && !collectionId) return null; const response = await Zotero.Connector.callMethod("getSelectedCollection", { switchToReadableLibrary: true }); const targets = response.targets || []; - pushDebug("available targets", summarizeTargets(targets)); - if (!targets.length) { throw new Error("Zotero did not return any selectable targets."); } @@ -124,20 +96,7 @@ async ({ libraryName, collectionId }) => { } } - const target = collectionTarget || libraryTarget; - pushDebug("resolved target", target ? { id: target.id, name: target.name } : null); - return target; - } - - function summarizeTabInfo(tabInfo) { - if (!tabInfo) return null; - return { - url: tabInfo.url, - isPDF: Boolean(tabInfo.isPDF), - frameId: tabInfo.frameId, - translatorCount: tabInfo.translators?.length || 0, - translators: summarizeTranslators(tabInfo.translators) - }; + return collectionTarget || libraryTarget; } function installSessionHooks(target) { @@ -148,7 +107,6 @@ async ({ libraryName, collectionId }) => { async function applyTargetToSession(sessionID) { if (!target || !sessionID) return; - pushDebug("apply target to session", { sessionID, targetId: target.id }); await Zotero.Connector.callMethod("updateSession", { sessionID, target: target.id @@ -156,13 +114,7 @@ async ({ libraryName, collectionId }) => { } Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) { - pushDebug("callMethodWithCookies request", { - method, - hasPayload: Boolean(payload), - sessionID: payload?.sessionID || null - }); const result = await originalCallMethodWithCookies(method, payload, ...args); - pushDebug("callMethodWithCookies response", { method, result }); if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) { await applyTargetToSession(payload.sessionID); } @@ -175,17 +127,11 @@ async ({ libraryName, collectionId }) => { sessionID, ...args ) { - pushDebug("saveStandaloneAttachmentToZotero request", { - title: attachment?.title || null, - url: attachment?.url || null, - sessionID - }); const result = await originalSaveStandaloneAttachment( attachment, sessionID, ...args ); - pushDebug("saveStandaloneAttachmentToZotero response", result); await applyTargetToSession(sessionID); return result; }; @@ -201,41 +147,33 @@ async ({ libraryName, collectionId }) => { } async function runSave() { - pushDebug("connector online", await Zotero.Connector.checkIsOnline()); + const isOnline = await Zotero.Connector.checkIsOnline(); + if (!isOnline) { + return { error: "Zotero Connector is offline." }; + } const tabs = await chrome.tabs.query({ active: true, currentWindow: true }); if (!tabs?.length) { - return { error: "No active tab found.", debug }; + return { error: "No active tab found." }; } const tab = tabs[0]; - pushDebug("active tab", { - id: tab.id, - url: tab.url, - title: tab.title, - status: tab.status - }); - const tabInfo = Zotero.Connector_Browser.getTabInfo(tab.id); - pushDebug("tab info", summarizeTabInfo(tabInfo)); - if (!tabInfo) { - return { error: "No translator or webpage saving options available.", debug }; + return { error: "No translator or webpage saving options available." }; } if (tabInfo.translators?.length) { const result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, { fallbackOnFailure: true }); - pushDebug("saveWithTranslator result", result); - return { ok: true, mode: "translator", result, debug }; + return { ok: true, mode: "translator", result }; } const result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true }); - pushDebug("saveAsWebpage result", result); - return { ok: true, mode: "webpage", result, debug }; + return { ok: true, mode: "webpage", result }; } try { @@ -248,31 +186,12 @@ async ({ libraryName, collectionId }) => { restoreHooks(); } } catch (error) { - pushDebug("caught error", { - message: error.message, - stack: error.stack - }); - return { error: error.message, debug }; + return { error: error.message }; } } """ -def debug_log(label, value=None): - timestamp = datetime.now().strftime("%H:%M:%S") - if value is None: - print(f"[debug {timestamp}] {label}") - return - - if isinstance(value, (dict, list, tuple)): - try: - value = json.dumps(value, ensure_ascii=True, default=str, indent=2) - except TypeError: - value = repr(value) - - print(f"[debug {timestamp}] {label}: {value}") - - def setup_extension(): """Download and unpack the Zotero Connector extension if needed.""" manifest_path = os.path.join(EXTENSION_DIR, "manifest.json") @@ -309,99 +228,54 @@ def get_browser_launch_config(extension_path, headless_mode): if headless_mode == "new": args.append("--headless=new") - playwright_headless = False - elif headless_mode == "false": - playwright_headless = False - else: - playwright_headless = True - - return playwright_headless, args + return False, args + if headless_mode == "false": + return False, args + return True, args async def close_extra_pages(browser_context): await asyncio.sleep(WELCOME_TAB_WAIT_SECONDS) - extra_pages = browser_context.pages[1:] - if not extra_pages: - return - - debug_log("closing extra tabs", [page.url for page in extra_pages]) - for extra_page in extra_pages: + for extra_page in browser_context.pages[1:]: await extra_page.close() async def get_primary_page(browser_context): await close_extra_pages(browser_context) - page = browser_context.pages[0] if browser_context.pages else await browser_context.new_page() - debug_log( - "active page before navigation", - {"url": page.url, "page_count": len(browser_context.pages)}, - ) - return page + if browser_context.pages: + return browser_context.pages[0] + return await browser_context.new_page() async def wait_for_service_worker(browser_context): print("[*] Finding Zotero Connector service worker...") - for attempt in range(SERVICE_WORKER_POLL_ATTEMPTS): - workers = list(browser_context.service_workers) - if attempt in SERVICE_WORKER_DEBUG_ATTEMPTS: - debug_log( - "service worker poll", - { - "attempt": attempt + 1, - "known_workers": [worker.url for worker in workers], - }, - ) - - for worker in workers: + for _ in range(SERVICE_WORKER_POLL_ATTEMPTS): + for worker in browser_context.service_workers: if "background-worker.js" in worker.url or "zotero" in worker.url: - debug_log("selected service worker", worker.url) return worker - await asyncio.sleep(SERVICE_WORKER_POLL_INTERVAL) - return None -async def navigate_to_page(page, url, browser_context): +async def navigate_to_page(page, url): print(f"[*] Navigating to {url}...") - response = await page.goto(url, wait_until="load") - debug_log( - "navigation result", - { - "response_url": response.url if response else None, - "status": response.status if response else None, - "final_page_url": page.url, - "title": await page.title(), - }, - ) - + await page.goto(url, wait_until="load") print("[*] Page loaded. Waiting for Zotero translator to initialize...") await asyncio.sleep(TRANSLATOR_WAIT_SECONDS) - debug_log( - "post-load page snapshot", - { - "url": page.url, - "title": await page.title(), - "service_workers": [worker.url for worker in browser_context.service_workers], - }, - ) -def log_save_result(save_result): +def print_save_result(save_result): if not save_result or "error" in save_result: error = save_result.get("error") if save_result else "Unknown error" print(f"[!] Save trigger failed: {error}") - else: - print(f"[*] Save completed successfully via {save_result.get('mode', 'unknown')}.") - target = save_result.get("target") - if target: - print(f"[*] Save target: {target.get('name')} ({target.get('id')})") - if save_result.get("result") is not None: - print(f"[*] Save returned: {save_result['result']}") + return - debug_log("save_result", save_result) - for index, entry in enumerate(save_result.get("debug", []), start=1): - debug_log(f"worker debug #{index} {entry.get('label')}", entry.get("value")) + print(f"[*] Save completed successfully via {save_result.get('mode', 'unknown')}.") + target = save_result.get("target") + if target: + print(f"[*] Save target: {target.get('name')} ({target.get('id')})") + if save_result.get("result") is not None: + print(f"[*] Save returned: {save_result['result']}") async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None): @@ -409,24 +283,9 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection playwright_headless, browser_args = get_browser_launch_config(extension_path, headless_mode) print(f"[*] Launching Chromium browser (headless={headless_mode}) with Zotero Connector...") - debug_log( - "launch configuration", - {"playwright_headless": playwright_headless, "args": browser_args}, - ) async with async_playwright() as playwright: with tempfile.TemporaryDirectory(prefix="zotero-paper-fetcher-") as user_data_dir: - debug_log( - "save_to_zotero arguments", - { - "url": url, - "headless_mode": headless_mode, - "library_name": library_name, - "collection_id": collection_id, - "extension_path": extension_path, - "user_data_dir": user_data_dir, - }, - ) browser_context = await playwright.chromium.launch_persistent_context( user_data_dir, headless=playwright_headless, @@ -435,16 +294,8 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection ) try: - debug_log( - "temporary context launched", - { - "initial_page_count": len(browser_context.pages), - "service_worker_count": len(browser_context.service_workers), - "user_data_dir": user_data_dir, - }, - ) page = await get_primary_page(browser_context) - await navigate_to_page(page, url, browser_context) + await navigate_to_page(page, url) worker = await wait_for_service_worker(browser_context) if not worker: @@ -456,7 +307,7 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection SAVE_SCRIPT, {"libraryName": library_name, "collectionId": collection_id}, ) - log_save_result(save_result) + print_save_result(save_result) print("[*] Waiting 5 seconds for any delayed connector activity...") await asyncio.sleep(POST_SAVE_WAIT_SECONDS)