cleaned up debug code

This commit is contained in:
Mantao Huang 2026-03-12 11:05:22 -04:00
parent 11cf99e63a
commit c21a37a04f

View File

@ -1,12 +1,10 @@
import argparse import argparse
import asyncio import asyncio
import io import io
import json
import os import os
import tempfile import tempfile
import urllib.request import urllib.request
import zipfile import zipfile
from datetime import datetime
from patchright.async_api import async_playwright from patchright.async_api import async_playwright
@ -23,16 +21,12 @@ EXTENSION_DIR = os.path.join(BASE_DIR, "zotero_extension")
WINDOW_SIZE = {"width": 1280, "height": 800} WINDOW_SIZE = {"width": 1280, "height": 800}
TRANSLATOR_WAIT_SECONDS = 3 TRANSLATOR_WAIT_SECONDS = 3
WELCOME_TAB_WAIT_SECONDS = 2 WELCOME_TAB_WAIT_SECONDS = 2
POST_SAVE_WAIT_SECONDS = 5 POST_SAVE_WAIT_SECONDS = 10
SERVICE_WORKER_POLL_ATTEMPTS = 60 SERVICE_WORKER_POLL_ATTEMPTS = 60
SERVICE_WORKER_POLL_INTERVAL = 0.5 SERVICE_WORKER_POLL_INTERVAL = 0.5
SERVICE_WORKER_DEBUG_ATTEMPTS = {0, 5, 10, 20, 40, 59}
SAVE_SCRIPT = r""" SAVE_SCRIPT = r"""
async ({ libraryName, collectionId }) => { async ({ libraryName, collectionId }) => {
const debug = [];
const pushDebug = (label, value = null) => debug.push({ label, value });
function normalizeCollectionId(value) { function normalizeCollectionId(value) {
if (!value) return null; if (!value) return null;
const trimmed = value.trim(); const trimmed = value.trim();
@ -41,23 +35,6 @@ async ({ libraryName, collectionId }) => {
return trimmed; return trimmed;
} }
function summarizeTargets(targets) {
return targets.map((target) => ({
id: target.id,
name: target.name,
level: target.level
}));
}
function summarizeTranslators(translators = []) {
return translators.map((translator) => ({
translatorID: translator.translatorID,
label: translator.label,
itemType: translator.itemType,
priority: translator.priority
}));
}
function findOwningLibrary(targets, targetId) { function findOwningLibrary(targets, targetId) {
const index = targets.findIndex((target) => target.id === targetId); const index = targets.findIndex((target) => target.id === targetId);
if (index === -1) return null; if (index === -1) return null;
@ -75,17 +52,12 @@ async ({ libraryName, collectionId }) => {
} }
async function resolveTarget() { async function resolveTarget() {
if (!libraryName && !collectionId) { if (!libraryName && !collectionId) return null;
pushDebug("resolved target", null);
return null;
}
const response = await Zotero.Connector.callMethod("getSelectedCollection", { const response = await Zotero.Connector.callMethod("getSelectedCollection", {
switchToReadableLibrary: true switchToReadableLibrary: true
}); });
const targets = response.targets || []; const targets = response.targets || [];
pushDebug("available targets", summarizeTargets(targets));
if (!targets.length) { if (!targets.length) {
throw new Error("Zotero did not return any selectable targets."); throw new Error("Zotero did not return any selectable targets.");
} }
@ -124,20 +96,7 @@ async ({ libraryName, collectionId }) => {
} }
} }
const target = collectionTarget || libraryTarget; return collectionTarget || libraryTarget;
pushDebug("resolved target", target ? { id: target.id, name: target.name } : null);
return target;
}
function summarizeTabInfo(tabInfo) {
if (!tabInfo) return null;
return {
url: tabInfo.url,
isPDF: Boolean(tabInfo.isPDF),
frameId: tabInfo.frameId,
translatorCount: tabInfo.translators?.length || 0,
translators: summarizeTranslators(tabInfo.translators)
};
} }
function installSessionHooks(target) { function installSessionHooks(target) {
@ -148,7 +107,6 @@ async ({ libraryName, collectionId }) => {
async function applyTargetToSession(sessionID) { async function applyTargetToSession(sessionID) {
if (!target || !sessionID) return; if (!target || !sessionID) return;
pushDebug("apply target to session", { sessionID, targetId: target.id });
await Zotero.Connector.callMethod("updateSession", { await Zotero.Connector.callMethod("updateSession", {
sessionID, sessionID,
target: target.id target: target.id
@ -156,13 +114,7 @@ async ({ libraryName, collectionId }) => {
} }
Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) { Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) {
pushDebug("callMethodWithCookies request", {
method,
hasPayload: Boolean(payload),
sessionID: payload?.sessionID || null
});
const result = await originalCallMethodWithCookies(method, payload, ...args); const result = await originalCallMethodWithCookies(method, payload, ...args);
pushDebug("callMethodWithCookies response", { method, result });
if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) { if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) {
await applyTargetToSession(payload.sessionID); await applyTargetToSession(payload.sessionID);
} }
@ -175,17 +127,11 @@ async ({ libraryName, collectionId }) => {
sessionID, sessionID,
...args ...args
) { ) {
pushDebug("saveStandaloneAttachmentToZotero request", {
title: attachment?.title || null,
url: attachment?.url || null,
sessionID
});
const result = await originalSaveStandaloneAttachment( const result = await originalSaveStandaloneAttachment(
attachment, attachment,
sessionID, sessionID,
...args ...args
); );
pushDebug("saveStandaloneAttachmentToZotero response", result);
await applyTargetToSession(sessionID); await applyTargetToSession(sessionID);
return result; return result;
}; };
@ -201,41 +147,33 @@ async ({ libraryName, collectionId }) => {
} }
async function runSave() { async function runSave() {
pushDebug("connector online", await Zotero.Connector.checkIsOnline()); const isOnline = await Zotero.Connector.checkIsOnline();
if (!isOnline) {
return { error: "Zotero Connector is offline." };
}
const tabs = await chrome.tabs.query({ active: true, currentWindow: true }); const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs?.length) { if (!tabs?.length) {
return { error: "No active tab found.", debug }; return { error: "No active tab found." };
} }
const tab = tabs[0]; const tab = tabs[0];
pushDebug("active tab", {
id: tab.id,
url: tab.url,
title: tab.title,
status: tab.status
});
const tabInfo = Zotero.Connector_Browser.getTabInfo(tab.id); const tabInfo = Zotero.Connector_Browser.getTabInfo(tab.id);
pushDebug("tab info", summarizeTabInfo(tabInfo));
if (!tabInfo) { if (!tabInfo) {
return { error: "No translator or webpage saving options available.", debug }; return { error: "No translator or webpage saving options available." };
} }
if (tabInfo.translators?.length) { if (tabInfo.translators?.length) {
const result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, { const result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {
fallbackOnFailure: true fallbackOnFailure: true
}); });
pushDebug("saveWithTranslator result", result); return { ok: true, mode: "translator", result };
return { ok: true, mode: "translator", result, debug };
} }
const result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { const result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, {
snapshot: true snapshot: true
}); });
pushDebug("saveAsWebpage result", result); return { ok: true, mode: "webpage", result };
return { ok: true, mode: "webpage", result, debug };
} }
try { try {
@ -248,31 +186,12 @@ async ({ libraryName, collectionId }) => {
restoreHooks(); restoreHooks();
} }
} catch (error) { } catch (error) {
pushDebug("caught error", { return { error: error.message };
message: error.message,
stack: error.stack
});
return { error: error.message, debug };
} }
} }
""" """
def debug_log(label, value=None):
timestamp = datetime.now().strftime("%H:%M:%S")
if value is None:
print(f"[debug {timestamp}] {label}")
return
if isinstance(value, (dict, list, tuple)):
try:
value = json.dumps(value, ensure_ascii=True, default=str, indent=2)
except TypeError:
value = repr(value)
print(f"[debug {timestamp}] {label}: {value}")
def setup_extension(): def setup_extension():
"""Download and unpack the Zotero Connector extension if needed.""" """Download and unpack the Zotero Connector extension if needed."""
manifest_path = os.path.join(EXTENSION_DIR, "manifest.json") manifest_path = os.path.join(EXTENSION_DIR, "manifest.json")
@ -309,89 +228,48 @@ def get_browser_launch_config(extension_path, headless_mode):
if headless_mode == "new": if headless_mode == "new":
args.append("--headless=new") args.append("--headless=new")
playwright_headless = False return False, args
elif headless_mode == "false": if headless_mode == "false":
playwright_headless = False return False, args
else: return True, args
playwright_headless = True
return playwright_headless, args
async def close_extra_pages(browser_context): async def close_extra_pages(browser_context):
await asyncio.sleep(WELCOME_TAB_WAIT_SECONDS) await asyncio.sleep(WELCOME_TAB_WAIT_SECONDS)
extra_pages = browser_context.pages[1:] for extra_page in browser_context.pages[1:]:
if not extra_pages:
return
debug_log("closing extra tabs", [page.url for page in extra_pages])
for extra_page in extra_pages:
await extra_page.close() await extra_page.close()
async def get_primary_page(browser_context): async def get_primary_page(browser_context):
await close_extra_pages(browser_context) await close_extra_pages(browser_context)
page = browser_context.pages[0] if browser_context.pages else await browser_context.new_page() if browser_context.pages:
debug_log( return browser_context.pages[0]
"active page before navigation", return await browser_context.new_page()
{"url": page.url, "page_count": len(browser_context.pages)},
)
return page
async def wait_for_service_worker(browser_context): async def wait_for_service_worker(browser_context):
print("[*] Finding Zotero Connector service worker...") print("[*] Finding Zotero Connector service worker...")
for attempt in range(SERVICE_WORKER_POLL_ATTEMPTS): for _ in range(SERVICE_WORKER_POLL_ATTEMPTS):
workers = list(browser_context.service_workers) for worker in browser_context.service_workers:
if attempt in SERVICE_WORKER_DEBUG_ATTEMPTS:
debug_log(
"service worker poll",
{
"attempt": attempt + 1,
"known_workers": [worker.url for worker in workers],
},
)
for worker in workers:
if "background-worker.js" in worker.url or "zotero" in worker.url: if "background-worker.js" in worker.url or "zotero" in worker.url:
debug_log("selected service worker", worker.url)
return worker return worker
await asyncio.sleep(SERVICE_WORKER_POLL_INTERVAL) await asyncio.sleep(SERVICE_WORKER_POLL_INTERVAL)
return None return None
async def navigate_to_page(page, url, browser_context): async def navigate_to_page(page, url):
print(f"[*] Navigating to {url}...") print(f"[*] Navigating to {url}...")
response = await page.goto(url, wait_until="load") await page.goto(url, wait_until="load")
debug_log(
"navigation result",
{
"response_url": response.url if response else None,
"status": response.status if response else None,
"final_page_url": page.url,
"title": await page.title(),
},
)
print("[*] Page loaded. Waiting for Zotero translator to initialize...") print("[*] Page loaded. Waiting for Zotero translator to initialize...")
await asyncio.sleep(TRANSLATOR_WAIT_SECONDS) await asyncio.sleep(TRANSLATOR_WAIT_SECONDS)
debug_log(
"post-load page snapshot",
{
"url": page.url,
"title": await page.title(),
"service_workers": [worker.url for worker in browser_context.service_workers],
},
)
def log_save_result(save_result): def print_save_result(save_result):
if not save_result or "error" in save_result: if not save_result or "error" in save_result:
error = save_result.get("error") if save_result else "Unknown error" error = save_result.get("error") if save_result else "Unknown error"
print(f"[!] Save trigger failed: {error}") print(f"[!] Save trigger failed: {error}")
else: return
print(f"[*] Save completed successfully via {save_result.get('mode', 'unknown')}.") print(f"[*] Save completed successfully via {save_result.get('mode', 'unknown')}.")
target = save_result.get("target") target = save_result.get("target")
if target: if target:
@ -399,34 +277,15 @@ def log_save_result(save_result):
if save_result.get("result") is not None: if save_result.get("result") is not None:
print(f"[*] Save returned: {save_result['result']}") print(f"[*] Save returned: {save_result['result']}")
debug_log("save_result", save_result)
for index, entry in enumerate(save_result.get("debug", []), start=1):
debug_log(f"worker debug #{index} {entry.get('label')}", entry.get("value"))
async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None): async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None):
extension_path = setup_extension() extension_path = setup_extension()
playwright_headless, browser_args = get_browser_launch_config(extension_path, headless_mode) playwright_headless, browser_args = get_browser_launch_config(extension_path, headless_mode)
print(f"[*] Launching Chromium browser (headless={headless_mode}) with Zotero Connector...") print(f"[*] Launching Chromium browser (headless={headless_mode}) with Zotero Connector...")
debug_log(
"launch configuration",
{"playwright_headless": playwright_headless, "args": browser_args},
)
async with async_playwright() as playwright: async with async_playwright() as playwright:
with tempfile.TemporaryDirectory(prefix="zotero-paper-fetcher-") as user_data_dir: with tempfile.TemporaryDirectory(prefix="zotero-paper-fetcher-") as user_data_dir:
debug_log(
"save_to_zotero arguments",
{
"url": url,
"headless_mode": headless_mode,
"library_name": library_name,
"collection_id": collection_id,
"extension_path": extension_path,
"user_data_dir": user_data_dir,
},
)
browser_context = await playwright.chromium.launch_persistent_context( browser_context = await playwright.chromium.launch_persistent_context(
user_data_dir, user_data_dir,
headless=playwright_headless, headless=playwright_headless,
@ -435,16 +294,8 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection
) )
try: try:
debug_log(
"temporary context launched",
{
"initial_page_count": len(browser_context.pages),
"service_worker_count": len(browser_context.service_workers),
"user_data_dir": user_data_dir,
},
)
page = await get_primary_page(browser_context) page = await get_primary_page(browser_context)
await navigate_to_page(page, url, browser_context) await navigate_to_page(page, url)
worker = await wait_for_service_worker(browser_context) worker = await wait_for_service_worker(browser_context)
if not worker: if not worker:
@ -456,7 +307,7 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection
SAVE_SCRIPT, SAVE_SCRIPT,
{"libraryName": library_name, "collectionId": collection_id}, {"libraryName": library_name, "collectionId": collection_id},
) )
log_save_result(save_result) print_save_result(save_result)
print("[*] Waiting 5 seconds for any delayed connector activity...") print("[*] Waiting 5 seconds for any delayed connector activity...")
await asyncio.sleep(POST_SAVE_WAIT_SECONDS) await asyncio.sleep(POST_SAVE_WAIT_SECONDS)