cleaned up debug code

This commit is contained in:
Mantao Huang 2026-03-12 11:05:22 -04:00
parent 11cf99e63a
commit c21a37a04f

View File

@ -1,12 +1,10 @@
import argparse
import asyncio
import io
import json
import os
import tempfile
import urllib.request
import zipfile
from datetime import datetime
from patchright.async_api import async_playwright
@ -23,16 +21,12 @@ EXTENSION_DIR = os.path.join(BASE_DIR, "zotero_extension")
WINDOW_SIZE = {"width": 1280, "height": 800}
TRANSLATOR_WAIT_SECONDS = 3
WELCOME_TAB_WAIT_SECONDS = 2
POST_SAVE_WAIT_SECONDS = 5
POST_SAVE_WAIT_SECONDS = 10
SERVICE_WORKER_POLL_ATTEMPTS = 60
SERVICE_WORKER_POLL_INTERVAL = 0.5
SERVICE_WORKER_DEBUG_ATTEMPTS = {0, 5, 10, 20, 40, 59}
SAVE_SCRIPT = r"""
async ({ libraryName, collectionId }) => {
const debug = [];
const pushDebug = (label, value = null) => debug.push({ label, value });
function normalizeCollectionId(value) {
if (!value) return null;
const trimmed = value.trim();
@ -41,23 +35,6 @@ async ({ libraryName, collectionId }) => {
return trimmed;
}
function summarizeTargets(targets) {
return targets.map((target) => ({
id: target.id,
name: target.name,
level: target.level
}));
}
function summarizeTranslators(translators = []) {
return translators.map((translator) => ({
translatorID: translator.translatorID,
label: translator.label,
itemType: translator.itemType,
priority: translator.priority
}));
}
function findOwningLibrary(targets, targetId) {
const index = targets.findIndex((target) => target.id === targetId);
if (index === -1) return null;
@ -75,17 +52,12 @@ async ({ libraryName, collectionId }) => {
}
async function resolveTarget() {
if (!libraryName && !collectionId) {
pushDebug("resolved target", null);
return null;
}
if (!libraryName && !collectionId) return null;
const response = await Zotero.Connector.callMethod("getSelectedCollection", {
switchToReadableLibrary: true
});
const targets = response.targets || [];
pushDebug("available targets", summarizeTargets(targets));
if (!targets.length) {
throw new Error("Zotero did not return any selectable targets.");
}
@ -124,20 +96,7 @@ async ({ libraryName, collectionId }) => {
}
}
const target = collectionTarget || libraryTarget;
pushDebug("resolved target", target ? { id: target.id, name: target.name } : null);
return target;
}
function summarizeTabInfo(tabInfo) {
if (!tabInfo) return null;
return {
url: tabInfo.url,
isPDF: Boolean(tabInfo.isPDF),
frameId: tabInfo.frameId,
translatorCount: tabInfo.translators?.length || 0,
translators: summarizeTranslators(tabInfo.translators)
};
return collectionTarget || libraryTarget;
}
function installSessionHooks(target) {
@ -148,7 +107,6 @@ async ({ libraryName, collectionId }) => {
async function applyTargetToSession(sessionID) {
if (!target || !sessionID) return;
pushDebug("apply target to session", { sessionID, targetId: target.id });
await Zotero.Connector.callMethod("updateSession", {
sessionID,
target: target.id
@ -156,13 +114,7 @@ async ({ libraryName, collectionId }) => {
}
Zotero.Connector.callMethodWithCookies = async function(method, payload, ...args) {
pushDebug("callMethodWithCookies request", {
method,
hasPayload: Boolean(payload),
sessionID: payload?.sessionID || null
});
const result = await originalCallMethodWithCookies(method, payload, ...args);
pushDebug("callMethodWithCookies response", { method, result });
if ((method === "saveItems" || method === "saveSnapshot") && payload?.sessionID) {
await applyTargetToSession(payload.sessionID);
}
@ -175,17 +127,11 @@ async ({ libraryName, collectionId }) => {
sessionID,
...args
) {
pushDebug("saveStandaloneAttachmentToZotero request", {
title: attachment?.title || null,
url: attachment?.url || null,
sessionID
});
const result = await originalSaveStandaloneAttachment(
attachment,
sessionID,
...args
);
pushDebug("saveStandaloneAttachmentToZotero response", result);
await applyTargetToSession(sessionID);
return result;
};
@ -201,41 +147,33 @@ async ({ libraryName, collectionId }) => {
}
async function runSave() {
pushDebug("connector online", await Zotero.Connector.checkIsOnline());
const isOnline = await Zotero.Connector.checkIsOnline();
if (!isOnline) {
return { error: "Zotero Connector is offline." };
}
const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs?.length) {
return { error: "No active tab found.", debug };
return { error: "No active tab found." };
}
const tab = tabs[0];
pushDebug("active tab", {
id: tab.id,
url: tab.url,
title: tab.title,
status: tab.status
});
const tabInfo = Zotero.Connector_Browser.getTabInfo(tab.id);
pushDebug("tab info", summarizeTabInfo(tabInfo));
if (!tabInfo) {
return { error: "No translator or webpage saving options available.", debug };
return { error: "No translator or webpage saving options available." };
}
if (tabInfo.translators?.length) {
const result = await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {
fallbackOnFailure: true
});
pushDebug("saveWithTranslator result", result);
return { ok: true, mode: "translator", result, debug };
return { ok: true, mode: "translator", result };
}
const result = await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, {
snapshot: true
});
pushDebug("saveAsWebpage result", result);
return { ok: true, mode: "webpage", result, debug };
return { ok: true, mode: "webpage", result };
}
try {
@ -248,31 +186,12 @@ async ({ libraryName, collectionId }) => {
restoreHooks();
}
} catch (error) {
pushDebug("caught error", {
message: error.message,
stack: error.stack
});
return { error: error.message, debug };
return { error: error.message };
}
}
"""
def debug_log(label, value=None):
timestamp = datetime.now().strftime("%H:%M:%S")
if value is None:
print(f"[debug {timestamp}] {label}")
return
if isinstance(value, (dict, list, tuple)):
try:
value = json.dumps(value, ensure_ascii=True, default=str, indent=2)
except TypeError:
value = repr(value)
print(f"[debug {timestamp}] {label}: {value}")
def setup_extension():
"""Download and unpack the Zotero Connector extension if needed."""
manifest_path = os.path.join(EXTENSION_DIR, "manifest.json")
@ -309,89 +228,48 @@ def get_browser_launch_config(extension_path, headless_mode):
if headless_mode == "new":
args.append("--headless=new")
playwright_headless = False
elif headless_mode == "false":
playwright_headless = False
else:
playwright_headless = True
return playwright_headless, args
return False, args
if headless_mode == "false":
return False, args
return True, args
async def close_extra_pages(browser_context):
await asyncio.sleep(WELCOME_TAB_WAIT_SECONDS)
extra_pages = browser_context.pages[1:]
if not extra_pages:
return
debug_log("closing extra tabs", [page.url for page in extra_pages])
for extra_page in extra_pages:
for extra_page in browser_context.pages[1:]:
await extra_page.close()
async def get_primary_page(browser_context):
await close_extra_pages(browser_context)
page = browser_context.pages[0] if browser_context.pages else await browser_context.new_page()
debug_log(
"active page before navigation",
{"url": page.url, "page_count": len(browser_context.pages)},
)
return page
if browser_context.pages:
return browser_context.pages[0]
return await browser_context.new_page()
async def wait_for_service_worker(browser_context):
print("[*] Finding Zotero Connector service worker...")
for attempt in range(SERVICE_WORKER_POLL_ATTEMPTS):
workers = list(browser_context.service_workers)
if attempt in SERVICE_WORKER_DEBUG_ATTEMPTS:
debug_log(
"service worker poll",
{
"attempt": attempt + 1,
"known_workers": [worker.url for worker in workers],
},
)
for worker in workers:
for _ in range(SERVICE_WORKER_POLL_ATTEMPTS):
for worker in browser_context.service_workers:
if "background-worker.js" in worker.url or "zotero" in worker.url:
debug_log("selected service worker", worker.url)
return worker
await asyncio.sleep(SERVICE_WORKER_POLL_INTERVAL)
return None
async def navigate_to_page(page, url, browser_context):
async def navigate_to_page(page, url):
print(f"[*] Navigating to {url}...")
response = await page.goto(url, wait_until="load")
debug_log(
"navigation result",
{
"response_url": response.url if response else None,
"status": response.status if response else None,
"final_page_url": page.url,
"title": await page.title(),
},
)
await page.goto(url, wait_until="load")
print("[*] Page loaded. Waiting for Zotero translator to initialize...")
await asyncio.sleep(TRANSLATOR_WAIT_SECONDS)
debug_log(
"post-load page snapshot",
{
"url": page.url,
"title": await page.title(),
"service_workers": [worker.url for worker in browser_context.service_workers],
},
)
def log_save_result(save_result):
def print_save_result(save_result):
if not save_result or "error" in save_result:
error = save_result.get("error") if save_result else "Unknown error"
print(f"[!] Save trigger failed: {error}")
else:
return
print(f"[*] Save completed successfully via {save_result.get('mode', 'unknown')}.")
target = save_result.get("target")
if target:
@ -399,34 +277,15 @@ def log_save_result(save_result):
if save_result.get("result") is not None:
print(f"[*] Save returned: {save_result['result']}")
debug_log("save_result", save_result)
for index, entry in enumerate(save_result.get("debug", []), start=1):
debug_log(f"worker debug #{index} {entry.get('label')}", entry.get("value"))
async def save_to_zotero(url, headless_mode="new", library_name=None, collection_id=None):
extension_path = setup_extension()
playwright_headless, browser_args = get_browser_launch_config(extension_path, headless_mode)
print(f"[*] Launching Chromium browser (headless={headless_mode}) with Zotero Connector...")
debug_log(
"launch configuration",
{"playwright_headless": playwright_headless, "args": browser_args},
)
async with async_playwright() as playwright:
with tempfile.TemporaryDirectory(prefix="zotero-paper-fetcher-") as user_data_dir:
debug_log(
"save_to_zotero arguments",
{
"url": url,
"headless_mode": headless_mode,
"library_name": library_name,
"collection_id": collection_id,
"extension_path": extension_path,
"user_data_dir": user_data_dir,
},
)
browser_context = await playwright.chromium.launch_persistent_context(
user_data_dir,
headless=playwright_headless,
@ -435,16 +294,8 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection
)
try:
debug_log(
"temporary context launched",
{
"initial_page_count": len(browser_context.pages),
"service_worker_count": len(browser_context.service_workers),
"user_data_dir": user_data_dir,
},
)
page = await get_primary_page(browser_context)
await navigate_to_page(page, url, browser_context)
await navigate_to_page(page, url)
worker = await wait_for_service_worker(browser_context)
if not worker:
@ -456,7 +307,7 @@ async def save_to_zotero(url, headless_mode="new", library_name=None, collection
SAVE_SCRIPT,
{"libraryName": library_name, "collectionId": collection_id},
)
log_save_result(save_result)
print_save_result(save_result)
print("[*] Waiting 5 seconds for any delayed connector activity...")
await asyncio.sleep(POST_SAVE_WAIT_SECONDS)