first commit

This commit is contained in:
Mantao Huang 2026-03-08 00:26:05 -06:00
commit f949795f71
8 changed files with 354 additions and 0 deletions

16
.gitignore vendored Normal file
View File

@ -0,0 +1,16 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv
# macOS
.DS_Store
# Chrome profile
chrome_profile/

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.13

50
README.md Normal file
View File

@ -0,0 +1,50 @@
# Zotero Paper Fetcher (Automator)
This script automates saving an academic paper (or any URL) to Zotero using the official **Zotero Connector** Google Chrome extension, driven by Playwright in Python.
## Why this approach?
Zotero Connector is a Chrome extension that provides the official, most robust way of getting high-quality metadata and full-text PDFs (if proxy or site access allows). However, standard browser automation (Headless Chrome) blocks Chrome extensions from running.
This script elegantly solves the problem by:
1. Automatically downloading the latest Zotero Connector extension.
2. Unpacking it from its `.crx` format.
3. Launching Chromium using Playwright in `--headless=new` mode (which DOES allow extensions, unlike the old headless mode) with a persistent user data directory.
4. Auto-closing setup tabs.
5. Emulating the official Zotero Connector keyboard shortcut (e.g. `Cmd+Shift+S` on Mac) once the page is fully loaded.
## Prerequisites
1. **Python 3.8+** installed.
2. **Zotero Desktop** must be currently running on your machine (the extension communicates with the Zotero desktop app securely on port `1969`).
## Setup
First, initialize your virtual environment and install dependencies:
```bash
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
playwright install chromium
```
## Usage
Simply pass the URL of the paper you want to add to Zotero:
```bash
source venv/bin/activate
python zotero_automator.py "https://arxiv.org/abs/1706.03762"
```
If you want to watch the browser process visually (helpful for debugging if a site requires a captcha or login, or just to verify the extension is working), pass the `--headed` flag:
```bash
python zotero_automator.py "https://arxiv.org/abs/1706.03762" --headed
```
## How It Works
- **`setup_extension()`**: Locates the `EKHAGK...` identifier for the Zotero extension on the Chrome web store and downloads the raw `.crx` payload. It unpacks the contents into `./zotero_extension/`.
- **`save_to_zotero()`**: Starts an `async_playwright` session pointing to a local profile folder (`./chrome_profile/`). The extension injects its translator scripts on network idle. We fire the platform-specific shortcut for "Save to Zotero" and wait 10 seconds for the backend download to finish.

6
main.py Normal file
View File

@ -0,0 +1,6 @@
def main():
print("Hello from zotero-paper-fetcher!")
if __name__ == "__main__":
main()

10
pyproject.toml Normal file
View File

@ -0,0 +1,10 @@
[project]
name = "zotero-paper-fetcher"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"playwright>=1.58.0",
"playwright-stealth>=2.0.2",
]

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
playwright==1.42.0

104
uv.lock generated Normal file
View File

@ -0,0 +1,104 @@
version = 1
revision = 3
requires-python = ">=3.13"
[[package]]
name = "greenlet"
version = "3.3.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a3/51/1664f6b78fc6ebbd98019a1fd730e83fa78f2db7058f72b1463d3612b8db/greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2", size = 188267, upload-time = "2026-02-20T20:54:15.531Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" },
{ url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" },
{ url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" },
{ url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" },
{ url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" },
{ url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" },
{ url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" },
{ url = "https://files.pythonhosted.org/packages/91/39/5ef5aa23bc545aa0d31e1b9b55822b32c8da93ba657295840b6b34124009/greenlet-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:a7945dd0eab63ded0a48e4dcade82939783c172290a7903ebde9e184333ca124", size = 230961, upload-time = "2026-02-20T20:16:58.461Z" },
{ url = "https://files.pythonhosted.org/packages/62/6b/a89f8456dcb06becff288f563618e9f20deed8dd29beea14f9a168aef64b/greenlet-3.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:394ead29063ee3515b4e775216cb756b2e3b4a7e55ae8fd884f17fa579e6b327", size = 230221, upload-time = "2026-02-20T20:17:37.152Z" },
{ url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" },
{ url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" },
{ url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" },
{ url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" },
{ url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" },
{ url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" },
{ url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" },
{ url = "https://files.pythonhosted.org/packages/f3/ca/2101ca3d9223a1dc125140dbc063644dca76df6ff356531eb27bc267b446/greenlet-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:8c4dd0f3997cf2512f7601563cc90dfb8957c0cff1e3a1b23991d4ea1776c492", size = 232034, upload-time = "2026-02-20T20:20:08.186Z" },
{ url = "https://files.pythonhosted.org/packages/f6/4a/ecf894e962a59dea60f04877eea0fd5724618da89f1867b28ee8b91e811f/greenlet-3.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:cd6f9e2bbd46321ba3bbb4c8a15794d32960e3b0ae2cc4d49a1a53d314805d71", size = 231437, upload-time = "2026-02-20T20:18:59.722Z" },
{ url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" },
{ url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" },
{ url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" },
{ url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" },
{ url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" },
{ url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" },
{ url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" },
{ url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" },
]
[[package]]
name = "playwright"
version = "1.58.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "greenlet" },
{ name = "pyee" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/c9/9c6061d5703267f1baae6a4647bfd1862e386fbfdb97d889f6f6ae9e3f64/playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606", size = 42251098, upload-time = "2026-01-30T15:09:24.028Z" },
{ url = "https://files.pythonhosted.org/packages/e0/40/59d34a756e02f8c670f0fee987d46f7ee53d05447d43cd114ca015cb168c/playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71", size = 41039625, upload-time = "2026-01-30T15:09:27.558Z" },
{ url = "https://files.pythonhosted.org/packages/e1/ee/3ce6209c9c74a650aac9028c621f357a34ea5cd4d950700f8e2c4b7fe2c4/playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117", size = 42251098, upload-time = "2026-01-30T15:09:30.461Z" },
{ url = "https://files.pythonhosted.org/packages/f1/af/009958cbf23fac551a940d34e3206e6c7eed2b8c940d0c3afd1feb0b0589/playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b", size = 46235268, upload-time = "2026-01-30T15:09:33.787Z" },
{ url = "https://files.pythonhosted.org/packages/d9/a6/0e66ad04b6d3440dae73efb39540c5685c5fc95b17c8b29340b62abbd952/playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa", size = 45964214, upload-time = "2026-01-30T15:09:36.751Z" },
{ url = "https://files.pythonhosted.org/packages/0e/4b/236e60ab9f6d62ed0fd32150d61f1f494cefbf02304c0061e78ed80c1c32/playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99", size = 36815998, upload-time = "2026-01-30T15:09:39.627Z" },
{ url = "https://files.pythonhosted.org/packages/41/f8/5ec599c5e59d2f2f336a05b4f318e733077cd5044f24adb6f86900c3e6a7/playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8", size = 36816005, upload-time = "2026-01-30T15:09:42.449Z" },
{ url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
]
[[package]]
name = "playwright-stealth"
version = "2.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "playwright" },
]
sdist = { url = "https://files.pythonhosted.org/packages/61/ee/871901103c7b2a12070011fd4d978191f8f962837bf8bb51847274f528fa/playwright_stealth-2.0.2.tar.gz", hash = "sha256:ac57e51873190da5e653e03720e948c8f0a3d06b098f1d56763103d23ee48143", size = 24902, upload-time = "2026-02-13T02:36:25.137Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/30/f95f087f4b071611a7f63a2a0c9af4df3ac046dae2a693bfdacd70512867/playwright_stealth-2.0.2-py3-none-any.whl", hash = "sha256:37a5733f481b9c0ad602cf71491aa5a7c96c2a2fe4fa1e7ab764d2cd35520f2f", size = 33209, upload-time = "2026-02-13T02:36:26.334Z" },
]
[[package]]
name = "pyee"
version = "13.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8b/04/e7c1fe4dc78a6fdbfd6c337b1c3732ff543b8a397683ab38378447baa331/pyee-13.0.1.tar.gz", hash = "sha256:0b931f7c14535667ed4c7e0d531716368715e860b988770fc7eb8578d1f67fc8", size = 31655, upload-time = "2026-02-14T21:12:28.044Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/c4/b4d4827c93ef43c01f599ef31453ccc1c132b353284fc6c87d535c233129/pyee-13.0.1-py3-none-any.whl", hash = "sha256:af2f8fede4171ef667dfded53f96e2ed0d6e6bd7ee3bb46437f77e3b57689228", size = 15659, upload-time = "2026-02-14T21:12:26.263Z" },
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
]
[[package]]
name = "zotero-paper-fetcher"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "playwright" },
{ name = "playwright-stealth" },
]
[package.metadata]
requires-dist = [
{ name = "playwright", specifier = ">=1.58.0" },
{ name = "playwright-stealth", specifier = ">=2.0.2" },
]

166
zotero_automator.py Normal file
View File

@ -0,0 +1,166 @@
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth import Stealth
import os
import urllib.request
import zipfile
import io
import sys
import argparse
EXTENSION_ID = 'ekhagklcjbdpajgpjgmbionohlpdbjgc'
EXTENSION_URL = f"https://clients2.google.com/service/update2/crx?response=redirect&prodversion=114.0.5735.90&acceptformat=crx2,crx3&x=id%3D{EXTENSION_ID}%2Cinstallsource%3Dondemand%2Cuc"
EXTENSION_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'zotero_extension')
USER_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome_profile')
print(EXTENSION_DIR)
def setup_extension():
"""Downloads and unpacks the Zotero Connector Chrome extension if not already present."""
if os.path.exists(EXTENSION_DIR) and os.path.exists(os.path.join(EXTENSION_DIR, 'manifest.json')):
print("[*] Zotero Extension already unpacked locally.")
return os.path.abspath(EXTENSION_DIR)
print("[*] Downloading Zotero Connector from Chrome Web Store...")
req = urllib.request.Request(
EXTENSION_URL,
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
)
with urllib.request.urlopen(req) as response:
data = response.read()
print("[*] Unpacking CRX file...")
# .crx files are zip files with an extra header. Find the standard ZIP header (PK\x03\x04).
zip_start = data.find(b'PK\x03\x04')
if zip_start == -1:
raise ValueError("Could not find ZIP header in downloaded CRX.")
os.makedirs(EXTENSION_DIR, exist_ok=True)
with zipfile.ZipFile(io.BytesIO(data[zip_start:])) as z:
z.extractall(EXTENSION_DIR)
print("[*] Zotero Extension setup complete.")
return os.path.abspath(EXTENSION_DIR)
async def save_to_zotero(url, headless_mode="new"):
"""Automates Chrome to load a URL and trigger Zotero Connector."""
extension_path = setup_extension()
print(f"[*] Launching Chromium browser (headless={headless_mode}) with Zotero Connector...")
# Prepare playwright arguments
args = [
f"--disable-extensions-except={extension_path}",
f"--load-extension={extension_path}",
]
# Standard headless=True historically blocked extensions.
# We use headless=False by default or can pass `--headless=new` parameter.
if headless_mode == "new":
args.append("--headless=new")
playwright_headless = False
elif headless_mode == "false":
playwright_headless = False
else:
playwright_headless = True # May ignore extensions depending on Chromium version
async with Stealth().use_async(async_playwright()) as p:
browser_context = await p.chromium.launch_persistent_context(
USER_DATA_DIR,
headless=playwright_headless,
args=args,
viewport={'width': 1280, 'height': 800}
)
# Close any welcome tabs the extension might open on first run
await asyncio.sleep(2)
if len(browser_context.pages) > 1:
for p_ext in browser_context.pages[1:]:
await p_ext.close()
# Use the primary tab
page = browser_context.pages[0] if browser_context.pages else await browser_context.new_page()
print(f"[*] Navigating to {url}...")
await page.goto(url, wait_until="load")
print("[*] Page loaded. Waiting for Zotero translator to initialize...")
# Give Zotero connection translator a moment to inject and detect the metadata
await asyncio.sleep(3)
print("[*] Finding Zotero Connector service worker...")
worker = None
for i in range(10):
for w in browser_context.service_workers:
if EXTENSION_ID in w.url:
worker = w
break
if worker:
break
await asyncio.sleep(0.5)
if not worker:
print("[!] Could not find Zotero extension service worker.")
else:
assert worker is not None
print("[*] Triggering save via extension service worker evaluation...")
# We wrap the call in a try/catch and return a structured object so we can extract the sessionID
session_id = await worker.evaluate('''async () => {
try {
let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs || tabs.length === 0) return {error: "No active tab found."};
let tab = tabs[0];
let tabInfo = Zotero.Connector_Browser.getTabInfo(tab.id);
if (tabInfo && tabInfo.translators && tabInfo.translators.length) {
await Zotero.Connector_Browser.saveWithTranslator(tab, 0, {fallbackOnFailure: true});
return { sessionID: tabInfo.instanceID }; // usually acts as sessionID
} else if (tabInfo) {
await Zotero.Connector_Browser.saveAsWebpage(tab, tabInfo.frameId, { snapshot: true });
return { sessionID: tabInfo.instanceID };
} else {
return {error: "No translator or webpage saving options available."};
}
} catch(e) {
return {error: e.message};
}
}''')
if not session_id or "error" in session_id:
print(f"[!] Save trigger failed: {session_id.get('error') if session_id else 'Unknown error'}")
else:
sid = session_id.get("sessionID")
print(f"[*] Save triggered (Session ID: {sid}). Polling for progress...")
# Poll for completion
max_polls = 60
for _ in range(max_polls):
await asyncio.sleep(1)
progress = await worker.evaluate('''async (sid) => {
try {
if (!Zotero.Connector) return {done: false, error: "Zotero.Connector uninitialized"};
let res = await Zotero.Connector.callMethod("sessionProgress", { sessionID: sid });
return res || {done: false};
} catch(e) {
return {done: true, error: e.message}; // typically throws when it's totally done/cleaned up
}
}''', sid)
if progress and progress.get("error"):
# Normally, when the progress tracker cleans up, callMethod("sessionProgress") throws
print(f"[*] Save completed or session ended. Error: {progress.get('error')}")
break
if progress and progress.get("done"):
print("[*] Translation and save processes finished successfully.")
break
print("[*] Operation finished. Closing browser.")
await browser_context.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Automate Zotero Connector via Playwright.")
parser.add_argument("url", nargs="?", default="https://arxiv.org/abs/1706.03762", help="URL to save to Zotero")
parser.add_argument("--headed", action="store_true", help="Show browser UI visually instead of headless=new")
args = parser.parse_args()
headless_arg = "false" if args.headed else "new"
asyncio.run(save_to_zotero(args.url, headless_mode=headless_arg))