Zotero works well when it has a Translator for the site you're on—a script
that tells it how to scrape metadata from JSTOR or Westlaw. But the web is
infinite and the Zotero team is not. For legal researchers especially, the
gaps are everywhere: niche law reviews, government PDF repositories, state
legislative archives. Save something from a site without a translator and you
get a "Webpage" item with an attachment named document(3).pdf. Bartleby is
a fallback for when Zotero's translators come up empty.
Bartleby sends the first 9,000 characters of a PDF or webpage to OpenAI and
attempts to conform the result to Zotero's item schema—distinguishing a
Statute from a Book, a Law Review Article from a Report. It also adds tags,
borrowing a technique from
joshjm's zotero-ai-tag that
restricts the model to tags already in your library. I've added some other
cleanup for personal nits, like converting ALL CAPS TITLES to sentence case.
Of course there's a risk of hallucination, so it's worth checking the outputs.
To make that possible, Bartleby stamps every record it touches with [AI Enriched] in the Extra field.
In use, a progress bar appears with the tagline "I would prefer not to..." before Bartleby parses the text, builds a Parent Item, and stamps the record. A recursion guard ensures it doesn't loop on its own output. It does the job—reluctantly, but well.
◦
Progress UI triggered via Actions Tags.◦
Bartleby builds a parent item and fills bibliographic fields.◦
Audit trail goes into Extra for every touched record.Quickstart: Actions Tags
- Install Zotero Actions Tags.
- In the plugin's preferences, add a new "On Item Add" action and paste the script below.
- Add your OpenAI key and adjust the model if desired. The recursion guards,
tag restrictions, and
[AI Enriched]audit stamp are included by default.
→ Bartleby Actions Tags script (click to expand)
// ================= CONFIGURATION =================
const OPENAI_API_KEY = 'sk-proj-YOUR_KEY_HERE' // <--- PASTE KEY
const MODEL = 'gpt-4o-mini'
const MAX_CHARS = 9000
const MAX_TAG_CONTEXT = 300
// =================================================
;(async () => {
// 1. UNIVERSAL ITEM DETECTION
let itemToProcess = null
if (typeof item !== 'undefined' && item) itemToProcess = item
else if (typeof items !== 'undefined' && items.length > 0) itemToProcess = items[0]
else {
try {
itemToProcess = Zotero.getActiveZoteroPane().getSelectedItems()[0]
} catch (e) {}
}
if (!itemToProcess) return
// --- GUARD 1: RECURSION & HISTORY ---
// If we are currently processing, OR if we have already finished this item, STOP.
let checkExtra = itemToProcess.getField('extra') || ''
if (
checkExtra.includes('[Bartleby') ||
checkExtra.includes('[AI Enriched') ||
checkExtra.includes('[AI Failed')
) {
return
}
// --- GUARD 2: FILED ATTACHMENT ---
// If this is a PDF that is already inside a parent item, ignore it.
// Bartleby only picks up "loose" files.
if (itemToProcess.isAttachment() && itemToProcess.parentID) {
return
}
// 2. SETUP UI
let progressWin = new Zotero.ProgressWindow({ closeOnClick: true })
progressWin.changeHeadline('Bartleby')
let icon = 'chrome://zotero/skin/tick.png'
let prog = new progressWin.ItemProgress(icon, 'I would prefer not to...')
progressWin.show()
async function log(msg, percentage) {
prog.setText(msg)
if (percentage) prog.setProgress(percentage)
}
async function finalLog(msg, isError = false) {
prog.setText(msg)
prog.setProgress(100)
if (isError) prog.setError()
let stamp = isError
? `[AI Failed: ${msg}]`
: `[AI Enriched: ${new Date().toISOString().split('T')[0]}]`
let target =
itemToProcess.isAttachment() && itemToProcess.parentID
? Zotero.Items.get(itemToProcess.parentID)
: itemToProcess
let currentExtra = target.getField('extra') || ''
// Remove ANY Bartleby/AI stamps to avoid duplicates, then add the final one
let cleanExtra = currentExtra.replace(/\[(Bartleby|AI).*?\]/g, '').trim()
target.setField('extra', cleanExtra + '\n' + stamp)
await target.saveTx()
progressWin.startCloseTimer(4000)
}
// --- HELPERS ---
async function getPdfText(target) {
try {
const cacheFile = await Zotero.FullText.getItemCacheFile(target)
if (cacheFile && (await IOUtils.exists(cacheFile.path))) {
return await IOUtils.readUTF8(cacheFile.path)
}
} catch (e) {}
return null
}
async function waitForText(target) {
for (let i = 0; i < 15; i++) {
const text = await getPdfText(target)
if (text && text.length > 50) return text
prog.setText(`Waiting for index... (${i}s)`)
await Zotero.Promise.delay(1000)
}
return null
}
async function getExistingTags() {
try {
let tags = await Zotero.Tags.getAll(itemToProcess.libraryID)
let tagNames = Object.keys(tags).filter((t) => !t.startsWith('_') && t.length > 2)
return tagNames.sort().slice(0, MAX_TAG_CONTEXT)
} catch (e) {
return []
}
}
function safeSetField(targetItem, fieldName, value) {
if (!value) return
try {
let fieldID = Zotero.ItemFields.getID(fieldName)
if (Zotero.ItemFields.isValidForType(fieldID, targetItem.itemTypeID)) {
targetItem.setField(fieldName, value)
}
} catch (e) {}
}
// --- OPENAI CALL ---
async function callAI(text, availableTags) {
const schema = {
name: 'metadata_extraction',
strict: true,
schema: {
type: 'object',
properties: {
itemType: {
type: 'string',
enum: [
'journalArticle',
'book',
'bookSection',
'report',
'statute',
'bill',
'case',
'hearing',
'newspaperArticle',
'magazineArticle',
'blogPost',
'thesis',
'manuscript',
'webpage',
'presentation',
'conferencePaper',
'videoRecording',
'podcast',
],
},
title: { type: 'string' },
shortTitle: { type: ['string', 'null'] },
date: { type: 'string', description: 'YYYY-MM-DD' },
publicationTitle: { type: ['string', 'null'] },
volume: { type: ['string', 'null'] },
issue: { type: ['string', 'null'] },
pages: { type: ['string', 'null'] },
doi: { type: ['string', 'null'] },
url: { type: ['string', 'null'] },
institution: { type: ['string', 'null'] },
docketNumber: { type: ['string', 'null'] },
court: { type: ['string', 'null'] },
creators: {
type: 'array',
items: {
type: 'object',
properties: {
firstName: { type: 'string' },
lastName: { type: 'string' },
creatorType: { type: 'string', enum: ['author', 'editor', 'contributor'] },
},
required: ['firstName', 'lastName', 'creatorType'],
additionalProperties: false,
},
},
tags: {
type: 'array',
items: {
type: 'string',
enum: availableTags.length > 0 ? availableTags : ['no_tags_available'],
},
},
},
required: [
'itemType',
'title',
'shortTitle',
'date',
'publicationTitle',
'volume',
'issue',
'pages',
'doi',
'url',
'institution',
'docketNumber',
'court',
'creators',
'tags',
],
additionalProperties: false,
},
}
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: MODEL,
messages: [
{
role: 'system',
content:
'You are a bibliographic expert. Extract metadata. Sentence case content. For tags, YOU MUST ONLY SELECT FROM THE PROVIDED LIST.',
},
{ role: 'user', content: `Analyze this text:\n\n${text.substring(0, MAX_CHARS)}` },
],
response_format: { type: 'json_schema', json_schema: schema },
}),
})
if (!resp.ok) {
const err = await resp.json()
throw new Error(err.error?.message || 'OpenAI API Error')
}
const json = await resp.json()
return JSON.parse(json.choices[0].message.content)
}
// ================= MAIN LOGIC =================
try {
await log('Reading Item...', 10)
let text = ''
let pdfItem = null
// 1. GET TEXT
if (itemToProcess.isRegularItem() && itemToProcess.getField('url')) {
await log('Fetching Webpage...', 30)
try {
text = (
await (await fetch(itemToProcess.getField('url'))).text()
).replace(/<[^>]*>/g, ' ')
} catch (e) {}
}
if (!text) {
if (itemToProcess.isAttachment()) pdfItem = itemToProcess
else {
let ids = itemToProcess.getAttachments()
if (ids.length > 0) pdfItem = Zotero.Items.get(ids[0])
}
if (pdfItem) {
await log('Scanning PDF...', 30)
text = await waitForText(pdfItem)
}
}
if (!text) throw new Error('No readable text found.')
// 2. AI PROCESSING
await log('Fetching Tags...', 50)
const tagsList = await getExistingTags()
await log('Consulting Bartleby...', 70)
const data = await callAI(text, tagsList)
// 3. APPLY CHANGES
await log(`Applying: ${data.itemType}...`, 90)
let writeItem = itemToProcess
if (itemToProcess.isAttachment()) {
let typeID = Zotero.ItemTypes.getID(data.itemType)
const parent = new Zotero.Item(typeID)
// --- ANTI-RECURSION STAMP ---
// Mark the parent immediately so the 'createItem' event ignores it
parent.setField('extra', '[Bartleby: Processing...]')
await parent.saveTx()
itemToProcess.parentID = parent.id
await itemToProcess.saveTx()
writeItem = parent
} else {
if (data.itemType !== 'webpage') {
try {
writeItem.setType(Zotero.ItemTypes.getID(data.itemType))
} catch (e) {}
}
}
// WRITE METADATA
const fields = [
'title',
'shortTitle',
'date',
'publicationTitle',
'volume',
'issue',
'pages',
'url',
'institution',
'docketNumber',
'court',
]
fields.forEach((f) => safeSetField(writeItem, f, data[f]))
safeSetField(writeItem, 'DOI', data.doi)
if (data.creators && data.creators.length > 0) writeItem.setCreators(data.creators)
// WRITE TAGS
if (data.tags && data.tags.length > 0) {
for (let tag of data.tags) {
if (tag !== 'no_tags_available') writeItem.addTag(tag, 1)
}
}
await writeItem.saveTx()
await finalLog(`Success! ${data.tags.length} tags added.`)
} catch (e) {
Zotero.logError(e)
await finalLog(e.message, true)
}
})()Cost Estimate
Pricing basis: GPT-4o-mini with MAX_CHARS = 9000 and MAX_TAG_CONTEXT = 300
Estimate: $0.0014 per item (about one-seventh of a penny)
Breakdown:
- Input tokens (~4,000): 9,000 chars ≈ 2,250 + tag context ≈ 1,200 + schema/instructions ≈ 550 → 4,000 × $0.25 / 1M = $0.0012
- Output tokens (~200): Structured JSON reply → 200 × $2.00 / 1M = $0.0004
Usage scenarios:
- 1 item → $0.0014
- 100 items → $0.14
- 1,000 items → $1.40
Additional Screens
◦◦
Item add hook firing on ingest.◦
Structured item with child attachment.◦
Diff of fields before and after Bartleby.