Bartleby the Librarian: An AI Metadata Tool for Zotero 7

Zotero works well when it has a Translator for the site you're on—a script that tells it how to scrape metadata from JSTOR or Westlaw. But the web is infinite and the Zotero team is not. For legal researchers especially, the gaps are everywhere: niche law reviews, government PDF repositories, state legislative archives. Save something from a site without a translator and you get a "Webpage" item with an attachment named document(3).pdf. Bartleby is a fallback for when Zotero's translators come up empty.

Bartleby sends the first 9,000 characters of a PDF or webpage to OpenAI and attempts to conform the result to Zotero's item schema—distinguishing a Statute from a Book, a Law Review Article from a Report. It also adds tags, borrowing a technique from joshjm's zotero-ai-tag that restricts the model to tags already in your library. I've added some other cleanup for personal nits, like converting ALL CAPS TITLES to sentence case. Of course there's a risk of hallucination, so it's worth checking the outputs. To make that possible, Bartleby stamps every record it touches with [AI Enriched] in the Extra field.

In use, a progress bar appears with the tagline "I would prefer not to..." before Bartleby parses the text, builds a Parent Item, and stamps the record. A recursion guard ensures it doesn't loop on its own output. It does the job—reluctantly, but well.

◦

Progress UI triggered via Actions Tags.◦ A raw PDF being converted into a Zotero Law Review Article

Bartleby builds a parent item and fills bibliographic fields.◦ Extra field stamped with "[AI Enriched]"

Audit trail goes into Extra for every touched record.

Quickstart: Actions Tags

Install Zotero Actions Tags.
In the plugin's preferences, add a new "On Item Add" action and paste the script below.
Add your OpenAI key and adjust the model if desired. The recursion guards, tag restrictions, and [AI Enriched] audit stamp are included by default.

→ Bartleby Actions Tags script (click to expand)

// ================= CONFIGURATION =================
const OPENAI_API_KEY = 'sk-proj-YOUR_KEY_HERE' // <--- PASTE KEY
const MODEL = 'gpt-4o-mini'
const MAX_CHARS = 9000
const MAX_TAG_CONTEXT = 300
// =================================================

;(async () => {
  // 1. UNIVERSAL ITEM DETECTION
  let itemToProcess = null
  if (typeof item !== 'undefined' && item) itemToProcess = item
  else if (typeof items !== 'undefined' && items.length > 0) itemToProcess = items[0]
  else {
    try {
      itemToProcess = Zotero.getActiveZoteroPane().getSelectedItems()[0]
    } catch (e) {}
  }
  if (!itemToProcess) return

  // --- GUARD 1: RECURSION & HISTORY ---
  // If we are currently processing, OR if we have already finished this item, STOP.
  let checkExtra = itemToProcess.getField('extra') || ''
  if (
    checkExtra.includes('[Bartleby') ||
    checkExtra.includes('[AI Enriched') ||
    checkExtra.includes('[AI Failed')
  ) {
    return
  }

  // --- GUARD 2: FILED ATTACHMENT ---
  // If this is a PDF that is already inside a parent item, ignore it.
  // Bartleby only picks up "loose" files.
  if (itemToProcess.isAttachment() && itemToProcess.parentID) {
    return
  }

  // 2. SETUP UI
  let progressWin = new Zotero.ProgressWindow({ closeOnClick: true })
  progressWin.changeHeadline('Bartleby')
  let icon = 'chrome://zotero/skin/tick.png'
  let prog = new progressWin.ItemProgress(icon, 'I would prefer not to...')
  progressWin.show()

  async function log(msg, percentage) {
    prog.setText(msg)
    if (percentage) prog.setProgress(percentage)
  }

  async function finalLog(msg, isError = false) {
    prog.setText(msg)
    prog.setProgress(100)
    if (isError) prog.setError()

    let stamp = isError
      ? `[AI Failed: ${msg}]`
      : `[AI Enriched: ${new Date().toISOString().split('T')[0]}]`
    let target =
      itemToProcess.isAttachment() && itemToProcess.parentID
        ? Zotero.Items.get(itemToProcess.parentID)
        : itemToProcess

    let currentExtra = target.getField('extra') || ''
    // Remove ANY Bartleby/AI stamps to avoid duplicates, then add the final one
    let cleanExtra = currentExtra.replace(/\[(Bartleby|AI).*?\]/g, '').trim()

    target.setField('extra', cleanExtra + '\n' + stamp)
    await target.saveTx()

    progressWin.startCloseTimer(4000)
  }

  // --- HELPERS ---
  async function getPdfText(target) {
    try {
      const cacheFile = await Zotero.FullText.getItemCacheFile(target)
      if (cacheFile && (await IOUtils.exists(cacheFile.path))) {
        return await IOUtils.readUTF8(cacheFile.path)
      }
    } catch (e) {}
    return null
  }

  async function waitForText(target) {
    for (let i = 0; i < 15; i++) {
      const text = await getPdfText(target)
      if (text && text.length > 50) return text
      prog.setText(`Waiting for index... (${i}s)`)
      await Zotero.Promise.delay(1000)
    }
    return null
  }

  async function getExistingTags() {
    try {
      let tags = await Zotero.Tags.getAll(itemToProcess.libraryID)
      let tagNames = Object.keys(tags).filter((t) => !t.startsWith('_') && t.length > 2)
      return tagNames.sort().slice(0, MAX_TAG_CONTEXT)
    } catch (e) {
      return []
    }
  }

  function safeSetField(targetItem, fieldName, value) {
    if (!value) return
    try {
      let fieldID = Zotero.ItemFields.getID(fieldName)
      if (Zotero.ItemFields.isValidForType(fieldID, targetItem.itemTypeID)) {
        targetItem.setField(fieldName, value)
      }
    } catch (e) {}
  }

  // --- OPENAI CALL ---
  async function callAI(text, availableTags) {
    const schema = {
      name: 'metadata_extraction',
      strict: true,
      schema: {
        type: 'object',
        properties: {
          itemType: {
            type: 'string',
            enum: [
              'journalArticle',
              'book',
              'bookSection',
              'report',
              'statute',
              'bill',
              'case',
              'hearing',
              'newspaperArticle',
              'magazineArticle',
              'blogPost',
              'thesis',
              'manuscript',
              'webpage',
              'presentation',
              'conferencePaper',
              'videoRecording',
              'podcast',
            ],
          },
          title: { type: 'string' },
          shortTitle: { type: ['string', 'null'] },
          date: { type: 'string', description: 'YYYY-MM-DD' },
          publicationTitle: { type: ['string', 'null'] },
          volume: { type: ['string', 'null'] },
          issue: { type: ['string', 'null'] },
          pages: { type: ['string', 'null'] },
          doi: { type: ['string', 'null'] },
          url: { type: ['string', 'null'] },
          institution: { type: ['string', 'null'] },
          docketNumber: { type: ['string', 'null'] },
          court: { type: ['string', 'null'] },
          creators: {
            type: 'array',
            items: {
              type: 'object',
              properties: {
                firstName: { type: 'string' },
                lastName: { type: 'string' },
                creatorType: { type: 'string', enum: ['author', 'editor', 'contributor'] },
              },
              required: ['firstName', 'lastName', 'creatorType'],
              additionalProperties: false,
            },
          },
          tags: {
            type: 'array',
            items: {
              type: 'string',
              enum: availableTags.length > 0 ? availableTags : ['no_tags_available'],
            },
          },
        },
        required: [
          'itemType',
          'title',
          'shortTitle',
          'date',
          'publicationTitle',
          'volume',
          'issue',
          'pages',
          'doi',
          'url',
          'institution',
          'docketNumber',
          'court',
          'creators',
          'tags',
        ],
        additionalProperties: false,
      },
    }

    const resp = await fetch('https://api.openai.com/v1/chat/completions', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        Authorization: `Bearer ${OPENAI_API_KEY}`,
      },
      body: JSON.stringify({
        model: MODEL,
        messages: [
          {
            role: 'system',
            content:
              'You are a bibliographic expert. Extract metadata. Sentence case content. For tags, YOU MUST ONLY SELECT FROM THE PROVIDED LIST.',
          },
          { role: 'user', content: `Analyze this text:\n\n${text.substring(0, MAX_CHARS)}` },
        ],
        response_format: { type: 'json_schema', json_schema: schema },
      }),
    })

    if (!resp.ok) {
      const err = await resp.json()
      throw new Error(err.error?.message || 'OpenAI API Error')
    }

    const json = await resp.json()
    return JSON.parse(json.choices[0].message.content)
  }

  // ================= MAIN LOGIC =================
  try {
    await log('Reading Item...', 10)
    let text = ''
    let pdfItem = null

    // 1. GET TEXT
    if (itemToProcess.isRegularItem() && itemToProcess.getField('url')) {
      await log('Fetching Webpage...', 30)
      try {
        text = (
          await (await fetch(itemToProcess.getField('url'))).text()
        ).replace(/<[^>]*>/g, ' ')
      } catch (e) {}
    }

    if (!text) {
      if (itemToProcess.isAttachment()) pdfItem = itemToProcess
      else {
        let ids = itemToProcess.getAttachments()
        if (ids.length > 0) pdfItem = Zotero.Items.get(ids[0])
      }
      if (pdfItem) {
        await log('Scanning PDF...', 30)
        text = await waitForText(pdfItem)
      }
    }

    if (!text) throw new Error('No readable text found.')

    // 2. AI PROCESSING
    await log('Fetching Tags...', 50)
    const tagsList = await getExistingTags()

    await log('Consulting Bartleby...', 70)
    const data = await callAI(text, tagsList)

    // 3. APPLY CHANGES
    await log(`Applying: ${data.itemType}...`, 90)

    let writeItem = itemToProcess

    if (itemToProcess.isAttachment()) {
      let typeID = Zotero.ItemTypes.getID(data.itemType)
      const parent = new Zotero.Item(typeID)

      // --- ANTI-RECURSION STAMP ---
      // Mark the parent immediately so the 'createItem' event ignores it
      parent.setField('extra', '[Bartleby: Processing...]')
      await parent.saveTx()

      itemToProcess.parentID = parent.id
      await itemToProcess.saveTx()
      writeItem = parent
    } else {
      if (data.itemType !== 'webpage') {
        try {
          writeItem.setType(Zotero.ItemTypes.getID(data.itemType))
        } catch (e) {}
      }
    }

    // WRITE METADATA
    const fields = [
      'title',
      'shortTitle',
      'date',
      'publicationTitle',
      'volume',
      'issue',
      'pages',
      'url',
      'institution',
      'docketNumber',
      'court',
    ]
    fields.forEach((f) => safeSetField(writeItem, f, data[f]))
    safeSetField(writeItem, 'DOI', data.doi)

    if (data.creators && data.creators.length > 0) writeItem.setCreators(data.creators)

    // WRITE TAGS
    if (data.tags && data.tags.length > 0) {
      for (let tag of data.tags) {
        if (tag !== 'no_tags_available') writeItem.addTag(tag, 1)
      }
    }

    await writeItem.saveTx()
    await finalLog(`Success! ${data.tags.length} tags added.`)
  } catch (e) {
    Zotero.logError(e)
    await finalLog(e.message, true)
  }
})()

Cost Estimate

Pricing basis: GPT-4o-mini with MAX_CHARS = 9000 and MAX_TAG_CONTEXT = 300

Estimate: $0.0014 per item (about one-seventh of a penny)

Breakdown:

Input tokens (~4,000): 9,000 chars ≈ 2,250 + tag context ≈ 1,200 + schema/instructions ≈ 550 → 4,000 × $0.25 / 1M = $0.0012
Output tokens (~200): Structured JSON reply → 200 × $2.00 / 1M = $0.0004

Usage scenarios:

1 item → $0.0014
100 items → $0.14
1,000 items → $1.40

Additional Screens

◦

Item add hook firing on ingest.◦ Attachment cleanup view after enrichment

Structured item with child attachment.◦ Before and after comparison of item cleanup

Diff of fields before and after Bartleby.