{
  "name": "cos8-literature-pipeline",
  "nodes": [
    {
      "parameters": {
        "options": {}
      },
      "id": "3882ca76-7424-429d-ac27-a42dd6eec954",
      "name": "When chat message received",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "typeVersion": 1.1,
      "position": [
        240,
        380
      ],
      "webhookId": "f60ef7d6-bedd-4f6b-8eff-1366a8139179"
    },
    {
      "parameters": {
        "jsCode": "const q = $input.first().json.chatInput ?? '';\nconst encoded = encodeURIComponent(q);\nreturn [{ json: {\n  question: q,\n  esearchUrl: `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=${encoded}&retmax=8&retmode=json`\n}}];"
      },
      "id": "0f9ee300-e767-427e-9341-92998bcd4b64",
      "name": "Stage 1 — Search",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        460,
        380
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.esearchUrl }}",
        "options": {}
      },
      "id": "58c55008-76e3-4e77-8595-1d2ebf34a67d",
      "name": "PubMed esearch",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        680,
        380
      ]
    },
    {
      "parameters": {
        "jsCode": "const r = $input.first().json;\nconst ids = (r.esearchresult?.idlist ?? []).slice(0, 5);\nif (!ids.length) return [{ json: { ids: '', efetchUrl: '', question: $('Stage 1 — Search').first().json.question }}];\nconst efetchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${ids.join(',')}&rettype=abstract&retmode=text`;\nreturn [{ json: { ids: ids.join(','), efetchUrl, count: ids.length, question: $('Stage 1 — Search').first().json.question } }];"
      },
      "id": "b201c81a-a6b7-4b93-a062-95add1d52cad",
      "name": "Stage 2 — Fetch",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        900,
        380
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.efetchUrl }}",
        "options": {
          "response": {
            "response": {
              "responseFormat": "text"
            }
          }
        }
      },
      "id": "5789c2f5-0367-4a3d-9439-65e84cb957ff",
      "name": "PubMed efetch",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1120,
        380
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "60521fbe-1a17-4695-b943-c2db7010c497",
              "name": "abstracts",
              "value": "={{ $json.data.substring(0, 10000) }}",
              "type": "string"
            },
            {
              "id": "bf1cabde-ec62-46aa-b8db-781d2492846a",
              "name": "question",
              "value": "={{ $('Stage 2 — Fetch').first().json.question }}",
              "type": "string"
            }
          ]
        }
      },
      "id": "39caaf8a-d33a-4851-8764-7fb94cdf4df1",
      "name": "Stage 3 — Prepare Extraction",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1340,
        380
      ]
    },
    {
      "parameters": {
        "promptType": "define",
        "text": "={{ 'Extract structured information from these abstracts for the topic: ' + $json.question }}",
        "options": {
          "systemMessage": "You are a literature extraction assistant. Extract structured information from these PubMed abstracts.\n\nABSTRACTS:\n{{ $json.abstracts }}\n\nFor EACH paper found, extract and return a JSON array with this structure:\n[\n  {\n    \"title\": \"paper title\",\n    \"year\": \"publication year\",\n    \"method\": \"main methodology used\",\n    \"finding\": \"key finding in one sentence\",\n    \"limitation\": \"main limitation or caveat\"\n  }\n]\n\nReturn ONLY the JSON array, no other text. If a field is not mentioned in the abstract, use null."
        }
      },
      "id": "08644456-3193-430f-93f0-ee297bb12772",
      "name": "Stage 3 — AI Extract",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "typeVersion": 3.1,
      "position": [
        1560,
        380
      ]
    },
    {
      "parameters": {
        "model": "llama-3.3-70b-versatile",
        "options": {
          "temperature": 0.2
        }
      },
      "id": "98f38ae7-25d6-4c79-afa7-4489776bea46",
      "name": "Groq",
      "type": "@n8n/n8n-nodes-langchain.lmChatGroq",
      "typeVersion": 1,
      "position": [
        1560,
        560
      ],
      "credentials": {
        "groqApi": {
          "id": "groq-shared-cred",
          "name": "Groq (shared, students)"
        }
      }
    },
    {
      "parameters": {
        "sessionIdType": "customKey",
        "sessionKey": "={{ $('When chat message received').first().json.sessionId }}",
        "contextWindowLength": 8
      },
      "id": "ec9c770b-e945-4e0d-a357-836b12da36d2",
      "name": "Simple Memory",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "typeVersion": 1.3,
      "position": [
        1760,
        560
      ]
    },
    {
      "parameters": {
        "jsCode": "const output = $input.first().json.output ?? '';\nlet papers;\ntry {\n  const match = output.match(/\\[.*\\]/s);\n  papers = JSON.parse(match ? match[0] : output);\n} catch(e) {\n  return [{ json: { table: '**Extraction failed** — the AI output could not be parsed as JSON. Try a more specific query.\\n\\nRaw output: ' + output.substring(0, 500) } }];\n}\n// Stage 4: Filter out incomplete records\nconst valid = papers.filter(p => p && p.title && p.finding);\nif (!valid.length) return [{ json: { table: 'No papers with sufficient information were found.' } }];\n// Stage 5: Format as Markdown table\nconst header = '| Title | Year | Method | Key Finding | Limitation |';\nconst sep    = '|-------|------|--------|-------------|------------|';\nconst rows   = valid.map(p =>\n  `| ${p.title || '-'} | ${p.year || '-'} | ${p.method || '-'} | ${p.finding || '-'} | ${p.limitation || '-'} |`\n);\nreturn [{ json: { table: [header, sep, ...rows].join('\\n'), count: valid.length } }];"
      },
      "id": "4c86647d-7974-495a-995a-c58cdc8d697d",
      "name": "Stage 4+5 — Filter and Format",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1780,
        380
      ]
    },
    {
      "parameters": {
        "content": "## Session 08 — Multi-stage Literature Pipeline\n\n**5 stages (all in this workflow):**\n1. **Search** — esearch converts your query to PMIDs\n2. **Fetch** — efetch retrieves top 5 full abstracts\n3. **Extract** — AI agent extracts structured fields (JSON)\n4. **Filter** — Code node removes incomplete records\n5. **Format** — Code node assembles a Markdown comparison table\n\n**Try it:** Ask about any research topic, e.g.:\n'mTOR inhibitor resistance in breast cancer'\n\n**Session memory** is set — follow-up questions work.\n\n**Extend:** Add a 6th stage: HTTP Request to Semantic Scholar API to fetch citation counts for each paper.",
        "height": 280,
        "width": 440
      },
      "id": "29a55bf8-d8d4-4ec8-86cc-4097f8d0b5ce",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        240,
        80
      ]
    }
  ],
  "connections": {
    "When chat message received": {
      "main": [
        [
          {
            "node": "Stage 1 — Search",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Stage 1 — Search": {
      "main": [
        [
          {
            "node": "PubMed esearch",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PubMed esearch": {
      "main": [
        [
          {
            "node": "Stage 2 — Fetch",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Stage 2 — Fetch": {
      "main": [
        [
          {
            "node": "PubMed efetch",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PubMed efetch": {
      "main": [
        [
          {
            "node": "Stage 3 — Prepare Extraction",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Stage 3 — Prepare Extraction": {
      "main": [
        [
          {
            "node": "Stage 3 — AI Extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Stage 3 — AI Extract": {
      "main": [
        [
          {
            "node": "Stage 4+5 — Filter and Format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Groq": {
      "ai_languageModel": [
        [
          {
            "node": "Stage 3 — AI Extract",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory": {
      "ai_memory": [
        [
          {
            "node": "Stage 3 — AI Extract",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  },
  "pinData": {}
}