// Essay — A Deployment Playbook, Applied to Microsoft Copilot
// UNLISTED preview route (pending author confirmation of the "applied to Copilot,
// not a Copilot case study" framing). Not linked from Writing until cleared.
function EssayCopilotPlaybook() {
  const { go } = useRoute();
  const [progress, setProgress] = React.useState(0);
  const [activeSection, setActiveSection] = React.useState('i');
  const articleRef = React.useRef(null);

  const sections = [
    { id: 'i',         numeral: 'I',   title: 'Use-case identification' },
    { id: 'ii',        numeral: 'II',  title: 'Data-access governance' },
    { id: 'iii',       numeral: 'III', title: 'Pilot-to-scale cadence' },
    { id: 'iv',        numeral: 'IV',  title: 'Adoption metric design' },
    { id: 'v',         numeral: 'V',   title: 'Value-realisation tracking' },
    { id: 'checklist', numeral: '·',   title: 'The deployment checklist' },
  ];

  React.useEffect(() => {
    const onScroll = () => {
      const el = articleRef.current;
      if (!el) return;
      const top = el.getBoundingClientRect().top;
      const height = el.scrollHeight - window.innerHeight;
      const scrolled = Math.min(1, Math.max(0, (-top) / height));
      setProgress(scrolled);
      for (let i = sections.length - 1; i >= 0; i--) {
        const s = document.getElementById('sec-' + sections[i].id);
        if (s && s.getBoundingClientRect().top < 120) { setActiveSection(sections[i].id); break; }
      }
    };
    window.addEventListener('scroll', onScroll, { passive: true });
    onScroll();
    return () => window.removeEventListener('scroll', onScroll);
  }, []);

  return (
    <main ref={articleRef} className="mx-auto max-w-[1320px] px-6 md:px-10 pt-10 md:pt-16 pb-20">
      <div className="rule-bottom pb-4 flex items-center justify-between smallcaps text-muted">
        <span>Essay</span>
        <span className="hidden md:inline">A deployment playbook, applied to Microsoft Copilot</span>
        <span className="tabular">June 2026</span>
      </div>

      <header className="mt-10 md:mt-16 grid md:grid-cols-12 gap-6 md:gap-10">
        <div className="md:col-span-2">
          <div className="smallcaps text-muted">Essay</div>
        </div>
        <div className="md:col-span-10">
          <h1 className="font-display font-semibold tracking-tight text-balance leading-[1.06]
                         text-[36px] sm:text-[44px] md:text-[58px]">
            A deployment playbook, applied to Microsoft Copilot.
          </h1>
          <p className="mt-5 font-display text-[20px] md:text-[23px] leading-[1.4] max-w-[52ch] text-ink2">
            Why buying the licence is procurement, and everything after it is the deployment.
          </p>
          <div className="mt-8 smallcaps text-muted">
            By <span className="text-ink">Prathyusha Vemula</span> · June 2026 · 11 min read
          </div>
        </div>
      </header>

      <div className="mt-16 grid md:grid-cols-12 gap-6 md:gap-10">
        <aside className="md:col-span-3 order-2 md:order-1">
          <div className="md:sticky md:top-28">
            <div className="smallcaps text-muted">Contents</div>
            <ol className="mt-4 space-y-3">
              {sections.map(s => (
                <li key={s.id}>
                  <a href={`#sec-${s.id}`} className={`flex items-baseline gap-3 transition-colors ${activeSection === s.id ? 'text-sienna' : 'text-ink2 hover:text-ink'}`}>
                    <span className="font-display text-sm w-7 text-right tabular">{s.numeral}</span>
                    <span className="leading-tight">{s.title}</span>
                  </a>
                </li>
              ))}
            </ol>
            <div className="mt-10 rule-top pt-5">
              <div className="smallcaps text-muted">Progress</div>
              <div className="mt-3 h-[2px] bg-ink/10 relative">
                <div className="absolute left-0 top-0 h-full bg-sienna transition-all duration-200" style={{ width: `${Math.round(progress * 100)}%` }} />
              </div>
              <div className="mt-2 tabular text-xs text-muted">{Math.round(progress * 100)}%</div>
            </div>
            <div className="mt-10 rule-top pt-5">
              <div className="smallcaps text-muted">Counter-arguments</div>
              <p className="mt-2 text-sm text-ink2 leading-relaxed">
                Replies welcome at <a className="link-underline text-ink" href="mailto:vemula.prathyusha@gmail.com">vemula.prathyusha@gmail.com</a>. The next revision cites disagreements that landed.
              </p>
            </div>
          </div>
        </aside>

        <article className="md:col-span-9 order-1 md:order-2">
          <div className="measure flow font-display text-[20px] md:text-[21px] leading-[1.65] text-ink">

            {/* Prologue */}
            <div className="mb-12 flow">
              <div className="smallcaps text-sienna">For leaders deciding whether to roll out Copilot</div>
              <p className="mt-4 font-display text-[24px] md:text-[28px] leading-[1.35] text-ink2">
                An enterprise buys Microsoft Copilot licences and calls that a deployment. It is not.
              </p>
              <p className="mt-8">
                The licence is procurement. The deployment is the work that happens after the contract is signed: choosing which workflows Copilot is allowed to touch, governing the data it can reach, running a pilot that predicts behaviour rather than proves a demo, designing for adoption, and tracking whether the workflow it was bought to improve actually moved.
              </p>
              <p>
                Most disappointment with Copilot traces to skipping that work. The product installs in days. The behaviour change it was bought to produce takes a programme. When an organisation treats the install as the finish line, the gap between a licence that shows active and a workflow that has genuinely changed becomes the story of the rollout twelve months later.
              </p>
              <p>
                This playbook sets out the discipline I would apply to a Copilot deployment in a regulated enterprise. It is not a Microsoft-specific insight. It is the same delivery discipline that produced a GenAI Agent Assist deployment across 1,000+ banking associates with a 30% average reduction in average handle time, a GenAI HR assistant, delivered to production, that cut escalations by roughly 40%, and a six-component agentic FinCrime workflow at a Tier-1 bank. Copilot is a different product. The work that decides whether it lands is the same work.
              </p>
              <PullQuote>
                The licence is procurement. The deployment is everything that happens after the contract is signed.
              </PullQuote>
              <p>
                The five sections below follow the order the work actually happens in: use-case identification, data-access governance, pilot-to-scale cadence, adoption metric design, and value-realisation tracking. The closing section is a one-page checklist organised by those five stages.
              </p>
            </div>

            {/* I */}
            <h2 id="sec-i" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">I · Use-case identification</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Pick the one workflow with a measured, owned metric Copilot can move.</span>
            </h2>
            <p className="mt-6">
              "Where could Copilot help" is an unbounded question, and an unbounded question produces an unbounded rollout. Copilot can draft an email, summarise a meeting, and search a document library, which means almost any knowledge-worker task returns a plausible answer. A list of plausible answers is the reason a deployment never reaches a verdict.
            </p>
            <p>
              The question that produces a deployment is narrower. Which workflow has a metric that is already measured, already owned by a named person, and slow or costly enough that moving it matters to that person. Three conditions, all of them required. A workflow with no baseline metric cannot be evaluated: there is nothing to compare against after rollout. A metric with no owner cannot be defended, because when the rollout competes for attention nobody is accountable for the result. And if the metric is already healthy, the change is not worth its cost, however well Copilot performs.
            </p>
            <p>
              In the GenAI Agent Assist programme, the workflow was contact-centre call handling, the metric was average handle time, and the owner was the operations leader whose targets depended on it. That is why the result was legible: handle time dropped 30% on average across the deployment, 40% on the strongest single deployment, and the person who owned that number could say so. The same selection test applies to Copilot. A meeting-summary use case is attractive in a demo and weak in a deployment, because no executive owns a "meeting summary quality" metric. A use case tied to proposal turnaround time, or to first-response time on a service queue, has an owner and a baseline, which means it can produce a verdict.
            </p>
            <p>
              Selecting two or three such workflows for the first wave is enough. The instinct to enable Copilot broadly so that "everyone can find their own use" feels generous and is the most reliable way to produce a rollout with no measurable result. Breadth comes after the first workflows produce evidence.
            </p>

            {/* II */}
            <h2 id="sec-ii" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">II · Data-access governance</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Copilot inherits the organisation's existing permission model. Where access governance is loose, it surfaces what was already over-shared.</span>
            </h2>
            <p className="mt-6">
              Copilot does not introduce a new security model. It reads what the signed-in user is already permitted to read across the Microsoft 365 estate, and it makes that reach usable in a way that browsing folders never did. Before Copilot, a document mis-shared to a wide group was technically accessible and practically buried. After Copilot, a user can ask a question and have that document surface as an answer. The exposure existed beforehand. Copilot removes the obscurity that was functioning as accidental security.
            </p>
            <p>
              This is why data-access governance is a precondition to rollout, not a follow-up to it. A salary file in a mislabelled SharePoint library, a board pack in an over-permissioned Teams site, a folder of legal documents shared with "everyone in the company" three reorganisations ago: none of these are Copilot problems, and all of them become visible the week Copilot goes live. An organisation that enables Copilot first and audits permissions afterwards has chosen to discover its over-sharing through its own employees. That is the most expensive way to learn your permissions are wrong.
            </p>
            <p>
              The governance work before any pilot has a defined shape:
            </p>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li><strong>Audit permissions</strong> on the content Copilot will index, sensitive sites and libraries first.</li>
              <li><strong>Apply Microsoft Purview</strong> (Microsoft's governance and compliance suite) sensitivity labels and data-loss-prevention (DLP) policies, so confidential content is governed by classification rather than by who set up a folder.</li>
              <li><strong>Confirm the data boundary for your tenant.</strong> Microsoft documents Copilot processing within the Microsoft 365 service boundary, but the real behaviour shifts with connected agents, web grounding (when Copilot draws on public web data) and Graph connectors (which pull in third-party content). Verify it against your settings and write it into the rollout brief.</li>
              <li><strong>Decide retention, eDiscovery, and audit-log access.</strong> Copilot prompts and responses are records (legal-hold and litigation-search reach them), subject to your tenant's retention and audit configuration; decide who can see interaction history.</li>
            </ul>
            <p className="mt-6">
              In a regulated industry these are hard constraints. They belong in the room before the first workflow decision.
            </p>
            <p>
              The discipline is to hold governance, people, and customer together from the first decision, instead of sequencing them. Governance treated as a post-rollout review fails slowly and expensively. Governance treated as a precondition makes the rest of the deployment safe to run.
            </p>

            {/* III */}
            <h2 id="sec-iii" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">III · Pilot-to-scale cadence</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Run a pilot that predicts how the median employee will behave at scale.</span>
            </h2>
            <p className="mt-6">
              Most Copilot pilots are designed to succeed. They recruit the enthusiasts, the people who asked for access, give them a few weeks, and collect their reactions. The enthusiasts report that Copilot is useful, the pilot is declared a success, and the organisation scales, and then discovers that the median employee behaves nothing like the volunteer who shaped the pilot result. A pilot built from enthusiasts predicts the behaviour of enthusiasts. At scale, most users are not enthusiasts.
            </p>
            <p>
              A pilot that predicts behaviour at scale is designed differently. It recruits a cohort that resembles the eventual population: a deliberate mix of roles and tenure, including people who did not ask for Copilot and have no particular interest in it. It runs long enough to clear the novelty period, the first weeks when usage is inflated by curiosity, and to reach the point where usage reflects settled habit. By then the early curiosity has worn off. It measures the specific workflow metric chosen in section I. Asking "did you find Copilot useful" measures sentiment. Asking "did proposal turnaround time on this team drop" measures the result that predicts what scaling will produce.
            </p>
            <p>
              The pilot also has to surface the failure modes that scale will amplify:
            </p>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Where Copilot returned a confident answer built on an out-of-date document, because the search index lagged behind the source of truth, and what that cost.</li>
              <li>Which permission problems appeared the moment a real cohort asked real questions.</li>
              <li>Where the workflow needed redesign, because Copilot inside an unchanged process often just speeds up a step that should not have existed.</li>
            </ul>
            <p className="mt-6">
              A pilot that surfaces these is more useful than one that produces a clean success slide. The clean slide is what gets contradicted at scale.
            </p>
            <p>
              The cadence is then deliberate. The first cohort produces evidence and a redesigned workflow. The second wave tests whether that holds for a wider, less motivated group. Each wave has an explicit decision gate: proceed, adjust, or stop. The metric decides which.
            </p>

            {/* IV */}
            <h2 id="sec-iv" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">IV · Adoption metric design</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Adoption is workflow change. Design the metric to detect it, beyond an active-licence count.</span>
            </h2>
            <p className="mt-6">
              The most available Copilot metric is the count of licences that are active, and it is the metric most likely to mislead. An active licence records that a person opened Copilot. It does not record that their work changed. An organisation that reports adoption as active licences is measuring whether the software was switched on, and then mistaking that for the outcome the software was bought to produce.
            </p>
            <p>
              Adoption is workflow change, and the metric has to be designed to detect workflow change. In the GenAI HR assistant deployment, which reached production, the real adoption signal was the roughly 40% reduction in escalations, a measured result. It showed the workflow had genuinely shifted: questions that used to become tickets were now resolved at the assistant. The count of employees who had opened it showed nothing of the kind. That is an adoption metric, because it measures the behaviour the deployment was meant to change.
            </p>
            <p>
              For a Copilot deployment, the gap to watch sits between two numbers. One is licence-active: people who have used Copilot at all. The other is workflow-changed: people for whom the target workflow now runs differently because Copilot is in it. A healthy deployment closes that gap over successive waves. A deployment in trouble shows a high licence-active number and a flat workflow-changed number: a population that has tried Copilot and a workflow that still runs the old way. That divergence is the single most informative signal in the rollout, and it is invisible to any organisation that only counts active licences.
            </p>
            <PullQuote>
              Watch the gap between licences that are active and workflows that have actually changed. That gap is the real state of the rollout.
            </PullQuote>
            <p>
              Designing the metric well means designing for the behaviour the deployment wants. If the goal is faster, better-grounded customer responses, the metric is response time and response quality on that queue, not Copilot prompt volume, which can rise while the customer outcome does not move at all. The metric should make a real result look like success and a hollow rollout look like the hollow rollout it is.
            </p>

            {/* V */}
            <h2 id="sec-v" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">V · Value-realisation tracking</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Connect Copilot use back to the specific workflow metric it was bought to move.</span>
            </h2>
            <p className="mt-6">
              Value realisation is the discipline of answering one question without flinching: did the workflow metric chosen in section I actually move, and can the owner of that metric confirm it. Everything earlier in the playbook exists to make this question answerable. A use case with no baseline cannot answer it; a pilot of enthusiasts answers it wrongly; an adoption metric built on active licences answers an easier question instead.
            </p>
            <p>
              The tracking has a baseline and a cadence. The baseline is the workflow metric measured before Copilot, captured during section I, because a baseline cannot be reconstructed after the fact. The cadence is a fixed review at 30, 90, and 180 days after each wave, comparing the workflow metric against that baseline, with the metric owner in the room. Thirty days reads early signal. Ninety days clears the novelty period. One hundred and eighty days shows whether the change held once Copilot stopped being new. A result that appears at 30 days and is gone at 180 was a curiosity, not a deployment outcome.
            </p>
            <p>
              This is also where measurement has to be honest about second-order effects. Copilot can reduce the time to draft a document while increasing the time to review it, because more drafts now exist. It can speed up a step and move the bottleneck one step downstream rather than removing it. A value-realisation review that tracks only the headline metric will report a win that the end-to-end workflow did not deliver. The review has to ask whether the metric improved and whether the gain survived contact with the rest of the process and the customer at the end of it.
            </p>
            <p>
              When value realisation is built in from section I, a Copilot deployment can state its result plainly: this workflow ran this way, it now runs this way, the named metric moved by this much, and the owner of that metric agrees. When it is bolted on at the end, the deployment is left defending a count of active licences, which is just procurement reporting on itself.
            </p>

            {/* checklist */}
            <h2 id="sec-checklist" className="scroll-mt-24 mt-16">
              <span className="block smallcaps text-sienna mb-2">The deployment checklist</span>
              <span className="font-display font-semibold text-[24px] md:text-[30px] leading-[1.16] tracking-tight">Decision criteria and risk gates, organised by the five stages.</span>
            </h2>
            <p className="mt-6">
              Each gate is a question with a required answer before the stage is allowed to close.
            </p>

            <h3 className="font-display font-semibold text-xl mt-10 text-teal">Stage I · Use-case identification</h3>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Does each candidate workflow have a metric that is <strong>already measured today</strong>? If there is no baseline, the use case cannot be evaluated, so defer it.</li>
              <li>Does that metric have a <strong>named owner</strong> who is accountable for it? An unowned metric has no defender when the rollout competes for attention.</li>
              <li>Is the workflow <strong>slow or costly enough that moving the metric matters</strong> to that owner? A healthy metric is not worth the change cost.</li>
              <li>Has the first wave been <strong>scoped to two or three workflows</strong>, not "Copilot for everyone"? Breadth comes after evidence.</li>
              <li><strong>Risk gate:</strong> if no candidate workflow clears all three conditions, do not start the rollout. Find a workflow that does.</li>
            </ul>

            <h3 className="font-display font-semibold text-xl mt-10 text-teal">Stage II · Data-access governance</h3>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Have <strong>permissions been audited</strong> on the SharePoint sites, Teams, and libraries Copilot will index, sensitive content first?</li>
              <li>Are <strong>sensitivity labels applied</strong> so confidential content is governed by classification, not by folder-setup memory?</li>
              <li>Is the <strong>Microsoft 365 data boundary</strong> stated explicitly in the rollout brief, so the risk review's first question is already answered?</li>
              <li>Have <strong>retention and eDiscovery treatment</strong> for Copilot prompts and responses been decided? They are records.</li>
              <li><strong>Risk gate:</strong> no pilot begins until the permission audit on in-scope content is complete. Enabling Copilot over un-audited permissions means discovering over-sharing through your own employees.</li>
            </ul>

            <h3 className="font-display font-semibold text-xl mt-10 text-teal">Stage III · Pilot-to-scale cadence</h3>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Does the pilot cohort <strong>resemble the eventual population</strong> (a mix of roles, tenure, and technical confidence, including non-volunteers)?</li>
              <li>Does the pilot run <strong>long enough to clear the novelty period</strong> and reach habit-level usage?</li>
              <li>Is the pilot measuring the <strong>chosen workflow metric</strong>, not satisfaction?</li>
              <li>Has the pilot been used to <strong>surface failure modes</strong> (stale-document answers, permission gaps, workflows needing redesign)?</li>
              <li>Does each wave have an <strong>explicit proceed / adjust / stop gate</strong>, decided by the metric?</li>
              <li><strong>Risk gate:</strong> a pilot staffed only by enthusiasts is not a valid basis for scaling. Rebuild the cohort.</li>
            </ul>

            <h3 className="font-display font-semibold text-xl mt-10 text-teal">Stage IV · Adoption metric design</h3>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Is the adoption metric defined as <strong>workflow change</strong>, not licence-active count?</li>
              <li>Are <strong>both numbers tracked</strong> (licences active and workflows changed), and is the gap between them being watched?</li>
              <li>Is the metric designed for <strong>the behaviour the deployment wants</strong>, not a vanity number like prompt volume?</li>
              <li><strong>Risk gate:</strong> if the only adoption number being reported to leadership is active licences, the deployment does not yet know its real state. Add the workflow-changed metric before scaling further.</li>
            </ul>

            <h3 className="font-display font-semibold text-xl mt-10 text-teal">Stage V · Value-realisation tracking</h3>
            <ul className="mt-4 space-y-2 list-disc pl-6 text-ink2">
              <li>Was the <strong>workflow baseline captured before Copilot</strong>, during Stage I?</li>
              <li>Is there a <strong>fixed 30 / 90 / 180-day review</strong> against that baseline, with the metric owner present?</li>
              <li>Does the review check <strong>second-order effects</strong> (bottlenecks moved downstream, review burden created, customer outcome at the end of the workflow)?</li>
              <li>Can the deployment state its result as <strong>"this metric moved by this much, and its owner confirms it"</strong>?</li>
              <li><strong>Risk gate:</strong> a result visible at 30 days but gone at 180 is not a deployment outcome. Do not report it as one.</li>
            </ul>

            <hr className="mt-16 hairline" />

            <p className="mt-10 text-base text-muted">
              Prathyusha Vemula leads AI transformation and automation at Concentrix as Group Lead (Senior Manager); "Senior Consultant" is the formal HR designation. Her work spans agentic AI, GenAI, RPA, and Automation Centre-of-Excellence strategy across twelve years in BFSI, Telecom, FMCG, and Manufacturing. Microsoft Certified Azure AI Engineer Associate (AI-102).
            </p>
            <p className="mt-6 text-sm text-muted">
              Tags · AI Transformation · Enterprise AI Deployment · AI Adoption · Data Governance · Value Realisation · Microsoft Copilot · Microsoft 365 · Change Management · Azure AI
            </p>
          </div>

          <div className="mt-16 rule-top pt-6 flex flex-wrap items-baseline justify-between gap-4 smallcaps text-muted">
            <span className="tabular">Published · June 2026</span>
            <a className="link-underline text-ink" href="mailto:vemula.prathyusha@gmail.com">vemula.prathyusha@gmail.com</a>
            <button onClick={() => go('writing')} className="link-underline">Back to writing →</button>
          </div>
        </article>
      </div>
    </main>
  );
}

Object.assign(window, { EssayCopilotPlaybook });
