起因 最近借助 Notion 作为数据库搭建了《开源服务指南》的工作流,Notion 是真的好用,无奈复制出来的 Markdown 文本中图片部分格式不对(不知道是不是我使用方法有问题),需要手动修正。一回生二回熟三回咱可不就烦了嘛,所以想着写个油猴插件解决 把 Notion Page 内容复制为标准 Markdown 文本  这个问题。
折腾 获取页面 HTML,然后 HTML 转 Markdown 先查了一下资料,在 GreasyFork  搜索 notion 和 markdown 关键字,找到了这么个脚本:复制为Markdown格式 
使用类似的思路,先获取目标 DOM 的 HTML 代码,然后 HTML 转 Markdown。但是理想很丰满,显示很骨感。
以下是相关代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 (function (     'use strict' ;     const  urlProtocol = document .location .protocol ;     const  urlOrigin = document .location .origin ;     const  urlPath = (document .location .pathname .substring (0 , document .location .pathname .lastIndexOf ('/' ))) + '/' ;     function  initCopyButton (         let  copyButton = document .createElement ('div' );         copyButton.style .position  = "fixed" ;         copyButton.style .width  = "44px" ;         copyButton.style .height  = "22px" ;         copyButton.style .lineHeight  = "22px" ;         copyButton.style .top  = "14%" ;         copyButton.style .right  = "1%" ;         copyButton.style .background  = "#0084ff" ;         copyButton.style .fontSize  = "14px" ;         copyButton.style .color  = "#fff" ;         copyButton.style .textAlign  = "center" ;         copyButton.style .borderRadius  = "6px" ;         copyButton.style .zIndex  = 10000 ;         copyButton.style .cursor  = "pointer" ;         copyButton.style .opacity  = 0.6 ;         copyButton.innerHTML  = "Copy Content" ;         copyButton.addEventListener ('click' , copyPageContent);         console .log ('initCopyButton' );         document .body .prepend (copyButton);     }     function  getContentElement (         let  pageContent = document .querySelector ('main.notion-frame .notion-page-content' );         if  (!pageContent) {             return ;         }         return  pageContent.innerHTML ;     }     function  html2Markdown (htmlText ) {         if  (!htmlText) {             return ;         }         htmlText = htmlText             .replace (/<figure[\s\S]+?<\/figure>/gi , processFigure)             .replace (/<img[^>]+>/gi , processImg)             .replace (/(<a.+?href=")(.*?")(.*?<\/a>)/gi , parseHref)         ;                  let  turndownService = new  TurndownService ();         let  markdownText = turndownService.turndown (htmlText);         markdownText = markdownText.replace (/<img.+?>/g , "" );         return  markdownText;     }     function  processFigure (str ) {         str = str.replace (/<noscript>[\s\S]*<\/noscript>/ , '' );         let  img = str.match (/<img[^>]+?>/ );         if  (img) {             return  img[0 ];         }         return  str;     }     function  processImg (imgStr ) {         let  src = (imgStr.match (/\ssrc=(["'])(.*?)\1/ ) || [])[2 ];         if  (!src) {             return  '' ;         }         let  original = (imgStr.match (/\sdata-original=(["'])(.*?)\1/ ) || [])[2 ];         if  (original) {             src = original;         }         if  (src.toLowerCase ().indexOf ('http' ) === 0 ) {             return  '<img src="' +src+'" />' ;         } else  if  (src.indexOf ('//' ) === 0 ) {             src = urlProtocol + src;         } else  if  (src.indexOf ('/' ) === 0 ) {             src = urlOrigin + src;         } else  {             src = urlPath + src;         }         return  '<img src="' +src+'" />' ;     }     function  parseHref (match, head, link, tail ){         if  (link.substr (0 , 4 ) === 'http' ) {             return  head + link.replace (/#.*/ ,"" ) + tail;         }         var  path = document .location .pathname .split ('/' );         path.pop ();         if  (link[0 ] === '#'  || link.substr (0 , 10 ) === 'javascript'  || link === '"' ) {              return  head + '#"'  + tail;         } else  if  (link[0 ] === '.'  && link[1 ] === '/' ){              return  head + document .location .origin  + path.join ('/' ) + link.substring (1 ) + tail;         } else  if  (link[0 ] === '.'  && link[1 ] === '.'  && link[2 ] === '/' ) {              var  p2Arr = link.split ('../' ),                 tmpRes = [p2Arr.pop ()];             path.pop ();             while (p2Arr.length ){                 var  t = p2Arr.pop ();                 if  (t === '' ){                     tmpRes.unshift (path.pop ());                 }             }             return  head + document .location .origin  + tmpRes.join ('/' ) + tail;         } else  if  (link.match (/^\/\/.*/ )) {              return  head + document .location .protocol  + link + tail;         } else  if  (link.match (/^\/.*/ )) {              return  head + document .location .origin  + link + tail;         } else  {              return  head + document .location .origin  + path.join ("/" ) + '/'  + link + tail;         }     }     function  copyToClipboard1 (text ) {         const  input = document .createElement ('textarea' );         input.style .position  = 'fixed' ;         input.style .opacity  = 0 ;         input.value  = text;         document .body .appendChild (input);         input.select ();         const  res = document .execCommand ('copy' );         document .body .removeChild (input);         return  res;     }     function  copyToClipboard (text ) {         navigator.clipboard .writeText (text)             .then (() =>  {             console .log ('文本已成功复制到剪贴板' );         })             .catch ((err ) =>  {             console .error ('复制操作失败' , err);         });     }     function  copyPageContent (         const  innerHtmlOfPageContent = getContentElement ();         let  markdownContent = html2Markdown (innerHtmlOfPageContent);         markdownContent = fixMarkdownContent (markdownContent);         console .log (markdownContent);                  navigator.clipboard .writeText (markdownContent)             .then (() =>  {             showMessage ('复制成功' );         })             .catch ((err ) =>  {             showMessage ('复制失败' );         });         const  copyResult = copyToClipboard (markdownContent);         const  message = copyResult ? '复制成功'  : '复制失败' ;              }     function  fixMarkdownContent (markdown ) {         if  (!markdown) {             return ;         }         const  regex = new  RegExp (`!\\[\\]\\(${urlOrigin} \\/image\\/(http.*?)\\?.*?\\)` , 'g' );         return  markdown.replaceAll (regex, (match, group1 ) =>  {             const  processedText = decodeURIComponent (group1);             return  `` ;         });     }          function  showMessage (message ) {         const  toast = document .createElement ('div' );         toast.style .position  = 'fixed' ;         toast.style .bottom  = '20px' ;         toast.style .left  = '50%' ;         toast.style .transform  = 'translateX(-50%)' ;         toast.style .padding  = '10px 20px' ;         toast.style .background  = 'rgba(0, 0, 0, 0.8)' ;         toast.style .color  = 'white' ;         toast.style .borderRadius  = '5px' ;         toast.style .zIndex  = '9999' ;         toast.innerText  = message;         document .body .appendChild (toast);         setTimeout (function (             toast.remove ();         }, 3000 );     }     function  init (         initCopyButton ();     }     init (); })(); 
借用 clipboard.js 执行复制功能 无意间发现了一个叫作 clipboard.js  的工具,看起来很方便。试了一下,只能复制 DOM 的 innerText,并不能复制 DOM 元素(或者说触发 Notion 本身的复制方法),作罢。
以下是代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 (function (     'use strict' ;     init ();          function  init (       waitFor ('#notion-app .notion-page-content' ).then (([notionContentElement] ) =>  {         initCopyButton ();         });     }          function  initCopyButton (       let  copyButton = document .createElement ('div' );         copyButton.id  = 'copyButton' ;         copyButton.style .position  = "fixed" ;         copyButton.style .width  = "88px" ;         copyButton.style .height  = "22px" ;         copyButton.style .lineHeight  = "22px" ;         copyButton.style .top  = "14%" ;         copyButton.style .right  = "1%" ;         copyButton.style .background  = "#0084ff" ;         copyButton.style .fontSize  = "14px" ;         copyButton.style .color  = "#fff" ;         copyButton.style .textAlign  = "center" ;         copyButton.style .borderRadius  = "6px" ;         copyButton.style .zIndex  = 10000 ;         copyButton.style .cursor  = "pointer" ;         copyButton.style .opacity  = 0.6 ;         copyButton.innerHTML  = "Copy Content" ;                  console .log ('initCopyButton' );         document .body .prepend (copyButton);         var  clipboard = new  ClipboardJS ('#copyButton' , {           target : function (trigger ) {                 return  document .querySelector ('#notion-app .notion-page-content' );             }         });         clipboard.on ('success' , function (e ) {             console .info ('Action:' , e.action );             console .info ('Text:' , e.text );             console .info ('Trigger:' , e.trigger );             e.clearSelection ();         });         clipboard.on ('error' , function (e ) {             console .error ('Action:' , e.action );             console .error ('Trigger:' , e.trigger );         });     }          function  waitFor (...selectors ) {       return  new  Promise (resolve  =>         const  delay = 500 ;             const  f  = (                 const  elements = selectors.map (selector  =>document .querySelector (selector));                 if  (elements.every (element  =>null )) {                   resolve (elements);                 } else  {                     setTimeout (f, delay);                 }             }             f ();         });     } })(); 
直接调用系统复制功能,然后读取剪切板内容并进行替换 后面我发现这个复制为 Markdown 的功能应该是 Notion 自己实现的,并不是粘贴时编辑器把富文本变成了 Markdown。既然如此,为啥不直接调用 Notion 自己的复制功能,然后我们读取剪切板再做内容修正嘞?
说干就干,搞完之后还是有一个很奇怪的小问题:同一个页面第一次复制内容不对,但是从第二次开始就对了。
以下是代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 (function (     'use strict' ;     init ();          function  init (         waitFor ('#notion-app .notion-page-content' ).then (([notionContentElement] ) =>  {             initCopyButton ();         });     }          function  initCopyButton (         let  copyButton = document .createElement ('div' );         copyButton.style .position  = "fixed" ;         copyButton.style .width  = "88px" ;         copyButton.style .height  = "22px" ;         copyButton.style .lineHeight  = "22px" ;         copyButton.style .top  = "14%" ;         copyButton.style .right  = "1%" ;         copyButton.style .background  = "#0084ff" ;         copyButton.style .fontSize  = "14px" ;         copyButton.style .color  = "#fff" ;         copyButton.style .textAlign  = "center" ;         copyButton.style .borderRadius  = "6px" ;         copyButton.style .zIndex  = 10000 ;         copyButton.style .cursor  = "pointer" ;         copyButton.style .opacity  = 0.6 ;         copyButton.innerHTML  = "Copy Content" ;         copyButton.addEventListener ('click' , copyPageContentAsync);         console .log ('initCopyButton' );         document .body .prepend (copyButton);     }          function  copyPageContentSync (         copyElementSync ('#notion-app .notion-page-content' );                  navigator.clipboard .readText ()             .then (clipboardContent  =>             console .log ('clipboardContent' , clipboardContent);             const  markdownContent = fixMarkdownFormat (clipboardContent);             console .log ('markdown' , markdownContent);             GM_setClipboard (markdownContent);             showMessage ('复制成功' );         });     }     async  function  copyPageContentAsync (         await  copyElementAsync ('#notion-app .notion-page-content' );                  const  clipboardContent = await  readClipboard ();         if  (!clipboardContent) {             showMessage ('复制失败' );             return ;         }         console .log ('clipboardContent' , clipboardContent);         const  markdownContent = fixMarkdownFormat (clipboardContent);         console .log ('markdown' , markdownContent);         GM_setClipboard (markdownContent);         showMessage ('复制成功' );     }          function  fixMarkdownFormat (markdown ) {         if  (!markdown) {             return ;         }                  return  markdown.replaceAll (/\!(http.*\.\w+)/g , (match, group1 ) =>  {             const  processedText = decodeURIComponent (group1);             console .log ('regex' , processedText);             return  `` ;         });              }          async  function  readClipboard (         try  {             const  clipText = await  navigator.clipboard .readText ();             return  clipText;         } catch  (error) {             console .error ('Failed to read clipboard:' , error);         }     }     function  copyElementChildren (selector ) {         const  dom = document .querySelector (selector);         const  range = document .createRange ();         range.setStart (dom, 0 );         range.setEnd (dom, dom.childNodes .length );         const  selection = window .getSelection ();         selection.removeAllRanges ();         selection.addRange (range);         document .execCommand ('copy' );              }          async  function  copyElementAsync (selector ) {         const  pageContent = document .querySelector (selector);                  let  range = document .createRange ();         range.selectNodeContents (pageContent);         let  selection = window .getSelection ();         selection.removeAllRanges ();         selection.addRange (range);         pageContent.focus ();         await  sleep (500 );         document .execCommand ('copy' );         selection.removeAllRanges ();     }     function  copyElementSync (selector ) {         const  pageContent = document .querySelector (selector);                  let  range = document .createRange ();         range.selectNodeContents (pageContent);         let  selection = window .getSelection ();         selection.removeAllRanges ();         selection.addRange (range);         pageContent.focus ();         document .execCommand ('copy' );              }          function  showMessage (message ) {         const  toast = document .createElement ('div' );         toast.style .position  = 'fixed' ;         toast.style .bottom  = '20px' ;         toast.style .left  = '50%' ;         toast.style .transform  = 'translateX(-50%)' ;         toast.style .padding  = '10px 20px' ;         toast.style .background  = 'rgba(0, 0, 0, 0.8)' ;         toast.style .color  = 'white' ;         toast.style .borderRadius  = '5px' ;         toast.style .zIndex  = '9999' ;         toast.innerText  = message;         document .body .appendChild (toast);         setTimeout (function (             toast.remove ();         }, 3000 );     }          function  waitFor (...selectors ) {         return  new  Promise (resolve  =>             const  delay = 500 ;             const  f  = (                 const  elements = selectors.map (selector  =>document .querySelector (selector));                 if  (elements.every (element  =>null )) {                     resolve (elements);                 } else  {                     setTimeout (f, delay);                 }             }             f ();         });     }          function  sleep (ms ) {         return  new  Promise (resolve  =>setTimeout (resolve, ms));     } })(); 
直接追加一个 EventListener 做格式修正 前面的代码,就算加了延时还是会有一个小 bug:一键复制的时候偶尔会少复制最后一个 Notion Block 内容。 更新:出现这个问题的原因不明,但是只要在 Notion 文章最后加入一个空行,就能避免这个问题 
最后还是硬着头皮看了看 Notion 的代码,发现 Notion 的复制事件是放在 window 上的(怪不得我之前在 document 上面找不到),既然找到了  Notion 自己的复制事件就好办了。最简单的方法就是在 window 上面追加一个 copy 事件,做额外的 Markdown 格式修正工作。
以下是代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 (function  (   'use strict' ;   init ();      function  init (     injectActions ();   }   function  injectActions (     window .addEventListener ('copy' , fixNotionMarkdownInClipboard);   }      function  fixNotionMarkdownInClipboard (     navigator.clipboard .readText ().then (text  =>       const  markdown = fixMarkdownFormat (text);       navigator.clipboard .writeText (markdown).then (() =>  {       }, () =>  {         console .log ('failed.' );       })     })   }      function  fixMarkdownFormat (markdown ) {     if  (!markdown) {       return ;     }          markdown = markdown.replaceAll (/\!(http.*\.\w+)/g , (match, group1 ) =>  {       const  processedText = decodeURIComponent (group1);       console .log ('regex' , processedText);       return  `` ;     });          const  captionRegex = /(\!\[(?<title>.+?)\]\(.*?\)\s*)\k<title>\s*/g ;     return  markdown.replaceAll (captionRegex, '$1' );   } })(); 
为了追求简单,这里移除了一键复制操作,后面有空再补。
完美方案 页面分析 
结构如下:
div#notion-app
div.notion-app-inner
div.notion-cursor-listener
 
 
 
 
 
 
插件装载时机 
开局就找 .notion-page-content
如果找得到,装载插件,结束 
如果找不到,不做任何事情,结束。 
 
 
同步查找 #notion-app main.notion-frame .notion-page-content
如果找得到,代表当前页面是正常的 Notion Page 页面,装载插件 
如果找不到,代表当前页面是 view / database 页面, 给 #notion-app 添加 observe,监听其子节点变动,根据子节点变动情况动态装载 / 卸载插件。结束。 
 
 
 
插件行为 
先给 window 追加一个类型为 copy 的 EventListener,事件触发时,读取剪切板内容并修正 Markdown 格式; 
往页面注入一个“复制”按钮,用户点击按钮时,自动选中 Notion 页面内容并触发 copy 事件; 
 
代码 Github 
后记 技术不到位,就只能各种摸爬滚打。
多看文档,了解一下浏览器有什么 API,每个 API  怎么调用、有什么能力,往往能起到事半功倍的效果。
脚本已发布至 Greasy Fork  可以自行下载使用。
参考文档 
Event  | Events  | DOM-Level-3-Events  | JavaScript 事件顺序   JS 中的事件接口、事件类型、以及事件执行顺序。EventTarget MutationObserver 
MutationRecord 
MutationRecord/addedNodes MutationRecord/removedNodes  
 
ClipboardAPI ClipboardEvent copy_event 创建和触发事件 复制 Notion 页面正文内容  这里有个等待页面 DOM 元素加载完成之后执行操作的方法非常棒,学到就是赚到。复制为Markdown格式  借助 turndown.js 实现 HTML 转 Markdown,开眼。选择(Selection)和范围(Range)  概念讲解,写得很详细,但我没看完。防抖与节流  防抖与节流