From 756832d8e59f2613b7420f06322d5678d6fe7cb3 Mon Sep 17 00:00:00 2001 From: Melroy van den Berg Date: Thu, 24 Apr 2025 16:22:57 +0200 Subject: [PATCH] Parse HTML to markdown --- formatter/formatter.go | 95 +++++++++++++++++------------------------- go.mod | 15 ++++++- go.sum | 42 +++++++++++++++++++ 3 files changed, 95 insertions(+), 57 deletions(-) diff --git a/formatter/formatter.go b/formatter/formatter.go index 3811cf4..753915e 100644 --- a/formatter/formatter.go +++ b/formatter/formatter.go @@ -8,6 +8,7 @@ import ( "github.com/fatih/color" "github.com/tidwall/gjson" + "github.com/vlanse/go-term-markdown" ) // Format takes ActivityPub data and returns a formatted string representation @@ -87,7 +88,8 @@ func formatActor(jsonStr string, parts []string, bold, cyan, green, red, yellow } if summary := gjson.Get(jsonStr, "summary").String(); summary != "" { - parts = append(parts, fmt.Sprintf("%s: %s", bold("Summary"), summary)) + md := htmlToMarkdown(summary) + parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Summary"), renderMarkdown(md))) } if published := gjson.Get(jsonStr, "published").String(); published != "" { @@ -108,12 +110,11 @@ func formatActor(jsonStr string, parts []string, bold, cyan, green, red, yellow // formatContent formats content-type objects (Note, Article, etc.) func formatContent(jsonStr string, parts []string, bold, green, yellow func(a ...interface{}) string) []string { if content := gjson.Get(jsonStr, "content").String(); content != "" { - // Strip HTML tags for display - content = stripHTML(content) - if len(content) > 300 { - content = content[:297] + "..." + md := htmlToMarkdown(content) + if len(md) > 300 { + md = md[:297] + "..." } - parts = append(parts, fmt.Sprintf("%s: %s", bold("Content"), content)) + parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Content"), renderMarkdown(md))) } // Check for attachments (images, videos, etc.) @@ -204,11 +205,8 @@ func formatActivity(jsonStr string, parts []string, bold, green, yellow func(a . parts = append(parts, fmt.Sprintf("%s: %s", bold("Object Type"), yellow(objectType))) if content := gjson.Get(jsonStr, "object.content").String(); content != "" { - content = stripHTML(content) - if len(content) > 300 { - content = content[:297] + "..." - } - parts = append(parts, fmt.Sprintf("%s: %s", bold("Content"), content)) + md := htmlToMarkdown(content) + parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Content"), renderMarkdown(md))) } // Check for attachments in the object @@ -324,11 +322,8 @@ func formatEvent(jsonStr string, parts []string, bold, green, yellow func(a ...i } if content := gjson.Get(jsonStr, "content").String(); content != "" { - content = stripHTML(content) - if len(content) > 300 { - content = content[:297] + "..." - } - parts = append(parts, fmt.Sprintf("%s: %s", bold("Description"), content)) + md := htmlToMarkdown(content) + parts = append(parts, fmt.Sprintf("%s:\n%s", bold("Description"), renderMarkdown(md))) } if startTime := gjson.Get(jsonStr, "startTime").String(); startTime != "" { @@ -359,6 +354,34 @@ func formatTombstone(jsonStr string, parts []string, bold, green, yellow func(a return parts } +// Helper to convert HTML to Markdown and render to terminal +func renderMarkdown(md string) string { + // width=80, no color override, no emoji, no images + return string(markdown.Render(md, 80, 6)) +} + +// Replace stripHTML with htmlToMarkdown +func htmlToMarkdown(html string) string { + // For now, a basic replacement (optionally, use a library for better conversion) + html = strings.ReplaceAll(html, "
", "\n") + html = strings.ReplaceAll(html, "
", "\n") + html = strings.ReplaceAll(html, "

", "\n") + html = strings.ReplaceAll(html, "

", "\n") + // Remove all other tags + for { + startIdx := strings.Index(html, "<") + if startIdx == -1 { + break + } + endIdx := strings.Index(html[startIdx:], ">") + if endIdx == -1 { + break + } + html = html[:startIdx] + html[startIdx+endIdx+1:] + } + return html +} + // formatDate formats an ISO 8601 date string to a more readable format func formatDate(isoDate string) string { t, err := time.Parse(time.RFC3339, isoDate) @@ -368,46 +391,6 @@ func formatDate(isoDate string) string { return t.Format("Jan 02, 2006 15:04:05") } -// stripHTML removes HTML tags from a string -func stripHTML(html string) string { - // Simple HTML tag stripping - in a real implementation, you might want to use a proper HTML parser - result := html - - // Replace common HTML entities - replacements := map[string]string{ - "&": "&", - "<": "<", - ">": ">", - """: "\"", - "'": "'", - " ": " ", - } - - for entity, replacement := range replacements { - result = strings.ReplaceAll(result, entity, replacement) - } - - // Remove HTML tags - for { - startIdx := strings.Index(result, "<") - if startIdx == -1 { - break - } - - endIdx := strings.Index(result[startIdx:], ">") - if endIdx == -1 { - break - } - - result = result[:startIdx] + result[startIdx+endIdx+1:] - } - - // Normalize whitespace - result = strings.Join(strings.Fields(result), " ") - - return result -} - // formatArray formats an array of values into a readable string func formatArray(values []gjson.Result) string { if len(values) == 0 { diff --git a/go.mod b/go.mod index ea433cf..b5df959 100644 --- a/go.mod +++ b/go.mod @@ -9,13 +9,26 @@ require ( ) require ( + github.com/MichaelMure/go-term-text v0.3.1 // indirect + github.com/alecthomas/chroma v0.10.0 // indirect + github.com/disintegration/imaging v1.6.2 // indirect + github.com/dlclark/regexp2 v1.10.0 // indirect + github.com/eliukblau/pixterm v1.3.1 // indirect github.com/go-fed/httpsig v1.1.0 // indirect + github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kyokomi/emoji/v2 v2.2.12 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/rivo/uniseg v0.4.7 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect - golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect + github.com/vlanse/go-term-markdown v0.0.1-dev2 // indirect + golang.org/x/crypto v0.19.0 // indirect + golang.org/x/image v0.15.0 // indirect + golang.org/x/net v0.21.0 // indirect golang.org/x/sys v0.18.0 // indirect ) diff --git a/go.sum b/go.sum index ed3e77c..6bbbe05 100644 --- a/go.sum +++ b/go.sum @@ -1,20 +1,50 @@ +github.com/MichaelMure/go-term-text v0.3.1 h1:Kw9kZanyZWiCHOYu9v/8pWEgDQ6UVN9/ix2Vd2zzWf0= +github.com/MichaelMure/go-term-text v0.3.1/go.mod h1:QgVjAEDUnRMlzpS6ky5CGblux7ebeiLnuy9dAaFZu8o= +github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= +github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= +github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= +github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/eliukblau/pixterm v1.3.1 h1:XeouQViH+lmzCa7sMUoK2cd7qlgHYGLIjwRKaOdJbKA= +github.com/eliukblau/pixterm v1.3.1/go.mod h1:on5ueknFt+ZFVvIVVzQ7/JXwPjv5fJd8Q1Ybh7XixfU= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/go-fed/httpsig v1.1.0 h1:9M+hb0jkEICD8/cAiNqEB66R87tTINszBRTjwjQzWcI= github.com/go-fed/httpsig v1.1.0/go.mod h1:RCMrTZvN1bJYtofsG4rd5NaO5obxQ5xBkdiS7xsT7bM= +github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k= +github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/kyokomi/emoji/v2 v2.2.12 h1:sSVA5nH9ebR3Zji1o31wu3yOwD1zKXQA2z0zUyeit60= +github.com/kyokomi/emoji/v2 v2.2.12/go.mod h1:JUcn42DTdsXJo1SWanHh4HKDEyPaR5CqkmoirZZP9qE= +github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -22,10 +52,21 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/vlanse/go-term-markdown v0.0.1-dev2 h1:sisNMYZSc2zdetAo7/kK5DRqzwfShlbuMdXEPAYlviQ= +github.com/vlanse/go-term-markdown v0.0.1-dev2/go.mod h1:ujQ7UdQuyzdk827VWflQknUMr7qyQHPHIQA0wDgVWwc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.19.0 h1:ENy+Az/9Y1vSrlrvBSyna3PITt4tiZLf7sgCjZBX7Wo= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20191206065243-da761ea9ff43/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.15.0 h1:kOELfmgrmJlw4Cdb7g/QGuB3CvDrXbqEIww/pNtNBm8= +golang.org/x/image v0.15.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -34,4 +75,5 @@ golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=