From 67053267528f3f6942fdcb9e8796c24277b3ec7a Mon Sep 17 00:00:00 2001 From: tsmethurst Date: Fri, 2 Apr 2021 19:20:04 +0200 Subject: [PATCH] extract mentions from status --- internal/util/status.go | 73 ++++++++++++++++++++++++++++++++++++ internal/util/status_test.go | 59 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 internal/util/status.go create mode 100644 internal/util/status_test.go diff --git a/internal/util/status.go b/internal/util/status.go new file mode 100644 index 0000000..f528a42 --- /dev/null +++ b/internal/util/status.go @@ -0,0 +1,73 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package util + +import ( + "fmt" + "regexp" +) + +// To play around with these regexes, see: https://regex101.com/r/2km2EK/1 +var ( + hostnameRegexString = `(?:(?:[a-zA-Z]{1})|(?:[a-zA-Z]{1}[a-zA-Z]{1})|(?:[a-zA-Z]{1}[0-9]{1})|(?:[0-9]{1}[a-zA-Z]{1})|(?:[a-zA-Z0-9][a-zA-Z0-9-_]{1,61}[a-zA-Z0-9]))\.(?:[a-zA-Z]{2,6}|[a-zA-Z0-9-]{2,30}\.[a-zA-Z]{2,5}))` + mentionRegexString = fmt.Sprintf(`(?: |^|\W)(@[a-zA-Z0-9_]+@%s(?: |\n)`, hostnameRegexString) + mentionRegex = regexp.MustCompile(mentionRegexString) +) + +// DeriveMentions takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of accounts +// mentioned in that status. +// +// It will look for fully-qualified account names in the form "@user@example.org". +// Mentions that are just in the form "@username" will not be detected. +func DeriveMentions(status string) []string { + menchies := []string{} + for _, match := range mentionRegex.FindAllStringSubmatch(status, -1) { + menchies = append(menchies, match[1]) + } + return Unique(menchies) +} + +// Unique returns a deduplicated version of a given string slice. +func Unique(s []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range s { + if _, value := keys[entry]; !value { + keys[entry] = true + list = append(list, entry) + } + } + return list +} + +// HTMLFormat takes a plaintext formatted status string, and converts it into +// a nice HTML-formatted string. +// +// This includes: +// +// - Replacing line-breaks with

+// +// - Replacing URLs with hrefs. +// +// - Replacing mentions with links to that account's URL as stored in the database. +func HTMLFormat(status string) string { + // TODO: write proper HTML formatting logic for a status + return status +} diff --git a/internal/util/status_test.go b/internal/util/status_test.go new file mode 100644 index 0000000..3e670fa --- /dev/null +++ b/internal/util/status_test.go @@ -0,0 +1,59 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type StatusTestSuite struct { + suite.Suite +} + +func (suite *StatusTestSuite) TestDeriveMentionsOK() { + statusText := `@dumpsterqueer@example.org testing testing + + is this thing on? + + @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt + + @thiswontwork though! @NORWILL@THIS.one!! + + here is a duplicate mention: @hello@test.lgbt + ` + + menchies := DeriveMentions(statusText) + assert.Len(suite.T(), menchies, 3) + assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) + assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) + assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) +} + +func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { + statusText := `` + menchies := DeriveMentions(statusText) + assert.Len(suite.T(), menchies, 0) +} + +func TestStatusTestSuite(t *testing.T) { + suite.Run(t, new(StatusTestSuite)) +}