more work on parsing statuses

This commit is contained in:
tsmethurst 2021-05-12 17:31:48 +02:00
parent 77736b926b
commit 884d0ecc8f
9 changed files with 251 additions and 32 deletions

View File

@ -30,10 +30,22 @@ type Mention struct {
CreatedAt time.Time `pg:"type:timestamp,notnull,default:now()"`
// When was this mention last updated?
UpdatedAt time.Time `pg:"type:timestamp,notnull,default:now()"`
// Who created this mention?
// What's the internal account ID of the originator of the mention?
OriginAccountID string `pg:",notnull"`
// Who does this mention target?
// What's the AP URI of the originator of the mention?
OriginAccountURI string `pg:",notnull"`
// What's the internal account ID of the mention target?
TargetAccountID string `pg:",notnull"`
// Prevent this mention from generating a notification?
Silent bool
// NameString is for putting in the namestring of the mentioned user
// before the mention is dereferenced. Should be in a form along the lines of:
// @whatever_username@example.org
//
// This will not be put in the database, it's just for convenience.
NameString string `pg:"-"`
// Href is the web URL (not AP uri!) of the user mentioned.
//
// This will not be put in the database, it's just for convenience.
Href string `pg:"-"`
}

View File

@ -179,7 +179,7 @@ func (p *processor) processLanguage(form *apimodel.AdvancedStatusCreateForm, acc
func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
menchies := []string{}
gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentions(form.Status), accountID, status.ID)
gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentionsFromStatus(form.Status), accountID, status.ID)
if err != nil {
return fmt.Errorf("error generating mentions from status: %s", err)
}
@ -198,7 +198,7 @@ func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, acc
func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
tags := []string{}
gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtags(form.Status), accountID, status.ID)
gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtagsFromStatus(form.Status), accountID, status.ID)
if err != nil {
return fmt.Errorf("error generating hashtags from status: %s", err)
}
@ -217,7 +217,7 @@ func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, account
func (p *processor) processEmojis(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
emojis := []string{}
gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojis(form.Status), accountID, status.ID)
gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojisFromStatus(form.Status), accountID, status.ID)
if err != nil {
return fmt.Errorf("error generating emojis from status: %s", err)
}

View File

@ -30,6 +30,7 @@ import (
"github.com/go-fed/activity/pub"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
func extractPreferredUsername(i withPreferredUsername) (string, error) {
@ -184,12 +185,12 @@ func extractImageURL(i withImage) (*url.URL, error) {
// here in order to find the first one that meets these criteria:
// 1. is an image
// 2. has a URL so we can grab it
for imageIter := imageProp.Begin(); imageIter != imageProp.End(); imageIter = imageIter.Next() {
for iter := imageProp.Begin(); iter != imageProp.End(); iter = iter.Next() {
// 1. is an image
if !imageIter.IsActivityStreamsImage() {
if !iter.IsActivityStreamsImage() {
continue
}
imageValue := imageIter.GetActivityStreamsImage()
imageValue := iter.GetActivityStreamsImage()
if imageValue == nil {
continue
}
@ -210,9 +211,9 @@ func extractSummary(i withSummary) (string, error) {
return "", errors.New("summary property was nil")
}
for summaryIter := summaryProp.Begin(); summaryIter != summaryProp.End(); summaryIter = summaryIter.Next() {
if summaryIter.IsXMLSchemaString() && summaryIter.GetXMLSchemaString() != "" {
return summaryIter.GetXMLSchemaString(), nil
for iter := summaryProp.Begin(); iter != summaryProp.End(); iter = iter.Next() {
if iter.IsXMLSchemaString() && iter.GetXMLSchemaString() != "" {
return iter.GetXMLSchemaString(), nil
}
}
@ -232,9 +233,9 @@ func extractURL(i withURL) (*url.URL, error) {
return nil, errors.New("url property was nil")
}
for urlIter := urlProp.Begin(); urlIter != urlProp.End(); urlIter = urlIter.Next() {
if urlIter.IsIRI() && urlIter.GetIRI() != nil {
return urlIter.GetIRI(), nil
for iter := urlProp.Begin(); iter != urlProp.End(); iter = iter.Next() {
if iter.IsIRI() && iter.GetIRI() != nil {
return iter.GetIRI(), nil
}
}
@ -247,8 +248,8 @@ func extractPublicKeyForOwner(i withPublicKey, forOwner *url.URL) (*rsa.PublicKe
return nil, nil, errors.New("public key property was nil")
}
for publicKeyIter := publicKeyProp.Begin(); publicKeyIter != publicKeyProp.End(); publicKeyIter = publicKeyIter.Next() {
pkey := publicKeyIter.Get()
for iter := publicKeyProp.Begin(); iter != publicKeyProp.End(); iter = iter.Next() {
pkey := iter.Get()
if pkey == nil {
continue
}
@ -449,7 +450,79 @@ func extractEmoji(i Emojiable) (*gtsmodel.Emoji, error) {
if idProp == nil || !idProp.IsIRI() {
return nil, errors.New("no id for emoji")
}
emoji.URI = idProp.GetIRI().String()
uri := idProp.GetIRI()
emoji.URI = uri.String()
emoji.Domain = uri.Host
name, err := extractName(i)
if err != nil {
return nil, err
}
emoji.Shortcode = strings.Trim(name, ":")
if i.GetActivityStreamsIcon() == nil {
return nil, errors.New("no icon for emoji")
}
imageURL, err := extractIconURL(i)
if err != nil {
return nil, errors.New("no url for emoji image")
}
emoji.ImageRemoteURL = imageURL.String()
return emoji, nil
}
func extractMentions(i withTag) ([]*gtsmodel.Mention, error) {
mentions := []*gtsmodel.Mention{}
tagsProp := i.GetActivityStreamsTag()
for iter := tagsProp.Begin(); iter != tagsProp.End(); iter = iter.Next() {
t := iter.GetType()
if t == nil {
continue
}
if t.GetTypeName() != "Mention" {
continue
}
mentionable, ok := t.(Mentionable)
if !ok {
continue
}
mention, err := extractMention(mentionable)
if err != nil {
continue
}
mentions = append(mentions, mention)
}
return mentions, nil
}
func extractMention(i Mentionable) (*gtsmodel.Mention, error) {
mention := &gtsmodel.Mention{}
mentionString, err := extractName(i)
if err != nil {
return nil, err
}
// just make sure the mention string is valid so we can handle it properly later on...
username, domain, err := util.ExtractMentionParts(mentionString)
if err != nil {
return nil, err
}
if username == "" || domain == "" {
return nil, errors.New("username or domain was empty")
}
// the href prop should be the URL of a user we know, eg https://example.org/@whatever_user
hrefProp := i.GetActivityStreamsHref()
if hrefProp == nil || !hrefProp.IsIRI() {
return nil, errors.New("no href prop")
}
mention.Href = hrefProp.GetIRI().String()
return mention, nil
}

View File

@ -73,13 +73,14 @@ type Attachmentable interface {
withFocalPoint
}
// Hashtaggable represents the minimum activitypub interface for representing a 'hashtag'.
// Hashtaggable represents the minimum activitypub interface for representing a 'hashtag' tag.
type Hashtaggable interface {
withTypeName
withHref
withName
}
// Emojiable represents the minimum interface for an 'emoji' tag.
type Emojiable interface {
withJSONLDId
withTypeName
@ -88,6 +89,12 @@ type Emojiable interface {
withIcon
}
// Mentionable represents the minimum interface for a 'mention' tag.
type Mentionable interface {
withName
withHref
}
type withJSONLDId interface {
GetJSONLDId() vocab.JSONLDIdProperty
}

View File

@ -185,9 +185,15 @@ func (c *converter) ASStatusToStatus(statusable Statusable) (*gtsmodel.Status, e
status.GTSTags = hashtags
}
// emojis, err := extractEmojis(statusable)
emojis, err := extractEmojis(statusable)
if err == nil {
status.GTSEmojis = emojis
}
// mentions, err := extractMentions(statusable)
mentions, err := extractMentions(statusable)
if err == nil {
status.GTSMentions = mentions
}
cw, err := extractSummary(statusable)
if err == nil && cw != "" {

View File

@ -37,7 +37,74 @@ type ASToInternalTestSuite struct {
}
const (
statusAsActivityJson = `{
statusWithMentionsActivityJson = `{
"@context": [
"https://www.w3.org/ns/activitystreams",
{
"ostatus": "http://ostatus.org#",
"atomUri": "ostatus:atomUri",
"inReplyToAtomUri": "ostatus:inReplyToAtomUri",
"conversation": "ostatus:conversation",
"sensitive": "as:sensitive",
"toot": "http://joinmastodon.org/ns#",
"votersCount": "toot:votersCount"
}
],
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/activity",
"type": "Create",
"actor": "https://ondergrond.org/users/dumpsterqueer",
"published": "2021-05-12T09:58:38Z",
"to": [
"https://ondergrond.org/users/dumpsterqueer/followers"
],
"cc": [
"https://www.w3.org/ns/activitystreams#Public",
"https://social.pixie.town/users/f0x"
],
"object": {
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552",
"type": "Note",
"summary": null,
"inReplyTo": "https://social.pixie.town/users/f0x/statuses/106221628567855262",
"published": "2021-05-12T09:58:38Z",
"url": "https://ondergrond.org/@dumpsterqueer/106221634728637552",
"attributedTo": "https://ondergrond.org/users/dumpsterqueer",
"to": [
"https://ondergrond.org/users/dumpsterqueer/followers"
],
"cc": [
"https://www.w3.org/ns/activitystreams#Public",
"https://social.pixie.town/users/f0x"
],
"sensitive": false,
"atomUri": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552",
"inReplyToAtomUri": "https://social.pixie.town/users/f0x/statuses/106221628567855262",
"conversation": "tag:ondergrond.org,2021-05-12:objectId=1132361:objectType=Conversation",
"content": "<p><span class=\"h-card\"><a href=\"https://social.pixie.town/@f0x\" class=\"u-url mention\">@<span>f0x</span></a></span> nice there it is:</p><p><a href=\"https://social.pixie.town/users/f0x/statuses/106221628567855262/activity\" rel=\"nofollow noopener noreferrer\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">social.pixie.town/users/f0x/st</span><span class=\"invisible\">atuses/106221628567855262/activity</span></a></p>",
"contentMap": {
"en": "<p><span class=\"h-card\"><a href=\"https://social.pixie.town/@f0x\" class=\"u-url mention\">@<span>f0x</span></a></span> nice there it is:</p><p><a href=\"https://social.pixie.town/users/f0x/statuses/106221628567855262/activity\" rel=\"nofollow noopener noreferrer\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">social.pixie.town/users/f0x/st</span><span class=\"invisible\">atuses/106221628567855262/activity</span></a></p>"
},
"attachment": [],
"tag": [
{
"type": "Mention",
"href": "https://social.pixie.town/users/f0x",
"name": "@f0x@pixie.town"
}
],
"replies": {
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies",
"type": "Collection",
"first": {
"type": "CollectionPage",
"next": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies?only_other_accounts=true&page=true",
"partOf": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies",
"items": []
}
}
}
}`
statusWithEmojisAndTagsAsActivityJson = `{
"@context": [
"https://www.w3.org/ns/activitystreams",
{
@ -309,7 +376,34 @@ func (suite *ASToInternalTestSuite) TestParseGargron() {
func (suite *ASToInternalTestSuite) TestParseStatus() {
m := make(map[string]interface{})
err := json.Unmarshal([]byte(statusAsActivityJson), &m)
err := json.Unmarshal([]byte(statusWithEmojisAndTagsAsActivityJson), &m)
assert.NoError(suite.T(), err)
t, err := streams.ToType(context.Background(), m)
assert.NoError(suite.T(), err)
create, ok := t.(vocab.ActivityStreamsCreate)
assert.True(suite.T(), ok)
obj := create.GetActivityStreamsObject()
assert.NotNil(suite.T(), obj)
first := obj.Begin()
assert.NotNil(suite.T(), first)
rep, ok := first.GetType().(typeutils.Statusable)
assert.True(suite.T(), ok)
status, err := suite.typeconverter.ASStatusToStatus(rep)
assert.NoError(suite.T(), err)
assert.Len(suite.T(), status.GTSEmojis, 3)
// assert.Len(suite.T(), status.GTSTags, 2) TODO: implement this first so that it can pick up tags
}
func (suite *ASToInternalTestSuite) TestParseStatusWithMention() {
m := make(map[string]interface{})
err := json.Unmarshal([]byte(statusWithMentionsActivityJson), &m)
assert.NoError(suite.T(), err)
t, err := streams.ToType(context.Background(), m)
@ -331,6 +425,9 @@ func (suite *ASToInternalTestSuite) TestParseStatus() {
assert.NoError(suite.T(), err)
fmt.Printf("%+v", status)
assert.Len(suite.T(), status.GTSMentions, 1)
fmt.Println(status.GTSMentions[0])
}
func (suite *ASToInternalTestSuite) TearDownTest() {

View File

@ -35,6 +35,9 @@ const (
)
var (
mentionNameRegexString = `@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)`
mentionNameRegex = regexp.MustCompile(fmt.Sprintf(`^%s$`, mentionNameRegexString))
// mention regex can be played around with here: https://regex101.com/r/qwM9D3/1
mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)`
mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString)

View File

@ -19,17 +19,18 @@
package util
import (
"fmt"
"strings"
)
// DeriveMentions takes a plaintext (ie., not html-formatted) status,
// DeriveMentionsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of accounts
// mentioned in that status.
//
// It will look for fully-qualified account names in the form "@user@example.org".
// or the form "@username" for local users.
// The case of the returned mentions will be lowered, for consistency.
func DeriveMentions(status string) []string {
func DeriveMentionsFromStatus(status string) []string {
mentionedAccounts := []string{}
for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) {
mentionedAccounts = append(mentionedAccounts, m[1])
@ -37,11 +38,11 @@ func DeriveMentions(status string) []string {
return lower(unique(mentionedAccounts))
}
// DeriveHashtags takes a plaintext (ie., not html-formatted) status,
// DeriveHashtagsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of hashtags
// used in that status, without the leading #. The case of the returned
// tags will be lowered, for consistency.
func DeriveHashtags(status string) []string {
func DeriveHashtagsFromStatus(status string) []string {
tags := []string{}
for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) {
tags = append(tags, m[1])
@ -49,11 +50,11 @@ func DeriveHashtags(status string) []string {
return lower(unique(tags))
}
// DeriveEmojis takes a plaintext (ie., not html-formatted) status,
// DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of emojis
// used in that status, without the surround ::. The case of the returned
// emojis will be lowered, for consistency.
func DeriveEmojis(status string) []string {
func DeriveEmojisFromStatus(status string) []string {
emojis := []string{}
for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) {
emojis = append(emojis, m[1])
@ -61,6 +62,26 @@ func DeriveEmojis(status string) []string {
return lower(unique(emojis))
}
// ExtractMentionParts extracts the username @test_user and the domain @example.org
// from a mention string like @test_user@example.org.
//
// If no domain is provided, it will return just the username part.
//
// If nothing is matched, it will return an error.
func ExtractMentionParts(mention string) (username, domain string, err error) {
matches := mentionNameRegex.FindStringSubmatch(mention)
if matches == nil {
err = fmt.Errorf("could't match mention %s", mention)
return
}
fmt.Println(matches)
username = matches[1]
if len(matches) == 2 {
domain = matches[2]
}
return
}
// unique returns a deduplicated version of a given string slice.
func unique(s []string) []string {
keys := make(map[string]bool)

View File

@ -42,7 +42,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
here is a duplicate mention: @hello@test.lgbt
`
menchies := util.DeriveMentions(statusText)
menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 4)
assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
@ -52,7 +52,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
statusText := ``
menchies := util.DeriveMentions(statusText)
menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 0)
}
@ -67,7 +67,7 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
#111111 thisalsoshouldn'twork#### ##`
tags := util.DeriveHashtags(statusText)
tags := util.DeriveHashtagsFromStatus(statusText)
assert.Len(suite.T(), tags, 5)
assert.Equal(suite.T(), "testing123", tags[0])
assert.Equal(suite.T(), "also", tags[1])
@ -90,7 +90,7 @@ Here's some normal text with an :emoji: at the end
:underscores_ok_too:
`
tags := util.DeriveEmojis(statusText)
tags := util.DeriveEmojisFromStatus(statusText)
assert.Len(suite.T(), tags, 7)
assert.Equal(suite.T(), "test", tags[0])
assert.Equal(suite.T(), "another", tags[1])