-
-
Notifications
You must be signed in to change notification settings - Fork 93
Expand file tree
/
Copy pathSimilarBlogPostJob.cs
More file actions
75 lines (62 loc) · 2.89 KB
/
Copy pathSimilarBlogPostJob.cs
File metadata and controls
75 lines (62 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using NCronJob;
using LinkDotNet.Blog.Domain;
using LinkDotNet.Blog.Infrastructure.Persistence;
using LinkDotNet.Blog.Web.Features.Services.Similiarity;
namespace LinkDotNet.Blog.Web.Features;
public class SimilarBlogPostJob : IJob
{
private readonly IRepository<BlogPost> blogPostRepository;
private readonly IRepository<SimilarBlogPost> similarBlogPostRepository;
public SimilarBlogPostJob(
IRepository<BlogPost> blogPostRepository,
IRepository<SimilarBlogPost> similarBlogPostRepository)
{
this.blogPostRepository = blogPostRepository;
this.similarBlogPostRepository = similarBlogPostRepository;
}
public async Task RunAsync(IJobExecutionContext context, CancellationToken token)
{
ArgumentNullException.ThrowIfNull(context);
var isInstantJobTriggered = context.Parameter is not null;
var noJobPublished = context.ParentOutput is null or 0;
if (noJobPublished && !isInstantJobTriggered)
{
return;
}
var blogPosts = await blogPostRepository.GetAllByProjectionAsync(
bp => new BlogPostSimilarity(bp.Id, bp.Title, bp.Tags, bp.ShortDescription),
f => f.IsPublished);
var documents = blogPosts.Select(bp => TextProcessor.TokenizeAndNormalize([bp.Title, bp.ShortDescription, ..bp.Tags])).ToList();
var similarities = blogPosts.Select(bp => GetSimilarityForBlogPost(bp, documents, blogPosts)).ToArray();
var ids = await similarBlogPostRepository.GetAllByProjectionAsync(s => s.Id);
await similarBlogPostRepository.DeleteBulkAsync(ids);
await similarBlogPostRepository.StoreBulkAsync(similarities);
}
private static SimilarBlogPost GetSimilarityForBlogPost(
BlogPostSimilarity blogPost,
List<IReadOnlyCollection<string>> documents,
IReadOnlyCollection<BlogPostSimilarity> blogPosts)
{
var target = TextProcessor.TokenizeAndNormalize([blogPost.Title, blogPost.ShortDescription, ..blogPost.Tags]);
var vectorizer = new TfIdfVectorizer(documents);
var targetVector = vectorizer.ComputeTfIdfVector(target);
var similarBlogPosts = blogPosts
.Select((bp, index) => new
{
BlogPost = bp,
Similarity = SimilarityCalculator.CosineSimilarity(targetVector, vectorizer.ComputeTfIdfVector(documents[index]))
})
.Where(s => s.BlogPost.Id != blogPost.Id)
.OrderByDescending(x => x.Similarity)
.Take(3)
.Select(s => s.BlogPost.Id)
.ToArray();
return new SimilarBlogPost { Id = blogPost.Id, SimilarBlogPostIds = similarBlogPosts };
}
private sealed record BlogPostSimilarity(string Id, string Title, IList<string> Tags, string ShortDescription);
}