using AngleSharp; using AngleSharp.Dom; using System; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; using System.Linq; using System.Runtime.Serialization; using System.IO; using Tools; using System.Text.RegularExpressions; using Tools.Crypto; namespace napdump { public readonly struct DumperConfig { public readonly int MaxThreads; public readonly Dumper ShareContextWith; public readonly (string Url, string Value)[] Cookies; public readonly AESKey? EncryptDeleted; public DumperConfig(int maxThreads, Dumper ShareContext=null, (string,string)[] Cookies=null, AESKey? EncryptDeleted=null) { MaxThreads = maxThreads; ShareContextWith = ShareContext; this.Cookies = Cookies; this.EncryptDeleted = EncryptDeleted; } public bool Equals(in DumperConfig other) { return other.MaxThreads == this.MaxThreads && ReferenceEquals(this.ShareContextWith, other.ShareContextWith) && (ReferenceEquals(Cookies, other.Cookies) || (Cookies?.SequenceEqual(other.Cookies) ?? false)) && (ReferenceEquals(EncryptDeleted, other.EncryptDeleted) || (EncryptDeleted?.Equals(other.EncryptDeleted ?? default) ?? false)); } public override bool Equals(object obj) { return obj is DumperConfig conf && this.Equals(conf); } public override int GetHashCode() { return MaxThreads.GetHashCode() ^ (ShareContextWith?.GetHashCode() ?? 0) ^ (Cookies?.Select(x => x.GetHashCode())?.Aggregate((x, y) => x ^ y) ?? 0) ^ (EncryptDeleted?.GetHashCode() ?? 0); } public static bool operator ==(DumperConfig left, DumperConfig right) { return left.Equals(right); } public static bool operator !=(DumperConfig left, DumperConfig right) { return !(left == right); } } public sealed class AsyncMutex : IDisposable { private readonly SemaphoreSlim sem; public AsyncMutex() { sem = new SemaphoreSlim(1, 1); } private AsyncMutex(SemaphoreSlim from) { sem = from; } private class Lock : IDisposable { public AsyncMutex Parent { get; } public Lock(AsyncMutex held) { Parent = held; } public void Dispose() { Parent.sem.Release(); } } public IDisposable Aquire(int msTimeout, CancellationToken token = default) { sem.Wait(msTimeout, token); return new Lock(this); } public IDisposable Aquire(CancellationToken token) { sem.Wait(token); return new Lock(this); } public IDisposable Aquire() { sem.Wait(); return new Lock(this); } public async ValueTask AquireAsync(int msTimeout, CancellationToken token = default) { await sem.WaitAsync(msTimeout, token); return new Lock(this); } public async ValueTask AquireAsync(CancellationToken token = default) { await sem.WaitAsync(token); return new Lock(this); } public void Dispose() { sem.Dispose(); } public static AsyncMutex Semaphore(int count, int max) { SemaphoreSlim sem = new SemaphoreSlim(count, max); return new AsyncMutex(sem); } public static AsyncMutex Semaphore(int count) => Semaphore(count, count); } public abstract class Dumper : IDisposable { public DumperConfig Config { get; } protected readonly AsyncMutex Pool; protected readonly CancellationTokenSource globalCancel = new CancellationTokenSource(); protected Dumper(DumperConfig config) { Config = config; Pool = config.ShareContextWith?.Pool ?? AsyncMutex.Semaphore(config.MaxThreads); } public void CancelAllOperations() { globalCancel.Cancel(); } public async IAsyncEnumerable Parse(string boardUrl, Hooks hooks = default, [EnumeratorCancellation] CancellationToken token=default) { using var cancel = CancellationTokenSource.CreateLinkedTokenSource(globalCancel.Token, token); //var cataloguePage = await GetCataloguePage(boardUrl, cancel.Token); var boardInfo = NewBoardInfo(); boardInfo.BoardURL = boardUrl; if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) gen PostInfo"); Channel threadGetters = Channel.CreateUnbounded(); Channel completedThreads = Channel.CreateUnbounded(); Task completer = Task.Run(async () => { List getters = new List(); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) gen getter"); int gi = 0; await foreach (var getter in threadGetters.Reader.ReadAllAsync(cancel.Token)) { getters.Add(getter); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) add getter {gi++}"); } if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) add getters {getters.Count}"); await Task.WhenAll(getters); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) getters complete"); completedThreads.Writer.Complete(); }); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) start completer"); int ti = 0; await foreach (var thread in GetThreads(boardInfo).WithCancellation(cancel.Token)) { //Thread got. thread.BoardInfo = boardInfo; hooks.OnThreadRetrieved?.Invoke(thread); ThreadRetrievedHook(thread); await threadGetters.Writer.WriteAsync(Task.Run(async () => { try { await InternalGetPosts(thread, hooks, cancel.Token); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) writing to complete"); await completedThreads.Writer.WriteAsync(thread, cancel.Token); if (hooks.PrintDebug) Console.WriteLine($"({thread.BoardInfo.BoardURL}) written"); }catch(Exception ex) { if (hooks.PrintDebug) Console.WriteLine($"Whoops {thread.PostNumber} failed: {ex.Message}\n{ex.StackTrace}"); hooks.OnThreadReadFailed?.Invoke(thread, ex); ThreadReadFailedHook(thread, ex); } }), cancel.Token); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) thread write ({ti++}) {thread.PostNumber}"); } threadGetters.Writer.Complete(); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) stop getter"); await foreach (var completedThread in completedThreads.Reader.ReadAllAsync(cancel.Token)) { boardInfo.AddChildThread(completedThread); yield return completedThread; } if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) completer complete"); await completer; boardInfo.DumpTimestamp = DateTime.Now; hooks.OnBoardRetrieved?.Invoke(boardInfo); BoardRetrievedHook(boardInfo); if (hooks.PrintDebug) Console.WriteLine($"({boardUrl}) end"); } public struct Hooks { public Action OnBoardRetrieved; public Action OnThreadRetrieved; public Action OnPostRetrieved; public Action OnThreadReadFailed; #if DEBUG public #else internal #endif bool PrintDebug; } public event Action OnBoardRetrieved; public event Action OnThreadRetrieved; public event Action OnPostRetrieved; public event Action OnThreadReadFailed; protected virtual void PostRetrievedHook(PostInfo post) => OnPostRetrieved?.Invoke(post); protected virtual void ThreadRetrievedHook(ThreadInfo thread) => OnThreadRetrieved?.Invoke(thread); protected virtual void BoardRetrievedHook(BoardInfo board) => OnBoardRetrieved?.Invoke(board); protected virtual void ThreadReadFailedHook(ThreadInfo thread, Exception ex) => OnThreadReadFailed?.Invoke(thread, ex); private async Task InternalGetPosts(ThreadInfo thread, Hooks hooks, CancellationToken token = default) { using var cancel = CancellationTokenSource.CreateLinkedTokenSource(globalCancel.Token, token); List posts = new List(); if (hooks.PrintDebug) Console.WriteLine($"({thread.BoardInfo.BoardURL}) entering context"); using (await Pool.AquireAsync(cancel.Token)) { //var threadPage = await GetThreadPage(thread, token); if (hooks.PrintDebug) Console.WriteLine($" ctx_aqu ({thread.BoardInfo.BoardURL}) getting posts"); await foreach (var post in GetPosts(thread).WithCancellation(cancel.Token)) { //Post got. post.Parent = thread; post.BoardInfo = thread.BoardInfo; cancel.Token.ThrowIfCancellationRequested(); hooks.OnPostRetrieved?.Invoke(post); PostRetrievedHook(post); posts.Add(post); } if (hooks.PrintDebug) Console.WriteLine($" ctx_aqu ({thread.BoardInfo.BoardURL}) posts got"); } if (hooks.PrintDebug) Console.WriteLine($"({thread.BoardInfo.BoardURL}) adding children"); thread.AddChildPosts(posts); } /// /// Run a block on this Dumper's thread pool. /// public async Task EnterContextAsync(CancellationToken token = default) { return await Pool.AquireAsync(token); } /// /// Run a block on this Dumper's thread pool. /// public IDisposable EnterContext(CancellationToken token = default) { return Pool.Aquire(token); } protected virtual BoardInfo NewBoardInfo() => new BoardInfo(); protected abstract IAsyncEnumerable GetPosts(ThreadInfo thread, [EnumeratorCancellation] CancellationToken token = default); protected abstract IAsyncEnumerable GetThreads(BoardInfo boardInfo, [EnumeratorCancellation] CancellationToken token = default); #region IDisposable Support private bool disposedValue = false; protected virtual void Dispose(bool disposing) { if (!disposedValue) { if (disposing) { if (!globalCancel.IsCancellationRequested) globalCancel.Cancel(); if (Config.ShareContextWith == null) Pool.Dispose(); globalCancel.Dispose(); } disposedValue = true; } } ~Dumper() { Dispose(false); } public void Dispose() { Dispose(true); GC.SuppressFinalize(this); } #endregion } }