Skip to content

Commit

Permalink
No commit message
Browse files Browse the repository at this point in the history
  • Loading branch information
zhk0603 committed Dec 10, 2017
1 parent 8a1bd8f commit bf50840
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 30 deletions.
4 changes: 2 additions & 2 deletions Crawler.Simple/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ private static void Main(string[] args)
//SimpleCrawler.CnBlogsCrawler().Run();
//Console.ReadKey();

//SimpleCrawler.RedisCnblogsCrawler().Run();
//Console.ReadKey();
SimpleCrawler.RedisCnblogsCrawler().Run();
Console.ReadKey();

}
}
Expand Down
21 changes: 7 additions & 14 deletions Crawler.Simple/SimpleCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ public static Crawler CnieltsSpider()
DownloadDirectory = @"E:\学习资料\English\新概念第二册\"
})
.UseMultiThread(1)
.SetLogFactory(new NLoggerFactory())
.UseNamed("CnieltsSpider");
return CrawlerBuilder.Current.Builder();
}
Expand All @@ -72,11 +71,10 @@ public static Crawler CnieltsV2Spider()
.UsePipeline<Cnielts_V2.CnieltsPipeline2>(new PipelineOptions())
.UsePipeline<Cnielts_V2.CnieltsPipeline3>(new FileDownloadOptions()
{
DownloadDirectory = @"~/CnieltsV2Spider/",
DownloadDirectory = @"~/CnieltsV2Spider/2017-12-10/",
Downloader = new HttpDownloader()
})
.UseMultiThread(3)
.SetLogFactory(new NLoggerFactory())
.UseParallelMode()
.UseNamed("CnieltsV2Spider");
return CrawlerBuilder.Current.Builder();
Expand All @@ -99,7 +97,6 @@ public static ICrawler UrlFinderPipeline()
Schedulers.SchedulerManager.GetScheduler<Schedulers.RedisScheduler<Site>>("UrlFinderPipeline")
})
.UseMultiThread(10)
.SetLogFactory(new NLoggerFactory())
//.UseBloomFilter(int.MaxValue, 0.001F)
.UseRedisBloomFilter()
.UseNamed("UrlFinderPipeline");
Expand All @@ -113,17 +110,15 @@ public static ICrawler CrawlerFullSite()
CrawlerBuilder.Current.ClearPipelines();
CrawlerBuilder.Current.ClearSites();
CrawlerBuilder.Current
.AddSite("http://cuiqingcai.com/")
.AddSite("https://www.yezismile.com")
.UsePipeline<Yezismile.YezismileUrlFinderPipeline>(new UrlFinderOptons()
{
WaitForComplete = 10000,
UrlValidator = url => url.Contains("cuiqingcai.com"),
UrlValidator = url => url.Contains("yezismile.com"),
Sleep = 500
})
.UsePipeline<FileDownloadPipeline>(new FileDownloadOptions("~/Cuiqingcai/"))
.UseMultiThread(5)
.SetLogFactory(new NLoggerFactory())
.UseBloomFilter(int.MaxValue, 0.001F)
.UsePipeline<FileDownloadPipeline>(new FileDownloadOptions("~/Yezismile/HtmlSources/"))
.UseMultiThread(8)
.UseNamed("CrawlerFullSite");
return CrawlerBuilder.Current.Builder();
}
Expand Down Expand Up @@ -161,8 +156,7 @@ public static ICrawler CnBlogsCrawler()
WaitForComplete = waitForComplete,
Cookie = cookie
})
.SetLogFactory(new NLoggerFactory())
.UseBloomFilter(int.MaxValue, 0.001F)
.UseBloomFilter(int.MaxValue / 21, 0.001F)
.UseMultiThread(5)
.UseParallelMode();

Expand Down Expand Up @@ -202,8 +196,7 @@ public static ICrawler RedisCnblogsCrawler()
WaitForComplete = waitForComplete,
Cookie = cookie
})
.SetLogFactory(new NLoggerFactory())
.UseMultiThread(5)
.UseMultiThread(1)
.UseRedisBloomFilter()
.UseParallelMode();

Expand Down
2 changes: 1 addition & 1 deletion Crawler/Filter/UrlFilterManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ public static void SetUrlFilter(Func<IUrlFilter> func)
_urlFilter = func();
}

public static IUrlFilter Current => _urlFilter;
public static IUrlFilter Current => _urlFilter ?? (_urlFilter = new BloomFilter(int.MaxValue / 21, 0.001F));
}
}
8 changes: 5 additions & 3 deletions Crawler/Logger/LoggerManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public static class LoggerManager
{
private static ILoggerFactory _loggerFactory;

public static ILoggerFactory CurrentFactory => _loggerFactory ?? (_loggerFactory = new NLoggerFactory());

public static void SetLogFactory(ILoggerFactory factory)
{
_loggerFactory = factory ?? throw new ArgumentNullException(nameof(factory));
Expand All @@ -22,7 +24,7 @@ public static ILogger GetLogger(string name)
throw new ArgumentNullException(nameof(name));
}

return _loggerFactory.Create(name);
return CurrentFactory.Create(name);
}

public static ILogger GetLogger(Type type)
Expand All @@ -31,12 +33,12 @@ public static ILogger GetLogger(Type type)
{
throw new ArgumentNullException(nameof(type));
}
return _loggerFactory.Create(type);
return CurrentFactory.Create(type);
}

public static ILogger GetLogger<T>()
{
return _loggerFactory.Create<T>();
return CurrentFactory.Create<T>();
}
}
}
6 changes: 2 additions & 4 deletions Crawler/Schedulers/RedisScheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ namespace Crawler.Schedulers
public class RedisScheduler<T> : IScheduler
{
private readonly string _redisSchedulerKey;
private readonly IUrlFilter _urlFilter;
private readonly ReaderWriterLockSlim _lock = new ReaderWriterLockSlim();
private readonly IDatabase _database;
private long _totalCount;
Expand All @@ -32,7 +31,6 @@ public RedisScheduler(string name, string connectionString)

_redisSchedulerKey = $"Crawler.Schedulers.RedisScheduler.{name}";
_database = ConnectionMultiplexer.Connect(connectionString).GetDatabase();
_urlFilter = UrlFilterManager.Current;
}

object IScheduler.Pop()
Expand Down Expand Up @@ -86,10 +84,10 @@ void IScheduler.Push(object @object)

public virtual void Push(T requestSite)
{
if (_urlFilter == null || !_urlFilter.Contains(_redisSchedulerKey + requestSite))
if (UrlFilterManager.Current == null || !UrlFilterManager.Current.Contains(_redisSchedulerKey + requestSite))
{
_database.ListLeftPush(_redisSchedulerKey, Serialize(requestSite));
_urlFilter?.Add(_redisSchedulerKey + requestSite);
UrlFilterManager.Current?.Add(_redisSchedulerKey + requestSite);
_totalCount++;
}
}
Expand Down
5 changes: 2 additions & 3 deletions Crawler/Schedulers/Scheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ namespace Crawler.Schedulers
{
public class Scheduler<T> : IScheduler
{
private readonly IUrlFilter _urlFilter = UrlFilterManager.Current;
private readonly ReaderWriterLockSlim _lock = new ReaderWriterLockSlim();
private readonly List<T> _stack = new List<T>();
private long _totalCount = 0;
Expand Down Expand Up @@ -78,10 +77,10 @@ public long Count

public virtual void Push(T requestSite)
{
if (_urlFilter == null || !_urlFilter.Contains(requestSite.ToString()))
if (UrlFilterManager.Current == null || !UrlFilterManager.Current.Contains(requestSite.ToString()))
{
_stack.Add(requestSite);
_urlFilter?.Add(requestSite.ToString());
UrlFilterManager.Current?.Add(requestSite.ToString());
_totalCount++;
}
}
Expand Down
8 changes: 5 additions & 3 deletions Crawler/Schedulers/SchedulerManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ internal static IScheduler InternalGetScheduler(Type schedulerType, string key)
{
if (!schedules.TryGetValue(key, out scheduler))
{
scheduler = RedisSchedulerFactory(schedulerType, key) ?? (IScheduler) Activator.CreateInstance(schedulerType);
scheduler = RedisSchedulerFactory(schedulerType, key) ??
(IScheduler) Activator.CreateInstance(schedulerType);
schedules.Add(key, scheduler);
}
}
Expand All @@ -75,7 +76,8 @@ internal static IScheduler InternalGetScheduler(Type schedulerType, string key)

private static IScheduler RedisSchedulerFactory(Type schedulerType, string key)
{
if (schedulerType.GetGenericTypeDefinition() == typeof(RedisScheduler<>))
if (schedulerType.IsGenericType &&
schedulerType.GetGenericTypeDefinition() == typeof(RedisScheduler<>))
{
var constructors = schedulerType.GetConstructors();

Expand All @@ -87,7 +89,7 @@ private static IScheduler RedisSchedulerFactory(Type schedulerType, string key)
var parameterTypes = parameters.Select(p => p.ParameterType).ToArray();
if (parameterTypes.Length != 2) continue;
if (parameterTypes.Any(x => x != typeof(string))) continue;
return (IScheduler)constructor.Invoke(new object[] { schedulerKey, connectionString});
return (IScheduler) constructor.Invoke(new object[] {schedulerKey, connectionString});
}
}
return null;
Expand Down

0 comments on commit bf50840

Please sign in to comment.