This repository was archived by the owner on Dec 17, 2021. It is now read-only.
File tree Expand file tree Collapse file tree 4 files changed +14
-12
lines changed Expand file tree Collapse file tree 4 files changed +14
-12
lines changed Original file line number Diff line number Diff line change 3
3
import argparse
4
4
from lxml import html
5
5
import requests
6
- from lib import uil
6
+ from lib import util
7
7
from lib .backend import Session
8
8
from lib .backend .model import ActivityInMailman
9
9
from datetime import datetime
@@ -13,27 +13,27 @@ def get_activity(verbose=False):
13
13
for l in lists :
14
14
if verbose : print 'Processing activity for %s...' % l ['name' ]
15
15
latest = Session .query (ActivityInMailman )\
16
- .filter (ActivityInMailman .list_name == l ['name' ]
16
+ .filter (ActivityInMailman .list_name == l ['name' ])\
17
17
.order_by (ActivityInMailman .message_id .desc ())\
18
18
.first ()
19
19
# Walk through message history from the web front-end
20
20
archive_url = l ['link' ].replace ('mailman/listinfo' ,'pipermail' )
21
- limit = 100
21
+ limit = 1000
22
22
latest_id = latest .message_id if latest else - 1
23
23
for msg in _yield_messages (archive_url ,latest_id , verbose = verbose ):
24
24
if verbose : print ' -> got msg #%d (%s: "%s")' % (msg ['id' ],msg ['email' ],msg ['subject' ])
25
25
Session .add ( ActivityInMailman (
26
- listname = l ['name' ],
26
+ list_name = l ['name' ],
27
27
message_id = msg ['id' ],
28
28
subject = msg ['subject' ],
29
29
author = msg ['author' ],
30
30
email = msg ['email' ],
31
31
link = msg ['link' ],
32
32
timestamp = msg ['date' ] ) )
33
33
limit -= 1
34
- if limit == 0 :
35
- if verbose : print ' -> Reached activity limit (100)'
36
- break ;
34
+ # if limit==0:
35
+ # if verbose: print ' -> Reached activity limit (100)'
36
+ # break;
37
37
Session .commit ()
38
38
39
39
def _yield_messages (url , latest_id , verbose = False ):
Original file line number Diff line number Diff line change @@ -151,7 +151,7 @@ def toJson(self):
151
151
152
152
class ActivityInMailman (Base ):
153
153
__tablename__ = 'activity_mailman'
154
- listname = Column (String , primary_key = True )
154
+ list_name = Column (String , primary_key = True )
155
155
message_id = Column (Integer , primary_key = True )
156
156
subject = Column (String )
157
157
author = Column (String )
Original file line number Diff line number Diff line change
1
+ from lxml import html
2
+ import requests
1
3
2
4
def list_mailman_lists (verbose = False ):
3
5
"""Scrape the server for a catalogue of all mailman lists."""
Original file line number Diff line number Diff line change @@ -15,13 +15,13 @@ def snapshot_mailman(verbose=False):
15
15
for l in lists :
16
16
if verbose : print 'Processing snapshots for %s...' % l ['name' ]
17
17
latest = Session .query (SnapshotOfMailman )\
18
- .filter (SnapshotOfMailman .name == l ['name' ])\
18
+ .filter (SnapshotOfMailman .list_name == l ['name' ])\
19
19
.order_by (SnapshotOfMailman .timestamp .desc ())\
20
20
.first ()
21
21
# By default, gather 30 days of snapshots
22
- since = today - timedelta (days = 30 )
22
+ since = today - timedelta (days = 180 )
23
23
if latest :
24
- if latest .timestamp >= until :
24
+ if latest .timestamp >= today :
25
25
if verbose : print ' -> most recent snapshots have already been processed.'
26
26
continue
27
27
since = latest .timestamp + timedelta (days = 1 )
@@ -32,7 +32,7 @@ def snapshot_mailman(verbose=False):
32
32
while since < today :
33
33
posts_today = Session .query (ActivityInMailman )\
34
34
.filter (ActivityInMailman .list_name == l ['name' ])\
35
- .filter (ActivityInMailman .timestamp .between (date , date + day ))\
35
+ .filter (ActivityInMailman .timestamp .between (since , since + timedelta ( days = 1 ) ))\
36
36
.count ()
37
37
sn = SnapshotOfMailman (\
38
38
list_name = l ['name' ],\
You can’t perform that action at this time.
0 commit comments