Skip to content

Commit

Permalink
Merge pull request #7 from binux/master
Browse files Browse the repository at this point in the history
merge brach
  • Loading branch information
machinewu committed Oct 16, 2015
2 parents 6da1a2d + e59bd63 commit 6db822f
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 34 deletions.
2 changes: 1 addition & 1 deletion pyspider/libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=Fa
str_time = "%d:%02d" % (local_date.hour, local_date.minute)

return format % {
"month_name": local_date.strftime('%B'),
"month_name": local_date.strftime('%b'),
"weekday": local_date.strftime('%A'),
"day": str(local_date.day),
"year": str(local_date.year),
Expand Down
16 changes: 5 additions & 11 deletions pyspider/processor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pyspider.libs.log import LogFormatter
from pyspider.libs.utils import pretty_unicode, hide_me
from pyspider.libs.response import rebuild_response
from .project_module import ProjectManager, ProjectLoader, ProjectFinder
from .project_module import ProjectManager, ProjectFinder


class ProcessorResult(object):
Expand Down Expand Up @@ -90,15 +90,8 @@ def enable_projects_import(self):
`from project import project_name`
'''
_self = self

class ProcessProjectFinder(ProjectFinder):

def get_loader(self, name):
info = _self.projectdb.get(name)
if info:
return ProjectLoader(info)
sys.meta_path.append(ProcessProjectFinder())
if six.PY2:
sys.meta_path.append(ProjectFinder(self.projectdb))

def __del__(self):
pass
Expand Down Expand Up @@ -176,7 +169,8 @@ def on_task(self, task, response):
# FIXME: unicode_obj should used in scheduler before store to database
# it's used here for performance.
if ret.follows:
self.newtask_queue.put([utils.unicode_obj(newtask) for newtask in ret.follows])
for each in (ret.follows[x:x + 1000] for x in range(0, len(ret.follows), 1000)):
self.newtask_queue.put([utils.unicode_obj(newtask) for newtask in each])

for project, msg, url in ret.messages:
try:
Expand Down
31 changes: 21 additions & 10 deletions pyspider/processor/project_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import imp
import time
import weakref
import logging
import inspect
import traceback
Expand Down Expand Up @@ -154,25 +155,36 @@ def get(self, project_name, updatetime=None, md5sum=None):
class ProjectFinder(object):
'''ProjectFinder class for sys.meta_path'''

def __init__(self, projectdb):
self.get_projectdb = weakref.ref(projectdb)

@property
def projectdb(self):
return self.get_projectdb()

def find_module(self, fullname, path=None):
if fullname == 'projects':
return ProjectsLoader()
return self
parts = fullname.split('.')
if len(parts) == 2 and parts[0] == 'projects':
return self.get_loader(parts[1])


class ProjectsLoader(object):
'''ProjectsLoader class for sys.meta_path package'''
name = parts[1]
if not self.projectdb:
return
info = self.projectdb.get(name)
if info:
return ProjectLoader(info)

def load_module(self, fullname):
mod = sys.modules.setdefault('projects', imp.new_module(fullname))
mod = imp.new_module(fullname)
mod.__file__ = '<projects>'
mod.__loader__ = self
mod.__path__ = []
mod.__path__ = ['<projects>']
mod.__package__ = 'projects'
return mod

def is_package(self, fullname):
return True


class ProjectLoader(object):
'''ProjectLoader class for sys.meta_path'''
Expand All @@ -184,10 +196,9 @@ def __init__(self, project, mod=None):

def load_module(self, fullname):
if self.mod is None:
mod = self.mod = imp.new_module(self.name)
self.mod = mod = imp.new_module(fullname)
else:
mod = self.mod

mod.__file__ = '<%s>' % self.name
mod.__loader__ = self
mod.__project__ = self.project
Expand Down
17 changes: 5 additions & 12 deletions pyspider/webui/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# Created on 2014-02-23 00:19:06


import re
import sys
import time
import socket
Expand All @@ -18,7 +17,7 @@

from pyspider.libs import utils, sample_handler, dataurl
from pyspider.libs.response import rebuild_response
from pyspider.processor.project_module import ProjectManager, ProjectFinder, ProjectLoader
from pyspider.processor.project_module import ProjectManager, ProjectFinder
from .app import app

default_task = {
Expand Down Expand Up @@ -60,13 +59,7 @@ def debug(project):

@app.before_first_request
def enable_projects_import():
class DebuggerProjectFinder(ProjectFinder):

def get_loader(self, name):
info = app.config['projectdb'].get(name)
if info:
return ProjectLoader(info)
sys.meta_path.append(DebuggerProjectFinder())
sys.meta_path.append(ProjectFinder(app.config['projectdb']))


@app.route('/debug/<project>/run', methods=['POST', ])
Expand All @@ -84,7 +77,7 @@ def run(project):
'time': time.time() - start_time,
}
return json.dumps(utils.unicode_obj(result)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}

project_info = {
'name': project,
Expand All @@ -105,7 +98,7 @@ def run(project):
'time': time.time() - start_time,
}
return json.dumps(utils.unicode_obj(result)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}
project_info['script'] = info['script']

fetch_result = {}
Expand Down Expand Up @@ -207,7 +200,7 @@ def get_script(project):
return 'project name is not allowed!', 400
info = projectdb.get(project, fields=['name', 'script'])
return json.dumps(utils.unicode_obj(info)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}


@app.route('/helper.js')
Expand Down
32 changes: 32 additions & 0 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Created on 2014-02-22 14:00:05

import os
import six
import copy
import time
import unittest2 as unittest
Expand Down Expand Up @@ -489,3 +490,34 @@ def test_70_update_project(self):
self.assertEqual(status['track']['process']['ok'], False)

self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1

@unittest.skipIf(six.PY3, "deprecated feature, not work for PY3")
def test_80_import_project(self):
self.projectdb.insert('test_project2', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': inspect.getsource(sample_handler),
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})
self.projectdb.insert('test_project3', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': inspect.getsource(sample_handler),
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})

from projects import test_project
self.assertIsNotNone(test_project)
self.assertIsNotNone(test_project.Handler)

from projects.test_project2 import Handler
self.assertIsNotNone(Handler)

import projects.test_project3
self.assertIsNotNone(projects.test_project3.Handler)
1 change: 1 addition & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_format_data(self):
self.assertEqual(utils.format_date(now - 2*60), '2 minutes ago')
self.assertEqual(utils.format_date(now - 30*60), '30 minutes ago')
self.assertEqual(utils.format_date(now - 60*60), '1 hour ago')
self.assertEqual(utils.format_date(1963475336), 'Mar 21, 2032 at 9:48')
self.assertEqual(utils.format_date(now - 12*60*60), '12 hours ago')
self.assertRegex(utils.format_date(now - 24*60*60), r'^yesterday at \d{1,2}:\d{2}$')
self.assertRegex(utils.format_date(now - 2*24*60*60), r'^[A-Z][a-z]+ at \d{1,2}:\d{2}$')
Expand Down

0 comments on commit 6db822f

Please sign in to comment.