Skip to content

Commit

Permalink
separate new tasks into smaller package. disable from projects import in
Browse files Browse the repository at this point in the history
PY3
  • Loading branch information
binux committed Oct 15, 2015
1 parent 5d602ca commit e59bd63
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 33 deletions.
16 changes: 5 additions & 11 deletions pyspider/processor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pyspider.libs.log import LogFormatter
from pyspider.libs.utils import pretty_unicode, hide_me
from pyspider.libs.response import rebuild_response
from .project_module import ProjectManager, ProjectLoader, ProjectFinder
from .project_module import ProjectManager, ProjectFinder


class ProcessorResult(object):
Expand Down Expand Up @@ -90,15 +90,8 @@ def enable_projects_import(self):
`from project import project_name`
'''
_self = self

class ProcessProjectFinder(ProjectFinder):

def get_loader(self, name):
info = _self.projectdb.get(name)
if info:
return ProjectLoader(info)
sys.meta_path.append(ProcessProjectFinder())
if six.PY2:
sys.meta_path.append(ProjectFinder(self.projectdb))

def __del__(self):
pass
Expand Down Expand Up @@ -175,7 +168,8 @@ def on_task(self, task, response):
# FIXME: unicode_obj should used in scheduler before store to database
# it's used here for performance.
if ret.follows:
self.newtask_queue.put([utils.unicode_obj(newtask) for newtask in ret.follows])
for each in (ret.follows[x:x + 1000] for x in range(0, len(ret.follows), 1000)):
self.newtask_queue.put([utils.unicode_obj(newtask) for newtask in each])

for project, msg, url in ret.messages:
try:
Expand Down
31 changes: 21 additions & 10 deletions pyspider/processor/project_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import imp
import time
import weakref
import logging
import inspect
import traceback
Expand Down Expand Up @@ -154,25 +155,36 @@ def get(self, project_name, updatetime=None, md5sum=None):
class ProjectFinder(object):
'''ProjectFinder class for sys.meta_path'''

def __init__(self, projectdb):
self.get_projectdb = weakref.ref(projectdb)

@property
def projectdb(self):
return self.get_projectdb()

def find_module(self, fullname, path=None):
if fullname == 'projects':
return ProjectsLoader()
return self
parts = fullname.split('.')
if len(parts) == 2 and parts[0] == 'projects':
return self.get_loader(parts[1])


class ProjectsLoader(object):
'''ProjectsLoader class for sys.meta_path package'''
name = parts[1]
if not self.projectdb:
return
info = self.projectdb.get(name)
if info:
return ProjectLoader(info)

def load_module(self, fullname):
mod = sys.modules.setdefault('projects', imp.new_module(fullname))
mod = imp.new_module(fullname)
mod.__file__ = '<projects>'
mod.__loader__ = self
mod.__path__ = []
mod.__path__ = ['<projects>']
mod.__package__ = 'projects'
return mod

def is_package(self, fullname):
return True


class ProjectLoader(object):
'''ProjectLoader class for sys.meta_path'''
Expand All @@ -184,10 +196,9 @@ def __init__(self, project, mod=None):

def load_module(self, fullname):
if self.mod is None:
mod = self.mod = imp.new_module(self.name)
self.mod = mod = imp.new_module(fullname)
else:
mod = self.mod

mod.__file__ = '<%s>' % self.name
mod.__loader__ = self
mod.__project__ = self.project
Expand Down
17 changes: 5 additions & 12 deletions pyspider/webui/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# Created on 2014-02-23 00:19:06


import re
import sys
import time
import socket
Expand All @@ -18,7 +17,7 @@

from pyspider.libs import utils, sample_handler, dataurl
from pyspider.libs.response import rebuild_response
from pyspider.processor.project_module import ProjectManager, ProjectFinder, ProjectLoader
from pyspider.processor.project_module import ProjectManager, ProjectFinder
from .app import app

default_task = {
Expand Down Expand Up @@ -60,13 +59,7 @@ def debug(project):

@app.before_first_request
def enable_projects_import():
class DebuggerProjectFinder(ProjectFinder):

def get_loader(self, name):
info = app.config['projectdb'].get(name)
if info:
return ProjectLoader(info)
sys.meta_path.append(DebuggerProjectFinder())
sys.meta_path.append(ProjectFinder(app.config['projectdb']))


@app.route('/debug/<project>/run', methods=['POST', ])
Expand All @@ -84,7 +77,7 @@ def run(project):
'time': time.time() - start_time,
}
return json.dumps(utils.unicode_obj(result)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}

project_info = {
'name': project,
Expand All @@ -105,7 +98,7 @@ def run(project):
'time': time.time() - start_time,
}
return json.dumps(utils.unicode_obj(result)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}
project_info['script'] = info['script']

fetch_result = {}
Expand Down Expand Up @@ -207,7 +200,7 @@ def get_script(project):
return 'project name is not allowed!', 400
info = projectdb.get(project, fields=['name', 'script'])
return json.dumps(utils.unicode_obj(info)), \
200, {'Content-Type': 'application/json'}
200, {'Content-Type': 'application/json'}


@app.route('/helper.js')
Expand Down
32 changes: 32 additions & 0 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Created on 2014-02-22 14:00:05

import os
import six
import copy
import time
import unittest2 as unittest
Expand Down Expand Up @@ -489,3 +490,34 @@ def test_70_update_project(self):
self.assertEqual(status['track']['process']['ok'], False)

self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1

@unittest.skipIf(six.PY3, "deprecated feature, not work for PY3")
def test_80_import_project(self):
self.projectdb.insert('test_project2', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': inspect.getsource(sample_handler),
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})
self.projectdb.insert('test_project3', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': inspect.getsource(sample_handler),
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})

from projects import test_project
self.assertIsNotNone(test_project)
self.assertIsNotNone(test_project.Handler)

from projects.test_project2 import Handler
self.assertIsNotNone(Handler)

import projects.test_project3
self.assertIsNotNone(projects.test_project3.Handler)

0 comments on commit e59bd63

Please sign in to comment.