-
Notifications
You must be signed in to change notification settings - Fork 461
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
85 additions
and
0 deletions.
There are no files selected for viewing
62 changes: 62 additions & 0 deletions
62
grobid-core/src/test/java/org/grobid/core/sax/TextSaxParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package org.grobid.core.sax; | ||
|
||
import org.grobid.core.layout.LayoutToken; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import javax.xml.parsers.SAXParser; | ||
import javax.xml.parsers.SAXParserFactory; | ||
|
||
import java.io.InputStream; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import static org.easymock.EasyMock.createMock; | ||
import static org.hamcrest.CoreMatchers.is; | ||
import static org.hamcrest.Matchers.greaterThan; | ||
import static org.hamcrest.Matchers.hasSize; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
public class TextSaxParserTest { | ||
SAXParserFactory spf = SAXParserFactory.newInstance(); | ||
|
||
TextSaxParser target; | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
target = new TextSaxParser(); | ||
target.addFilter("description"); | ||
target.addFilter("p"); | ||
target.addFilter("heading"); | ||
target.addFilter("head"); | ||
} | ||
|
||
@Test | ||
public void testParseSize() throws Exception { | ||
// get a factory | ||
SAXParserFactory spf = SAXParserFactory.newInstance(); | ||
|
||
InputStream is = this.getClass().getResourceAsStream("patent.xml"); | ||
|
||
SAXParser p = spf.newSAXParser(); | ||
p.parse(is, target); | ||
|
||
List<String> segments = target.getTexts(); | ||
assertThat(segments, hasSize(7)); | ||
} | ||
|
||
@Test | ||
public void testParseContent() throws Exception { | ||
// get a factory | ||
SAXParserFactory spf = SAXParserFactory.newInstance(); | ||
|
||
InputStream is = this.getClass().getResourceAsStream("patent.xml"); | ||
|
||
SAXParser p = spf.newSAXParser(); | ||
p.parse(is, target); | ||
|
||
List<String> segments = target.getTexts(); | ||
assertThat(segments.get(0), is("TECHNICAL FIELD")); | ||
} | ||
} |
23 changes: 23 additions & 0 deletions
23
grobid-core/src/test/resources/org/grobid/core/sax/patent.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<description type="description" xml:base="/api/emi/378875770" xml:id="e78df3e0-8e43-4754-b124-f705c030ce1a" xml:lang="en"> | ||
<p xml:id="_2ff0350029"> | ||
</p> | ||
<head xml:id="_2ff0350030">TECHNICAL FIELD</head> | ||
<p n="0001" xml:id="_2ff0350031">This application claims the priority of the USA provisional applications 60/675,311 for which it applied to 2005/4/26 by which the whole is integrated in this specification by the reference.</p> | ||
<p n="0002" xml:id="_2ff0350032">This invention relates to the antibody couple|bonded with P-cadherin, and its antigen binding part.<lb> | ||
</lb>This invention relates also to the composition containing the methods of producing the nucleic acid molecule, P-cadherin antibody, and the antigen binding part which also code such an antibody and an antigen binding part, these antibodies, and an antigen binding part, an antibody, an antigen binding part, and the method of using a composition.</p> | ||
<head xml:id="_2ff0350034">DESCRIPTION OF RELATED ART</head> | ||
<p n="0003" xml:id="_2ff0350035">A cadherin is a super family of the trans-membrane glycoprotein which controls the cell adhesion between generation|occurrence|production and a structure|tissue homeostasis (Gumbiner, J.Cell.Biol., 148:399-404(2000);Yagi et al., Genes Dev., 14:1169-1180(2000)).<lb> | ||
</lb>The intracellular domain of a cadherin is interacted with cytoplasm proteins, such as a catenin and p120, and the foundation of a coupling|bonding with a cadherin and an actin cytoskeleton is formed of it.<lb> | ||
</lb>A cadherin has five extracellular Ca<2+> binding domains and the small cytosolic domain highly preserve|saved between classic cadherins.<lb> | ||
</lb>P-cadherin, E-cadherin, and N-cadherin is contained in the component of a classic cadherin family.<lb> | ||
</lb>It is thought that cell adhesion molecules, such as a cadherin, play the serious|significant role for the cell junction of a cancer cell and a transfer cell (Furukawa et al., Microscopy Res.Technique, 38(4):343-352(1997)).<lb> | ||
</lb>P-cadherin expression in a normal adult tissue is low, and is restrict|limited mainly to the base layer of a myoepithelial cell and the stratified epithelium (Shimoyama et al., Cancer Res., 49:2128-33(1989)).<lb> | ||
</lb>P-cadherin is upregulated by inflammatory bowel diseases, such as Crohn's disease and colitis (Hardy et al., Gut, 50:513-519(2002)).<lb> | ||
</lb>It is clear from a series of extensive proof now that abnormal P-cadherin expression is also related to the tumor of cell growth, the colon, a breast, a lung, a thyroid gland, and a uterine cervix (Gamallo, Modern Pathology, 14:650-654 (2001);, Stefansson et al., J.Clin.Oncol.,<lb> | ||
</lb>22 (7) :1242-1252 (2004)).<lb> | ||
</lb>It was reported that antigen recognition of the human P-cadherin is carried out by the NCC-CAD-299 monoclonal antibody produced with respect to the vulva epidermoid carcinoma (Shimoyama et al., Cancer Res., 49:2128-2133(1989)).<lb> | ||
</lb>When the adhesion|attachment and intracellular signal transduction which P-cadherin carries are adjusted, it will estimate|forecast that the proliferation and survival of the tumor cells in in vivo fall.</p> | ||
<head xml:id="_2ff0350046">PROBLEM TO BE SOLVED BY THE INVENTION</head> | ||
<p n="0004" xml:id="_2ff0350047">Therefore, when the central role considered that P-cadherin has in cell growth and advancing of a solid tumor is considered, it is desirable to produce|generate the antibody with respect to P-cadherin which can bring the patient of various cancers remedial profits.</p> | ||
</description> |