From 5750ad7de9724a073619d14d84d1645cb153719f Mon Sep 17 00:00:00 2001 From: Patrice Lopez Date: Tue, 6 Feb 2024 17:07:09 +0100 Subject: [PATCH] add tests --- .../grobid/core/sax/TextSaxParserTest.java | 62 +++++++++++++++++++ .../resources/org/grobid/core/sax/patent.xml | 23 +++++++ 2 files changed, 85 insertions(+) create mode 100644 grobid-core/src/test/java/org/grobid/core/sax/TextSaxParserTest.java create mode 100644 grobid-core/src/test/resources/org/grobid/core/sax/patent.xml diff --git a/grobid-core/src/test/java/org/grobid/core/sax/TextSaxParserTest.java b/grobid-core/src/test/java/org/grobid/core/sax/TextSaxParserTest.java new file mode 100644 index 0000000000..28a73c5649 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/sax/TextSaxParserTest.java @@ -0,0 +1,62 @@ +package org.grobid.core.sax; + +import org.grobid.core.layout.LayoutToken; +import org.junit.Before; +import org.junit.Test; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import static org.easymock.EasyMock.createMock; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +public class TextSaxParserTest { + SAXParserFactory spf = SAXParserFactory.newInstance(); + + TextSaxParser target; + + @Before + public void setUp() throws Exception { + target = new TextSaxParser(); + target.addFilter("description"); + target.addFilter("p"); + target.addFilter("heading"); + target.addFilter("head"); + } + + @Test + public void testParseSize() throws Exception { + // get a factory + SAXParserFactory spf = SAXParserFactory.newInstance(); + + InputStream is = this.getClass().getResourceAsStream("patent.xml"); + + SAXParser p = spf.newSAXParser(); + p.parse(is, target); + + List segments = target.getTexts(); + assertThat(segments, hasSize(7)); + } + + @Test + public void testParseContent() throws Exception { + // get a factory + SAXParserFactory spf = SAXParserFactory.newInstance(); + + InputStream is = this.getClass().getResourceAsStream("patent.xml"); + + SAXParser p = spf.newSAXParser(); + p.parse(is, target); + + List segments = target.getTexts(); + assertThat(segments.get(0), is("TECHNICAL FIELD")); + } +} \ No newline at end of file diff --git a/grobid-core/src/test/resources/org/grobid/core/sax/patent.xml b/grobid-core/src/test/resources/org/grobid/core/sax/patent.xml new file mode 100644 index 0000000000..3914fa2dd7 --- /dev/null +++ b/grobid-core/src/test/resources/org/grobid/core/sax/patent.xml @@ -0,0 +1,23 @@ + + +

+

+ TECHNICAL FIELD +

This application claims the priority of the USA provisional applications 60/675,311 for which it applied to 2005/4/26 by which the whole is integrated in this specification by the reference.

+

This invention relates to the antibody couple|bonded with P-cadherin, and its antigen binding part. + This invention relates also to the composition containing the methods of producing the nucleic acid molecule, P-cadherin antibody, and the antigen binding part which also code such an antibody and an antigen binding part, these antibodies, and an antigen binding part, an antibody, an antigen binding part, and the method of using a composition.

+ DESCRIPTION OF RELATED ART +

A cadherin is a super family of the trans-membrane glycoprotein which controls the cell adhesion between generation|occurrence|production and a structure|tissue homeostasis (Gumbiner, J.Cell.Biol., 148:399-404(2000);Yagi et al., Genes Dev., 14:1169-1180(2000)). + The intracellular domain of a cadherin is interacted with cytoplasm proteins, such as a catenin and p120, and the foundation of a coupling|bonding with a cadherin and an actin cytoskeleton is formed of it. + A cadherin has five extracellular Ca<2+> binding domains and the small cytosolic domain highly preserve|saved between classic cadherins. + P-cadherin, E-cadherin, and N-cadherin is contained in the component of a classic cadherin family. + It is thought that cell adhesion molecules, such as a cadherin, play the serious|significant role for the cell junction of a cancer cell and a transfer cell (Furukawa et al., Microscopy Res.Technique, 38(4):343-352(1997)). + P-cadherin expression in a normal adult tissue is low, and is restrict|limited mainly to the base layer of a myoepithelial cell and the stratified epithelium (Shimoyama et al., Cancer Res., 49:2128-33(1989)). + P-cadherin is upregulated by inflammatory bowel diseases, such as Crohn's disease and colitis (Hardy et al., Gut, 50:513-519(2002)). + It is clear from a series of extensive proof now that abnormal P-cadherin expression is also related to the tumor of cell growth, the colon, a breast, a lung, a thyroid gland, and a uterine cervix (Gamallo, Modern Pathology, 14:650-654 (2001);, Stefansson et al., J.Clin.Oncol., + 22 (7) :1242-1252 (2004)). + It was reported that antigen recognition of the human P-cadherin is carried out by the NCC-CAD-299 monoclonal antibody produced with respect to the vulva epidermoid carcinoma (Shimoyama et al., Cancer Res., 49:2128-2133(1989)). + When the adhesion|attachment and intracellular signal transduction which P-cadherin carries are adjusted, it will estimate|forecast that the proliferation and survival of the tumor cells in in vivo fall.

+ PROBLEM TO BE SOLVED BY THE INVENTION +

Therefore, when the central role considered that P-cadherin has in cell growth and advancing of a solid tumor is considered, it is desirable to produce|generate the antibody with respect to P-cadherin which can bring the patient of various cancers remedial profits.

+