@@ -77,92 +77,85 @@ public String execute(String filePath)
77
77
// Get all of the document text as one big string
78
78
String text = documentResponse .getText ();
79
79
80
- // Read the text recognition output from the processor
81
- //System.out.println("The document contains the following paragraphs:");
82
- Document .Page firstPage = documentResponse .getPages (0 );
83
- List <Document .Page .Paragraph > paragraphs = firstPage .getParagraphsList ();
84
-
85
80
// Read the text recognition output from the processor
86
81
List <Document .Page > pages = documentResponse .getPagesList ();
87
- // System.out.printf("There are %s page(s) in this document.\n", pages.size());
82
+ System .out .printf ("There are %s page(s) in this document.\n " , pages .size ());
88
83
89
84
for (Document .Page page : pages ) {
85
+ System .out .printf ("\n \n **** Page %d ****\n " , page .getPageNumber ());
90
86
91
87
List <Document .Page .Table > tables = page .getTablesList ();
92
-
93
- //영양성분표 없는 상황 추가해야함
94
-
88
+ System .out .printf ("Found %d table(s):\n " , tables .size ());
95
89
for (Document .Page .Table table : tables ) {
96
- nutritionText += printTableInfo (table , text );
90
+ nutritionText += extractTableContents (table , documentResponse . getText () );
97
91
}
98
92
99
- List <Document .Page .FormField > formFields = page .getFormFieldsList ();
100
- for (Document .Page .FormField formField : formFields ) {
101
- String fieldName = getLayoutText (formField .getFieldName ().getTextAnchor (), text );
102
- String fieldValue = getLayoutText (formField .getFieldValue ().getTextAnchor (), text );
103
-
104
- System .out .printf (
105
- " '%s': '%s'\n " , removeNewlines (fieldName ), removeNewlines (fieldValue ));
106
- nutritionText += String .format (" '%s': '%s'" , removeNewlines (fieldName ), removeNewlines (fieldValue ));
107
- }
93
+ // ... (Other code for additional processing, if needed)
108
94
}
109
- }
110
- return nutritionText ;
111
- }
112
95
113
- private static byte [] readImageDataFromUrl (String imageUrl ) throws IOException {
114
- URL url = new URL (imageUrl );
115
96
116
- try (InputStream inputStream = url .openStream ()) {
117
- // Dynamically adjust the buffer size based on the available data
118
- ByteArrayOutputStream outputStream = new ByteArrayOutputStream ();
119
- byte [] tempBuffer = new byte [8192 ];
120
- int bytesRead ;
121
- while ((bytesRead = inputStream .read (tempBuffer )) != -1 ) {
122
- outputStream .write (tempBuffer , 0 , bytesRead );
123
- }
124
- return outputStream .toByteArray ();
125
97
}
98
+ return nutritionText ;
126
99
}
127
100
128
- private static String printTableInfo (Document .Page .Table table , String text ) {
129
-
130
- String tableText = "" ;
131
- Document .Page .Table .TableRow firstBodyRow = table .getBodyRows (0 );
132
- int columnCount = firstBodyRow .getCellsCount ();
133
- /*System.out.printf(
134
- " Table with %d columns and %d rows:\n", columnCount, table.getBodyRowsCount());*/
101
+ private static String extractTableContents (Document .Page .Table table , String text ) {
135
102
103
+ String tableText ="" ;
136
104
Document .Page .Table .TableRow headerRow = table .getHeaderRows (0 );
105
+ // Extract and print header
137
106
StringBuilder headerRowText = new StringBuilder ();
138
107
for (Document .Page .Table .TableCell cell : headerRow .getCellsList ()) {
139
108
String columnName = getLayoutText (cell .getLayout ().getTextAnchor (), text );
140
- headerRowText .append (String .format ("%s " , removeNewlines (columnName )));
109
+ headerRowText .append (String .format ("%s | " , removeNewlines (columnName )));
141
110
}
142
111
headerRowText .setLength (headerRowText .length () - 3 );
143
- // System.out.printf(" Collumns : %s\n", headerRowText.toString());
112
+ System .out .printf ("Columns : %s\n " , headerRowText .toString ());
144
113
tableText += headerRowText .toString ();
114
+ for (Document .Page .Table .TableRow bodyRow : table .getBodyRowsList ()) {
115
+ for (Document .Page .Table .TableCell cell : bodyRow .getCellsList ()) {
116
+ String cellText = getLayoutText (cell .getLayout ().getTextAnchor (), text );
117
+ System .out .printf ("Table cell text: '%s'\n " , removeNewlines (cellText ));
145
118
146
- StringBuilder firstRowText = new StringBuilder ();
147
- for (Document .Page .Table .TableCell cell : firstBodyRow .getCellsList ()) {
148
- String cellText = getLayoutText (cell .getLayout ().getTextAnchor (), text );
149
- firstRowText .append (String .format ("%s " , removeNewlines (cellText )));
119
+ //tableText += String.format(" '%s' |", removeNewlines(cellText));
120
+ tableText += (removeNewlines (cellText )+" | " );
121
+ }
150
122
}
151
- firstRowText .setLength (firstRowText .length () - 3 );
152
- //System.out.printf(" First row data: %s\n", firstRowText.toString());
153
- tableText += firstRowText ;
154
-
155
123
return tableText ;
156
124
}
157
125
126
+ private static byte [] readImageDataFromUrl (String imageUrl ) throws IOException {
127
+ URL url = new URL (imageUrl );
128
+
129
+ try (InputStream inputStream = url .openStream ()) {
130
+ // Dynamically adjust the buffer size based on the available data
131
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream ();
132
+ byte [] tempBuffer = new byte [8192 ];
133
+ int bytesRead ;
134
+ while ((bytesRead = inputStream .read (tempBuffer )) != -1 ) {
135
+ outputStream .write (tempBuffer , 0 , bytesRead );
136
+ }
137
+ return outputStream .toByteArray ();
138
+ }
139
+ }
158
140
// Extract shards from the text field
159
141
private static String getLayoutText (Document .TextAnchor textAnchor , String text ) {
160
- if (textAnchor .getTextSegmentsList ().size () > 0 ) {
142
+ /* if (textAnchor.getTextSegmentsList().size() > 0) {
161
143
int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex();
162
144
int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex();
163
145
return text.substring(startIdx, endIdx);
164
146
}
165
- return "[NO TEXT]" ;
147
+ return "[NO TEXT]";*/
148
+
149
+ StringBuilder result = new StringBuilder ();
150
+
151
+ for (Document .TextAnchor .TextSegment textSegment : textAnchor .getTextSegmentsList ()) {
152
+ int startIdx = (int ) textSegment .getStartIndex ();
153
+ int endIdx = (int ) textSegment .getEndIndex ();
154
+ String segmentText = text .substring (startIdx , endIdx );
155
+ result .append (segmentText );
156
+ }
157
+
158
+ return result .toString ();
166
159
}
167
160
168
161
private static String removeNewlines (String s ) {
0 commit comments