Skip to content

Commit

Permalink
change from year to publication year
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Nov 22, 2023
1 parent 60c4caf commit b0a0e1a
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
6 changes: 2 additions & 4 deletions document_qa/document_qa_engine.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import copy
import json
import os
from pathlib import Path
from typing import Union, Any

from document_qa.grobid_processors import GrobidProcessor
from grobid_client.grobid_client import GrobidClient
from langchain.chains import create_extraction_chain
from langchain.chains.question_answering import load_qa_chain
Expand All @@ -13,8 +13,6 @@
from langchain.vectorstores import Chroma
from tqdm import tqdm

from document_qa.grobid_processors import GrobidProcessor


class DocumentQAEngine:
llm = None
Expand Down Expand Up @@ -220,7 +218,7 @@ def get_text_from_document(self, pdf_file_path, chunk_size=-1, perc_overlap=0.1,
biblio_metadata = copy.copy(biblio)
biblio_metadata['type'] = "biblio"
biblio_metadata['section'] = "header"
for key in ['title', 'authors', 'year']:
for key in ['title', 'authors', 'publication_year']:
if key in biblio_metadata:
texts.append("{}: {}".format(key, biblio_metadata[key]))
metadatas.append(biblio_metadata)
Expand Down
2 changes: 1 addition & 1 deletion document_qa/grobid_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def parse_grobid_xml(self, text):
}
try:
year = dateparser.parse(doc_biblio.header.date).year
biblio["year"] = year
biblio["publication_year"] = year
except:
pass

Expand Down

0 comments on commit b0a0e1a

Please sign in to comment.