-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.python
47 lines (38 loc) · 2.3 KB
/
search.python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from Bio import Entrez
import pandas as pd
Entrez.email = "[email protected]" # 更改为你的实际邮箱地址
def fetch_abstracts(keyword, mindate='2018', maxdate='2024'):
# 构建搜索查询
query = f"{keyword}[Title/Abstract] AND ({mindate}:{maxdate}[Date - Publication])"
search_results = Entrez.read(Entrez.esearch(db="pubmed", term=query, retmax=100000))
id_list = search_results['IdList']
results = []
for article_id in id_list:
handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="xml")
article = Entrez.read(handle)
try:
article_info = article['PubmedArticle'][0]['MedlineCitation']['Article']
title = article_info.get('ArticleTitle', '')
abstract = article_info['Abstract']['AbstractText'][0] if 'Abstract' in article_info else "No abstract available"
authors_list = article_info.get('AuthorList', [])
authors = ', '。join([f"{author.get('LastName', '')}, {author.get('ForeName', '')}" for author in authors_list if author.get('LastName')])
pub_date = article_info['Journal']['JournalIssue']['PubDate']。get('Year', '')
doi = ''
for aid in article['PubmedArticle'][0]['PubmedData']['ArticleIdList']:
if aid.attributes.get('IdType') == 'doi':
doi = str(aid)
break
journal_title = article_info['Journal']['Title']
keywords = ', '。join([kw for sublist in article.get('KeywordList', []) for kw in sublist if hasattr(kw, 'title')])
results.append([title, pub_date, doi, authors, abstract, journal_title, keywords])
except Exception as e:
print(f"Error processing article ID {article_id}: {e}")
return results
def save_results_to_excel(results, filename="PubMed_Abstracts.xlsx"):
df = pd.DataFrame(results, columns=['Title', 'Year', 'DOI', 'Author', 'Abstract', 'Journal', 'Keyword'])
df.to_excel(filename, index=False, engine='openpyxl')
print(f"摘要已保存到 {filename}")
# 检索词
keywords = '"polycystic kidney diseases" OR "polycystic kidney disease, autosomal dominant" OR "autosomal dominant polycystic kidney disease" OR "ADPKD"'
results = fetch_abstracts(keywords)
save_results_to_excel(results)