Skip to content

Commit

Permalink
Add AI summary for each edge.
Browse files Browse the repository at this point in the history
  • Loading branch information
yjcyxky committed Aug 10, 2024
1 parent efe64fd commit f55062f
Show file tree
Hide file tree
Showing 20 changed files with 561 additions and 151 deletions.
18 changes: 17 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ hmac = "0.12.1"
reqwest = { version = "0.12.3", features = ["json", "stream"] }
base64 = "0.22.0"
sha2 = "0.10.7"
itertools = "0.12.0"
itertools = "0.13.0"
urlencoding = "2.1.3"

# Models
openai-api-rs = "2.1.4"
Expand Down
3 changes: 3 additions & 0 deletions migrations/20240809_add_prompt_template_column.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Revert: 20240809_add_prompt_template_column.up.sql

ALTER TABLE biomedgps_relation_metadata DROP COLUMN prompt_template;
3 changes: 3 additions & 0 deletions migrations/20240809_add_prompt_template_column.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Add a prompt template column into the biomedgps_relation_metadata table for describing the prompt template of the relation type.
ALTER TABLE biomedgps_relation_metadata
ADD COLUMN prompt_template TEXT DEFAULT NULL;
220 changes: 198 additions & 22 deletions src/api/publication.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
use anyhow;
use log::info;
use poem_openapi::Object;
use reqwest;
use serde::{Deserialize, Serialize};
use urlencoding;

const GUIDESCOPER_PUBLICATIONS_API: &str = "/api/paper_search/";
const GUIDESCOPER_DETAILS_API: &str = "/api/papers/details/";
const GUIDESCOPER_SUMMARY_API: &str = "/api/summary/?search_id=";
const GUIDESCOPER_CONSENSUS_API: &str = "/api/yes_no/?search_id=";
const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36";

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Object)]
pub struct PublicationRecords {
pub records: Vec<Publication>,
pub total: u64,
pub page: u64,
pub page_size: u64,
pub search_id: Option<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Object)]
Expand All @@ -20,42 +29,61 @@ pub struct Publication {
pub title: String,
pub year: Option<u64>,
pub doc_id: String,
pub article_abstract: Option<String>,
pub doi: Option<String>,
pub provider_url: Option<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Object)]
pub struct PublicationDetail {
pub authors: Vec<String>,
pub citation_count: Option<u64>,
pub struct PublicationsSummary {
pub summary: String,
pub journal: String,
pub title: String,
pub year: Option<u64>,
pub doc_id: String,
pub article_abstract: Option<String>,
pub doi: Option<String>,
pub provider_url: Option<String>,
pub daily_limit_reached: bool,
pub is_disputed: bool,
pub is_incomplete: bool,
pub results_analyzed_count: u64,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Object)]
pub struct ConsensusResult {
pub results_analyzed_count: u64,
pub yes_percent: f64,
pub no_percent: f64,
pub possibly_percent: f64,
pub yes_doc_ids: Vec<String>,
pub no_doc_ids: Vec<String>,
pub possibly_doc_ids: Vec<String>,
pub is_incomplete: bool,
pub is_disputed: bool,
}

impl Publication {
pub async fn fetch_publication(id: &str) -> Result<PublicationDetail, anyhow::Error> {
pub async fn fetch_publication(id: &str) -> Result<Publication, anyhow::Error> {
let api_token = match std::env::var("GUIDESCOPER_API_TOKEN") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_API_TOKEN not found"));
}
};

let detail_api = match std::env::var("GUIDESCOPER_DETAIL_API") {
let guidescoper_server = match std::env::var("GUIDESCOPER_SERVER") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_DETAIL_API not found"));
return Err(anyhow::anyhow!("GUIDESCOPER_SERVER not found"));
}
};

let detail_api = format!("{}{}", guidescoper_server, GUIDESCOPER_DETAILS_API);
info!("detail_api: {}", detail_api);

let url = format!("{}{}", detail_api, id);
let cookie = format!("_session={}", api_token);
let client = reqwest::Client::new();
let res = client.get(&url).header("Cookie", cookie).send().await?;
let res = client
.get(&url)
.header("Cookie", cookie)
.header("USER_AGENT", USER_AGENT)
.send()
.await?;

if res.status().is_success() {
let body = res.text().await?;
Expand All @@ -76,7 +104,7 @@ impl Publication {
let doi = json["doi"].as_str().map(|s| s.to_string());
let provider_url = json["provider_url"].as_str().map(|s| s.to_string());

Ok(PublicationDetail {
Ok(Publication {
authors: authors_vec,
citation_count: citation_count,
summary: summary,
Expand Down Expand Up @@ -105,31 +133,48 @@ impl Publication {
}
};

let guidescoper_api = match std::env::var("GUIDESCOPER_API") {
let guidescoper_server = match std::env::var("GUIDESCOPER_SERVER") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_API not found"));
return Err(anyhow::anyhow!("GUIDESCOPER_SERVER not found"));
}
};

let guidescoper_api = format!("{}{}", guidescoper_server, GUIDESCOPER_PUBLICATIONS_API);
info!("guidescoper_api: {}", guidescoper_api);

// We only need to fetch the top 10 results currently.
let total = 10;
let page = 0;
let page_size = 10;
let page = page.unwrap_or(1);
let page_size = page_size.unwrap_or(10);
let mut total = page_size;

let mut records = Vec::new();
let encoded_query_str = urlencoding::encode(query_str);
let url = format!(
"{}?query={}&page={}&size={}",
guidescoper_api, query_str, page, page_size
guidescoper_api, encoded_query_str, page, page_size
);
info!("Query url: {}", url);
let cookie = format!("_session={}", api_token);
let client = reqwest::Client::new();
let res = client.get(&url).header("Cookie", cookie).send().await?;
let res = client
.get(&url)
.header("Cookie", cookie)
.header("USER_AGENT", USER_AGENT)
.send()
.await?;

let mut search_id = String::new();

if res.status().is_success() {
let body = res.text().await?;
let json: serde_json::Value = serde_json::from_str(&body)?;
search_id = json["search_id"].as_str().unwrap().to_string();
total = json["numTopResults"].as_u64().unwrap();
// TODO: do we need to add the adjusted query into the response? It seems not necessary?
// let query_str = json["adjustedQuery"].as_str().unwrap().to_string();
let items = json["papers"].as_array().unwrap();

for item in items {
let authors = item["authors"].as_array().unwrap();
let mut authors_vec = Vec::new();
Expand All @@ -142,6 +187,8 @@ impl Publication {
let title = item["title"].as_str().unwrap().to_string();
let year = item["year"].as_u64();
let doc_id = item["doc_id"].as_str().unwrap().to_string();
let doi_id = item["doi"].as_str().unwrap().to_string();

records.push(Publication {
authors: authors_vec,
citation_count: citation_count,
Expand All @@ -150,15 +197,144 @@ impl Publication {
title: title,
year: year,
doc_id: doc_id,
article_abstract: None,
doi: Some(doi_id),
provider_url: None,
});
}
} else {
let err_msg = format!("Failed to fetch publications: {}", res.text().await?);
return Err(anyhow::anyhow!(err_msg));
}

Ok(PublicationRecords {
records: records,
total: total,
page: page,
page_size: page_size,
search_id: Some(search_id),
})
}

pub async fn fetch_summary(search_id: &str) -> Result<PublicationsSummary, anyhow::Error> {
let api_token = match std::env::var("GUIDESCOPER_API_TOKEN") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_API_TOKEN not found"));
}
};

let guidescoper_server = match std::env::var("GUIDESCOPER_SERVER") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_SERVER not found"));
}
};

let summary_api = format!("{}{}", guidescoper_server, GUIDESCOPER_SUMMARY_API);

let url = format!("{}{}", summary_api, search_id);
let cookie = format!("_session={}", api_token);
let client = reqwest::Client::new();
let res = client
.get(&url)
.header("Cookie", cookie)
.header("USER_AGENT", USER_AGENT)
.send()
.await?;

if res.status().is_success() {
let body = res.text().await?;
let json: serde_json::Value = serde_json::from_str(&body)?;
let summary = json["summary"].as_str().unwrap().to_string();
let daily_limit_reached = json["dailyLimitReached"].as_bool().unwrap();
let is_disputed = json["isDisputed"].as_bool().unwrap();
let is_incomplete = json["isIncomplete"].as_bool().unwrap();
let results_analyzed_count = json["resultsAnalyzedCount"].as_u64().unwrap();

Ok(PublicationsSummary {
summary: summary,
daily_limit_reached: daily_limit_reached,
is_disputed: is_disputed,
is_incomplete: is_incomplete,
results_analyzed_count: results_analyzed_count,
})
} else {
let err_msg = format!("Failed to fetch summary: {}", res.text().await?);
Err(anyhow::anyhow!(err_msg))
}
}

pub async fn fetch_consensus(search_id: &str) -> Result<ConsensusResult, anyhow::Error> {
let api_token = match std::env::var("GUIDESCOPER_API_TOKEN") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_API_TOKEN not found"));
}
};

let guidescoper_server = match std::env::var("GUIDESCOPER_SERVER") {
Ok(token) => token,
Err(_) => {
return Err(anyhow::anyhow!("GUIDESCOPER_SERVER not found"));
}
};

let consensus_api = format!("{}{}", guidescoper_server, GUIDESCOPER_CONSENSUS_API);

let url = format!("{}{}", consensus_api, search_id);
let cookie = format!("_session={}", api_token);
let client = reqwest::Client::new();
let res = client
.get(&url)
.header("Cookie", cookie)
.header("USER_AGENT", USER_AGENT)
.send()
.await?;

if res.status().is_success() {
let body = res.text().await?;
let json: serde_json::Value = serde_json::from_str(&body)?;

let results_analyzed_count = json["resultsAnalyzedCount"].as_u64().unwrap();

let yes_no_answer_percents = &json["yesNoAnswerPercents"];
let yes_percent = yes_no_answer_percents["YES"].as_f64().unwrap();
let no_percent = yes_no_answer_percents["NO"].as_f64().unwrap();
let possibly_percent = yes_no_answer_percents["POSSIBLY"].as_f64().unwrap();

let result_id_to_yes_no_answer = json["resultIdToYesNoAnswer"].as_object().unwrap();

let mut yes_doc_ids_vec = Vec::new();
let mut no_doc_ids_vec = Vec::new();
let mut possibly_doc_ids_vec = Vec::new();

for (doc_id, answer) in result_id_to_yes_no_answer {
match answer.as_str().unwrap() {
"YES" => yes_doc_ids_vec.push(doc_id.clone()),
"NO" => no_doc_ids_vec.push(doc_id.clone()),
"POSSIBLY" => possibly_doc_ids_vec.push(doc_id.clone()),
_ => {}
}
}

let is_incomplete = json["isIncomplete"].as_bool().unwrap();
let is_disputed = json["isDisputed"].as_bool().unwrap();

Ok(ConsensusResult {
results_analyzed_count: results_analyzed_count,
yes_percent: yes_percent,
no_percent: no_percent,
possibly_percent: possibly_percent,
yes_doc_ids: yes_doc_ids_vec,
no_doc_ids: no_doc_ids_vec,
possibly_doc_ids: possibly_doc_ids_vec,
is_incomplete: is_incomplete,
is_disputed: is_disputed,
})
} else {
let err_msg = format!("Failed to fetch consensus: {}", res.text().await?);
Err(anyhow::anyhow!(err_msg))
}
}
}
Loading

0 comments on commit f55062f

Please sign in to comment.