提交 c8e38fb4 authored 作者: 宋宏伟's avatar 宋宏伟

update code

上级 cbc75fe3
......@@ -74,7 +74,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc,
null as rx_end_datetime,
a.dose as dosage_qty,
a.dose_unit_name as dosage_unit,
a.frequency_code,
a.frequency_code as frequency,
a.frequency_name,
a.qty as quantity,
null as quantity_uom,
......@@ -156,7 +156,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc,
a.end_time as rx_end_datetime,
a.dose as dosage_qty,
a.dose_unit_name as dosage_unit,
a.frequency_code,
a.frequency_code as frequency,
a.frequency_name,
a.qty as quantity,
null as quantity_uom,
......
......@@ -50,7 +50,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc
select DISTINCT * from iceberg.cdm.lab_report_result
where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180')
or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血')
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120'
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120')
or (test_item_name ~* 'HbA1c|糖化血红蛋白')
)
select DISTINCT
......@@ -125,7 +125,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc
select DISTINCT * from iceberg.cdm.lab_report_result
where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180')
or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血')
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120'
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120')
or (test_item_name ~* 'HbA1c|糖化血红蛋白')
)
select DISTINCT
......
......@@ -4,7 +4,7 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8')
library(here)
source(here("codes","preprocess","R","target_group.R"))
source(here("target_group.R"))
# 基线 -------------------------------------------------------------------------
## 人口学 ####
......
......@@ -4,8 +4,8 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8')
library(here)
source(here("codes","preprocess","R","common.R"))
source(here("codes","preprocess","R","dataset_for_analysis.R"))
source(here("common.R"))
source(here("dataset_for_analysis.R"))
# load(here("data", "clean", "dataset_for_analysis.RData"))
......
......@@ -4,9 +4,9 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8')
library(here)
source(here("codes","preprocess","R","common.R"))
source(here("common.R"))
source(here("codes","preprocess","R","wrangling.R"))
source(here("wrangling.R"))
patient_clean <- arrow::read_parquet("../../../data/preprocessed/patient_clean.parquet")
visit_clean <- arrow::read_parquet("../../../data/preprocessed/visit_clean.parquet")
......
......@@ -20,7 +20,7 @@ library(zoo)
# library(eoffice)
library(tableone)
library(plotly)
source(here("codes","preprocess","R","common.R"))
source(here("common.R"))
# 导入数据
......@@ -108,7 +108,7 @@ rx_sep <- rx_raw %>%
mutate(visit_id = as.character(visit_id)) %>%
drop_na(std_rx_desc) %>%
distinct() %>%
left_join(select(visit_clean, visit_id, patient_id, patient_type, admission_datetime, specialty = raw_specialty, provider_id)) %>%
left_join(select(visit_clean, visit_id, patient_id, patient_type, admission_datetime, specialty, provider_id)) %>%
# mutate(order_datetime = case_when(is.na(order_datetime) ~ admission_datetime,
# TRUE ~ order_datetime)) %>%
mutate(order_datetime = case_when(!is.na(rx_start_datetime) ~ rx_start_datetime,
......
#################################### 数据提取配置 ##############################################
# 机构id 传入None 则提全部机构数据 需要传入列表
# 机构id 传入None 则提全部机构数据 需要传入列表
[pv_ids]
pv_ids = ['320106426090445', '320104466002630', '320106466000838']
# pv_ids = [None]
# pv_ids = ['320106426090445', '320104466002630', '320106466000838']
# pv_ids = ['320106426090445']
pv_ids = [None]
# 数据提取范围 需要传入开始时间和结束时间 需要传入列表
[date_ranges]
date_ranges = [["2021-01-01", "2021-07-01"]]
# date_ranges = [["2021-01-01", "2021-07-01"],["2021-07-01", "2022-01-01"],["2022-01-01", "2022-07-01"],["2022-07-01", "2023-01-01"],["2023-01-01", "2023-07-01"],["2023-07-01", "2024-01-01"],["2024-01-01", "2024-07-01"],["2024-07-01", "2024-10-01"]]
# date_ranges = [["2021-01-01", "2021-07-01"]]
date_ranges = [["2021-01-01", "2021-07-01"],["2021-07-01", "2022-01-01"],["2022-01-01", "2022-07-01"],["2022-07-01", "2023-01-01"],["2023-01-01", "2023-07-01"],["2023-07-01", "2024-01-01"],["2024-01-01", "2024-07-01"],["2024-07-01", "2024-11-01"]]
# 表格提取
[tables]
tables = ['patient', 'visit', 'prescribing', 'diagnosis', 'lab']
\ No newline at end of file
tables = ['patient', 'visit', 'prescribing', 'diagnosis', 'lab']
# tables = ['lab']
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论