Skip to content

Commit

Permalink
Added p value to Cor, Changes s.t. can run terms q
Browse files Browse the repository at this point in the history
  • Loading branch information
kataikko committed Jul 3, 2023
1 parent 87823d0 commit b525a5c
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 12 deletions.
15 changes: 11 additions & 4 deletions db/scripts/read/read_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def read_experiment():
df=pd.read_csv(file, sep="\t"), file_name=file_name.split("/")[-1], reformat=reformat
)
dataframes[index] = df
elif file_extention == ".csv":
df, index = _reformat_experiment_file(
df=pd.read_csv(file, sep=","), file_name=file_name.split("/")[-1], reformat=reformat
)
dataframes[index] = df
return dataframes

def filter_df_by_context(context: str, df: pd.DataFrame, protein: bool):
Expand Down Expand Up @@ -129,7 +134,7 @@ def post_processing(exp: list[pd.DataFrame]):

# Filter for relevant columns
tmp_or_id = relevant_info.filter(items=["id", "nearest_index"])
or_tg_corr = exp[3].filter(items=["ENSEMBL", "Correlation", "nearest_index"])
or_tg_corr = exp[3].filter(items=["ENSEMBL", "Correlation", "nearest_index", "p"])
or_tg_corr = or_tg_corr.merge(tmp_or_id, left_on="nearest_index", right_on="nearest_index", how="left")
or_tg_corr = or_tg_corr.drop(columns=["nearest_index"])

Expand Down Expand Up @@ -158,7 +163,7 @@ def _reformat_experiment_file(df: pd.DataFrame, file_name: str, reformat: bool):
print_update(update_type="Reformatting", text=file_name, color="orange")

# Filename and function pairs: same index <-> use function for file
names = ["exp_DA", "exp_DE_filter", "TF_target_cor_", "peak_target_cor_", "TF_motif_peak"]
names = ["exp_DA", "exp_DE_filter", "correlation_pval_TF_target", "corr_peak_target", "TF_motif_peak"]
functions = [_reformat_da, _reformat_de, _reformat_tf_tg, _reformat_or_tg, _reformat_motif]
index = names.index(file_name)

Expand All @@ -184,12 +189,14 @@ def _reformat_de(df: pd.DataFrame):


def _reformat_tf_tg(df: pd.DataFrame):
df = df.rename(columns={"nearest_ENSEMBL": "ENSEMBL", "TF_target_cor": "Correlation"})
df = df.filter(items=["nearest_ENSEMBL_target", "ENSEMBL_TF", "korrelationskoeffizient", "p-Wert"])
df = df.rename(columns={"nearest_ENSEMBL_target": "ENSEMBL_TG", "korrelationskoeffizient": "Correlation", "p-Wert": "p"})
return df


def _reformat_or_tg(df: pd.DataFrame):
df = df.rename(columns={"nearest_ENSEMBL": "ENSEMBL", "peak_target_cor": "Correlation"})
df = df.filter(items=["nearest_ENSEMBL_x", "nearest_index", "correlation_coefficient", "p-value"])
df = df.rename(columns={"nearest_ENSEMBL_x": "ENSEMBL", "correlation_coefficient": "Correlation", "p-value": "p"})
return df


Expand Down
7 changes: 4 additions & 3 deletions db/scripts/upload/upload_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,11 @@ def create_functional(
"""
print_update(update_type="Node Creation", text="Functional Term", color="blue")

# TODO: Change "Terms" to "FT"
save_df_to_csv(file_name="ft_nodes.csv", df=ft_nodes, override_prod=True)
create_nodes(
source_file="ft_nodes.csv",
type_="FT",
type_="Terms",
id="Term",
values=["Term", "Name", "Category"],
reformat_values=[],
Expand All @@ -152,7 +153,7 @@ def create_functional(
source_file="ft_overlap.csv",
type_="OVERLAP",
between=(("Term", "source"), ("Term", "target")),
node_types=("FT", "FT"),
node_types=("Terms", "Terms"),
values=["Score"],
reformat_values=[("Score", "toFloat")],
merge=False,
Expand All @@ -166,7 +167,7 @@ def create_functional(
source_file="ft_gene.csv",
type_="LINK",
between=(("ENSEMBL", "ENSEMBL"), ("Term", "Term")),
node_types=("TG", "FT"),
node_types=("TG", "Terms"),
values=[],
reformat_values=[],
merge=False,
Expand Down
10 changes: 5 additions & 5 deletions db/scripts/upload/upload_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ def create_correlation(
create_relationship(
source_file="tf_tg_corr.csv",
type_="CORRELATION",
between=(("SYMBOL", "TF"), ("ENSEMBL", "ENSEMBL")),
between=(("ENSEMBL", "ENSEMBL_TF"), ("ENSEMBL", "ENSEMBL_TG")),
node_types=("TF", "TG"),
values=["Correlation", "Source"],
reformat_values=[("Correlation", "toFloat"), ("Source", "toInteger")],
values=["Correlation", "p", "Source"],
reformat_values=[("Correlation", "toFloat"), ("Source", "toInteger"), ("p", "toFloat")],
merge=False,
driver=driver,
)
Expand All @@ -225,8 +225,8 @@ def create_correlation(
type_="CORRELATION",
between=(("id", "id"), ("ENSEMBL", "ENSEMBL")),
node_types=("OR", "TG"),
values=["Correlation", "Source"],
reformat_values=[("Correlation", "toFloat"), ("Source", "toInteger")],
values=["Correlation", "p", "Source"],
reformat_values=[("Correlation", "toFloat"), ("Source", "toInteger"), ("p", "toFloat")],
merge=False,
driver=driver,
)
Expand Down
Loading

0 comments on commit b525a5c

Please sign in to comment.