Arts-of-coding commited on
Commit
ee5dfd4
·
verified ·
1 Parent(s): c5b3f53

Update dash_plotly_QC_scRNA.py

Browse files

Parameterizing the dotplot condition

Files changed (1) hide show
  1. dash_plotly_QC_scRNA.py +8 -8
dash_plotly_QC_scRNA.py CHANGED
@@ -334,22 +334,22 @@ def update_graph_and_pie_chart(batch_chosen, s_chosen, g2m_chosen, condition1_ch
334
  # Melt wide format DataFrame into long format
335
  # Specify batch column as string type and gene columns as float type
336
  list_conds = condition3_chosen
337
- list_conds += ["batch"]
338
  dff_pre = dff.select(list_conds)
339
 
340
  # Melt wide format DataFrame into long format
341
- dff_long = dff_pre.melt(id_vars="batch", variable_name="Gene", value_name="Mean expression")
342
 
343
  # Calculate the mean expression levels for each gene in each region
344
- expression_means = dff_long.lazy().group_by(["batch", "Gene"]).agg(pl.mean("Mean expression")).collect()
345
 
346
  # Calculate the percentage total expressed
347
- dff_long1 = dff_pre.melt(id_vars="batch", variable_name="Gene")#.group_by(pl.all()).agg(pl.len())
348
  count = 1
349
  dff_long2 = dff_long1.with_columns(pl.lit(count).alias("len"))
350
  dff_long3 = dff_long2.filter(pl.col("value") > 0).group_by(["batch", "Gene"]).agg(pl.sum("len").alias("len"))
351
- dff_long4 = dff_long2.group_by(["batch", "Gene"]).agg(pl.sum("len").alias("total"))
352
- dff_5 = dff_long4.join(dff_long3, on=["batch","Gene"], how="outer")
353
  result = dff_5.select([
354
  pl.when((pl.col('len').is_not_null()) & (pl.col('total').is_not_null()))
355
  .then(pl.col('len') / pl.col('total')*100)
@@ -357,11 +357,11 @@ def update_graph_and_pie_chart(batch_chosen, s_chosen, g2m_chosen, condition1_ch
357
  ])
358
  result = result.with_columns(pl.col("%").fill_null(100))
359
  dff_5[["percentage"]] = result[["%"]]
360
- dff_5 = dff_5.select(pl.col("batch","Gene","percentage"))
361
 
362
  # Final part to join the percentage expressed and mean expression levels
363
  # TO DO
364
- expression_means = expression_means.join(dff_5, on=["batch","Gene"], how="inner")
365
 
366
  #expression_means = expression_means.select(["batch", "Gene", "Expression"] + condition3_chosen)
367
 
 
334
  # Melt wide format DataFrame into long format
335
  # Specify batch column as string type and gene columns as float type
336
  list_conds = condition3_chosen
337
+ list_conds += [condition1_chosen]
338
  dff_pre = dff.select(list_conds)
339
 
340
  # Melt wide format DataFrame into long format
341
+ dff_long = dff_pre.melt(id_vars=condition1_chosen, variable_name="Gene", value_name="Mean expression")
342
 
343
  # Calculate the mean expression levels for each gene in each region
344
+ expression_means = dff_long.lazy().group_by([condition1_chosen, "Gene"]).agg(pl.mean("Mean expression")).collect()
345
 
346
  # Calculate the percentage total expressed
347
+ dff_long1 = dff_pre.melt(id_vars=condition1_chosen, variable_name="Gene")#.group_by(pl.all()).agg(pl.len())
348
  count = 1
349
  dff_long2 = dff_long1.with_columns(pl.lit(count).alias("len"))
350
  dff_long3 = dff_long2.filter(pl.col("value") > 0).group_by(["batch", "Gene"]).agg(pl.sum("len").alias("len"))
351
+ dff_long4 = dff_long2.group_by([condition1_chosen, "Gene"]).agg(pl.sum("len").alias("total"))
352
+ dff_5 = dff_long4.join(dff_long3, on=[condition1_chosen,"Gene"], how="outer")
353
  result = dff_5.select([
354
  pl.when((pl.col('len').is_not_null()) & (pl.col('total').is_not_null()))
355
  .then(pl.col('len') / pl.col('total')*100)
 
357
  ])
358
  result = result.with_columns(pl.col("%").fill_null(100))
359
  dff_5[["percentage"]] = result[["%"]]
360
+ dff_5 = dff_5.select(pl.col(condition1_chosen,"Gene","percentage"))
361
 
362
  # Final part to join the percentage expressed and mean expression levels
363
  # TO DO
364
+ expression_means = expression_means.join(dff_5, on=[condition1_chosen,"Gene"], how="inner")
365
 
366
  #expression_means = expression_means.select(["batch", "Gene", "Expression"] + condition3_chosen)
367