hiyata commited on
Commit
021471b
·
verified ·
1 Parent(s): bcf9134

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -100
app.py CHANGED
@@ -777,121 +777,186 @@ def create_simple_genome_diagram(gene_results: List[Dict[str, Any]], genome_leng
777
 
778
  return img
779
 
780
- def analyze_gene_features(sequence_file: str,
781
- features_file: str,
782
- fasta_text: str = "",
783
- features_text: str = "") -> Tuple[str, Optional[str], Optional[Image.Image]]:
784
- """Analyze SHAP values for each gene feature"""
785
- # First analyze whole sequence
786
- sequence_results = analyze_sequence(sequence_file, top_kmers=10, fasta_text=fasta_text)
787
- if isinstance(sequence_results[0], str) and "Error" in sequence_results[0]:
788
- return f"Error in sequence analysis: {sequence_results[0]}", None, None
789
-
790
- # Get SHAP values
791
- shap_means = sequence_results[3]["shap_means"]
792
 
793
- # Parse gene features
794
- try:
795
- if features_text.strip():
796
- genes = parse_gene_features(features_text)
797
- else:
798
- with open(features_file, 'r') as f:
799
- genes = parse_gene_features(f.read())
800
- except Exception as e:
801
- return f"Error reading features file: {str(e)}", None, None
802
-
803
- # Analyze each gene
804
- gene_results = []
805
- for gene in genes:
806
  try:
807
- location = gene['metadata'].get('location', '')
808
- if not location:
809
- continue
810
-
811
- start, end = parse_location(location)
812
- if start is None or end is None:
 
813
  continue
814
 
815
- # Get SHAP values for this region
816
- gene_shap = shap_means[start:end]
817
- stats = compute_gene_statistics(gene_shap)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
- gene_results.append({
820
- 'gene_name': gene['metadata'].get('gene', 'Unknown'),
821
- 'location': location,
822
- 'start': start,
823
- 'end': end,
824
- 'locus_tag': gene['metadata'].get('locus_tag', ''),
825
- 'avg_shap': stats['avg_shap'],
826
- 'median_shap': stats['median_shap'],
827
- 'std_shap': stats['std_shap'],
828
- 'max_shap': stats['max_shap'],
829
- 'min_shap': stats['min_shap'],
830
- 'pos_fraction': stats['pos_fraction'],
831
- 'classification': 'Human' if stats['avg_shap'] > 0 else 'Non-human',
832
- 'confidence': abs(stats['avg_shap'])
833
- })
834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835
  except Exception as e:
836
- print(f"Error processing gene {gene['metadata'].get('gene', 'Unknown')}: {str(e)}")
837
  continue
838
 
839
- if not gene_results:
840
- return "No valid genes could be processed", None, None
841
-
842
- # Sort genes by absolute SHAP value
843
- sorted_genes = sorted(gene_results, key=lambda x: abs(x['avg_shap']), reverse=True)
844
-
845
- # Create results text
846
- results_text = "Gene Analysis Results:\n\n"
847
- results_text += f"Total genes analyzed: {len(gene_results)}\n"
848
- results_text += f"Human-like genes: {sum(1 for g in gene_results if g['classification'] == 'Human')}\n"
849
- results_text += f"Non-human-like genes: {sum(1 for g in gene_results if g['classification'] == 'Non-human')}\n\n"
850
 
851
- results_text += "Top 10 most distinctive genes:\n"
852
- for gene in sorted_genes[:10]:
853
- results_text += (
854
- f"Gene: {gene['gene_name']}\n"
855
- f"Location: {gene['location']}\n"
856
- f"Classification: {gene['classification']} "
857
- f"(confidence: {gene['confidence']:.4f})\n"
858
- f"Average SHAP: {gene['avg_shap']:.4f}\n\n"
859
- )
860
 
861
- # Create CSV content
862
- csv_content = "gene_name,location,avg_shap,median_shap,std_shap,max_shap,min_shap,"
863
- csv_content += "pos_fraction,classification,confidence,locus_tag\n"
 
 
 
 
864
 
865
- for gene in gene_results:
866
- csv_content += (
867
- f"{gene['gene_name']},{gene['location']},{gene['avg_shap']:.4f},"
868
- f"{gene['median_shap']:.4f},{gene['std_shap']:.4f},{gene['max_shap']:.4f},"
869
- f"{gene['min_shap']:.4f},{gene['pos_fraction']:.4f},{gene['classification']},"
870
- f"{gene['confidence']:.4f},{gene['locus_tag']}\n"
871
- )
872
 
873
- # Save CSV to temp file
874
- try:
875
- temp_dir = tempfile.gettempdir()
876
- temp_path = os.path.join(temp_dir, f"gene_analysis_{os.urandom(4).hex()}.csv")
877
-
878
- with open(temp_path, 'w') as f:
879
- f.write(csv_content)
880
- except Exception as e:
881
- print(f"Error saving CSV: {str(e)}")
882
- temp_path = None
883
 
884
- # Create visualization
885
- try:
886
- diagram_img = create_simple_genome_diagram(gene_results, len(shap_means))
887
- except Exception as e:
888
- print(f"Error creating visualization: {str(e)}")
889
- # Create error image
890
- diagram_img = Image.new('RGB', (800, 100), color='white')
891
- draw = ImageDraw.Draw(diagram_img)
892
- draw.text((10, 40), f"Error creating visualization: {str(e)}", fill='black')
893
 
894
- return results_text, temp_path, diagram_img
895
 
896
  ###############################################################################
897
  # 12. DOWNLOAD FUNCTIONS
 
777
 
778
  return img
779
 
780
+ def create_simple_genome_diagram(gene_results: List[Dict[str, Any]], genome_length: int) -> Image.Image:
781
+ """Create a simple genome diagram using PIL with proper coordinate handling"""
782
+ # Validate inputs and ensure genome_length is an integer
783
+ if not gene_results or not isinstance(genome_length, int) or genome_length <= 0:
784
+ img = Image.new('RGB', (800, 100), color='white')
785
+ draw = ImageDraw.Draw(img)
786
+ draw.text((10, 40), "Error: Invalid input data", fill='black')
787
+ return img
 
 
 
 
788
 
789
+ # Pre-process gene coordinates and handle type conversion
790
+ processed_genes = []
791
+ for gene in gene_results:
 
 
 
 
 
 
 
 
 
 
792
  try:
793
+ # Ensure start and end are integers
794
+ start = int(float(gene['start']))
795
+ end = int(float(gene['end']))
796
+
797
+ # Validate coordinates
798
+ if start < 0 or end > genome_length or start >= end:
799
+ print(f"Warning: Skipping gene {gene.get('gene_name', 'unknown')} due to invalid coordinates: {start}-{end}")
800
  continue
801
 
802
+ processed_gene = gene.copy()
803
+ processed_gene['start'] = start
804
+ processed_gene['end'] = end
805
+ processed_genes.append(processed_gene)
806
+ except (ValueError, TypeError) as e:
807
+ print(f"Warning: Error processing gene coordinates: {str(e)}")
808
+ continue
809
+
810
+ if not processed_genes:
811
+ img = Image.new('RGB', (800, 100), color='white')
812
+ draw = ImageDraw.Draw(img)
813
+ draw.text((10, 40), "Error: No valid genes to display", fill='black')
814
+ return img
815
+
816
+ # Image dimensions
817
+ width = 1500
818
+ height = 600
819
+ margin = 50
820
+ track_height = 40
821
+
822
+ # Create image with white background
823
+ img = Image.new('RGB', (width, height), 'white')
824
+ draw = ImageDraw.Draw(img)
825
+
826
+ # Try to load font, fall back to default if unavailable
827
+ try:
828
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
829
+ title_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
830
+ except:
831
+ font = None
832
+ title_font = None
833
+
834
+ # Draw title
835
+ draw.text((margin, margin//2), "Genome SHAP Analysis",
836
+ fill='black', font=title_font or font)
837
+
838
+ # Draw genome line
839
+ line_y = height // 2
840
+ line_coords = [(margin, line_y), (width - margin, line_y)]
841
+ draw.line(line_coords, fill='black', width=2)
842
+
843
+ # Calculate scale factor
844
+ scale = float(width - 2 * margin) / float(genome_length)
845
+
846
+ # Draw scale markers
847
+ for i in range(0, genome_length + 1, max(1, genome_length // 10)):
848
+ x = int(margin + i * scale)
849
+ marker_coords = [(x, line_y - 5), (x, line_y + 5)]
850
+ draw.line(marker_coords, fill='black', width=1)
851
+ draw.text((x - 20, line_y + 10), f"{i:,}", fill='black', font=font)
852
+
853
+ # Sort genes by absolute SHAP value for drawing
854
+ sorted_genes = sorted(processed_genes, key=lambda x: abs(float(x.get('avg_shap', 0))))
855
+
856
+ # Draw genes
857
+ for gene in sorted_genes:
858
+ try:
859
+ # Calculate position
860
+ start_x = int(margin + gene['start'] * scale)
861
+ end_x = int(margin + gene['end'] * scale)
862
+
863
+ # Ensure minimum visible width
864
+ if end_x - start_x < 2:
865
+ end_x = start_x + 2
866
+
867
+ # Calculate color based on SHAP value
868
+ avg_shap = float(gene.get('avg_shap', 0))
869
+ if avg_shap > 0:
870
+ intensity = min(255, int(abs(avg_shap * 500)))
871
+ color = (255, 255 - intensity, 255 - intensity) # Red
872
+ else:
873
+ intensity = min(255, int(abs(avg_shap * 500)))
874
+ color = (255 - intensity, 255 - intensity, 255) # Blue
875
 
876
+ # Draw gene box
877
+ box_coords = [
878
+ start_x,
879
+ int(line_y - track_height // 2),
880
+ end_x,
881
+ int(line_y + track_height // 2)
882
+ ]
883
+ draw.rectangle(box_coords, fill=color, outline='black')
 
 
 
 
 
 
 
884
 
885
+ # Draw gene name
886
+ label = str(gene.get('gene_name', 'Unknown'))
887
+ if font:
888
+ label_bbox = draw.textbbox((0, 0), label, font=font)
889
+ label_width = label_bbox[2] - label_bbox[0]
890
+ else:
891
+ label_width = len(label) * 6 # Rough estimate if no font
892
+
893
+ # Try to place label, alternating above and below
894
+ if sorted_genes.index(gene) % 2 == 0:
895
+ text_y = line_y - track_height - 15
896
+ else:
897
+ text_y = line_y + track_height + 5
898
+
899
+ # Draw label with rotation if space is tight
900
+ gene_width = end_x - start_x
901
+ if gene_width > label_width:
902
+ # Horizontal label
903
+ text_x = int(start_x + (gene_width - label_width) // 2)
904
+ draw.text((text_x, text_y), label, fill='black', font=font)
905
+ elif gene_width > 20:
906
+ # Create rotated text image
907
+ txt_img = Image.new('RGBA', (label_width, 20), (255, 255, 255, 0))
908
+ txt_draw = ImageDraw.Draw(txt_img)
909
+ txt_draw.text((0, 0), label, font=font, fill='black')
910
+ txt_img = txt_img.rotate(90, expand=True)
911
+ img.paste(txt_img, (int(start_x), text_y), txt_img)
912
+
913
  except Exception as e:
914
+ print(f"Warning: Error drawing gene {gene.get('gene_name', 'unknown')}: {str(e)}")
915
  continue
916
 
917
+ # Draw legend
918
+ legend_x = margin
919
+ legend_y = height - margin
920
+ draw.text((legend_x, legend_y - 60), "SHAP Values:", fill='black', font=font)
 
 
 
 
 
 
 
921
 
922
+ # Draw legend boxes
923
+ box_width = 20
924
+ box_height = 20
925
+ spacing = 15
 
 
 
 
 
926
 
927
+ # Strong human-like
928
+ draw.rectangle([
929
+ (legend_x, legend_y - 45),
930
+ (legend_x + box_width, legend_y - 45 + box_height)
931
+ ], fill=(255, 0, 0), outline='black')
932
+ draw.text((legend_x + box_width + spacing, legend_y - 45),
933
+ "Strong human-like signal", fill='black', font=font)
934
 
935
+ # Weak human-like
936
+ draw.rectangle([
937
+ (legend_x, legend_y - 20),
938
+ (legend_x + box_width, legend_y - 20 + box_height)
939
+ ], fill=(255, 200, 200), outline='black')
940
+ draw.text((legend_x + box_width + spacing, legend_y - 20),
941
+ "Weak human-like signal", fill='black', font=font)
942
 
943
+ # Weak non-human-like
944
+ draw.rectangle([
945
+ (legend_x + 250, legend_y - 45),
946
+ (legend_x + 250 + box_width, legend_y - 45 + box_height)
947
+ ], fill=(200, 200, 255), outline='black')
948
+ draw.text((legend_x + 250 + box_width + spacing, legend_y - 45),
949
+ "Weak non-human-like signal", fill='black', font=font)
 
 
 
950
 
951
+ # Strong non-human-like
952
+ draw.rectangle([
953
+ (legend_x + 250, legend_y - 20),
954
+ (legend_x + 250 + box_width, legend_y - 20 + box_height)
955
+ ], fill=(0, 0, 255), outline='black')
956
+ draw.text((legend_x + 250 + box_width + spacing, legend_y - 20),
957
+ "Strong non-human-like signal", fill='black', font=font)
 
 
958
 
959
+ return img
960
 
961
  ###############################################################################
962
  # 12. DOWNLOAD FUNCTIONS