{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from presidio_analyzer import AnalyzerEngine, PatternRecognizer\n", "from presidio_anonymizer import AnonymizerEngine\n", "from presidio_anonymizer.entities import OperatorConfig\n", "import json\n", "from presidio_analyzer import RecognizerRegistry\n", "from presidio_analyzer import Pattern\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# text = \"Karan is working in Infosys. He is from Mumbai. His appointment for renewing passport is booked on March 12 and his old Passport Number is P2096457. Also, he want to link his Aadhaar Number is 567845678987 with his Pan Number is BNZAA2318A. and has 35$\"\n", "text = \"Karan is working in Infosys.He has email id asv@gmail.com\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "registry = RecognizerRegistry()\n", "registry.load_predefined_recognizers()\n", "analyzer = AnalyzerEngine(registry=registry)\n", "anonymize=AnonymizerEngine()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = analyzer.analyze(text=text, language=\"en\", entities=[\"PERSON\",\"EMAIL_ADDRESS\"])\n", "print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "l=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer', 'Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "l1=list(set(l))\n", "l1==l\n", "d={}\n", "for i in l:\n", " if i in d:\n", " d[i]+=1\n", " else:\n", " d[i]=1\n", "print(d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x1=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer', 'Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "x2=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer', 'Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "x3=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer', 'Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "x4=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer', 'Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "\n", "\n", "s=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'SpacyRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "\n", "ss=['Aadhaar_Number', 'PAN_Number', 'UsBankRecognizer', 'UsLicenseRecognizer', 'UsItinRecognizer', 'UsPassportRecognizer', 'UsSsnRecognizer', 'NhsRecognizer', 'SgFinRecognizer', 'AuAbnRecognizer', 'AuAcnRecognizer', 'AuTfnRecognizer', 'AuMedicareRecognizer', 'InPanRecognizer', 'CreditCardRecognizer', 'CryptoRecognizer', 'DateRecognizer', 'EmailRecognizer', 'IbanRecognizer', 'IpRecognizer', 'MedicalLicenseRecognizer', 'ClientListRecognizer', 'PhoneRecognizer', 'UrlRecognizer']\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "d={}\n", "for i in ss:\n", " if i in d:\n", " \n", " d[i]+=1\n", " else:\n", " d[i]=1\n", "print(d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "patternObj = Pattern(name=\"Currency\",\n", " regex='[1-9]*\\$',\n", " score=0.8)\n", "patternRecog = PatternRecognizer(supported_entity=\"CURRENCY\",\n", " patterns=[patternObj])\n", "registry.add_recognizer(patternRecog)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = analyzer.analyze(text=text, language=\"en\",allow_list=[\"Karan\"])\n", "print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from faker import Faker\n", "fake=Faker()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fake.name()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "anonymized_results = anonymize.anonymize(\n", " text=text,\n", " analyzer_results=result, \n", " operators= {\"DEFAULT\": OperatorConfig(\"replace\", {\"new_value\": fake.name()})}\n", ")\n", "\n", "print(f\"text: {anonymized_results.text}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from privacy.dao.privacy.DatabaseConnection import DB\n", "DB.connect()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import random\n", "import string\n", "\n", "def generate_value_with_ranges(pattern):\n", " value = \"\"\n", " for char in pattern:\n", " if char in \"[a-z]\":\n", " value += random.choice(string.ascii_lowercase)\n", " elif char in \"[A-Z]\":\n", " value += random.choice(string.ascii_uppercase)\n", " else:\n", " value += char\n", " return value\n", "\n", "pattern = r\"[A-Z][a-z]{2}\" # Example: AaX\n", "generated_value = generate_value_with_ranges(pattern)\n", "print(generated_value) # Output: something like Pbq\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def valueGen(pattern):\n", " if(\"|\" in pattern):\n", " pattern=pattern.split(\"|\")\n", " pattern=pattern[random.randrange(0,len(pattern))]\n", " res=\"\" \n", " while True:\n", " \n", " r=re.search(r\"(\\[[A-Za-z0-9\\-\\,]*\\](\\{[0-9\\,]*\\})?)|(\\\\s)|((\\\\d)(\\{[0-9\\,]*\\})?)|((\\\\w)(\\{[0-9\\,]*\\})?)|(\\w*)\",pattern)\n", " # print(r.group(2)) \n", " # print(pattern)\n", " # print(r.span())\n", " print(\"gp\",r.group(),r.group(4))\n", " if(r.group(1)):\n", " ptr=r.group()\n", "\n", " # print(ptr)\n", " t=re.match(r\"\\[[A-Za-z0-9\\-\\,]*\\]\",ptr).group()[1:-1].split(\",\")\n", " s=string.ascii_lowercase+string.ascii_uppercase+string.digits+string.punctuation\n", " # print(t)\n", " s1=\"\"\n", " for x in t:\n", " # print(x)\n", " l=x.split('-')\n", " # print(l)\n", " s1+=s[s.index(l[0]):s.index(l[1])+1]\n", " count=re.search(r\"\\{[0-9\\,]*\\}\",ptr)\n", " k=1\n", " if count: \n", " k=int(random.choice(count.group()[1:-1].split(',')))\n", " print(\"==\",k)\n", " # if k==0:\n", " # k=1\n", " v=\"\".join(random.choices(s1,k=k))\n", " # print(v)\n", " pattern=pattern[r.span()[1]:]\n", " res+=v\n", " # print(pattern)\n", " print(v)\n", " \n", " if(r.group()=='\\s'):\n", " print(\" a\")\n", " pattern=pattern[r.span()[1]:]\n", " res+=\" \"\n", " \n", " if(r.group()=='\\d'):\n", " print(\"b\")\n", " pattern=pattern[r.span()[1]:]\n", " res+=random.choice(string.digits)\n", " if(r.group()==r.group(4)):\n", " pattern=pattern[r.span()[1]:]\n", " res+=random.choice(string.ascii_lowercase)\n", " if(r.group(5)):\n", " print(\"tt======\",r.group())\n", " pattern=pattern[r.span()[1]:]\n", " res+=r.group()\n", " print(pattern)\n", " \n", " if(re.search(r\"\\[[A-Za-z0-9\\-\\,]*\\]\\{[0-9\\,]*\\}\",pattern)==None):\n", " break\n", " print(pattern)\n", " print(\"======\",res)\n", " print(\"===\",res) \n", " \n", " \n", " \n", "p=\"[A-Z]{2}\\s[0-9]{2}\\s[A-Z]{1,2}\\s[0-9]{4}\"\n", "# p=\"[A-Za-z]{6}\\d{2}\"\n", "valueGen(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# p=\"\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\b\"\n", "p=\"[A-Z,a-z]{2}\\s[0-9]{2}\\s[A-Z,a-z]{1,2}\\s[0-9]{4}\"\n", "valueGen(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from xeger import Xeger\n", "x = Xeger()\n", "t=x.xeger(p)\n", "print(t)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\n", "payload={\"portfolio\":\"RAI\",\"account\":\"TEST\"}\n", " \n", "# payload={\"accName\":\"Infosys\",\"subAccName\":\"Impact\"}\n", "# api_url = os.getenv(\"PRIVADMIN_API\")\n", "\n", "# print(api_url)\n", "aurl=\"http://10.66.155.13:30016/api/v1/rai/admin/PrivacyDataList\"\n", "# log.debug(aurl)\n", "# log.debug(str(type(aurl)))\n", "# log.debug(\"Calling Admin Api ======\")\n", "# log.debug(\"api payload:\"+str(payload))\n", "# print(payload)\n", "response1 = requests.post(\n", " url=aurl\n", " , headers={'Content-Type': \"application/json\",\n", " 'accept': \"application/json\"}\n", " , json=payload\n", " )\n", "print(response1.json()[\"datalist\"])\n", "# response1=httpx.post(aurl, json=payload)\n", "# response1=httpx.post('http://10.66.155.13:30016/api/v1/rai/admin/PrivacyDataList', json=payload)\n", "# log.debug(\"response=\"+str(response1))\n", "# log.debug(\"response11=\"+str(response1.text))\n", "# response1=PrivacyData.getDataList(payload)\n", "entityType,datalist,preEntity,records,encryptionList,scoreTreshold=response1.json()[\"datalist\"]\n", "entityType,datalist,preEntity,records,encryptionList,scoreTreshold" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import logging\n", "from typing import Optional, List, Tuple, Set\n", "import spacy\n", "from spacy.matcher import PhraseMatcher\n", "from presidio_analyzer.predefined_recognizers.spacy_recognizer import SpacyRecognizer\n", "# from presidio_analyzer.predefined_recognizers import SpacyRecognizer\n", "from presidio_analyzer import RecognizerResult\n", "import copy\n", "\n", "\n", "\n", "\n", "from presidio_analyzer import (\n", " RecognizerResult,\n", " LocalRecognizer,\n", " AnalysisExplanation,\n", ")\n", "\n", "logger = logging.getLogger(\"presidio_analyzer\")\n", "# terms = [\"1&1 Telecommunication SE\",\"1010 data services LLC\",\"AMA\",\n", "# \"A O Smith Corporations\",\"ABBMST\",\"Addidas India\",\"CITI\",\"Cisco Systems\",\"ERICSSON\",\"Gati Ltd\",\"IBM\",\n", "# \"Infosys Ltd\",\"Intel Corporation\",\"Johnson\",\"JTC Corporation\",\"NSC Global\",\"SUZUKI MOTOR CORPORATION\",\n", "# \"Synopsys Ltd\",\"TIBCOO\", \"T-Mobile UK\",\"Toyota Systems Corporation\",\"TSB Bank\",\"UBS Bank\"\n", "# ,\"United Health Corporation\",\"Vodafone quickcom\",\"Voltas\",\"VOLVO CARS\",\"WIPRO LIMITED\",\n", "# \"Walmart\", \"CVS Health\", \"Walgreens Boots Alliance\"]\n", "# terms=[]\n", "class DataList:\n", " # def __init__(self,val) -> None:\n", " # self.Entiity=val\n", " entity=[]\n", " def setData(values):\n", " terms.extend(values)\n", " # print(terms)\n", " def resetData():\n", " terms.clear()\n", " # def setEntity(val):\n", " # DataList.Entity=val\n", " # ClientListRecognizer(supported_entities=val)\n", " # def getE():\n", " # return self.Entiity\n", " \n", "\n", "nlp = spacy.load(\"en_core_web_lg\")\n", " \n", "\n", "\n", "\n", "\n", "class TESTR(SpacyRecognizer): \n", " \"\"\"\n", " Recognize PII entities using a spaCy NLP model.\n", "\n", " Since the spaCy pipeline is ran by the AnalyzerEngine,\n", " this recognizer only extracts the entities from the NlpArtifacts\n", " and replaces their types to align with Presidio's.\n", "\n", " :param supported_language: Language this recognizer supports\n", " :param supported_entities: The entities this recognizer can detect\n", " :param ner_strength: Default confidence for NER prediction\n", " :param check_label_groups: Tuple containing Presidio entity names\n", " and spaCy entity names, for verifying that the right entity\n", " is translated into a Presidio entity.\n", " \"\"\"\n", "\n", " # ENTITIES = DataList.entity\n", " # ENTITIES =[]\n", " # terms=[]\n", "\n", " DEFAULT_EXPLANATION = \"Identified as {} by Spacy's Named Entity Recognition\"\n", "\n", " CHECK_LABEL_GROUPS = [\n", " # ({\"LOCATION\"}, {\"GPE\", \"LOC\"}),\n", " # ({\"PERSON\", \"PER\"}, {\"PERSON\", \"PER\"}),\n", " # ({\"DATE_TIME\"}, {\"DATE\", \"TIME\"}),\n", " # ({\"NRP\"}, {\"NORP\"}),\n", " # ({\"ORGANIZATION\"}, {\"ORG\"}),\n", " # ()\n", " ]\n", " \n", " \n", "\n", " \n", "\n", " def __init__(\n", " self,\n", " terms,entitie,\n", " supported_language: str = \"en\",\n", " supported_entities: Optional[List[str]] = None,\n", " ner_strength: float = 0.85,\n", " check_label_groups: Optional[Tuple[Set, Set]] = None,\n", " context: Optional[List[str]] = None,\n", " \n", " \n", " ):\n", " self.terms=terms\n", " self.ENTITIES=entitie\n", " self.ner_strength = ner_strength\n", " self.check_label_groups = (\n", " check_label_groups if check_label_groups else self.CHECK_LABEL_GROUPS\n", " )\n", " supported_entities = supported_entities if supported_entities else self.ENTITIES\n", " # print(\"=========\",supported_entities)\n", " super().__init__(\n", " supported_entities=supported_entities,\n", " supported_language=supported_language,\n", " context=context,\n", " )\n", "\n", " def load(self) -> None: # noqa D102\n", " # no need to load anything as the analyze method already receives\n", " # preprocessed nlp artifacts\n", " pass\n", " \n", " \n", " def build_spacy_explanation(\n", " self, original_score: float, explanation: str\n", " ) -> AnalysisExplanation:\n", " \"\"\"\n", " Create explanation for why this result was detected.\n", "\n", " :param original_score: Score given by this recognizer\n", " :param explanation: Explanation string\n", " :return:\n", " \"\"\"\n", " explanation = AnalysisExplanation(\n", " recognizer=self.__class__.__name__,\n", " original_score=original_score,\n", " textual_explanation=explanation,\n", " )\n", " return explanation\n", " \n", " def analyze(self, text, entities, nlp_artifacts=None): # noqa D102\n", " \n", " # print(\"=========\",self.supported_entities)\n", " \n", " # matcher = PhraseMatcher(nlp.vocab)\n", " \n", " # # Only run nlp.make_doc to speed things up\n", " # patterns = [nlp.make_doc(text) for text in terms]\n", " \n", " # matcher.add(\"TerminologyList\", patterns)\n", " # result = []\n", " \n", " matcher = PhraseMatcher(nlp.vocab)\n", " \n", " # Only run nlp.make_doc to speed things up\n", " patterns = [nlp.make_doc(text) for text in self.terms]\n", " \n", " matcher.add(\"TerminologyList\", patterns)\n", " \n", " results = []\n", " # result =[]\n", " \n", " doc = nlp(text)\n", " doc1 = str(doc)\n", " \n", " matches = matcher(doc)\n", " for match_id, start, end in matches:\n", " span = doc[start:end]\n", " \n", " if doc1.find(str(span)):\n", " doc1=doc1.replace(str(span.text),\"\")\n", " # etype=copy.deepcopy(DataList.entity) \n", " etype=self.ENTITIES \n", " spacy_result = RecognizerResult(\n", " \n", " entity_type=etype[0],\n", " start=span.start_char,\n", " end=span.end_char,\n", " score=self.ner_strength,\n", " # analysis_explanation=explanation,\n", " recognition_metadata={\n", " RecognizerResult.RECOGNIZER_NAME_KEY: self.name,\n", " RecognizerResult.RECOGNIZER_IDENTIFIER_KEY: self.id,\n", " },\n", " )\n", " \n", "\n", " results.append(spacy_result)\n", "\n", " \n", " \n", "\n", " return results\n", "\n", " @staticmethod\n", " def __check_label(\n", " entity: str, label: str, check_label_groups: Tuple[Set, Set]\n", " ) -> bool:\n", " return any(\n", " [entity in egrp and label in lgrp for egrp, lgrp in check_label_groups]\n", " )\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from presidio_analyzer import AnalyzerEngine, RecognizerRegistry\n", "from presidio_anonymizer import AnonymizerEngine\n", "\n", "yaml_file = \"recognizers.yaml\"\n", "registry = RecognizerRegistry()\n", "registry.load_predefined_recognizers()\n", "analyzer = AnalyzerEngine(registry=registry)\n", "anonymize=AnonymizerEngine()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# DataList.entity.clear()\n", "# DataList.resetData()\n", "# DataList.entity.append(\"XX\")\n", "# DataList.setData([\"alex\",\"amit\"])\n", "r=(TESTR(terms=[\"alex\",\"amit\"],entitie=[\"XX\"]))\n", "registry.add_recognizer(r)\n", "# DataList.entity.clear()\n", "# DataList.resetData()\n", "# DataList.entity.append(\"YY\")\n", "# DataList.setData([\"game\",\"race\"])\n", "r1=(TESTR(terms=[\"game\",\"race\"],entitie=[\"YY\"]))\n", "registry.add_recognizer(r1)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "txt=\"My name is alex and amit live in Pune play race game.\"\n", "results = analyzer.analyze(\n", " txt,\n", " language=\"en\",\n", " return_decision_process=True,\n", " )\n", "print(results)\n", "\n", "anonymize_text = anonymize.anonymize(text=txt,\n", " operators={},\n", " analyzer_results=results)\n", "\n", "anonymize_text\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from detectron2.utils.visualizer import Visualizer\n", "from detectron2.data import MetadataCatalog\n", "from detectron2.config import get_cfg\n", "\n", "# Replace \"TextDetectionModel\" with your chosen pre-trained model name from Detectron2 model zoo \n", "cfg = get_cfg()\n", "cfg.MODEL.ROI_HEADS.NAME = \"TextDetectionModel\"\n", "\n", "# Load model and weights (adjust paths as needed)\n", "cfg.MODEL.WEIGHTS = \"path/to/model_weights.pth\"\n", "predictor = Detectron2Demo(cfg)\n", "\n", "# Define function to extract text from video frames\n", "def extract_text_from_video(video_path):\n", " cap = cv2.VideoCapture(video_path)\n", " text_list = []\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", " \n", " # Use Detectron2 predictor to get text detections\n", " outputs = predictor(frame)\n", " \n", " # Extract text from detected regions (replace with your logic based on model outputs)\n", " for text_obj in outputs[\"instances\"].pred_boxes:\n", " x1, y1, x2, y2 = text_obj.intBounds()\n", " text_region = frame[y1:y2, x1:x2]\n", " # You might need to use OCR library like pytesseract to extract text from the region\n", " extracted_text = \"your_ocr_function(text_region)\" # Replace with actual OCR logic\n", " text_list.append(extracted_text)\n", " \n", " cap.release()\n", " return text_list\n", "\n", "# Example usage\n", "video_path = \"path/to/your/video.mp4\"\n", "extracted_text = extract_text_from_video(video_path)\n", "print(f\"Extracted text: {extracted_text}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install detectron2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cv2\n", "import pytesseract\n", "\n", "# Function to extract text from a single frame\n", "def extract_text_from_frame(frame):\n", " # Optional image processing (adjust as needed)\n", " gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", " thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n", " output_type = pytesseract.Output.DICT\n", " s=pytesseract.image_to_data(frame, output_type=output_type)\n", " s=\" \".join(s[\"text\"])\n", " print(\"======\",s)\n", " # Use OCR library (replace with your preferred OCR implementation)\n", " text = pytesseract.image_to_string(thresh, config='--psm 6') # Adjust config for better results\n", " return text\n", "\n", "# Open video and iterate through frames\n", "cap = cv2.VideoCapture(r\"C:\\Users\\amitumamaheshwar.h\\Downloads\\piivdo 1.mp4\")\n", "extracted_text = []\n", "while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " text = extract_text_from_frame(frame)\n", " \n", " print(\"==\",text)\n", " extracted_text.append(text)\n", "\n", "cap.release()\n", "\n", "# Print or process the extracted text\n", "print(f\"Extracted text: {extracted_text}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cv2\n", "import pytesseract\n", "\n", "def extract_text_with_bounding_boxes(image_path):\n", " \"\"\"\n", " Extracts text with bounding boxes from an image using TesseractOCR.\n", "\n", " Args:\n", " image_path (str): Path to the image file.\n", "\n", " Returns:\n", " list: List of dictionaries containing extracted text and bounding box coordinates.\n", " \"\"\"\n", " img = cv2.imread(image_path)\n", "\n", " # Convert to grayscale (optional, might improve OCR accuracy)\n", " gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", "\n", " # Apply image processing techniques (optional, adjust based on your image)\n", " # thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Example thresholding\n", "\n", " # Use Tesseract to detect text regions (config option for bounding boxes)\n", " boxes = pytesseract.image_to_data(gray, config='--oem 1 --psm 6')\n", "# print(boxes)\n", " # Extract text and bounding box coordinates\n", " extracted_data = []\n", " for i, line in enumerate(boxes.splitlines()[1:]):\n", " line=line.split('\\t')[6::]\n", " # print(line)\n", " if line[-2] != '-1':\n", " # print(line)\n", " x, y, w, h, conf, text = line\n", " extracted_data.append({\n", " 'text': text,\n", " 'x': int(x),\n", " 'y': int(y),\n", " 'width': int(w),\n", " 'height': int(h),\n", " 'confidence': float(conf)\n", " })\n", "\n", " return extracted_data\n", "\n", "# Example usage\n", "image_path = r\"C:\\WORK\\GIT\\responsible-ai-admin\\responsible-ai-admin\\src\\rai_admin\\temp\\Karan (2).png\"\n", "extracted_text = extract_text_with_bounding_boxes(image_path)\n", "print(extracted_text)\n", "# Print extracted text and bounding box data\n", "for data in extracted_text:\n", " print(f\"Text: {data['text']}, Confidence: {data['confidence']}\")\n", " print(f\"Bounding Box: ({data['x']},{data['y']}), Width: {data['width']}, Height: {data['height']}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cv2\n", "import pytesseract\n", "from moviepy.editor import ImageClip\n", "# Function to extract text from a single frame\n", "def extract_text_from_frame(frame):\n", " # Optional image processing (adjust as needed)\n", " gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", " thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n", "# output_type = pytesseract.Output.DICT\n", "# s=pytesseract.image_to_data(frame, output_type=output_type)\n", "# s=\" \".join(s[\"text\"])\n", "# print(\"======\",s)\n", " # Use OCR library (replace with your preferred OCR implementation)\n", " # text=extract_text_with_bounding_boxes(frame)\n", "# text = pytesseract.image_to_string(thresh, config='--psm 6') # Adjust config for better results\n", "\n", " boxes = pytesseract.image_to_data(thresh, config='--oem 1 --psm 6')\n", "# print(boxes)\n", " # Extract text and bounding box coordinates\n", " extracted_data = []\n", " for i, line in enumerate(boxes.splitlines()[1:]):\n", " line=line.split('\\t')[6::]\n", " # print(line)\n", " if line[-2] != '-1':\n", " # print(line)\n", " x, y, w, h, conf, text = line\n", " x = int(x)\n", " y = int(y)\n", " w = int(w)\n", " h = int(h)\n", " cv2.rectangle(frame, (x + 1, y + 1), (x + w - 1, y + h - 1), (0, 255, 0), -1) # Adjust padding for fill\n", "\n", " # extracted_data.append({\n", " # 'text': text,\n", " # 'x': int(x),\n", " # 'y': int(y),\n", " # 'width': int(w),\n", " # 'height': int(h),\n", " # 'confidence': float(conf)\n", " # })\n", " \n", " \n", " \n", " return frame\n", " # return extracted_data\n", " # return text\n", "\n", "# Open video and iterate through frames\n", "cap = cv2.VideoCapture(r\"C:\\Users\\amitumamaheshwar.h\\Downloads\\piivdo 1.mp4\")\n", "extracted_text = []\n", "processed_frames = []\n", "while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " proc_frame = extract_text_from_frame(frame.copy())\n", " processed_frames.append(proc_frame)\n", "\n", " \n", " \n", " # print(\"==\",text)\n", " # extracted_text.append(text)\n", "\n", "\n", "cap.release()\n", "\n", "\n", "# Print or process the extracted text\n", "# print(f\"Extracted text: {extracted_text}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(processed_frames)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from moviepy.editor import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "clip = ImageClip(processed_frames).set_duration(5)\n", "clip.write_videofile(\"test.mp4\", fps=24)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "clip = ImageClip.from_array(processed_frames, fps=25)\n", "clip.write_videofile(\"output.mp4\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "height, width = processed_frames[0].shape[:2] # Get frame dimensions from the first frame\n", "fourcc = cv2.VideoWriter_fourcc(*'XVID') # Adjust codec if needed\n", "video = cv2.VideoWriter(\"test.mp4\", fourcc, 25, (width, height))\n", "for frame in processed_frames:\n", " video.write(frame)\n", "video.release()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import base64\n", "import io\n", "from typing import Tuple\n", "import cv2\n", "from PIL import Image\n", "from privacy.config.logger import request_id_var\n", "request_id_var.set(\"aa\")\n", "from privacy.service.service import PrivacyService,AttributeDict\n", "import numpy as np\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "\n", "async def videoPrivacy(payload) -> Tuple[str, str]:\n", " # upload_file = payload['video']\n", " # video_data = await upload_file.read()\n", " s=time.time()\n", " temp_file_path = r\"C:\\Users\\amitumamaheshwar.h\\Downloads\\piivdo 1.mp4\"\n", " output_file_path = \"output.mp4\"\n", "\n", " # with open(temp_file_path, \"wb\") as temp_file:\n", " # temp_file.write(video_data)\n", "\n", " video = cv2.VideoCapture(temp_file_path)\n", "\n", " # Get video properties\n", " width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " fps = video.get(cv2.CAP_PROP_FPS)\n", "\n", " # Define the codec and create a VideoWriter object\n", " fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", " out = cv2.VideoWriter(output_file_path, fourcc, fps, (width, height))\n", " \n", " while(video.isOpened()):\n", " ret, frame = video.read()\n", " print(ret)\n", " if ret==True:\n", " # Convert the frame to PIL Image\n", " # base64.b64encode(frame).decode()\n", " # Image.open(base64.b64encode(frame).decode())\n", " # print(type(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))\n", " imagef = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n", " imagef.save(\"test.jpg\")\n", " # image=open(\"test.jpg\",\"rb\")\n", " print(type(imagef))\n", " image={\"file\":\"test.jpg\"}\n", " image=AttributeDict(image)\n", " # ocr=None\n", " # global imageAnalyzerEngine\n", "\n", " # imageAnalyzerEngine = ImageAnalyzerEngine(analyzer_engine=analyzer,ocr=ocr) \n", " # imageRedactorEngine = ImageRedactorEngine(image_analyzer_engine=imageAnalyzerEngine)\n", " # redacted_image = imageRedactorEngine.redact(image, (255, 192, 203))\n", " payload={\"easyocr\":\"Tesseract\",\"mag_ratio\":False,\"rotationFlag\":False,\"image\":image,\"portfolio\":None,\"account\":None,\"exclusion\":None}\n", " \n", " redacted_image=PrivacyService.image_anonymize(payload)\n", " decoded_bytes = base64.b64decode(redacted_image)\n", "\n", " # Create a BytesIO object to simulate a file-like object\n", " bio = io.BytesIO(decoded_bytes)\n", "\n", " # Use OpenCV (assuming it's an image) or other libraries to load the image from the BytesIO object\n", " img = cv2.imdecode(np.fromstring(bio.getvalue(), np.uint8), cv2.IMREAD_COLOR)\n", "\n", " # Convert the PIL Image back to OpenCV frame\n", " frame = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", "\n", " # Write the frame into the file 'output.avi'\n", " out.write(frame)\n", "\n", " else:\n", " break\n", "\n", " # Release everything when job is finished\n", " video.release()\n", " out.release()\n", "\n", " # Remove temporary file\n", " # os.remove(temp_file_path)\n", "\n", " # Read the processed video file\n", " # with open(output_file_path, \"rb\") as video_file:\n", " # video_data = video_file.read()\n", "\n", " # # Convert the video to base64\n", " # video_str = base64.b64encode(video_data).decode()\n", "\n", " # Remove the output file\n", " # os.remove(output_file_path)\n", " print(\"====\",time.time()-s)\n", " return \"video_str\"\n", "\n", "s=await videoPrivacy({})\n", "print(s)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import asyncio\n", "import cv2\n", "from PIL import Image\n", "import base64\n", "import io\n", "from concurrent.futures import ThreadPoolExecutor\n", "\n", "async def video_privacy_parallel(payload) -> Tuple[str, str]:\n", " \"\"\"\n", " Processes a video, anonymizes frames in parallel using PrivacyService,\n", " and returns a tuple containing the output video and processing time.\n", "\n", " Args:\n", " payload (dict): The input payload for the video processing function.\n", "\n", " Returns:\n", " Tuple[str, str]: A tuple containing the anonymized video as a base64\n", " encoded string and the processing time in seconds.\n", " \"\"\"\n", "\n", " start_time = time.time()\n", "\n", " temp_file_path = \"piivdo 1.mp4\" # Replace with your actual video path\n", " output_file_path = \"output.mp4\"\n", "\n", " cap = cv2.VideoCapture(temp_file_path)\n", "\n", " if not cap.isOpened():\n", " print(\"Error: Could not open video file.\")\n", " return \"\", time.time() - start_time\n", "\n", " width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " fps = cap.get(cv2.CAP_PROP_FPS)\n", " fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", " out = cv2.VideoWriter(output_file_path, fourcc, fps, (width, height))\n", "\n", " async def process_frame(frame):\n", " \"\"\"\n", " Processes a single video frame.\n", "\n", " Args:\n", " frame (numpy.ndarray): The frame to be processed.\n", "\n", " Returns:\n", " bytes: The anonymized frame data as bytes.\n", " \"\"\"\n", "\n", " imagef = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n", " imagef.save(\"test.jpg\")\n", "\n", " image = {\"file\": \"test.jpg\"}\n", " image = AttributeDict(image)\n", "\n", " try:\n", " redacted_image_bytes = await PrivacyService.image_anonymize(payload, image)\n", " except Exception as e:\n", " print(f\"Error anonymizing frame: {e}\")\n", " return None\n", "\n", " decoded_bytes = base64.b64decode(redacted_image_bytes)\n", " bio = io.BytesIO(decoded_bytes)\n", " img = cv2.imdecode(np.fromstring(bio.getvalue(), np.uint8), cv2.IMREAD_COLOR)\n", " frame = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", "\n", " return frame\n", "\n", " async def main_loop():\n", " \"\"\"\n", " Processes frames in a loop, using a thread pool for parallelization.\n", " \"\"\"\n", "\n", " tasks = []\n", " with ThreadPoolExecutor(max_workers=4) as executor: # Adjust max_workers as needed\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " task = executor.submit(process_frame, frame.copy())\n", " tasks.append(task)\n", "\n", " processed_frames = []\n", " for task in tasks:\n", " try:\n", " processed_frame = await task\n", " if processed_frame is not None:\n", " processed_frames.append(processed_frame)\n", " except Exception as e:\n", " print(f\"Error processing frame: {e}\")\n", "\n", " for frame in processed_frames:\n", " out.write(frame)\n", "\n", " await main_loop()\n", "\n", " cap.release()\n", " out.release()\n", " # Remove temporary files (if needed)\n", "\n", " processing_time = time.time() - start_time\n", " print(f\"Processing time: {processing_time:.2f} seconds\")\n", "\n", " # Read the processed video file and convert to base64 if needed\n", " # ...\n", "\n", " return \"video_str\", processing_time\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.\n" ] } ], "source": [ "import base64\n", "import io\n", "from typing import Tuple\n", "import cv2\n", "from PIL import Image\n", "from privacy.config.logger import request_id_var\n", "request_id_var.set(\"aa\")\n", "from privacy.service.service import PrivacyService,AttributeDict\n", "import numpy as np\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "samp 9\n", "totalFrame: 117\n", "after sampling 13\n", "Entering in image_anonymize function\n", "Entering in image_anonymize function\n", "remaining: 6 / 13\n", "Time taken to duplicate image: Entering in image_anonymize function\n", " 0.01303410530090332\n", "Time taken to parse ocr kwargs: 0.0\n", "Entering in image_anonymize function\n", "Time taken to duplicate image: 0.022548913955688477\n", "Time taken to parse ocr kwargs: 0.0\n", "Entering in image_anonymize function\n", "Entering in image_anonymize function\n", "Time taken to duplicate image: Time taken to duplicate image: 0.01099252700805664\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to duplicate image: 0.008993864059448242\n", "Time taken to parse ocr kwargs: 0.0\n", " 0.019997358322143555\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to duplicate image: 0.0069980621337890625\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to perform ocr: 1.9020135402679443\n", "Time taken to threshold ocr result: 0.0010013580322265625\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to perform ocr: 1.8870036602020264\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.c02@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to perform ocr: 1.9049980640411377\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to perform ocr: 1.9230256080627441\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.000989675521850586\n", "Time taken to perform ocr: 1.9470298290252686\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to perform ocr: 1.971557378768921\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to analyze text: 0.23116564750671387\n", "Time taken to map analyzer results to bounding boxes: 0.005988597869873047\n", "Time taken to analyze image: 2.1241579055786133\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to analyze text: 0.2351548671722412\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 2.140152931213379\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "Returning from image_anonymize function\n", "Time taken to analyze text: 0.39892077445983887\n", "Time taken to map analyzer results to bounding boxes: 0.0010018348693847656\n", "Time taken to analyze image: 2.310929298400879\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to analyze text: 0.3519287109375\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 2.2759439945220947\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to analyze text: 0.34737491607666016\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 2.3199360370635986\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "Returning from image_anonymize function\n", "Time taken to analyze text: 0.38391971588134766\n", "Time taken to map analyzer results to bounding boxes: 0.000997304916381836\n", "Time taken to analyze image: 2.331946849822998\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "Returning from image_anonymize function\n", "Entering in image_anonymize function\n", "Entering in image_anonymize function\n", "Entering in image_anonymize function\n", "remaining: 12 / 13\n", "Time taken to duplicate image: 0.013000965118408203\n", "Time taken to parse ocr kwargs: 0.0\n", "Entering in image_anonymize function\n", "Entering in image_anonymize function\n", "Time taken to duplicate image: 0.013574600219726562\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to duplicate image: 0.012556791305541992\n", "Time taken to parse ocr kwargs: 0.0\n", "Entering in image_anonymize function\n", "Time taken to duplicate image: 0.003995180130004883\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to duplicate image: 0.003995180130004883\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to duplicate image: 0.00799870491027832\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to perform ocr: 1.925011157989502\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to analyze text: 0.12255573272705078\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 2.0475668907165527\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to perform ocr: 2.081622362136841\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Returning from image_anonymize function\n", "Time taken to analyze text: 0.15117764472961426\n", "Time taken to map analyzer results to bounding boxes: 0.0020067691802978516\n", "Time taken to analyze image: 2.234806776046753\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to perform ocr: 2.280395030975342\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Returning from image_anonymize function\n", "Time taken to perform ocr: 2.340979814529419\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to perform ocr: 2.366431713104248\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to analyze text: 0.16561484336853027\n", "Time taken to map analyzer results to bounding boxes: 0.0010027885437011719\n", "Time taken to analyze image: 2.4480113983154297\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to perform ocr: 2.478013277053833\n", "Time taken to threshold ocr result: 0.0\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Returning from image_anonymize function\n", "Time taken to analyze text: 0.22170615196228027\n", "Time taken to map analyzer results to bounding boxes: 0.0009951591491699219\n", "Time taken to analyze image: 2.5901355743408203\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Time taken to analyze text: 0.31073760986328125\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 2.6517174243927\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "Time taken to analyze text: 0.22667837142944336\n", "Time taken to map analyzer results to bounding boxes: 0.0009975433349609375\n", "Time taken to analyze image: 2.7056891918182373\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "Returning from image_anonymize function\n", "remaining: 13 / 13\n", "Entering in image_anonymize function\n", "Time taken to duplicate image: 0.004000663757324219\n", "Time taken to parse ocr kwargs: 0.0\n", "Time taken to perform ocr: 1.4694664478302002\n", "Time taken to threshold ocr result: 0.001005411148071289\n", " Phone : 9159236847 E-mail : krishna@gmail.com Teams-Id: Krishnakumar.cO2@infosys.com Aadhar : 7629 5476 3472 5008 Welcome to the team O-\n", "Time taken to get text from ocr dict: 0.0\n", "Time taken to analyze text: 0.0729987621307373\n", "Time taken to map analyzer results to bounding boxes: 0.0\n", "Time taken to analyze image: 1.54447340965271\n", "Time taken to draw rectangle: 0.0\n", "Time taken to redact image: 0.0\n", "Returning from image_anonymize function\n", "==== 7.518835544586182\n", "\n" ] } ], "source": [ "\n", "import os\n", "import shutil\n", "import threading\n", "import time\n", "import uuid\n", "\n", "path=\"../video/\"\n", "\n", "\n", "def frameAnonymization(frame,indx,procFrame,request_id):\n", " # request_id_var.set(request_id)\n", " id = uuid.uuid4().hex\n", " request_id_var.set(id)\n", " ipath=path+str(request_id)+\"/\"+str(indx)+\".jpg\"\n", " # print(ipath)\n", " # Convert the frame to PIL Image\n", " # base64.b64encode(frame).decode()\n", " # Image.open(base64.b64encode(frame).decode())\n", " # print(type(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))\n", " imagef = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n", " imagef.save(ipath)\n", " # image=open(\"test.jpg\",\"rb\")\n", " # print(type(imagef))\n", " image={\"file\":ipath}\n", " image=AttributeDict(image)\n", " # ocr=None\n", " # global imageAnalyzerEngine\n", "\n", " # imageAnalyzerEngine = ImageAnalyzerEngine(analyzer_engine=analyzer,ocr=ocr) \n", " # imageRedactorEngine = ImageRedactorEngine(image_analyzer_engine=imageAnalyzerEngine)\n", " # redacted_image = imageRedactorEngine.redact(image, (255, 192, 203))\n", " payload={\"easyocr\":\"Tesseract\",\"mag_ratio\":False,\"rotationFlag\":False,\"image\":image,\"portfolio\":None,\"account\":None,\"exclusion\":None}\n", " \n", " redacted_image=PrivacyService.image_anonymize(payload)\n", " decoded_bytes = base64.b64decode(redacted_image)\n", "\n", " # Create a BytesIO object to simulate a file-like object\n", " bio = io.BytesIO(decoded_bytes)\n", "\n", " # Use OpenCV (assuming it's an image) or other libraries to load the image from the BytesIO object\n", " img = cv2.imdecode(np.fromstring(bio.getvalue(), np.uint8), cv2.IMREAD_COLOR)\n", "\n", " # Convert the PIL Image back to OpenCV frame\n", " frame = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", " procFrame[indx]=frame\n", " return (frame,indx)\n", " # Write the frame into the file 'output.avi'\n", " # out.write(frame)\n", "\n", " # else:\n", " # break\n", "\n", "\n", "async def videoPrivacy(payload) -> Tuple[str, str]:\n", " # upload_file = payload['video']\n", " id = uuid.uuid4().hex\n", " request_id_var.set(id)\n", " _path=path+str(id)\n", " if(not os.path.exists(_path)):\n", " os.makedirs(_path)\n", " # video_data = await upload_file.read()\n", " s=time.time()\n", " temp_file_path = r\"C:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\temp.avi\"\n", " output_file_path = \"output3.mp4\"\n", " # with open(temp_file_path, \"wb\") as temp_file:\n", " # temp_file.write(video_data)\n", " video = cv2.VideoCapture(temp_file_path)\n", " # Get video properties\n", " width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " fps = video.get(cv2.CAP_PROP_FPS)\n", " sampling_rate=int(fps*0.3)\n", " # Define the codec and create a VideoWriter object\n", " fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", " out = cv2.VideoWriter(output_file_path, fourcc, fps, (width, height))\n", " frameList=[]\n", " indxList=[]\n", " first=True\n", " count=0\n", " last_frame=None\n", " print(\"samp \",sampling_rate)\n", "\n", "\n", " # audio_fps = video.get(cv2.CAP_PROP_FPS)\n", " # fourcc = int(video.get(cv2.CAP_PROP_FOURCC)) \n", " # print(\"aud\",audio_fps,fourcc)\n", " # sampling_rate=1\n", " while(video.isOpened()):\n", " ret, frame = video.read()\n", " # print(ret)\n", " if ret==True:\n", " if first:\n", " frameList.append(frame)\n", " indxList.append(count)\n", " first=False \n", " else:\n", " if count % sampling_rate == 0:\n", " frameList.append(frame)\n", " indxList.append(count)\n", " # else:\n", " # frameList.append(None)\n", " last_frame=frame\n", " count+=1 \n", " else:\n", " break\n", " if(count%sampling_rate!=0):\n", " frameList.append(last_frame)\n", " indxList.append(count)\n", " print(\"totalFrame:\",count)\n", " # print(indxList,len(indxList)) \n", " print(\"after sampling\",len(frameList))\n", " rcount=len(frameList)\n", " framecopy=frameList.copy()\n", " procFrame=[None]*(count+1)\n", " # print(len(procFrame))\n", " # indx=0\n", " while framecopy:\n", " threads = []\n", " for _ in range(min(6, len(framecopy))): # Limit calls to remaining arguments\n", " arg = framecopy.pop(0) # Get the first argument and remove it\n", " indx=indxList.pop(0)\n", " thread = threading.Thread(target=frameAnonymization, args=(arg,indx,procFrame,request_id_var.get()))\n", " thread.start()\n", " threads.append(thread)\n", " # print(thread)\n", " indx+=1\n", " # Wait for all threads in the current set to finish\n", "\n", " print(\"remaining:\",rcount-len(framecopy),\"/\",rcount)\n", " for thread in threads:\n", " thread.join() \n", " # print(\"===\",procFrame) \n", " # Release everything when job is finished\n", " # print(procFrame)\n", " lstFrame=None\n", " for frm in procFrame:\n", " # print(frm,frm.any())\n", " # print(frm,frm.all())\n", " if(lstFrame is None):\n", " lstFrame=frm\n", " if(frm is not None):\n", " lstFrame=frm \n", " else:\n", " frm=lstFrame\n", " out.write(frm)\n", " video.release()\n", " out.release()\n", " # Remove temporary file\n", " # os.remove(temp_file_path)\n", " # Read the processed video file\n", " with open(output_file_path, \"rb\") as video_file:\n", " video_data = video_file.read()\n", " # Convert the video to base64\n", " video_str = base64.b64encode(video_data).decode()\n", " # Remove the output file\n", " # os.remove(output_file_path)\n", " shutil.rmtree(_path)\n", " print(\"====\",time.time()-s)\n", " del procFrame\n", " del indxList\n", " del frameList\n", " return video_str\n", "\n", "s=await videoPrivacy({})\n", "print(s)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lstFrame=None\n", "for frm in procFrame:\n", " # print(frm,frm.any())\n", " # print(frm,frm.all())\n", " if(lstFrame is None):\n", " lstFrame=frm\n", " if(frm is not None):\n", " lstFrame=frm \n", " else:\n", " frm=lstFrame\n", " out.write(frm)\n", "video.release()\n", "out.release()\n", "# Remove temporary file\n", "# os.remove(temp_file_path)\n", "# Read the processed video file\n", "# with open(output_file_path, \"rb\") as video_file:\n", "# video_data = video_file.read()\n", "# # Convert the video to base64\n", "# video_str = base64.b64encode(video_data).decode()\n", "# Remove the output file\n", "# os.remove(output_file_path)\n", "shutil.rmtree(_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cv2\n", "import os\n", "def sample_frames(video_path, output_dir=\"sampled_frames\"):\n", " \"\"\"\n", " Samples frames from a video at a specified interval and saves them to a directory.\n", "\n", " Args:\n", " video_path (str): Path to the input video.\n", " sampling_rate (int, optional): Interval between frames to sample. Defaults to 10.\n", " output_dir (str, optional): Directory to save the sampled frames. Defaults to \"sampled_frames\".\n", " \"\"\"\n", "\n", " cap = cv2.VideoCapture(video_path)\n", " fps = cap.get(cv2.CAP_PROP_FPS) # Get the video's frame rate (informational)\n", " sampling_rate=int(fps*0.3)\n", " print(sampling_rate)\n", " # print(fps)\n", " count = 0\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " if count % sampling_rate == 0:\n", " # Create output directory if it doesn't exist\n", " if not os.path.exists(output_dir):\n", " os.makedirs(output_dir)\n", "\n", " # Generate frame filename with frame number\n", " filename = f\"{output_dir}/frame_{count}.jpg\"\n", " cv2.imwrite(filename, frame)\n", "\n", " count += 1\n", "\n", " cap.release()\n", "\n", " print(f\"Sampled frames at a rate of 1 frame every {sampling_rate / fps:.2f} seconds (based on video FPS).\")\n", "\n", "if __name__ == \"__main__\":\n", " temp_file_path = r\"C:\\WORK\\GIT\\responsible-ai-admin\\responsible-ai-admin\\src\\rai_admin\\temp\\Recording 2024-05-28 181908.mp4\"\n", " # Replace with your video path\n", " sample_frames(temp_file_path, sampling_rate=10)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://infyartifactory.ad.infosys.com/artifactory/api/pypi/pypi-remote/simple, https://infyartifactory.ad.infosys.com/artifactory/api/pypi/pypi-remote/simple\n", "Requirement already satisfied: moviepy in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (1.0.3)\n", "Requirement already satisfied: decorator<5.0,>=4.0.2 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (4.4.2)\n", "Requirement already satisfied: imageio<3.0,>=2.5 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (2.33.0)\n", "Requirement already satisfied: imageio-ffmpeg>=0.2.0 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (0.4.9)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (1.26.2)\n", "Requirement already satisfied: proglog<=1.0.0 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (0.1.10)\n", "Requirement already satisfied: tqdm<5.0,>=4.11.2 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (4.66.1)\n", "Requirement already satisfied: requests<3.0,>=2.8.1 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from moviepy) (2.31.0)\n", "Requirement already satisfied: pillow>=8.3.2 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from imageio<3.0,>=2.5->moviepy) (10.1.0)\n", "Requirement already satisfied: setuptools in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from imageio-ffmpeg>=0.2.0->moviepy) (65.5.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (2.1.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (2023.11.17)\n", "Requirement already satisfied: colorama in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from tqdm<5.0,>=4.11.2->moviepy) (0.4.6)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "WARNING: Ignoring invalid distribution -ensorflow-intel (c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages)\n", "\n", "[notice] A new release of pip is available: 23.0.1 -> 24.0\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "error", "evalue": "OpenCV(4.8.1) :-1: error: (-5:Bad argument) in function 'VideoCapture'\n> Overload resolution failed:\n> - Can't convert object to 'str' for 'filename'\n> - VideoCapture() missing required argument 'apiPreference' (pos 2)\n> - Argument 'index' is required to be an integer\n> - VideoCapture() missing required argument 'apiPreference' (pos 2)\n", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31merror\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[5], line 12\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[38;5;66;03m# print(len(clip))\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# clipping of the video \u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[38;5;66;03m# getting video for only starting 10 seconds \u001b[39;00m\n\u001b[0;32m 11\u001b[0m clip \u001b[38;5;241m=\u001b[39m clip\u001b[38;5;241m.\u001b[39msubclip(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m10\u001b[39m) \n\u001b[1;32m---> 12\u001b[0m cap \u001b[38;5;241m=\u001b[39m \u001b[43mcv2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mVideoCapture\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclip\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# rotating video by 180 degree \u001b[39;00m\n", "\u001b[1;31merror\u001b[0m: OpenCV(4.8.1) :-1: error: (-5:Bad argument) in function 'VideoCapture'\n> Overload resolution failed:\n> - Can't convert object to 'str' for 'filename'\n> - VideoCapture() missing required argument 'apiPreference' (pos 2)\n> - Argument 'index' is required to be an integer\n> - VideoCapture() missing required argument 'apiPreference' (pos 2)\n" ] } ], "source": [ "# Import everything needed to edit video clips \n", "from moviepy.editor import *\n", "import cv2\n", "temp_file_path = r\"C:\\Users\\amitumamaheshwar.h\\Downloads\\OCS - Bulk Upload 2.mp4\"\n", "# loading video dsa gfg intro video \n", "clip = VideoFileClip(temp_file_path) \n", "\n", "# print(len(clip))\n", "# clipping of the video \n", "# getting video for only starting 10 seconds \n", "clip = clip.subclip(0, 10) \n", "# cap = cv2.VideoCapture(clip)\n", "# rotating video by 180 degree \n", "print(clip)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import cv2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "error", "evalue": "OpenCV(4.8.1) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\core\\include\\opencv2/core/private.cuda.hpp:106: error: (-216:No CUDA support) The library is compiled without CUDA support in function 'throw_no_cuda'\n", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31merror\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[8], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mcv2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetDevice\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[1;31merror\u001b[0m: OpenCV(4.8.1) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\core\\include\\opencv2/core/private.cuda.hpp:106: error: (-216:No CUDA support) The library is compiled without CUDA support in function 'throw_no_cuda'\n" ] } ], "source": [ "cv2.cuda.getDevice()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "async def videoPrivacy(payload) -> Tuple[str, str]:\n", " payload=AttributeDict(payload)\n", " upload_file = payload.video\n", " video_data = await upload_file.read()\n", "\n", " temp_file_path = \"temp.avi\"\n", " output_file_path = \"output.avi\"\n", "\n", " with open(temp_file_path, \"wb\") as temp_file:\n", " temp_file.write(video_data)\n", "\n", " video = cv2.VideoCapture(temp_file_path)\n", "\n", " # Get video properties\n", " width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " fps = video.get(cv2.CAP_PROP_FPS)\n", "\n", " # Define the codec and create a VideoWriter object\n", " fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", " out = cv2.VideoWriter(output_file_path, fourcc, fps, (width, height))\n", "\n", " while(video.isOpened()):\n", " ret, frame = video.read()\n", " if ret==True:\n", " # Convert the frame to PIL Image\n", " imagef = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n", " imagef.save(\"videoframe.jpg\")\n", " # image=open(\"test.jpg\",\"rb\")\n", " # print(type(imagef))\n", " image={\"file\":\"videoframe.jpg\"}\n", " image=AttributeDict(image)\n", " # ocr=None\n", " # global imageAnalyzerEngine\n", "\n", " # imageAnalyzerEngine = ImageAnalyzerEngine(analyzer_engine=analyzer,ocr=ocr) \n", " # imageRedactorEngine = ImageRedactorEngine(image_analyzer_engine=imageAnalyzerEngine)\n", " # redacted_image = imageRedactorEngine.redact(image, (255, 192, 203))\n", " payload[\"image\"]=image\n", " redacted_image=PrivacyService.image_anonymize(payload)\n", " decoded_bytes = base64.b64decode(redacted_image)\n", "\n", " # Create a BytesIO object to simulate a file-like object\n", " bio = io.BytesIO(decoded_bytes)\n", "\n", " # Use OpenCV (assuming it's an image) or other libraries to load the image from the BytesIO object\n", " img = cv2.imdecode(np.fromstring(bio.getvalue(), np.uint8), cv2.IMREAD_COLOR)\n", "\n", " # Convert the PIL Image back to OpenCV frame\n", " frame = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n", "\n", " # Write the frame into the file 'output.avi'\n", " out.write(frame)\n", "\n", " else:\n", " break\n", " \n", " # Release everything when job is finished\n", " video.release()\n", " out.release()\n", "\n", " # Remove temporary file\n", " # os.remove(temp_file_path)\n", "\n", " # Read the processed video file\n", " with open(output_file_path, \"rb\") as video_file:\n", " video_data = video_file.read()\n", "\n", " # Convert the video to base64\n", " video_str = base64.b64encode(video_data).decode()\n", "\n", " # Remove the output file\n", " # os.remove(output_file_path)\n", "\n", " return video_str" ] } ], "metadata": { "kernelspec": { "display_name": "myenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }