diff --git "a/questions/MLS-C01-January.json" "b/questions/MLS-C01-January.json" --- "a/questions/MLS-C01-January.json" +++ "b/questions/MLS-C01-January.json" @@ -1,6 +1,6 @@ [ { - "question": "QUESTION 1 A Machine Learning Specialist is working with multi ple data sources containing billions of records that need to be joined. What feature engineering an d model development approach should the Specialist take with a dataset this large?", + "question": "A Machine Learning Specialist is working with multi ple data sources containing billions of records that need to be joined. What feature engineering an d model development approach should the Specialist take with a dataset this large?", "options": [ "Use an Amazon SageMaker notebook for both feature engineering and model development", "Use an Amazon SageMaker notebook for feature engi neering and Amazon ML for model development", @@ -12,7 +12,7 @@ "references": "" }, { - "question": "QUESTION 2 A Machine Learning Specialist has completed a proof of concept for a company using a small data sample and now the Specialist is ready to implement an end-to-end solution in AWS using Amazon SageMaker The historical training data is stored in Amazon RDS Which approach should the Specialist use for traini ng a model using that data?", + "question": "A Machine Learning Specialist has completed a proof of concept for a company using a small data sample and now the Specialist is ready to implement an end-to-end solution in AWS using Amazon SageMaker The historical training data is stored in Amazon RDS Which approach should the Specialist use for traini ng a model using that data?", "options": [ "Write a direct connection to the SQL database wit hin the notebook and pull data in", "Push the data from Microsoft SQL Server to Amazon S3 using an AWS Data Pipeline and providethe S3 location within the notebook.", @@ -24,7 +24,7 @@ "references": "" }, { - "question": "QUESTION 3 Which of the following metrics should a Machine Lea rning Specialist generally use to compare/evaluate machine learning classification mo dels against each other? A. Recall", + "question": "Which of the following metrics should a Machine Lea rning Specialist generally use to compare/evaluate machine learning classification mo dels against each other? A. Recall", "options": [ "Misclassification rate", "Mean absolute percentage error (MAPE)", @@ -35,7 +35,7 @@ "references": "" }, { - "question": "QUESTION 4 A Machine Learning Specialist is using Amazon Sage Maker to host a model for a highly available customer-facing application. The Specialist has trained a new version of the mod el, validated it with historical data, and now wants to deploy it to production To limit any risk of a negative customer experience, the Specialist wants to be able to monitor the model and roll it b ack, if needed What is the SIMPLEST approach with the LEAST risk t o deploy the model and roll it back, if needed?", + "question": "A Machine Learning Specialist is using Amazon Sage Maker to host a model for a highly available customer-facing application. The Specialist has trained a new version of the mod el, validated it with historical data, and now wants to deploy it to production To limit any risk of a negative customer experience, the Specialist wants to be able to monitor the model and roll it b ack, if needed What is the SIMPLEST approach with the LEAST risk t o deploy the model and roll it back, if needed?", "options": [ "Create a SageMaker endpoint and configuration for the new model version. Redirect productiontraffic to the new endpoint by updating the client configuration. Revert traffic to the last version i fthe model does not perform as expected.", "Create a SageMaker endpoint and configuration for the new model version. Redirect productiontraffic to the new endpoint by using a load balance r Revert traffic to the last version if the modeldoes not perform as expected.", @@ -47,7 +47,7 @@ "references": "" }, { - "question": "QUESTION 6 A manufacturing company has structured and unstruct ured data stored in an Amazon S3 bucket A Machine Learning Specialist wants to use SQL to run queries on this data. Which solution requires the LEAST effort to be able to query this data?", + "question": "A manufacturing company has structured and unstruct ured data stored in an Amazon S3 bucket A Machine Learning Specialist wants to use SQL to run queries on this data. Which solution requires the LEAST effort to be able to query this data?", "options": [ "Use AWS Lambda to transform the data and Amazon K inesis Data Analytics to run queries", "Use AWS Data Pipeline to transform the data and A mazon RDS to run queries.", @@ -59,7 +59,7 @@ "references": "" }, { - "question": "QUESTION 7 A Machine Learning Specialist is packaging a custom ResNet model into a Docker container so the company can leverage Amazon SageMaker for training The Specialist is using Amazon EC2 P3 instances to train the model and needs to properly configure the Docker container to leverage the NVIDIA GPUs What does the Specialist need to do1? A. Bundle the NVIDIA drivers with the Docker image", + "question": "A Machine Learning Specialist is packaging a custom ResNet model into a Docker container so the company can leverage Amazon SageMaker for training The Specialist is using Amazon EC2 P3 instances to train the model and needs to properly configure the Docker container to leverage the NVIDIA GPUs What does the Specialist need to do1? A. Bundle the NVIDIA drivers with the Docker image", "options": [ "Build the Docker container to be NVIDIA-Docker co mpatible", "Organize the Docker container's file structure to execute on GPU instances.", @@ -70,7 +70,7 @@ "references": "" }, { - "question": "QUESTION 8 A large JSON dataset for a project has been uploade d to a private Amazon S3 bucket The Machine Learning Specialist wants to securely access and ex plore the data from an Amazon SageMaker notebook instance A new VPC was created and assigne d to the Specialist How can the privacy and integrity of the data store d in Amazon S3 be maintained while granting access to the Specialist for analysis?", + "question": "A large JSON dataset for a project has been uploade d to a private Amazon S3 bucket The Machine Learning Specialist wants to securely access and ex plore the data from an Amazon SageMaker notebook instance A new VPC was created and assigne d to the Specialist How can the privacy and integrity of the data store d in Amazon S3 be maintained while granting access to the Specialist for analysis?", "options": [ "Launch the SageMaker notebook instance within the VPC with SageMaker-provided internetaccess enabled Use an S3 ACL to open read privilege s to the everyone group", "Launch the SageMaker notebook instance within the VPC and create an S3 VPC endpoint for thenotebook to access the data Copy the JSON dataset f rom Amazon S3 into the ML storage volume onthe SageMaker notebook instance and work against th e local dataset", @@ -82,7 +82,7 @@ "references": "" }, { - "question": "QUESTION 9 Given the following confusion matrix for a movie cl assification model, what is the true class frequency for Romance and the predicted class frequ ency for Adventure? A. The true class frequency for Romance is 77.56% an d the predicted class frequency for Adventure is 20 85%", + "question": "Given the following confusion matrix for a movie cl assification model, what is the true class frequency for Romance and the predicted class frequ ency for Adventure? A. The true class frequency for Romance is 77.56% an d the predicted class frequency for Adventure is 20 85%", "options": [ "The true class frequency for Romance is 57.92% an d the predicted class frequency for Adventure is1312%", "The true class frequency for Romance is 0 78 and the predicted class frequency for Adventure is (047 - 0.32).", @@ -93,7 +93,7 @@ "references": "" }, { - "question": "QUESTION 10 A Machine Learning Specialist is building a supervi sed model that will evaluate customers' satisfaction with their mobile phone service based on recent usage The model's output should infer whether or not a customer is likely to switch to a competitor in the next 30 days Which of the following modeling techniques should t he Specialist use1?", + "question": "A Machine Learning Specialist is building a supervi sed model that will evaluate customers' satisfaction with their mobile phone service based on recent usage The model's output should infer whether or not a customer is likely to switch to a competitor in the next 30 days Which of the following modeling techniques should t he Specialist use1?", "options": [ "Time-series prediction", "Anomaly detection", @@ -105,7 +105,7 @@ "references": "" }, { - "question": "QUESTION 11 A web-based company wants to improve its conversion rate on its landing page Using a large historical dataset of customer visits, the company has repeatedly trained a multi-class deep learning network algorithm on Amazon SageMaker However there is an overfitting problem training data shows 90% accuracy in predictions, while test data shows 70% accuracy only The company needs to boost the generalization of it s model before deploying it into production to maximize conversions of visits to purchases Which action is recommended to provide the HIGHEST accuracy model for the company's test and validation data?", + "question": "A web-based company wants to improve its conversion rate on its landing page Using a large historical dataset of customer visits, the company has repeatedly trained a multi-class deep learning network algorithm on Amazon SageMaker However there is an overfitting problem training data shows 90% accuracy in predictions, while test data shows 70% accuracy only The company needs to boost the generalization of it s model before deploying it into production to maximize conversions of visits to purchases Which action is recommended to provide the HIGHEST accuracy model for the company's test and validation data?", "options": [ "Increase the randomization of training data in th e mini-batches used in training.", "Allocate a higher proportion of the overall data to the training dataset", @@ -117,7 +117,7 @@ "references": "" }, { - "question": "QUESTION 12 A Machine Learning Specialist was given a dataset c onsisting of unlabeled data The Specialist must create a model that can help the team classify the data into different buckets What model should be used to complete this work?", + "question": "A Machine Learning Specialist was given a dataset c onsisting of unlabeled data The Specialist must create a model that can help the team classify the data into different buckets What model should be used to complete this work?", "options": [ "K-means clustering", "Random Cut Forest (RCF) C. XGBoost", @@ -128,7 +128,7 @@ "references": "" }, { - "question": "QUESTION 13 A retail company intends to use machine learning to categorize new products A labeled dataset of current products was provided to the Data Science t eam The dataset includes 1 200 products The labeled dataset has 15 features for each product su ch as title dimensions, weight, and price Each product is labeled as belonging to one of six categ ories such as books, games, electronics, and movies. Which model should be used for categorizing new pro ducts using the provided dataset for training?", + "question": "A retail company intends to use machine learning to categorize new products A labeled dataset of current products was provided to the Data Science t eam The dataset includes 1 200 products The labeled dataset has 15 features for each product su ch as title dimensions, weight, and price Each product is labeled as belonging to one of six categ ories such as books, games, electronics, and movies. Which model should be used for categorizing new pro ducts using the provided dataset for training?", "options": [ "An XGBoost model where the objective parameter is set to multi: softmax", "A deep convolutional neural network (CNN) with a softmax activation function for the last layer", @@ -140,7 +140,7 @@ "references": "" }, { - "question": "QUESTION 14 A Machine Learning Specialist is building a model t o predict future employment rates based on a wide range of economic factors While exploring the data, the Specialist notices that the magnitude of the input features vary greatly The Specialist does not want variables with a larger magnitude to dominate the model What should the Specialist do to prepare the data f or model training'? A. Apply quantile binning to group the data into cat egorical bins to keep any relationships in the data by replacing the magnitude with distribution", + "question": "A Machine Learning Specialist is building a model t o predict future employment rates based on a wide range of economic factors While exploring the data, the Specialist notices that the magnitude of the input features vary greatly The Specialist does not want variables with a larger magnitude to dominate the model What should the Specialist do to prepare the data f or model training'? A. Apply quantile binning to group the data into cat egorical bins to keep any relationships in the data by replacing the magnitude with distribution", "options": [ "Apply the Cartesian product transformation to cre ate new combinations of fields that areindependent of the magnitude", "Apply normalization to ensure each field will hav e a mean of 0 and a variance of 1 to remove anysignificant magnitude", @@ -151,7 +151,7 @@ "references": "" }, { - "question": "QUESTION 15 A Machine Learning Specialist prepared the followin g graph displaying the results of k-means for k = [1:10] Considering the graph, what is a reasonable selecti on for the optimal choice of k?", + "question": "A Machine Learning Specialist prepared the followin g graph displaying the results of k-means for k = [1:10] Considering the graph, what is a reasonable selecti on for the optimal choice of k?", "options": [ "1", "4", @@ -163,7 +163,7 @@ "references": "" }, { - "question": "QUESTION 16 A company is using Amazon Polly to translate plaint ext documents to speech for automated company announcements However company acronyms are being mispronounced in the current documents How should a Machine Learning Specialist address this issue for future documents?", + "question": "A company is using Amazon Polly to translate plaint ext documents to speech for automated company announcements However company acronyms are being mispronounced in the current documents How should a Machine Learning Specialist address this issue for future documents?", "options": [ "Convert current documents to SSML with pronunciat ion tags", "Create an appropriate pronunciation lexicon.", @@ -175,7 +175,7 @@ "references": "" }, { - "question": "QUESTION 17 A Machine Learning Specialist is using Apache Spark for pre-processing training data As part of the Spark pipeline, the Specialist wants to use Amazon SageMaker for training a model and hosting it Which of the following would the Specialist do to i ntegrate the Spark application with SageMaker? (Select THREE)", + "question": "A Machine Learning Specialist is using Apache Spark for pre-processing training data As part of the Spark pipeline, the Specialist wants to use Amazon SageMaker for training a model and hosting it Which of the following would the Specialist do to i ntegrate the Spark application with SageMaker? (Select THREE)", "options": [ "Download the AWS SDK for the Spark environment", "Install the SageMaker Spark library in the Spark en vironment. C. Use the appropriate estimator from the SageMaker Sp ark Library to train a model.", @@ -186,7 +186,7 @@ "references": "" }, { - "question": "QUESTION 18 A Machine Learning Specialist is working with a lar ge cybersecurily company that manages security events in real time for companies around the world The cybersecurity company wants to design a solution that will allow it to use machine learning to score malicious events as anomalies on the data as it is being ingested The company also wants be a ble to save the results in its data lake for later processing and analysis What is the MOST efficient way to accomplish these tasks'? A. Ingest the data using Amazon Kinesis Data Firehos e, and use Amazon Kinesis Data Analytics Random Cut Forest (RCF) for anomaly detection Then use Kinesis Data Firehose to stream the results to Amazon S3", + "question": "A Machine Learning Specialist is working with a lar ge cybersecurily company that manages security events in real time for companies around the world The cybersecurity company wants to design a solution that will allow it to use machine learning to score malicious events as anomalies on the data as it is being ingested The company also wants be a ble to save the results in its data lake for later processing and analysis What is the MOST efficient way to accomplish these tasks'? A. Ingest the data using Amazon Kinesis Data Firehos e, and use Amazon Kinesis Data Analytics Random Cut Forest (RCF) for anomaly detection Then use Kinesis Data Firehose to stream the results to Amazon S3", "options": [ "Ingest the data into Apache Spark Streaming using Amazon EMR. and use Spark MLlib with kmeansto perform anomaly detection Then store the results in an Apache Hadoop Distributed FileSystem (HDFS) using Amazon EMR with a replication f actor of three as the data lake", "Ingest the data and store it in Amazon S3 Use AWS Batch along with the AWS Deep Learning AMIsto train a k-means model using TensorFlow on the da ta in Amazon S3.", @@ -197,7 +197,7 @@ "references": "" }, { - "question": "QUESTION 19 A Machine Learning Specialist works for a credit ca rd processing company and needs to predict which transactions may be fraudulent in near-real time. S pecifically, the Specialist must train a model that returns the probability that a given transaction ma y be fraudulent How should the Specialist frame this business probl em'?", + "question": "A Machine Learning Specialist works for a credit ca rd processing company and needs to predict which transactions may be fraudulent in near-real time. S pecifically, the Specialist must train a model that returns the probability that a given transaction ma y be fraudulent How should the Specialist frame this business probl em'?", "options": [ "Streaming classification", "Binary classification", @@ -209,7 +209,7 @@ "references": "" }, { - "question": "QUESTION 20 Amazon Connect has recently been tolled out across a company as a contact call center The solution has been configured to store voice call recordings on Amazon S3 The content of the voice calls are being analyzed f or the incidents being discussed by the call operators Amazon Transcribe is being used to conver t the audio to text, and the output is stored on Amazon S3 Which approach will provide the information require d for further analysis?", + "question": "Amazon Connect has recently been tolled out across a company as a contact call center The solution has been configured to store voice call recordings on Amazon S3 The content of the voice calls are being analyzed f or the incidents being discussed by the call operators Amazon Transcribe is being used to conver t the audio to text, and the output is stored on Amazon S3 Which approach will provide the information require d for further analysis?", "options": [ "Use Amazon Comprehend with the transcribed files to build the key topics", "Use Amazon Translate with the transcribed files t o train and build a model for the key topics", @@ -221,7 +221,7 @@ "references": "" }, { - "question": "QUESTION 21 A Machine Learning Specialist is building a predict ion model for a large number of features using linear models, such as linear regression and logist ic regression During exploratory data analysis the Specialist observes that many features are highly c orrelated with each other This may make the model unstable What should be done to reduce the impact of having such a large number of features?", + "question": "A Machine Learning Specialist is building a predict ion model for a large number of features using linear models, such as linear regression and logist ic regression During exploratory data analysis the Specialist observes that many features are highly c orrelated with each other This may make the model unstable What should be done to reduce the impact of having such a large number of features?", "options": [ "Perform one-hot encoding on highly correlated fea tures", "Use matrix multiplication on highly correlated fe atures.", @@ -233,7 +233,7 @@ "references": "" }, { - "question": "QUESTION 22 A Machine Learning Specialist wants to determine th e appropriate SageMaker Variant Invocations Per Instance setting for an endpoint automatic scal ing configuration. The Specialist has performed a load test on a single instance and determined that peak requests per second (RPS) without service degradation is about 20 RPS As this is the first de ployment, the Specialist intends to set the invocation safety factor to 0 5 Based on the stated parameters and given that the i nvocations per instance setting is measured on a per-minute basis, what should the Specialist set as the sageMaker variant invocations Per instance setting?", + "question": "A Machine Learning Specialist wants to determine th e appropriate SageMaker Variant Invocations Per Instance setting for an endpoint automatic scal ing configuration. The Specialist has performed a load test on a single instance and determined that peak requests per second (RPS) without service degradation is about 20 RPS As this is the first de ployment, the Specialist intends to set the invocation safety factor to 0 5 Based on the stated parameters and given that the i nvocations per instance setting is measured on a per-minute basis, what should the Specialist set as the sageMaker variant invocations Per instance setting?", "options": [ "10", "30", @@ -245,7 +245,7 @@ "references": "" }, { - "question": "QUESTION 23 A Machine Learning Specialist deployed a model that provides product recommendations on a company's website Initially, the model was performi ng very well and resulted in customers buying more products on average However within the past fe w months the Specialist has noticed that the effect of product recommendations has diminished an d customers are starting to return to their original habits of spending less The Specialist is unsure of what happened, as the model has not changed from its initial deployment over a year ago Which method should the Specialist try to improve m odel performance?", + "question": "A Machine Learning Specialist deployed a model that provides product recommendations on a company's website Initially, the model was performi ng very well and resulted in customers buying more products on average However within the past fe w months the Specialist has noticed that the effect of product recommendations has diminished an d customers are starting to return to their original habits of spending less The Specialist is unsure of what happened, as the model has not changed from its initial deployment over a year ago Which method should the Specialist try to improve m odel performance?", "options": [ "The model needs to be completely re-engineered be cause it is unable to handle product inventorychanges", "The model's hyperparameters should be periodicall y updated to prevent drift", @@ -257,7 +257,7 @@ "references": "" }, { - "question": "QUESTION 24 A manufacturer of car engines collects data from ca rs as they are being driven The data collected includes timestamp, engine temperature, rotations p er minute (RPM), and other sensor readings The company wants to predict when an engine is going to have a problem so it can notify drivers in advance to get engine maintenance The engine data i s loaded into a data lake for training Which is the MOST suitable predictive model that ca n be deployed into production'?", + "question": "A manufacturer of car engines collects data from ca rs as they are being driven The data collected includes timestamp, engine temperature, rotations p er minute (RPM), and other sensor readings The company wants to predict when an engine is going to have a problem so it can notify drivers in advance to get engine maintenance The engine data i s loaded into a data lake for training Which is the MOST suitable predictive model that ca n be deployed into production'?", "options": [ "Add labels over time to indicate which engine fau lts occur at what time in the future to turn thisinto a supervised learning problem Use a recurrent neural network (RNN) to train the model torecognize when an engine might need maintenance for a certain fault.", "This data requires an unsupervised learning algor ithm Use Amazon SageMaker k-means to clusterthe data", @@ -269,7 +269,7 @@ "references": "" }, { - "question": "QUESTION 26 A Machine Learning Specialist is developing recomme ndation engine for a photography blog Given a picture, the recommendation engine should show a pi cture that captures similar objects The Specialist would like to create a numerical represe ntation feature to perform nearest-neighbor searches What actions would allow the Specialist to get rele vant numerical representations?", + "question": "A Machine Learning Specialist is developing recomme ndation engine for a photography blog Given a picture, the recommendation engine should show a pi cture that captures similar objects The Specialist would like to create a numerical represe ntation feature to perform nearest-neighbor searches What actions would allow the Specialist to get rele vant numerical representations?", "options": [ "Reduce image resolution and use reduced resolutio n pixel values as features", "Use Amazon Mechanical Turk to label image content and create a one-hot representationindicating the presence of specific labels", @@ -281,7 +281,7 @@ "references": "" }, { - "question": "QUESTION 27 A gaming company has launched an online game where people can start playing for free but they need to pay if they choose to use certain features The company needs to build an automated system to predict whether or not a new user will become a paid user within 1 year The company has gathered a labeled dataset from 1 million users The training dataset consists of 1.000 positive sam ples (from users who ended up paying within 1 year) and 999.000 negative samples (from users who did not use any paid features) Each data sample consists of 200 features including user age, device , location, and play patterns Using this dataset for training, the Data Science t eam trained a random forest model that converged with over 99% accuracy on the training set However, the prediction results on a test dataset were not satisfactory. Which of the following approaches should the Data S cience team take to mitigate this issue? (Select TWO.)", + "question": "A gaming company has launched an online game where people can start playing for free but they need to pay if they choose to use certain features The company needs to build an automated system to predict whether or not a new user will become a paid user within 1 year The company has gathered a labeled dataset from 1 million users The training dataset consists of 1.000 positive sam ples (from users who ended up paying within 1 year) and 999.000 negative samples (from users who did not use any paid features) Each data sample consists of 200 features including user age, device , location, and play patterns Using this dataset for training, the Data Science t eam trained a random forest model that converged with over 99% accuracy on the training set However, the prediction results on a test dataset were not satisfactory. Which of the following approaches should the Data S cience team take to mitigate this issue? (Select TWO.)", "options": [ "Add more deep trees to the random forest to enabl e the model to learn more features.", "indicate a copy of the samples in the test databa se in the training dataset", @@ -293,7 +293,7 @@ "references": "" }, { - "question": "QUESTION 28 While reviewing the histogram for residuals on regr ession evaluation data a Machine Learning Specialist notices that the residuals do not form a zero-centered bell shape as shown What does this mean?", + "question": "While reviewing the histogram for residuals on regr ession evaluation data a Machine Learning Specialist notices that the residuals do not form a zero-centered bell shape as shown What does this mean?", "options": [ "The model might have prediction errors over a ran ge of target values.", "The dataset cannot be accurately represented usin g the regression model", @@ -305,7 +305,7 @@ "references": "" }, { - "question": "QUESTION 29 During mini-batch training of a neural network for a classification problem, a Data Scientist notices that training accuracy oscillates What is the MOST likely cause of this issue? A. The class distribution in the dataset is imbalanced", + "question": "During mini-batch training of a neural network for a classification problem, a Data Scientist notices that training accuracy oscillates What is the MOST likely cause of this issue? A. The class distribution in the dataset is imbalanced", "options": [ "Dataset shuffling is disabled", "The batch size is too big", @@ -316,7 +316,7 @@ "references": "" }, { - "question": "QUESTION 30 A Machine Learning Specialist observes several perf ormance problems with the training portion of a machine learning solution on Amazon SageMaker The s olution uses a large training dataset 2 TB in size and is using the SageMaker k-means algorithm T he observed issues include the unacceptable length of time it takes before the training job lau nches and poor I/O throughput while training the model What should the Specialist do to address the perfor mance issues with the current solution?", + "question": "A Machine Learning Specialist observes several perf ormance problems with the training portion of a machine learning solution on Amazon SageMaker The s olution uses a large training dataset 2 TB in size and is using the SageMaker k-means algorithm T he observed issues include the unacceptable length of time it takes before the training job lau nches and poor I/O throughput while training the model What should the Specialist do to address the perfor mance issues with the current solution?", "options": [ "Use the SageMaker batch transform feature", "Compress the training data into Apache Parquet fo rmat.", @@ -328,7 +328,7 @@ "references": "" }, { - "question": "QUESTION 32 A Machine Learning Specialist is building a model t hat will perform time series forecasting using Amazon SageMaker The Specialist has finished traini ng the model and is now planning to perform load testing on the endpoint so they can configure Auto Scaling for the model variant Which approach will allow the Specialist to review the latency, memory utilization, and CPU utilization during the load test\"?", + "question": "A Machine Learning Specialist is building a model t hat will perform time series forecasting using Amazon SageMaker The Specialist has finished traini ng the model and is now planning to perform load testing on the endpoint so they can configure Auto Scaling for the model variant Which approach will allow the Specialist to review the latency, memory utilization, and CPU utilization during the load test\"?", "options": [ "Review SageMaker logs that have been written to A mazon S3 by leveraging Amazon Athena andAmazon OuickSight to visualize logs as they are bei ng produced", "Generate an Amazon CloudWatch dashboard to create a single view for the latency, memoryutilization, and CPU utilization metrics that are o utputted by Amazon SageMaker", @@ -340,7 +340,7 @@ "references": "" }, { - "question": "QUESTION 33 An Amazon SageMaker notebook instance is launched i nto Amazon VPC The SageMaker notebook references data contained in an Amazon S3 bucket in another account The bucket is encrypted using SSE-KMS The instance returns an access denied error when trying to access data in Amazon S3. Which of the following are required to access the b ucket and avoid the access denied error? (Select THREE)", + "question": "An Amazon SageMaker notebook instance is launched i nto Amazon VPC The SageMaker notebook references data contained in an Amazon S3 bucket in another account The bucket is encrypted using SSE-KMS The instance returns an access denied error when trying to access data in Amazon S3. Which of the following are required to access the b ucket and avoid the access denied error? (Select THREE)", "options": [ "An AWS KMS key policy that allows access to the c ustomer master key (CMK)", "A SageMaker notebook security group that allows a ccess to Amazon S3", @@ -352,7 +352,7 @@ "references": "" }, { - "question": "QUESTION 35 A Machine Learning Specialist needs to create a dat a repository to hold a large amount of time-based training data for a new model. In the source system , new files are added every hour Throughout a single 24-hour period, the volume of hourly updates will change significantly. The Specialist always wants to train on the last 24 hours of the data Which type of data repository is the MOST cost-effe ctive solution?", + "question": "A Machine Learning Specialist needs to create a dat a repository to hold a large amount of time-based training data for a new model. In the source system , new files are added every hour Throughout a single 24-hour period, the volume of hourly updates will change significantly. The Specialist always wants to train on the last 24 hours of the data Which type of data repository is the MOST cost-effe ctive solution?", "options": [ "An Amazon EBS-backed Amazon EC2 instance with hou rly directories", "An Amazon RDS database with hourly table partitio ns", @@ -364,7 +364,7 @@ "references": "" }, { - "question": "QUESTION 36 A retail chain has been ingesting purchasing record s from its network of 20,000 stores to Amazon S3 using Amazon Kinesis Data Firehose To support train ing an improved machine learning model, training records will require new but simple transf ormations, and some attributes will be combined The model needs lo be retrained daily Given the large number of stores and the legacy dat a ingestion, which change will require the LEAST amount of development effort?", + "question": "A retail chain has been ingesting purchasing record s from its network of 20,000 stores to Amazon S3 using Amazon Kinesis Data Firehose To support train ing an improved machine learning model, training records will require new but simple transf ormations, and some attributes will be combined The model needs lo be retrained daily Given the large number of stores and the legacy dat a ingestion, which change will require the LEAST amount of development effort?", "options": [ "Require that the stores to switch to capturing th eir data locally on AWS Storage Gateway forloading into Amazon S3 then use AWS Glue to do the transformation", "Deploy an Amazon EMR cluster running Apache Spark with the transformation logic, and have thecluster run each day on the accumulating records in Amazon S3, outputting new/transformed recordsto Amazon S3", @@ -376,7 +376,7 @@ "references": "" }, { - "question": "QUESTION 37 A city wants to monitor its air quality to address the consequences of air pollution A Machine Learning Specialist needs to forecast the air quali ty in parts per million of contaminates for the nex t 2 days in the city as this is a prototype, only daily data from the last year is available Which model is MOST likely to provide the best resu lts in Amazon SageMaker?", + "question": "A city wants to monitor its air quality to address the consequences of air pollution A Machine Learning Specialist needs to forecast the air quali ty in parts per million of contaminates for the nex t 2 days in the city as this is a prototype, only daily data from the last year is available Which model is MOST likely to provide the best resu lts in Amazon SageMaker?", "options": [ "Use the Amazon SageMaker k-Nearest-Neighbors (kNN ) algorithm on the single time seriesconsisting ofthe full year of data with a predictor_type of regr essor.", "Use Amazon SageMaker Random Cut Forest (RCF) on t he single time series consisting of the fullyear of data.", @@ -388,7 +388,7 @@ "references": "" }, { - "question": "QUESTION 38 For the given confusion matrix, what is the recall and precision of the model?", + "question": "For the given confusion matrix, what is the recall and precision of the model?", "options": [ "Recall = 0.92 Precision = 0.84", "Recall = 0.84 Precision = 0.8", @@ -400,7 +400,7 @@ "references": "" }, { - "question": "QUESTION 39 A Machine Learning Specialist is working with a med ia company to perform classification on popular articles from the company's website. The company is using random forests to classify how popular an article will be before it is published A sample of the data being used is below. Given the dataset, the Specialist wants to convert the Day-Of_Week column to binary values. What technique should be used to convert this colum n to binary values.", + "question": "A Machine Learning Specialist is working with a med ia company to perform classification on popular articles from the company's website. The company is using random forests to classify how popular an article will be before it is published A sample of the data being used is below. Given the dataset, the Specialist wants to convert the Day-Of_Week column to binary values. What technique should be used to convert this colum n to binary values.", "options": [ "Binarization", "One-hot encoding", @@ -412,7 +412,7 @@ "references": "" }, { - "question": "QUESTION 40 A company has raw user and transaction data stored in AmazonS3 a MySQL database, and Amazon RedShift A Data Scientist needs to perform an analy sis by joining the three datasets from Amazon S3, MySQL, and Amazon RedShift, and then calculating th e average-of a few selected columns from the joined data Which AWS service should the Data Scientist use?", + "question": "A company has raw user and transaction data stored in AmazonS3 a MySQL database, and Amazon RedShift A Data Scientist needs to perform an analy sis by joining the three datasets from Amazon S3, MySQL, and Amazon RedShift, and then calculating th e average-of a few selected columns from the joined data Which AWS service should the Data Scientist use?", "options": [ "Amazon Athena", "Amazon Redshift Spectrum", @@ -424,7 +424,7 @@ "references": "" }, { - "question": "QUESTION 41 A Mobile Network Operator is building an analytics platform to analyze and optimize a company's operations using Amazon Athena and Amazon S3 The source systems send data in CSV format in real lime The Data Engineering team wants to transform the data to the Apache Parquet format bef ore storing it on Amazon S3 Which solution takes the LEAST effort to implement?", + "question": "A Mobile Network Operator is building an analytics platform to analyze and optimize a company's operations using Amazon Athena and Amazon S3 The source systems send data in CSV format in real lime The Data Engineering team wants to transform the data to the Apache Parquet format bef ore storing it on Amazon S3 Which solution takes the LEAST effort to implement?", "options": [ "Ingest .CSV data using Apache Kafka Streams on Am azon EC2 instances and use Kafka Connect S3to serialize data as Parquet", "Ingest .CSV data from Amazon Kinesis Data Streams and use Amazon Glue to convert data intoParquet.", @@ -436,7 +436,7 @@ "references": "" }, { - "question": "QUESTION 43 A Marketing Manager at a pet insurance company plan s to launch a targeted marketing campaign on social media to acquire new customers Currently, th e company has the following data in Amazon Aurora Profiles for all past and existing customers Profiles for all past and existing insured pets Policy-level information Premiums received Claims paid What steps should be taken to implement a machine l earning model to identify potential new customers on social media?", + "question": "A Marketing Manager at a pet insurance company plan s to launch a targeted marketing campaign on social media to acquire new customers Currently, th e company has the following data in Amazon Aurora Profiles for all past and existing customers Profiles for all past and existing insured pets Policy-level information Premiums received Claims paid What steps should be taken to implement a machine l earning model to identify potential new customers on social media?", "options": [ "Use regression on customer profile data to unders tand key characteristics of consumer segmentsFind similar profiles on social media.", "Use clustering on customer profile data to unders tand key characteristics of consumer segmentsFind similar profiles on social media.", @@ -448,7 +448,7 @@ "references": "" }, { - "question": "QUESTION 44 A company is running an Amazon SageMaker training j ob that will access data stored in its Amazon S3 bucket A compliance policy requires that the dat a never be transmitted across the internet How should the company set up the job?", + "question": "A company is running an Amazon SageMaker training j ob that will access data stored in its Amazon S3 bucket A compliance policy requires that the dat a never be transmitted across the internet How should the company set up the job?", "options": [ "Launch the notebook instances in a public subnet and access the data through the public S3endpoint", "Launch the notebook instances in a private subnet and access the data through a NAT gateway", @@ -460,7 +460,7 @@ "references": "" }, { - "question": "QUESTION 45 A Machine Learning Specialist is preparing data for training on Amazon SageMaker The Specialist is transformed into a numpy .array, which appears to b e negatively affecting the speed of the training What should the Specialist do to optimize the data for training on SageMaker'?", + "question": "A Machine Learning Specialist is preparing data for training on Amazon SageMaker The Specialist is transformed into a numpy .array, which appears to b e negatively affecting the speed of the training What should the Specialist do to optimize the data for training on SageMaker'?", "options": [ "Use the SageMaker batch transform feature to tran sform the training data into a DataFrame", "Use AWS Glue to compress the data into the Apache P arquet format C. Transform the dataset into the Recordio protobuf fo rmat", @@ -471,7 +471,7 @@ "references": "" }, { - "question": "QUESTION 46 A Machine Learning Specialist is training a model t o identify the make and model of vehicles in images The Specialist wants to use transfer learnin g and an existing model trained on images of general objects The Specialist collated a large cus tom dataset of pictures containing different vehicl e makes and models. What should the Specialist do to initialize the mod el to re-train it with the custom data?", + "question": "A Machine Learning Specialist is training a model t o identify the make and model of vehicles in images The Specialist wants to use transfer learnin g and an existing model trained on images of general objects The Specialist collated a large cus tom dataset of pictures containing different vehicl e makes and models. What should the Specialist do to initialize the mod el to re-train it with the custom data?", "options": [ "Initialize the model with random weights in all l ayers including the last fully connected layer", "Initialize the model with pre-trained weights in all layers and replace the last fully connected lay er.", @@ -483,7 +483,7 @@ "references": "" }, { - "question": "QUESTION 47 A Machine Learning Specialist is developing a custo m video recommendation model for an application The dataset used to train this model is very large with millions of data points and is hosted in an Amazon S3 bucket The Specialist wants to avoid loading all of this data onto an Amazon SageMaker notebook instance because it would take h ours to move and will exceed the attached 5 GB Amazon EBS volume on the notebook instance. Which approach allows the Specialist to use all the data to train the model? A. Load a smaller subset of the data into the SageMa ker notebook and train locally. Confirm that the training code is executing and the model parameters seem reasonable. Initiate a SageMaker training job using the full dataset from the S3 bucket using Pip e input mode.", + "question": "A Machine Learning Specialist is developing a custo m video recommendation model for an application The dataset used to train this model is very large with millions of data points and is hosted in an Amazon S3 bucket The Specialist wants to avoid loading all of this data onto an Amazon SageMaker notebook instance because it would take h ours to move and will exceed the attached 5 GB Amazon EBS volume on the notebook instance. Which approach allows the Specialist to use all the data to train the model? A. Load a smaller subset of the data into the SageMa ker notebook and train locally. Confirm that the training code is executing and the model parameters seem reasonable. Initiate a SageMaker training job using the full dataset from the S3 bucket using Pip e input mode.", "options": [ "Launch an Amazon EC2 instance with an AWS Deep Le arning AMI and attach the S3 bucket to theinstance. Train on a small amount of the data to ve rify the training code and hyperparameters. Goback to Amazon SageMaker and train using the full d ataset", "Use AWS Glue to train a model using a small subse t of the data to confirm that the data will becompatible with Amazon SageMaker. Initiate a SageMa ker training job using the full dataset from the S3bucketusing Pipe input mode.", @@ -494,7 +494,7 @@ "references": "" }, { - "question": "QUESTION 48 A Machine Learning Specialist is creating a new nat ural language processing application that processes a dataset comprised of 1 million sentence s The aim is to then run Word2Vec to generate embeddings of the sentences and enable different ty pes of predictions Here is an example from the dataset \"The quck BROWN FOX jumps over the lazy dog \" Which of the following are the operations the Speci alist needs to perform to correctly sanitize and prepare the data in a repeatable manner? (Select TH REE)", + "question": "A Machine Learning Specialist is creating a new nat ural language processing application that processes a dataset comprised of 1 million sentence s The aim is to then run Word2Vec to generate embeddings of the sentences and enable different ty pes of predictions Here is an example from the dataset \"The quck BROWN FOX jumps over the lazy dog \" Which of the following are the operations the Speci alist needs to perform to correctly sanitize and prepare the data in a repeatable manner? (Select TH REE)", "options": [ "Perform part-of-speech tagging and keep the actio n verb and the nouns only", "Normalize all words by making the sentence lowerc ase", @@ -506,7 +506,7 @@ "references": "" }, { - "question": "QUESTION 49 This graph shows the training and validation loss a gainst the epochs for a neural network The network being trained is as follows Two dense layers one output neuron 100 neurons in each layer 100 epochs Random initialization of weights Which technique can be used to improve model perfor mance in terms of accuracy in the validation set?", + "question": "This graph shows the training and validation loss a gainst the epochs for a neural network The network being trained is as follows Two dense layers one output neuron 100 neurons in each layer 100 epochs Random initialization of weights Which technique can be used to improve model perfor mance in terms of accuracy in the validation set?", "options": [ "Early stopping", "Random initialization of weights with appropriate seed", @@ -518,7 +518,7 @@ "references": "" }, { - "question": "QUESTION 50 A manufacturing company asks its Machine Learning S pecialist to develop a model that classifies defective parts into one of eight defect types. The company has provided roughly 100000 images per defect type for training During the injial training of the image classification model the Specialist notices that the validation accuracy is 80%, while the training accuracy is 90% It is known that human-level performance for this type of image clas sification is around 90% What should the Specialist consider to fix this iss ue1?", + "question": "A manufacturing company asks its Machine Learning S pecialist to develop a model that classifies defective parts into one of eight defect types. The company has provided roughly 100000 images per defect type for training During the injial training of the image classification model the Specialist notices that the validation accuracy is 80%, while the training accuracy is 90% It is known that human-level performance for this type of image clas sification is around 90% What should the Specialist consider to fix this iss ue1?", "options": [ "A longer training time", "Making the network larger", @@ -530,7 +530,7 @@ "references": "" }, { - "question": "QUESTION 51 Example Corp has an annual sale event from October to December. The company has sequential sales data from the past 15 years and wants to use Amazon ML to predict the sales for this year's upcoming event. Which method should Example Corp use to split the d ata into a training dataset and evaluation dataset?", + "question": "Example Corp has an annual sale event from October to December. The company has sequential sales data from the past 15 years and wants to use Amazon ML to predict the sales for this year's upcoming event. Which method should Example Corp use to split the d ata into a training dataset and evaluation dataset?", "options": [ "Pre-split the data before uploading to Amazon S3", "Have Amazon ML split the data randomly.", @@ -542,7 +542,7 @@ "references": "" }, { - "question": "QUESTION 52 A company is running a machine learning prediction service that generates 100 TB of predictions every day A Machine Learning Specialist must genera te a visualization of the daily precision-recall curve from the predictions, and forward a read-only version to the Business team. Which solution requires the LEAST coding effort?", + "question": "A company is running a machine learning prediction service that generates 100 TB of predictions every day A Machine Learning Specialist must genera te a visualization of the daily precision-recall curve from the predictions, and forward a read-only version to the Business team. Which solution requires the LEAST coding effort?", "options": [ "Run a daily Amazon EMR workflow to generate preci sion-recall data, and save the results inAmazon S3 Give the Business team read-only access t o S3", "Generate daily precision-recall data in Amazon Qu ickSight, and publish the results in a dashboardshared with the Business team", @@ -554,7 +554,7 @@ "references": "" }, { - "question": "QUESTION 53 A Machine Learning Specialist has built a model usi ng Amazon SageMaker built-in algorithms and is not getting expected accurate results The Specialis t wants to use hyperparameter optimization to increase the model's accuracy Which method is the MOST repeatable and requires th e LEAST amount of effort to achieve this?", + "question": "A Machine Learning Specialist has built a model usi ng Amazon SageMaker built-in algorithms and is not getting expected accurate results The Specialis t wants to use hyperparameter optimization to increase the model's accuracy Which method is the MOST repeatable and requires th e LEAST amount of effort to achieve this?", "options": [ "Launch multiple training jobs in parallel with di fferent hyperparameters", "Create an AWS Step Functions workflow that monito rs the accuracy in Amazon CloudWatch Logsand relaunches the training job with a defined list of hyperparameters", @@ -566,7 +566,7 @@ "references": "" }, { - "question": "QUESTION 54 IT leadership wants Jo transition a company's exist ing machine learning data storage environment to AWS as a temporary ad hoc solution The company curr ently uses a custom software process that heavily leverages SOL as a query language and exclu sively stores generated csv documents for machine learning The ideal state for the company would be a solution that allows it to continue to use the current workforce of SQL experts The solution must also sup port the storage of csv and JSON files, and be able to query over semi-structured data The followi ng are high priorities for the company: Solution simplicity Fast development time Low cost High flexibility What technologies meet the company's requirements?", + "question": "IT leadership wants Jo transition a company's exist ing machine learning data storage environment to AWS as a temporary ad hoc solution The company curr ently uses a custom software process that heavily leverages SOL as a query language and exclu sively stores generated csv documents for machine learning The ideal state for the company would be a solution that allows it to continue to use the current workforce of SQL experts The solution must also sup port the storage of csv and JSON files, and be able to query over semi-structured data The followi ng are high priorities for the company: Solution simplicity Fast development time Low cost High flexibility What technologies meet the company's requirements?", "options": [ "Amazon S3 and Amazon Athena", "Amazon Redshift and AWS Glue", @@ -578,7 +578,7 @@ "references": "" }, { - "question": "QUESTION 55 A Machine Learning Specialist is working for a cred it card processing company and receives an unbalanced dataset containing credit card transacti ons. It contains 99,000 valid transactions and 1,000 fraudulent transactions The Specialist is ask ed to score a model that was run against the dataset The Specialist has been advised that identi fying valid transactions is equally as important as identifying fraudulent transactions What metric is BEST suited to score the model?", + "question": "A Machine Learning Specialist is working for a cred it card processing company and receives an unbalanced dataset containing credit card transacti ons. It contains 99,000 valid transactions and 1,000 fraudulent transactions The Specialist is ask ed to score a model that was run against the dataset The Specialist has been advised that identi fying valid transactions is equally as important as identifying fraudulent transactions What metric is BEST suited to score the model?", "options": [ "Precision", "Recall", @@ -590,7 +590,7 @@ "references": "" }, { - "question": "QUESTION 56 A bank's Machine Learning team is developing an app roach for credit card fraud detection The company has a large dataset of historical data labe led as fraudulent The goal is to build a model to take the information from new transactions and pred ict whether each transaction is fraudulent or not Which built-in Amazon SageMaker machine learning al gorithm should be used for modeling this problem?", + "question": "A bank's Machine Learning team is developing an app roach for credit card fraud detection The company has a large dataset of historical data labe led as fraudulent The goal is to build a model to take the information from new transactions and pred ict whether each transaction is fraudulent or not Which built-in Amazon SageMaker machine learning al gorithm should be used for modeling this problem?", "options": [ "Seq2seq", "XGBoost", @@ -602,7 +602,7 @@ "references": "" }, { - "question": "QUESTION 57 While working on a neural network project, a Machin e Learning Specialist discovers thai some features in the data have very high magnitude resul ting in this data being weighted more in the cost function What should the Specialist do to ensure be tter convergence during backpropagation?", + "question": "While working on a neural network project, a Machin e Learning Specialist discovers thai some features in the data have very high magnitude resul ting in this data being weighted more in the cost function What should the Specialist do to ensure be tter convergence during backpropagation?", "options": [ "Dimensionality reduction", "Data normalization", @@ -614,7 +614,7 @@ "references": "" }, { - "question": "QUESTION 58 An online reseller has a large, multi-column datase t with one column missing 30% of its data A Machine Learning Specialist believes that certain c olumns in the dataset could be used to reconstruct the missing data. Which reconstruction approach should the Specialist use to preserve the integrity of the dataset?", + "question": "An online reseller has a large, multi-column datase t with one column missing 30% of its data A Machine Learning Specialist believes that certain c olumns in the dataset could be used to reconstruct the missing data. Which reconstruction approach should the Specialist use to preserve the integrity of the dataset?", "options": [ "Listwise deletion", "Last observation carried forward", @@ -626,7 +626,7 @@ "references": "" }, { - "question": "QUESTION 59 A Machine Learning Specialist discover the followin g statistics while experimenting on a model. What can the Specialist from the experiments?", + "question": "A Machine Learning Specialist discover the followin g statistics while experimenting on a model. What can the Specialist from the experiments?", "options": [ "The model In Experiment 1 had a high variance err or lhat was reduced in Experiment 3 byregularization Experiment 2 shows that there is min imal bias error in Experiment 1", "The model in Experiment 1 had a high bias error t hat was reduced in Experiment 3 byregularization Experiment 2 shows that there is min imal variance error in Experiment 1", @@ -638,7 +638,7 @@ "references": "" }, { - "question": "QUESTION 60 A Machine Learning Specialist needs to be able to i ngest streaming data and store it in Apache Parquet files for exploration and analysis. Which o f the following services would both ingest and store this data in the correct format?", + "question": "A Machine Learning Specialist needs to be able to i ngest streaming data and store it in Apache Parquet files for exploration and analysis. Which o f the following services would both ingest and store this data in the correct format?", "options": [ "AWSDMS", "Amazon Kinesis Data Streams", @@ -650,7 +650,7 @@ "references": "" }, { - "question": "QUESTION 61 A Machine Learning Specialist needs to move and tra nsform data in preparation for training Some of the data needs to be processed in near-real time an d other data can be moved hourly There are existing Amazon EMR MapReduce jobs to clean and fea ture engineering to perform on the data Which of the following services can feed data to th e MapReduce jobs? (Select TWO )", + "question": "A Machine Learning Specialist needs to move and tra nsform data in preparation for training Some of the data needs to be processed in near-real time an d other data can be moved hourly There are existing Amazon EMR MapReduce jobs to clean and fea ture engineering to perform on the data Which of the following services can feed data to th e MapReduce jobs? (Select TWO )", "options": [ "AWSDMS", "Amazon Kinesis", @@ -662,7 +662,7 @@ "references": "" }, { - "question": "QUESTION 62 An insurance company is developing a new device for vehicles that uses a camera to observe drivers' behavior and alert them when they appear distracted The company created approximately 10,000 training images in a controlled environment that a Machine Learning Specialist will use to train and evaluate machine learning models During the model evaluation the Specialist notices that the training error rate diminishes faster as the number of epochs increases and the model is not accurately inferring on the unseen test images Which of the following should be used to resolve th is issue? (Select TWO)", + "question": "An insurance company is developing a new device for vehicles that uses a camera to observe drivers' behavior and alert them when they appear distracted The company created approximately 10,000 training images in a controlled environment that a Machine Learning Specialist will use to train and evaluate machine learning models During the model evaluation the Specialist notices that the training error rate diminishes faster as the number of epochs increases and the model is not accurately inferring on the unseen test images Which of the following should be used to resolve th is issue? (Select TWO)", "options": [ "Add vanishing gradient to the model", "Perform data augmentation on the training data", @@ -674,7 +674,7 @@ "references": "" }, { - "question": "QUESTION 63 The Chief Editor for a product catalog wants the Re search and Development team to build a machine learning system that can be used to detect whether or not individuals in a collection of images are wearing the company's retail brand The team has a s et of training data Which machine learning algorithm should the researc hers use that BEST meets their requirements?", + "question": "The Chief Editor for a product catalog wants the Re search and Development team to build a machine learning system that can be used to detect whether or not individuals in a collection of images are wearing the company's retail brand The team has a s et of training data Which machine learning algorithm should the researc hers use that BEST meets their requirements?", "options": [ "Latent Dirichlet Allocation (LDA)", "Recurrent neural network (RNN)", @@ -686,7 +686,7 @@ "references": "" }, { - "question": "QUESTION 64 A Machine Learning Specialist kicks off a hyperpara meter tuning job for a tree-based ensemble model using Amazon SageMaker with Area Under the RO C Curve (AUC) as the objective metric This workflow will eventually be deployed in a pipeline that retrains and tunes hyperparameters each night to model click-through on data that goes stal e every 24 hours With the goal of decreasing the amount of time it t akes to train these models, and ultimately to decrease costs, the Specialist wants to reconfigure the input hyperparameter range(s) Which visualization will accomplish this?", + "question": "A Machine Learning Specialist kicks off a hyperpara meter tuning job for a tree-based ensemble model using Amazon SageMaker with Area Under the RO C Curve (AUC) as the objective metric This workflow will eventually be deployed in a pipeline that retrains and tunes hyperparameters each night to model click-through on data that goes stal e every 24 hours With the goal of decreasing the amount of time it t akes to train these models, and ultimately to decrease costs, the Specialist wants to reconfigure the input hyperparameter range(s) Which visualization will accomplish this?", "options": [ "A histogram showing whether the most important in put feature is Gaussian.", "A scatter plot with points colored by target vari able that uses (-Distributed Stochastic NeighborEmbedding (I-SNE) to visualize the large number of input variables in an easier-to-read dimension.", @@ -698,7 +698,7 @@ "references": "" }, { - "question": "QUESTION 65 A Machine Learning Specialist is configuring automa tic model tuning in Amazon SageMaker When using the hyperparameter optimization feature, which of the following guidelines should be followed to improve optimization? Choose the maximum number of hyperparameters suppor ted by", + "question": "A Machine Learning Specialist is configuring automa tic model tuning in Amazon SageMaker When using the hyperparameter optimization feature, which of the following guidelines should be followed to improve optimization? Choose the maximum number of hyperparameters suppor ted by", "options": [ "Amazon SageMaker to search the largest number of combinations possible", "Specify a very large hyperparameter range to allo w Amazon SageMaker to cover every possiblevalue.", @@ -710,7 +710,7 @@ "references": "" }, { - "question": "QUESTION 67 A Machine Learning Specialist is designing a system for improving sales for a company. The objective is to use the large amount of information the compa ny has on users' behavior and product preferences to predict which products users would l ike based on the users' similarity to other users. What should the Specialist do to meet this objectiv e?", + "question": "A Machine Learning Specialist is designing a system for improving sales for a company. The objective is to use the large amount of information the compa ny has on users' behavior and product preferences to predict which products users would l ike based on the users' similarity to other users. What should the Specialist do to meet this objectiv e?", "options": [ "Build a content-based filtering recommendation en gine with Apache Spark ML on Amazon EMR.", "Build a collaborative filtering recommendation en gine with Apache Spark ML on Amazon EMR.", @@ -722,7 +722,7 @@ "references": "" }, { - "question": "QUESTION 68 A Data Engineer needs to build a model using a data set containing customer credit card information. How can the Data Engineer ensure the data remains e ncrypted and the credit card information is secure?", + "question": "A Data Engineer needs to build a model using a data set containing customer credit card information. How can the Data Engineer ensure the data remains e ncrypted and the credit card information is secure?", "options": [ "Use a custom encryption algorithm to encrypt the data and store the data on an AmazonSageMakerinstance in a VPC. Use the SageMaker DeepAR algorit hm to randomize the credit card numbers.", "Use an IAM policy to encrypt the data on the Amaz on S3 bucket and Amazon Kinesis toautomaticallydiscard credit card numbers and insert fake credit card numbers.", @@ -734,7 +734,7 @@ "references": "" }, { - "question": "QUESTION 69 A Machine Learning Specialist is using an Amazon Sa geMaker notebook instance in a private subnet of a corporate VPC. The ML Specialist has important data stored on the Amazon SageMaker notebook instance's Amazon EBS volume, and needs to take a s napshot of that EBS volume. However the ML Specialist cannot find the Amazon SageMaker noteboo k instance's EBS volume or Amazon EC2 instance within the VPC. Why is the ML Specialist not seeing the instance vi sible in the VPC?", + "question": "A Machine Learning Specialist is using an Amazon Sa geMaker notebook instance in a private subnet of a corporate VPC. The ML Specialist has important data stored on the Amazon SageMaker notebook instance's Amazon EBS volume, and needs to take a s napshot of that EBS volume. However the ML Specialist cannot find the Amazon SageMaker noteboo k instance's EBS volume or Amazon EC2 instance within the VPC. Why is the ML Specialist not seeing the instance vi sible in the VPC?", "options": [ "Amazon SageMaker notebook instances are based on the EC2 instances within the customeraccount, but they run outside of VPCs.", "Amazon SageMaker notebook instances are based on the Amazon ECS service within customeraccounts.", @@ -746,7 +746,7 @@ "references": "" }, { - "question": "QUESTION 70 A manufacturing company has structured and unstruct ured data stored in an Amazon S3 bucket. A Machine Learning Specialist wants to use SQL to run queries on this data. Which solution requires the LEAST effort to be able to query this data?", + "question": "A manufacturing company has structured and unstruct ured data stored in an Amazon S3 bucket. A Machine Learning Specialist wants to use SQL to run queries on this data. Which solution requires the LEAST effort to be able to query this data?", "options": [ "Use AWS Data Pipeline to transform the data and Ama zon RDS to run queries. B. Use AWS Glue to catalogue the data and Amazon Athen a to run queries.", "Use AWS Batch to run ETL on the data and Amazon A urora to run the queries.", @@ -757,7 +757,7 @@ "references": "" }, { - "question": "QUESTION 71 A Machine Learning Specialist receives customer dat a for an online shopping website. The data includes demographics, past visits, and locality in formation. The Specialist must develop a machine learning approach to identify the customer shopping patterns, preferences and trends to enhance the website for better service and smart recommenda tions. Which solution should the Specialist recommend?", + "question": "A Machine Learning Specialist receives customer dat a for an online shopping website. The data includes demographics, past visits, and locality in formation. The Specialist must develop a machine learning approach to identify the customer shopping patterns, preferences and trends to enhance the website for better service and smart recommenda tions. Which solution should the Specialist recommend?", "options": [ "Latent Dirichlet Allocation (LDA) for the given c ollection of discrete data to identify patterns in thecustomer database.", "A neural network with a minimum of three layers a nd random initial weights to identify patterns in the customer database", @@ -769,7 +769,7 @@ "references": "" }, { - "question": "QUESTION 72 A Machine Learning Specialist is working with a lar ge company to leverage machine learning within its products. The company wants to group its custom ers into categories based on which customers will and will not churn within the next 6 months. T he company has labeled the data available to the Specialist. Which machine learning model type should the Specia list use to accomplish this task?", + "question": "A Machine Learning Specialist is working with a lar ge company to leverage machine learning within its products. The company wants to group its custom ers into categories based on which customers will and will not churn within the next 6 months. T he company has labeled the data available to the Specialist. Which machine learning model type should the Specia list use to accomplish this task?", "options": [ "Linear regression", "Classification", @@ -781,7 +781,7 @@ "references": "" }, { - "question": "QUESTION 73 The displayed graph is from a foresting model for t esting a time series. Considering the graph only, which conclusion should a Machine Learning Specialist make about the behavior of the model?", + "question": "The displayed graph is from a foresting model for t esting a time series. Considering the graph only, which conclusion should a Machine Learning Specialist make about the behavior of the model?", "options": [ "The model predicts both the trend and the seasona lity well.", "The model predicts the trend well, but not the se asonality.", @@ -793,7 +793,7 @@ "references": "" }, { - "question": "QUESTION 74 A company wants to classify user behavior as either fraudulent or normal. Based on internal research, a Machine Learning Specialist would like to build a binary classifier based on two features: age of account and transaction month. The class dis tribution for these features is illustrated in the figure provided. Based on this information which model would have th e HIGHEST accuracy?", + "question": "A company wants to classify user behavior as either fraudulent or normal. Based on internal research, a Machine Learning Specialist would like to build a binary classifier based on two features: age of account and transaction month. The class dis tribution for these features is illustrated in the figure provided. Based on this information which model would have th e HIGHEST accuracy?", "options": [ "Long short-term memory (LSTM) model with scaled e xponential linear unit (SELL))", "Logistic regression", @@ -805,7 +805,7 @@ "references": "" }, { - "question": "QUESTION 75 A Machine Learning Specialist at a company sensitiv e to security is preparing a dataset for model training. The dataset is stored in Amazon S3 and co ntains Personally Identifiable Information (Pll). The dataset: * Must be accessible from a VPC only. * Must not traverse the public internet. How can these requirements be satisfied? A. Create a VPC endpoint and apply a bucket access p olicy that restricts access to the given VPC endpoint and the VPC.", + "question": "A Machine Learning Specialist at a company sensitiv e to security is preparing a dataset for model training. The dataset is stored in Amazon S3 and co ntains Personally Identifiable Information (Pll). The dataset: * Must be accessible from a VPC only. * Must not traverse the public internet. How can these requirements be satisfied? A. Create a VPC endpoint and apply a bucket access p olicy that restricts access to the given VPC endpoint and the VPC.", "options": [ "Create a VPC endpoint and apply a bucket access p olicy that allows access from the given VPCendpoint and an Amazon EC2 instance.", "Create a VPC endpoint and use Network Access Cont rol Lists (NACLs) to allow traffic between onlythe given VPC endpoint and an Amazon EC2 instance.", @@ -816,7 +816,7 @@ "references": "" }, { - "question": "QUESTION 76 An employee found a video clip with audio on a comp any's social media feed. The language used in the video is Spanish. English is the employee's fir st language, and they do not understand Spanish. The employee wants to do a sentiment analysis. What combination of services is the MOST efficient to accomplish the task? A. Amazon Transcribe, Amazon Translate, and Amazon C omprehend", + "question": "An employee found a video clip with audio on a comp any's social media feed. The language used in the video is Spanish. English is the employee's fir st language, and they do not understand Spanish. The employee wants to do a sentiment analysis. What combination of services is the MOST efficient to accomplish the task? A. Amazon Transcribe, Amazon Translate, and Amazon C omprehend", "options": [ "Amazon Transcribe, Amazon Comprehend, and Amazon SageMaker seq2seq", "Amazon Transcribe, Amazon Translate, and Amazon S ageMaker Neural Topic Model (NTM)", @@ -827,7 +827,7 @@ "references": "" }, { - "question": "QUESTION 77 A Machine Learning Specialist is packaging a custom ResNet model into a Docker container so the company can leverage Amazon SageMaker for training. The Specialist is using Amazon EC2 P3 instances to train the model and needs to properly configure the Docker container to leverage the NVIDIA GPUs. What does the Specialist need to do?", + "question": "A Machine Learning Specialist is packaging a custom ResNet model into a Docker container so the company can leverage Amazon SageMaker for training. The Specialist is using Amazon EC2 P3 instances to train the model and needs to properly configure the Docker container to leverage the NVIDIA GPUs. What does the Specialist need to do?", "options": [ "Bundle the NVIDIA drivers with the Docker image.", "Build the Docker container to be NVIDIA-Docker co mpatible.", @@ -839,7 +839,7 @@ "references": "" }, { - "question": "QUESTION 78 A Machine Learning Specialist is building a logisti c regression model that will predict whether or not a person will order a pizz", + "question": "A Machine Learning Specialist is building a logisti c regression model that will predict whether or not a person will order a pizz", "options": [ "The Specialist is trying to build the optimal mod el with an ideal classification threshold.What model evaluation technique should the Speciali st use to understand how different classificationthresholds will impact the model's performance?", "Receiver operating characteristic (ROC) curve", @@ -851,7 +851,7 @@ "references": "" }, { - "question": "QUESTION 79 An interactive online dictionary wants to add a wid get that displays words used in similar contexts. A Machine Learning Specialist is asked to provide wor d features for the downstream nearest neighbor model powering the widget. What should the Specialist do to meet these require ments?", + "question": "An interactive online dictionary wants to add a wid get that displays words used in similar contexts. A Machine Learning Specialist is asked to provide wor d features for the downstream nearest neighbor model powering the widget. What should the Specialist do to meet these require ments?", "options": [ "Create one-hot word encoding vectors.", "Produce a set of synonyms for every word using Am azon Mechanical Turk.", @@ -863,7 +863,7 @@ "references": "" }, { - "question": "QUESTION 80 A Machine Learning Specialist is configuring Amazon SageMaker so multiple Data Scientists can access notebooks, train models, and deploy endpoint s. To ensure the best operational performance, the Specialist needs to be able to track how often the Scientists are deploying models, GPU and CPU utilization on the deployed SageMaker endpoints, an d all errors that are generated when an endpoint is invoked. Which services are integrated with Amazon SageMaker to track this information? (Select TWO.)", + "question": "A Machine Learning Specialist is configuring Amazon SageMaker so multiple Data Scientists can access notebooks, train models, and deploy endpoint s. To ensure the best operational performance, the Specialist needs to be able to track how often the Scientists are deploying models, GPU and CPU utilization on the deployed SageMaker endpoints, an d all errors that are generated when an endpoint is invoked. Which services are integrated with Amazon SageMaker to track this information? (Select TWO.)", "options": [ "AWS CloudTrail", "AWS Health", @@ -875,7 +875,7 @@ "references": "" }, { - "question": "QUESTION 81 A Machine Learning Specialist trained a regression model, but the first iteration needs optimizing. The Specialist needs to understand whether the mode l is more frequently overestimating or underestimating the target. What option can the Specialist use to determine whe ther it is overestimating or underestimating the target value?", + "question": "A Machine Learning Specialist trained a regression model, but the first iteration needs optimizing. The Specialist needs to understand whether the mode l is more frequently overestimating or underestimating the target. What option can the Specialist use to determine whe ther it is overestimating or underestimating the target value?", "options": [ "Root Mean Square Error (RMSE)", "Residual plots", @@ -887,7 +887,7 @@ "references": "" }, { - "question": "QUESTION 83 When submitting Amazon SageMaker training jobs usin g one of the built-in algorithms, which common parameters MUST be specified? (Select THREE. )", + "question": "When submitting Amazon SageMaker training jobs usin g one of the built-in algorithms, which common parameters MUST be specified? (Select THREE. )", "options": [ "The training channel identifying the location of training data on an Amazon S3 bucket.", "The validation channel identifying the location o f validation data on an Amazon S3 bucket.", @@ -899,7 +899,7 @@ "references": "" }, { - "question": "QUESTION 84 A Data Scientist is developing a machine learning m odel to predict future patient outcomes based on information collected about each patient and their treatment plans. The model should output a continuous value as its prediction. The data availa ble includes labeled outcomes for a set of 4,000 patients. The study was conducted on a group of ind ividuals over the age of 65 who have a particular disease that is known to worsen with age. Initial models have performed poorly. While reviewi ng the underlying data, the Data Scientist notices that, out of 4,000 patient observations, there are 450 where the patient age has been input as 0. The other features for these observations appear normal compared to the rest of the sample population. How should the Data Scientist correct this issue?", + "question": "A Data Scientist is developing a machine learning m odel to predict future patient outcomes based on information collected about each patient and their treatment plans. The model should output a continuous value as its prediction. The data availa ble includes labeled outcomes for a set of 4,000 patients. The study was conducted on a group of ind ividuals over the age of 65 who have a particular disease that is known to worsen with age. Initial models have performed poorly. While reviewi ng the underlying data, the Data Scientist notices that, out of 4,000 patient observations, there are 450 where the patient age has been input as 0. The other features for these observations appear normal compared to the rest of the sample population. How should the Data Scientist correct this issue?", "options": [ "Drop all records from the dataset where age has b een set to 0.", "Replace the age field value for records with a va lue of 0 with the mean or median value from thedataset.", @@ -911,7 +911,7 @@ "references": "" }, { - "question": "QUESTION 85 A Data Science team is designing a dataset reposito ry where it will store a large amount of training data commonly used in its machine learning models. As Data Scientists may create an arbitrary number of new datasets every day the solution has t o scale automatically and be cost-effective. Also, it must be possible to explore the data using SQL. Which storage scheme is MOST adapted to this scenar io?", + "question": "A Data Science team is designing a dataset reposito ry where it will store a large amount of training data commonly used in its machine learning models. As Data Scientists may create an arbitrary number of new datasets every day the solution has t o scale automatically and be cost-effective. Also, it must be possible to explore the data using SQL. Which storage scheme is MOST adapted to this scenar io?", "options": [ "Store datasets as files in Amazon S3.", "Store datasets as files in an Amazon EBS volume a ttached to an Amazon EC2 instance.", @@ -923,7 +923,7 @@ "references": "" }, { - "question": "QUESTION 86 A Machine Learning Specialist working for an online fashion company wants to build a data ingestion solution for the company's Amazon S3-based data lak e. The Specialist wants to create a set of ingestion m echanisms that will enable future capabilities comprised of: Real-time analytics Interactive analytics of historical data Clickstream analytics Product recommendations Which services should the Specialist use?", + "question": "A Machine Learning Specialist working for an online fashion company wants to build a data ingestion solution for the company's Amazon S3-based data lak e. The Specialist wants to create a set of ingestion m echanisms that will enable future capabilities comprised of: Real-time analytics Interactive analytics of historical data Clickstream analytics Product recommendations Which services should the Specialist use?", "options": [ "AWS Glue as the data dialog; Amazon Kinesis Data Streams and Amazon Kinesis Data Analytics forreal-time data insights; Amazon Kinesis Data Fireho se for delivery to Amazon ES for clickstreamanalytics; Amazon EMR to generate personalized prod uct recommendations", "Amazon Athena as the data catalog; Amazon Kinesis Data Streams and Amazon Kinesis DataAnalytics for near-realtime data insights; Amazon K inesis Data Firehose for clickstream analytics;AWS Glue to generate personalized product recommend ations C. AWS Glue as the data catalog; Amazon Kinesis Data Streams and Amazon Kinesis Data Analytics forhistorical data insights; Amazon Kinesis Data Fireh ose for delivery to Amazon ES for clickstreamanalytics; Amazon EMR to generate personalized prod uct recommendations", @@ -934,7 +934,7 @@ "references": "" }, { - "question": "QUESTION 87 A company is observing low accuracy while training on the default built-in image classification algorithm in Amazon SageMaker. The Data Science tea m wants to use an Inception neural network architecture instead of a ResNet architecture. Which of the following will accomplish this? (Selec t TWO.)", + "question": "A company is observing low accuracy while training on the default built-in image classification algorithm in Amazon SageMaker. The Data Science tea m wants to use an Inception neural network architecture instead of a ResNet architecture. Which of the following will accomplish this? (Selec t TWO.)", "options": [ "Customize the built-in image classification algor ithm to use Inception and use this for modeltraining.", "Create a support case with the SageMaker team to change the default image classificationalgorithm to Inception.", @@ -946,7 +946,7 @@ "references": "" }, { - "question": "QUESTION 88 A Machine Learning Specialist built an image classi fication deep learning model. However the Specialist ran into an overfitting problem in which the training and testing accuracies were 99% and 75%r respectively. How should the Specialist address this issue and wh at is the reason behind it?", + "question": "A Machine Learning Specialist built an image classi fication deep learning model. However the Specialist ran into an overfitting problem in which the training and testing accuracies were 99% and 75%r respectively. How should the Specialist address this issue and wh at is the reason behind it?", "options": [ "The learning rate should be increased because the optimization process was trapped at a localminimum.", "The dropout rate at the flatten layer should be i ncreased because the model is not generalizedenough.", @@ -958,7 +958,7 @@ "references": "" }, { - "question": "QUESTION 89 A Machine Learning team uses Amazon SageMaker to tr ain an Apache MXNet handwritten digit classifier model using a research dataset. The team wants to receive a notification when the model is overfitting. Auditors want to view the Amazon SageM aker log activity report to ensure there are no unauthorized API calls. What should the Machine Learning team do to address the requirements with the least amount of code and fewest steps?", + "question": "A Machine Learning team uses Amazon SageMaker to tr ain an Apache MXNet handwritten digit classifier model using a research dataset. The team wants to receive a notification when the model is overfitting. Auditors want to view the Amazon SageM aker log activity report to ensure there are no unauthorized API calls. What should the Machine Learning team do to address the requirements with the least amount of code and fewest steps?", "options": [ "Implement an AWS Lambda function to long Amazon S ageMaker API calls to Amazon S3. Add codeto push a custom metric to Amazon CloudWatch. Creat e an alarm in CloudWatch with Amazon SNS toreceive a notification when the model is overfittin g.", "Use AWS CloudTrail to log Amazon SageMaker API ca lls to Amazon S3. Add code to push a custommetric to Amazon CloudWatch. Create an alarm in Clo udWatch with Amazon SNS to receive anotification when the model is overfitting.", @@ -970,7 +970,7 @@ "references": "" }, { - "question": "QUESTION 90 A Machine Learning Specialist is implementing a ful l Bayesian network on a dataset that describes public transit in New York City. One of the random variables is discrete, and represents the number of minutes New Yorkers wait for a bus given that the b uses cycle every 10 minutes, with a mean of 3 minutes. Which prior probability distribution should the ML Specialist use for this variable?", + "question": "A Machine Learning Specialist is implementing a ful l Bayesian network on a dataset that describes public transit in New York City. One of the random variables is discrete, and represents the number of minutes New Yorkers wait for a bus given that the b uses cycle every 10 minutes, with a mean of 3 minutes. Which prior probability distribution should the ML Specialist use for this variable?", "options": [ "Poisson distribution ,", "Uniform distribution", @@ -982,7 +982,7 @@ "references": "" }, { - "question": "QUESTION 91 A Data Science team within a large company uses Ama zon SageMaker notebooks to access data stored in Amazon S3 buckets. The IT Security team i s concerned that internet-enabled notebook instances create a security vulnerability where mal icious code running on the instances could compromise data privacy. The company mandates that all instances stay within a secured VPC with no internet access, and data communication traffic must stay within the AWS network. How should the Data Science team configure the note book instance placement to meet these requirements? A. Associate the Amazon SageMaker notebook with a pr ivate subnet in a VPC. Place the Amazon SageMaker endpoint and S3 buckets within the same V PC.", + "question": "A Data Science team within a large company uses Ama zon SageMaker notebooks to access data stored in Amazon S3 buckets. The IT Security team i s concerned that internet-enabled notebook instances create a security vulnerability where mal icious code running on the instances could compromise data privacy. The company mandates that all instances stay within a secured VPC with no internet access, and data communication traffic must stay within the AWS network. How should the Data Science team configure the note book instance placement to meet these requirements? A. Associate the Amazon SageMaker notebook with a pr ivate subnet in a VPC. Place the Amazon SageMaker endpoint and S3 buckets within the same V PC.", "options": [ "Associate the Amazon SageMaker notebook with a pr ivate subnet in a VPC. Use 1AM policies togrant access to Amazon S3 and Amazon SageMaker.", "Associate the Amazon SageMaker notebook with a pr ivate subnet in a VPC. Ensure the VPC has S3VPC endpoints and Amazon SageMaker VPC endpoints at tached to it.", @@ -993,7 +993,7 @@ "references": "" }, { - "question": "QUESTION 92 A Machine Learning Specialist has created a deep le arning neural network model that performs well on the training data but performs poorly on the tes t data. Which of the following methods should the Specialis t consider using to correct this? (Select THREE.)", + "question": "A Machine Learning Specialist has created a deep le arning neural network model that performs well on the training data but performs poorly on the tes t data. Which of the following methods should the Specialis t consider using to correct this? (Select THREE.)", "options": [ "Decrease regularization.", "Increase regularization.", @@ -1005,7 +1005,7 @@ "references": "" }, { - "question": "QUESTION 93 A Data Scientist needs to create a serverless inges tion and analytics solution for high-velocity, real time streaming data. The ingestion process must buffer and convert incom ing records from JSON to a query-optimized, columnar format without data loss. The output datas tore must be highly available, and Analysts must be able to run SQL queries against the data and con nect to existing business intelligence dashboards. Which solution should the Data Scientist build to s atisfy the requirements?", + "question": "A Data Scientist needs to create a serverless inges tion and analytics solution for high-velocity, real time streaming data. The ingestion process must buffer and convert incom ing records from JSON to a query-optimized, columnar format without data loss. The output datas tore must be highly available, and Analysts must be able to run SQL queries against the data and con nect to existing business intelligence dashboards. Which solution should the Data Scientist build to s atisfy the requirements?", "options": [ "Create a schema in the AWS Glue Data Catalog of t he incoming data format. Use an AmazonKinesis Data Firehose delivery stream to stream the data and transform the data to Apache Parquetor ORC format using the AWS Glue Data Catalog befor e delivering to Amazon S3. Have the Analystsquery the data directly from Amazon S3 using Amazon Athena, and connect to Bl tools using theAthena Java Database Connectivity (JDBC) connector.", "Write each JSON record to a staging location in A mazon S3. Use the S3 Put event to trigger anAWS Lambda function that transforms the data into A pache Parquet or ORC format and writes thedata to a processed data location in Amazon S3. Hav e the Analysts query the data directly fromAmazon S3 using Amazon Athena, and connect to Bl to ols using the Athena Java DatabaseConnectivity (JDBC) connector.", @@ -1017,7 +1017,7 @@ "references": "" }, { - "question": "QUESTION 94 A company is setting up an Amazon SageMaker environ ment. The corporate data security policy does not allow communication over the internet. How can the company enable the Amazon SageMaker ser vice without enabling direct internet access to Amazon SageMaker notebook instances?", + "question": "A company is setting up an Amazon SageMaker environ ment. The corporate data security policy does not allow communication over the internet. How can the company enable the Amazon SageMaker ser vice without enabling direct internet access to Amazon SageMaker notebook instances?", "options": [ "Create a NAT gateway within the corporate VPC.", "Route Amazon SageMaker traffic through an on-prem ises network.", @@ -1029,7 +1029,7 @@ "references": "" }, { - "question": "QUESTION 95 An office security agency conducted a successful pi lot using 100 cameras installed at key locations within the main office. Images from the cameras wer e uploaded to Amazon S3 and tagged using Amazon Rekognition, and the results were stored in Amazon ES. The agency is now looking to expand the pilot into a full production system using thous ands of video cameras in its office locations globally. The goal is to identify activities perfor med by non-employees in real time. Which solution should the agency consider?", + "question": "An office security agency conducted a successful pi lot using 100 cameras installed at key locations within the main office. Images from the cameras wer e uploaded to Amazon S3 and tagged using Amazon Rekognition, and the results were stored in Amazon ES. The agency is now looking to expand the pilot into a full production system using thous ands of video cameras in its office locations globally. The goal is to identify activities perfor med by non-employees in real time. Which solution should the agency consider?", "options": [ "Use a proxy server at each local office and for e ach camera, and stream the RTSP feed to a uniqueAmazon Kinesis Video Streams video stream. On each stream, use Amazon Rekognition Video andcreate a stream processor to detect faces from a co llection of known employees, and alert whennonemployeesare detected.", "Use a proxy server at each local office and for e ach camera, and stream the RTSP feed to a uniqueAmazon Kinesis Video Streams video stream. On each stream, use Amazon Rekognition Image todetect faces from a collection of known employees a nd alert when non-employees are detected. C. Install AWS DeepLens cameras and use the DeepLens _Kinesis_Video module to stream video toAmazon Kinesis Video Streams for each camera. On ea ch stream, use Amazon Rekognition Video andcreate a stream processor to detect faces from a co llection on each stream, and alert whennonemployees are detected.", @@ -1040,7 +1040,7 @@ "references": "" }, { - "question": "QUESTION 96 A financial services company is building a robust s erverless data lake on Amazon S3. The data lake should be flexible and meet the following requireme nts: * Support querying old and new data on Amazon S3 th rough Amazon Athena and Amazon Redshift Spectrum. * Support event-driven ETL pipelines. * Provide a quick and easy way to understand metada ta. Which approach meets trfese requirements?", + "question": "A financial services company is building a robust s erverless data lake on Amazon S3. The data lake should be flexible and meet the following requireme nts: * Support querying old and new data on Amazon S3 th rough Amazon Athena and Amazon Redshift Spectrum. * Support event-driven ETL pipelines. * Provide a quick and easy way to understand metada ta. Which approach meets trfese requirements?", "options": [ "Use an AWS Glue crawler to crawl S3 data, an AWS Lambda function to trigger an AWS Glue ETLjob, and an AWS Glue Data catalog to search and dis cover metadata.", "Use an AWS Glue crawler to crawl S3 data, an AWS Lambda function to trigger an AWS Batch job,and an external Apache Hive metastore to search and discover metadata.", @@ -1052,7 +1052,7 @@ "references": "" }, { - "question": "QUESTION 97 A company's Machine Learning Specialist needs to im prove the training speed of a time-series forecasting model using TensorFlow. The training is currently implemented on a single-GPU machine and takes approximately 23 hours to complete. The t raining needs to be run daily. The model accuracy js acceptable, but the company a nticipates a continuous increase in the size of the training data and a need to update the model on an hourly, rather than a daily, basis. The company also wants to minimize coding effort and in frastructure changes What should the Machine Learning Specialist do to t he training solution to allow it to scale for futur e demand?", + "question": "A company's Machine Learning Specialist needs to im prove the training speed of a time-series forecasting model using TensorFlow. The training is currently implemented on a single-GPU machine and takes approximately 23 hours to complete. The t raining needs to be run daily. The model accuracy js acceptable, but the company a nticipates a continuous increase in the size of the training data and a need to update the model on an hourly, rather than a daily, basis. The company also wants to minimize coding effort and in frastructure changes What should the Machine Learning Specialist do to t he training solution to allow it to scale for futur e demand?", "options": [ "Do not change the TensorFlow code. Change the mac hine to one with a more powerful GPU tospeed up the training.", "Change the TensorFlow code to implement a Horovod distributed framework supported byAmazon SageMaker. Parallelize the training to as ma ny machines as needed to achieve the businessgoals.", @@ -1064,7 +1064,7 @@ "references": "" }, { - "question": "QUESTION 98 A Machine Learning Specialist is required to build a supervised image-recognition model to identify a cat. The ML Specialist performs some tests and reco rds the following results for a neural networkbased image classifier: Total number of images available = 1,000 Test set i mages = 100 (constant test set) The ML Specialist notices that, in over 75% of the misclassified images, the cats were held upside down by their owners. Which techniques can be used by the ML Specialist t o improve this specific test error?", + "question": "A Machine Learning Specialist is required to build a supervised image-recognition model to identify a cat. The ML Specialist performs some tests and reco rds the following results for a neural networkbased image classifier: Total number of images available = 1,000 Test set i mages = 100 (constant test set) The ML Specialist notices that, in over 75% of the misclassified images, the cats were held upside down by their owners. Which techniques can be used by the ML Specialist t o improve this specific test error?", "options": [ "Increase the training data by adding variation in rotation for training images.", "Increase the number of epochs for model training.", @@ -1076,7 +1076,7 @@ "references": "" }, { - "question": "QUESTION 99 A Data Scientist is developing a machine learning m odel to classify whether a financial transaction is fraudulent. The labeled data available for training consists of 100,000 non-fraudulent observations and 1,000 fraudulent observations. The Data Scientist applies the XGBoost algorithm to the data, resulting in the following confusion matrix when the trained model is applied to a previ ously unseen validation dataset. The accuracy of the model is 99.1%, but the Data Scientist has been asked to reduce the number of false negatives. Which combination of steps should the Data Scientis t take to reduce the number of false positive predictions by the model? (Select TWO.)", + "question": "A Data Scientist is developing a machine learning m odel to classify whether a financial transaction is fraudulent. The labeled data available for training consists of 100,000 non-fraudulent observations and 1,000 fraudulent observations. The Data Scientist applies the XGBoost algorithm to the data, resulting in the following confusion matrix when the trained model is applied to a previ ously unseen validation dataset. The accuracy of the model is 99.1%, but the Data Scientist has been asked to reduce the number of false negatives. Which combination of steps should the Data Scientis t take to reduce the number of false positive predictions by the model? (Select TWO.)", "options": [ "Change the XGBoost eval_metric parameter to optim ize based on rmse instead of error.", "Increase the XGBoost scale_pos_weight parameter t o adjust the balance of positive and negativeweights.", @@ -1088,7 +1088,7 @@ "references": "" }, { - "question": "QUESTION 100 A Machine Learning Specialist is assigned a TensorF low project using Amazon SageMaker for training, and needs to continue working for an extended perio d with no Wi-Fi access. Which approach should the Specialist use to continu e working?", + "question": "A Machine Learning Specialist is assigned a TensorF low project using Amazon SageMaker for training, and needs to continue working for an extended perio d with no Wi-Fi access. Which approach should the Specialist use to continu e working?", "options": [ "Install Python 3 and boto3 on their laptop and co ntinue the code development using thatenvironment.", "Download the TensorFlow Docker container used in Amazon SageMaker from GitHub to their localenvironment, and use the Amazon SageMaker Python SD K to test the code.", @@ -1099,7 +1099,7 @@ "references": "" }, { - "question": "QUESTION 101 A Data Scientist wants to gain real-time insights i nto a data stream of GZIP files. Which solution would allow the use of SQL to query the stream with the LEAST latency?", + "question": "A Data Scientist wants to gain real-time insights i nto a data stream of GZIP files. Which solution would allow the use of SQL to query the stream with the LEAST latency?", "options": [ "Amazon Kinesis Data Analytics with an AWS Lambda function to transform the data.", "AWS Glue with a custom ETL script to transform th e data.", @@ -1111,7 +1111,7 @@ "references": "" }, { - "question": "QUESTION 102 A Machine Learning Specialist must build out a proc ess to query a dataset on Amazon S3 using Amazon Athena The dataset contains more than 800.00 0 records stored as plaintext CSV files Each record contains 200 columns and is approximately 1 5 MB in size Most queries will span 5 to 10 columns only How should the Machine Learning Specialist transfor m the dataset to minimize query runtime?", + "question": "A Machine Learning Specialist must build out a proc ess to query a dataset on Amazon S3 using Amazon Athena The dataset contains more than 800.00 0 records stored as plaintext CSV files Each record contains 200 columns and is approximately 1 5 MB in size Most queries will span 5 to 10 columns only How should the Machine Learning Specialist transfor m the dataset to minimize query runtime?", "options": [ "Convert the records to Apache Parquet format", "Convert the records to JSON format", @@ -1123,7 +1123,7 @@ "references": "" }, { - "question": "QUESTION 103 A Machine Learning Specialist is developing a daily ETL workflow containing multiple ETL jobs The workflow consists of the following processes * Start the workflow as soon as data is uploaded to Amazon S3 * When all the datasets are available in Amazon S3, start an ETL job to join the uploaded datasets with multiple terabyte-sized datasets already store d in Amazon S3 * Store the results of joining datasets in Amazon S 3 * If one of the jobs fails, send a notification to the Administrator Which configuration will meet these requirements?", + "question": "A Machine Learning Specialist is developing a daily ETL workflow containing multiple ETL jobs The workflow consists of the following processes * Start the workflow as soon as data is uploaded to Amazon S3 * When all the datasets are available in Amazon S3, start an ETL job to join the uploaded datasets with multiple terabyte-sized datasets already store d in Amazon S3 * Store the results of joining datasets in Amazon S 3 * If one of the jobs fails, send a notification to the Administrator Which configuration will meet these requirements?", "options": [ "Use AWS Lambda to trigger an AWS Step Functions w orkflow to wait for dataset uploads tocomplete in Amazon S3. Use AWS Glue to join the dat asets Use an Amazon CloudWatch alarm tosend an SNS notification to the Administrator in th e case of a failure", "Develop the ETL workflow using AWS Lambda to star t an Amazon SageMaker notebook instanceUse a lifecycle configuration script to join the da tasets and persist the results in Amazon S3 Use anAmazon CloudWatch alarm to send an SNS notification to the Administrator in the case of a failure", @@ -1135,7 +1135,7 @@ "references": "" }, { - "question": "QUESTION 104 An agency collects census information within a coun try to determine healthcare and social program needs by province and city. The census form collect s responses for approximately 500 questions from each citizen Which combination of algorithms would provide the a ppropriate insights? (Select TWO ) A. The factorization machines (FM) algorithm", + "question": "An agency collects census information within a coun try to determine healthcare and social program needs by province and city. The census form collect s responses for approximately 500 questions from each citizen Which combination of algorithms would provide the a ppropriate insights? (Select TWO ) A. The factorization machines (FM) algorithm", "options": [ "The Latent Dirichlet Allocation (LDA) algorithm", "The principal component analysis (PCA) algorithm", @@ -1146,7 +1146,7 @@ "references": "" }, { - "question": "QUESTION 105 A large consumer goods manufacturer has the followi ng products on sale 34 different toothpaste variants 48 different toothbrush variants 43 different mouthwash variants The entire sales history of all these products is a vailable in Amazon S3 Currently, the company is using custom-built autoregressive integrated moving average (ARIMA) models to forecast demand for these products The company wants to predict the demand for a new product that will soon be launched Which solution should a Machine Learning Specialist apply?", + "question": "A large consumer goods manufacturer has the followi ng products on sale 34 different toothpaste variants 48 different toothbrush variants 43 different mouthwash variants The entire sales history of all these products is a vailable in Amazon S3 Currently, the company is using custom-built autoregressive integrated moving average (ARIMA) models to forecast demand for these products The company wants to predict the demand for a new product that will soon be launched Which solution should a Machine Learning Specialist apply?", "options": [ "Train a custom ARIMA model to forecast demand for the new product.", "Train an Amazon SageMaker DeepAR algorithm to for ecast demand for the new product", @@ -1158,7 +1158,7 @@ "references": "" }, { - "question": "QUESTION 106 A Data Scientist needs to migrate an existing on-pr emises ETL process to the cloud The current process runs at regular time intervals and uses PyS park to combine and format multiple large data sources into a single consolidated output for downs tream processing The Data Scientist has been given the following req uirements for the cloud solution * Combine multiple data sources * Reuse existing PySpark logic * Run the solution on the existing schedule * Minimize the number of servers that will need to be managed Which architecture should the Data Scientist use to build this solution?", + "question": "A Data Scientist needs to migrate an existing on-pr emises ETL process to the cloud The current process runs at regular time intervals and uses PyS park to combine and format multiple large data sources into a single consolidated output for downs tream processing The Data Scientist has been given the following req uirements for the cloud solution * Combine multiple data sources * Reuse existing PySpark logic * Run the solution on the existing schedule * Minimize the number of servers that will need to be managed Which architecture should the Data Scientist use to build this solution?", "options": [ "Write the raw data to Amazon S3 Schedule an AWS L ambda function to submit a Spark step to apersistent Amazon EMR cluster based on the existing schedule Use the existing PySpark logic to runthe ETL job on the EMR cluster Output the results t o a \"processed\" location m Amazon S3 that isaccessible tor downstream use", "Write the raw data to Amazon S3 Create an AWS Glu e ETL job to perform the ETL processingagainst the input data Write the ETL job in PySpark to leverage the existing logic Create a new AWSGlue trigger to trigger the ETL job based on the ex isting schedule Configure the output target of theETL job to write to a \"processed\" location in Amazo n S3 that is accessible for downstream use.", @@ -1170,7 +1170,7 @@ "references": "" }, { - "question": "QUESTION 107 A large company has developed a B1 application that generates reports and dashboards using data collected from various operational metrics The comp any wants to provide executives with an enhanced experience so they can use natural languag e to get data from the reports The company wants the executives to be able ask questions using written and spoken interlaces Which combination of services can be used to build this conversational interface? (Select THREE)", + "question": "A large company has developed a B1 application that generates reports and dashboards using data collected from various operational metrics The comp any wants to provide executives with an enhanced experience so they can use natural languag e to get data from the reports The company wants the executives to be able ask questions using written and spoken interlaces Which combination of services can be used to build this conversational interface? (Select THREE)", "options": [ "Alexa for Business", "Amazon Connect", @@ -1182,7 +1182,7 @@ "references": "" }, { - "question": "QUESTION 108 A Machine Learning Specialist is applying a linear least squares regression model to a dataset with 1 000 records and 50 features Prior to training, the ML Specialist notices that two features are perfect ly linearly dependent Why could this be an issue for the linear least squ ares regression model?", + "question": "A Machine Learning Specialist is applying a linear least squares regression model to a dataset with 1 000 records and 50 features Prior to training, the ML Specialist notices that two features are perfect ly linearly dependent Why could this be an issue for the linear least squ ares regression model?", "options": [ "It could cause the backpropagation algorithm to f ail during training", "It could create a singular matrix during optimiza tion which fails to define a unique solution", @@ -1194,7 +1194,7 @@ "references": "" }, { - "question": "QUESTION 109 A Machine Learning Specialist uploads a dataset to an Amazon S3 bucket protected with server-side encryption using AWS KMS. How should the ML Specialist define the Amazon Sage Maker notebook instance so it can read the same dataset from Amazon S3?", + "question": "A Machine Learning Specialist uploads a dataset to an Amazon S3 bucket protected with server-side encryption using AWS KMS. How should the ML Specialist define the Amazon Sage Maker notebook instance so it can read the same dataset from Amazon S3?", "options": [ "Define security group(s) to allow all HTTP inboun d/outbound traffic and assign those securitygroup(s) to the Amazon SageMaker notebook instance.", "\u00d0\u00a1onfigure the Amazon SageMaker notebook instance to have access to the VPC. Grant permissionin the KMS key policy to the notebooks KMS role.", @@ -1206,7 +1206,7 @@ "references": "" }, { - "question": "QUESTION 110 A Data Scientist is building a model to predict cus tomer churn using a dataset of 100 continuous numerical features. The Marketing team has not prov ided any insight about which features are relevant for churn prediction. The Marketing team wants to inter pret the model and see the direct impact of relevan t features on the model outcome. While training a log istic regression model, the Data Scientist observes that there is a wide gap between the training and validation s et accuracy. Which methods can the Data Scientist use to improve the model performance and satisfy the Marketing teams needs? (Choose two.)", + "question": "A Data Scientist is building a model to predict cus tomer churn using a dataset of 100 continuous numerical features. The Marketing team has not prov ided any insight about which features are relevant for churn prediction. The Marketing team wants to inter pret the model and see the direct impact of relevan t features on the model outcome. While training a log istic regression model, the Data Scientist observes that there is a wide gap between the training and validation s et accuracy. Which methods can the Data Scientist use to improve the model performance and satisfy the Marketing teams needs? (Choose two.)", "options": [ "Add L1 regularization to the classifier", "Add features to the dataset", @@ -1218,7 +1218,7 @@ "references": "" }, { - "question": "QUESTION 111 An aircraft engine manufacturing company is measuri ng 200 performance metrics in a time-series. Engineers want to detect critical manufacturing defects in ne ar-real time during testing. All of the data needs to be stored for offline analysis. What approach would be the MOST effective to perfor m near-real time defect detection?", + "question": "An aircraft engine manufacturing company is measuri ng 200 performance metrics in a time-series. Engineers want to detect critical manufacturing defects in ne ar-real time during testing. All of the data needs to be stored for offline analysis. What approach would be the MOST effective to perfor m near-real time defect detection?", "options": [ "Use AWS IoT Analytics for ingestion, storage, and further analysis. Use Jupyter notebooks from within AWS IoT Analytics to carry out analysis for anomalies.", "Use Amazon S3 for ingestion, storage, and further analysis. Use an Amazon EMR cluster to carryout Apache Spark ML k-means clustering to determine anomalies.", @@ -1230,7 +1230,7 @@ "references": "" }, { - "question": "QUESTION 113 A company uses a long short-term memory (LSTM) mode l to evaluate the risk factors of a particular energy sector. The model reviews multi-page text do cuments to analyze each sentence of the text and categorize it as either a potential risk or no risk . The model is not performing well, even though the Data Scientist has experimented with many different network struct ures and tuned the corresponding hyperparameters. Which approach will provide the MAXIMUM performance boost?", + "question": "A company uses a long short-term memory (LSTM) mode l to evaluate the risk factors of a particular energy sector. The model reviews multi-page text do cuments to analyze each sentence of the text and categorize it as either a potential risk or no risk . The model is not performing well, even though the Data Scientist has experimented with many different network struct ures and tuned the corresponding hyperparameters. Which approach will provide the MAXIMUM performance boost?", "options": [ "Initialize the words by term frequency-inverse do cument frequency (TF-IDF) vectors pretrained ona large collection of news articles related to the energy sector.", "Use gated recurrent units (GRUs) instead of LSTM and run the training process until the validationloss stops decreasing.", @@ -1242,7 +1242,7 @@ "references": "" }, { - "question": "QUESTION 114 A Machine Learning Specialist previously trained a logistic regression model using scikit-learn on a local machine, and the Specialist now wants to depl oy it to production for inference only. What steps should be taken to ensure Amazon SageMak er can host a model that was trained locally?", + "question": "A Machine Learning Specialist previously trained a logistic regression model using scikit-learn on a local machine, and the Specialist now wants to depl oy it to production for inference only. What steps should be taken to ensure Amazon SageMak er can host a model that was trained locally?", "options": [ "Build the Docker image with the inference code. T ag the Docker image with the registry hostnameand upload it to Amazon ECR.", "Serialize the trained model so the format is comp ressed for deployment. Tag the Docker imagewith the registry hostname and upload it to Amazon S3.", @@ -1254,7 +1254,7 @@ "references": "" }, { - "question": "QUESTION 115 A trucking company is collecting live image data fr om its fleet of trucks across the globe. The data i s growing rapidly and approximately 100 GB of new dat a is generated every day. The company wants to explore machine learning uses cases while ensuri ng the data is only accessible to specific IAM users. Which storage option provides the most processing f lexibility and will allow access control with IAM?", + "question": "A trucking company is collecting live image data fr om its fleet of trucks across the globe. The data i s growing rapidly and approximately 100 GB of new dat a is generated every day. The company wants to explore machine learning uses cases while ensuri ng the data is only accessible to specific IAM users. Which storage option provides the most processing f lexibility and will allow access control with IAM?", "options": [ "Use a database, such as Amazon DynamoDB, to store the images, and set the IAM policies torestrict access to only the desired IAM users.", "Use an Amazon S3-backed data lake to store the ra w images, and set up the permissions usingbucket policies.", @@ -1266,7 +1266,7 @@ "references": "" }, { - "question": "QUESTION 116 A credit card company wants to build a credit scori ng model to help predict whether a new credit card applicant will default on a credit card paymen t. The company has collected data from a large numb er of sources with thousands of raw attributes. Early exp eriments to train a classification model revealed t hat many attributes are highly correlated, the large number of features slows down the training speed significa ntly, and that there are some overfitting issues. The Data Scientist on this project would like to sp eed up the model training time without losing a lot of information from the original dataset. Which feature engineering technique should the Data Scientist use to meet the objectives?", + "question": "A credit card company wants to build a credit scori ng model to help predict whether a new credit card applicant will default on a credit card paymen t. The company has collected data from a large numb er of sources with thousands of raw attributes. Early exp eriments to train a classification model revealed t hat many attributes are highly correlated, the large number of features slows down the training speed significa ntly, and that there are some overfitting issues. The Data Scientist on this project would like to sp eed up the model training time without losing a lot of information from the original dataset. Which feature engineering technique should the Data Scientist use to meet the objectives?", "options": [ "Run self-correlation on all features and remove h ighly correlated features", "Normalize all numerical values to be between 0 an d 1", @@ -1278,7 +1278,7 @@ "references": "" }, { - "question": "QUESTION 117 A Data Scientist is training a multilayer perceptio n (MLP) on a dataset with multiple classes. The target class of interest is unique compared to the other classes within the dataset, but it does not achieve and acceptable ecall metric. The Data Scien tist has already tried varying the number and size of the MLPs hidden layers, which has not significantly improved the results. A solution to improve recall must be implemented as quickly as possible. Which techniques should be used to meet these requi rements?", + "question": "A Data Scientist is training a multilayer perceptio n (MLP) on a dataset with multiple classes. The target class of interest is unique compared to the other classes within the dataset, but it does not achieve and acceptable ecall metric. The Data Scien tist has already tried varying the number and size of the MLPs hidden layers, which has not significantly improved the results. A solution to improve recall must be implemented as quickly as possible. Which techniques should be used to meet these requi rements?", "options": [ "Gather more data using Amazon Mechanical Turk and then retrain", "Train an anomaly detection model instead of an ML P", @@ -1290,7 +1290,7 @@ "references": "" }, { - "question": "QUESTION 118 A Machine Learning Specialist works for a credit ca rd processing company and needs to predict which transactions may be fraudulent in near-real time. S pecifically, the Specialist must train a model that returns the probability that a given transaction may fraudulent . How should the Specialist frame this business probl em?", + "question": "A Machine Learning Specialist works for a credit ca rd processing company and needs to predict which transactions may be fraudulent in near-real time. S pecifically, the Specialist must train a model that returns the probability that a given transaction may fraudulent . How should the Specialist frame this business probl em?", "options": [ "Streaming classification", "Binary classification", @@ -1302,7 +1302,7 @@ "references": "" }, { - "question": "QUESTION 119 A real estate company wants to create a machine lea rning model for predicting housing prices based on a historical dataset. The dataset contains 32 fe atures. Which model will meet the business requirement?", + "question": "A real estate company wants to create a machine lea rning model for predicting housing prices based on a historical dataset. The dataset contains 32 fe atures. Which model will meet the business requirement?", "options": [ "Logistic regression", "Linear regression", @@ -1314,7 +1314,7 @@ "references": "" }, { - "question": "QUESTION 121 A Data Scientist needs to analyze employment data. The dataset contains approximately 10 million observations on people across 10 different features . During the preliminary analysis, the Data Scientist notices that income and age distributions are not normal. While income levels shows a right skew as expected, with fewer individuals having a higher in come, the age distribution also show a right skew, with fewer older individuals participating in the w orkforce. Which feature transformations can the Data Scientis t apply to fix the incorrectly skewed data? (Choose two.)", + "question": "A Data Scientist needs to analyze employment data. The dataset contains approximately 10 million observations on people across 10 different features . During the preliminary analysis, the Data Scientist notices that income and age distributions are not normal. While income levels shows a right skew as expected, with fewer individuals having a higher in come, the age distribution also show a right skew, with fewer older individuals participating in the w orkforce. Which feature transformations can the Data Scientis t apply to fix the incorrectly skewed data? (Choose two.)", "options": [ "Cross-validation", "Numerical value binning", @@ -1326,7 +1326,7 @@ "references": "" }, { - "question": "QUESTION 122 A Machine Learning Specialist is given a structured dataset on the shopping habits of a companys customer base. The dataset contains thousands of co lumns of data and hundreds of numerical columns for each customer. The Specialist wants to identify whe ther there are natural groupings for these columns across all customers and visualize the results as q uickly as possible. What approach should the Specialist take to accompl ish these tasks?", + "question": "A Machine Learning Specialist is given a structured dataset on the shopping habits of a companys customer base. The dataset contains thousands of co lumns of data and hundreds of numerical columns for each customer. The Specialist wants to identify whe ther there are natural groupings for these columns across all customers and visualize the results as q uickly as possible. What approach should the Specialist take to accompl ish these tasks?", "options": [ "Embed the numerical features using the t-distribu ted stochastic neighbor embedding (t-SNE)algorithm and create a scatter plot.", "Run k-means using the Euclidean distance measure for different values of k and create an elbowplot.", @@ -1338,7 +1338,7 @@ "references": "" }, { - "question": "QUESTION 123 A Machine Learning Specialist is planning to create a long-running Amazon EMR cluster. The EMR cluster will have 1 master node, 10 core nodes, and 20 task nodes. To save on costs, the Specialist wi ll use Spot Instances in the EMR cluster. Which nodes should the Specialist launch on Spot In stances?", + "question": "A Machine Learning Specialist is planning to create a long-running Amazon EMR cluster. The EMR cluster will have 1 master node, 10 core nodes, and 20 task nodes. To save on costs, the Specialist wi ll use Spot Instances in the EMR cluster. Which nodes should the Specialist launch on Spot In stances?", "options": [ "Master node", "Any of the core nodes", @@ -1350,7 +1350,7 @@ "references": "" }, { - "question": "QUESTION 124 A company wants to predict the sale prices of house s based on available historical sales data. The tar get variable in the companys dataset is the sale price. The features include parameters such as the lot size, living area measurements, non-living area mea surements, number of bedrooms, number of bathrooms, year built, and postal code. The company wants to u se multi-variable linear regression to predict hous e sale prices. Which step should a machine learning specialist tak e to remove features that are irrelevant for the analysis and reduce the models complexity?", + "question": "A company wants to predict the sale prices of house s based on available historical sales data. The tar get variable in the companys dataset is the sale price. The features include parameters such as the lot size, living area measurements, non-living area mea surements, number of bedrooms, number of bathrooms, year built, and postal code. The company wants to u se multi-variable linear regression to predict hous e sale prices. Which step should a machine learning specialist tak e to remove features that are irrelevant for the analysis and reduce the models complexity?", "options": [ "Plot a histogram of the features and compute thei r standard deviation. Remove features with highvariance.", "Plot a histogram of the features and compute thei r standard deviation. Remove features with lowvariance.", @@ -1362,7 +1362,7 @@ "references": "" }, { - "question": "QUESTION 125 A health care company is planning to use neural net works to classify their X-ray images into normal and abnormal classes. The labeled data is divided i nto a training set of 1,000 images and a test set o f 200 images. The initial training of a neural networ k model with 50 hidden layers yielded 99% accuracy on the training set, but only 55% accuracy on the test set. What changes should the Specialist consider to solv e this issue? (Choose three.)", + "question": "A health care company is planning to use neural net works to classify their X-ray images into normal and abnormal classes. The labeled data is divided i nto a training set of 1,000 images and a test set o f 200 images. The initial training of a neural networ k model with 50 hidden layers yielded 99% accuracy on the training set, but only 55% accuracy on the test set. What changes should the Specialist consider to solv e this issue? (Choose three.)", "options": [ "Choose a higher number of layers", "Choose a lower number of layers", @@ -1374,7 +1374,7 @@ "references": "" }, { - "question": "QUESTION 126 A Machine Learning Specialist is attempting to buil d a linear regression model. Given the displayed residual plot only, what is the MOST likely problem with the model?", + "question": "A Machine Learning Specialist is attempting to buil d a linear regression model. Given the displayed residual plot only, what is the MOST likely problem with the model?", "options": [ "Linear regression is inappropriate. The residuals do not have constant variance.", "Linear regression is inappropriate. The underlyin g data has outliers.", @@ -1386,7 +1386,7 @@ "references": "" }, { - "question": "QUESTION 127 A machine learning specialist works for a fruit pro cessing company and needs to build a system that categorizes apples into three types. The specialist has collected a dataset that contains 150 images for each type of apple and applied transfer learnin g on a neural network that was pretrained on ImageNet with this dataset. The company requires at least 85% accuracy to make use of the model. After an exhaustive grid search, the optimal hyperp arameters produced the following: 68% accuracy on the training set 67% accuracy on the validation set What can the machine learning specialist do to impr ove the systems accuracy?", + "question": "A machine learning specialist works for a fruit pro cessing company and needs to build a system that categorizes apples into three types. The specialist has collected a dataset that contains 150 images for each type of apple and applied transfer learnin g on a neural network that was pretrained on ImageNet with this dataset. The company requires at least 85% accuracy to make use of the model. After an exhaustive grid search, the optimal hyperp arameters produced the following: 68% accuracy on the training set 67% accuracy on the validation set What can the machine learning specialist do to impr ove the systems accuracy?", "options": [ "Upload the model to an Amazon SageMaker notebook instance and use the Amazon SageMakerHPO feature to optimize the models hyperparameters.", "Add more data to the training set and retrain the model using transfer learning to reduce the bias.", @@ -1398,7 +1398,7 @@ "references": "" }, { - "question": "QUESTION 128 A company uses camera images of the tops of items d isplayed on store shelves to determine which items were removed and which ones still remain. Aft er several hours of data labeling, the company has a total of 1,000 hand-labeled images covering 10 dist inct items. The training results were poor. Which machine learning approach fulfills the compan ys long-term needs?", + "question": "A company uses camera images of the tops of items d isplayed on store shelves to determine which items were removed and which ones still remain. Aft er several hours of data labeling, the company has a total of 1,000 hand-labeled images covering 10 dist inct items. The training results were poor. Which machine learning approach fulfills the compan ys long-term needs?", "options": [ "Convert the images to grayscale and retrain the m odel", "Reduce the number of distinct items from 10 to 2, build the model, and iterate", @@ -1410,7 +1410,7 @@ "references": "" }, { - "question": "QUESTION 129 A Data Scientist is developing a binary classifier to predict whether a patient has a particular disea se on a series of test results. The Data Scientist has data on 400 patients randomly selected from the population. The disease is seen in 3% of the popula tion. Which cross-validation strategy should the Data Sci entist adopt?", + "question": "A Data Scientist is developing a binary classifier to predict whether a patient has a particular disea se on a series of test results. The Data Scientist has data on 400 patients randomly selected from the population. The disease is seen in 3% of the popula tion. Which cross-validation strategy should the Data Sci entist adopt?", "options": [ "A k-fold cross-validation strategy with k=5", "A stratified k-fold cross-validation strategy wit h k=5", @@ -1422,7 +1422,7 @@ "references": "" }, { - "question": "QUESTION 130 A technology startup is using complex deep neural n etworks and GPU compute to recommend the companys products to its existing customers based u pon each customers habits and interactions. The solution currently pulls each dataset from an A mazon S3 bucket before loading the data into a TensorFlow model pulled from the companys Git repos itory that runs locally. This job then runs for several hours while continually outputting its prog ress to the same S3 bucket. The job can be paused, restarted, and continued at any time in the event o f a failure, and is run from a central queue. Senior managers are concerned about the complexity of the solutions resource management and the costs involved in repeating the process regular ly. They ask for the workload to be automated so it runs once a week, starting Monday and completing by the close of business Friday. Which architecture should be used to scale the solu tion at the lowest cost?", + "question": "A technology startup is using complex deep neural n etworks and GPU compute to recommend the companys products to its existing customers based u pon each customers habits and interactions. The solution currently pulls each dataset from an A mazon S3 bucket before loading the data into a TensorFlow model pulled from the companys Git repos itory that runs locally. This job then runs for several hours while continually outputting its prog ress to the same S3 bucket. The job can be paused, restarted, and continued at any time in the event o f a failure, and is run from a central queue. Senior managers are concerned about the complexity of the solutions resource management and the costs involved in repeating the process regular ly. They ask for the workload to be automated so it runs once a week, starting Monday and completing by the close of business Friday. Which architecture should be used to scale the solu tion at the lowest cost?", "options": [ "Implement the solution using AWS Deep Learning Co ntainers and run the container as a job usingAWS Batch on a GPU-compatible Spot Instance", "Implement the solution using a low-cost GPU-compa tible Amazon EC2 instance and use the AWSInstance Scheduler to schedule the task", @@ -1434,7 +1434,7 @@ "references": "" }, { - "question": "QUESTION 131 A media company with a very large archive of unlabe led images, text, audio, and video footage wishes to index its assets to allow rapid identific ation of relevant content by the Research team. The company wants to use machine learning to accelerate the efforts of its in-house researchers who have limited machine learning expertise. Which is the FASTEST route to index the assets?", + "question": "A media company with a very large archive of unlabe led images, text, audio, and video footage wishes to index its assets to allow rapid identific ation of relevant content by the Research team. The company wants to use machine learning to accelerate the efforts of its in-house researchers who have limited machine learning expertise. Which is the FASTEST route to index the assets?", "options": [ "Use Amazon Rekognition, Amazon Comprehend, and Am azon Transcribe to tag data into distinctcategories/classes.", "Create a set of Amazon Mechanical Turk Human Inte lligence Tasks to label all footage.", @@ -1446,7 +1446,7 @@ "references": "" }, { - "question": "QUESTION 132 A Machine Learning Specialist is working for an onl ine retailer that wants to run analytics on every customer visit, processed through a machine learnin g pipeline. The data needs to be ingested by Amazon Kinesis Data Streams at up to 100 transactio ns per second, and the JSON data blob is 100 KB in size. What is the MINIMUM number of shards in Kinesis Dat a Streams the Specialist should use to successfully ingest this data?", + "question": "A Machine Learning Specialist is working for an onl ine retailer that wants to run analytics on every customer visit, processed through a machine learnin g pipeline. The data needs to be ingested by Amazon Kinesis Data Streams at up to 100 transactio ns per second, and the JSON data blob is 100 KB in size. What is the MINIMUM number of shards in Kinesis Dat a Streams the Specialist should use to successfully ingest this data?", "options": [ "1 shards", "10 shards", @@ -1458,7 +1458,7 @@ "references": "" }, { - "question": "QUESTION 133 A Machine Learning Specialist is deciding between b uilding a naive Bayesian model or a full Bayesian network for a classification problem. The Specialis t computes the Pearson correlation coefficients between each feature and finds that their absolute values range between 0.1 to 0.95. Which model describes the underlying data in this s ituation?", + "question": "A Machine Learning Specialist is deciding between b uilding a naive Bayesian model or a full Bayesian network for a classification problem. The Specialis t computes the Pearson correlation coefficients between each feature and finds that their absolute values range between 0.1 to 0.95. Which model describes the underlying data in this s ituation?", "options": [ "A naive Bayesian model, since the features are al l conditionally independent.", "A full Bayesian network, since the features are a ll conditionally independent.", @@ -1470,7 +1470,7 @@ "references": "" }, { - "question": "QUESTION 134 A Data Scientist is building a linear regression mo del and will use resulting p-values to evaluate the statistical significance of each coefficient. Upon inspection of the dataset, the Data Scientist disco vers that most of the features are normally distributed. The plot of one feature in the dataset is shown in the graphic. What transformation should the Data Scientist apply to satisfy the statistical assumptions of the linear regression model?", + "question": "A Data Scientist is building a linear regression mo del and will use resulting p-values to evaluate the statistical significance of each coefficient. Upon inspection of the dataset, the Data Scientist disco vers that most of the features are normally distributed. The plot of one feature in the dataset is shown in the graphic. What transformation should the Data Scientist apply to satisfy the statistical assumptions of the linear regression model?", "options": [ "Exponential transformation", "Logarithmic transformation", @@ -1482,7 +1482,7 @@ "references": "" }, { - "question": "QUESTION 135 A Machine Learning Specialist is assigned to a Frau d Detection team and must tune an XGBoost model, which is working appropriately for test data . However, with unknown data, it is not working as expected. The existing parameters are provided as follows. Which parameter tuning guidelines should the Specia list follow to avoid overfitting?", + "question": "A Machine Learning Specialist is assigned to a Frau d Detection team and must tune an XGBoost model, which is working appropriately for test data . However, with unknown data, it is not working as expected. The existing parameters are provided as follows. Which parameter tuning guidelines should the Specia list follow to avoid overfitting?", "options": [ "Lower the min_child_weight parameter value.", "Increase the max_depth parameter value.", @@ -1494,7 +1494,7 @@ "references": "" }, { - "question": "QUESTION 136 A data scientist is developing a pipeline to ingest streaming web traffic data. The data scientist nee ds to implement a process to identify unusual web traffic patterns as part of the pipeline. The patterns will be used downstream for alerting and incident response. The data scientist has access to unlabeled historic data to use, if needed. The solution needs to do the following: Calculate an anomaly score for each web traffic ent ry. Adapt unusual event identification to changing web patterns over time. Which approach should the data scientist implement to meet these requirements?", + "question": "A data scientist is developing a pipeline to ingest streaming web traffic data. The data scientist nee ds to implement a process to identify unusual web traffic patterns as part of the pipeline. The patterns will be used downstream for alerting and incident response. The data scientist has access to unlabeled historic data to use, if needed. The solution needs to do the following: Calculate an anomaly score for each web traffic ent ry. Adapt unusual event identification to changing web patterns over time. Which approach should the data scientist implement to meet these requirements?", "options": [ "Use historic web traffic data to train an anomaly detection model using the Amazon SageMakerRandom Cut Forest (RCF) built-in model. Use an Amaz on Kinesis Data Stream to process the incomingweb traffic data. Attach a preprocessing AWS Lambda function to perform data enrichment by callingthe RCF model to calculate the anomaly score for ea ch record. B. Use historic web traffic data to train an anomaly detection model using the Amazon SageMakerbuilt-in XGBoost model. Use an Amazon Kinesis Data Stream to process the incoming web trafficdata. Attach a preprocessing AWS Lambda function to perform data enrichment by calling theXGBoost model to calculate the anomaly score for ea ch record.", "Collect the streaming data using Amazon Kinesis D ata Firehose. Map the delivery stream as aninput source for Amazon Kinesis Data Analytics. Wri te a SQL query to run in real time against thestreaming data with the k-Nearest Neighbors (kNN) S QL extension to calculate anomaly scores foreach record using a tumbling window.", @@ -1505,7 +1505,7 @@ "references": "" }, { - "question": "QUESTION 137 A Data Scientist received a set of insurance record s, each consisting of a record ID, the final outcom e among 200 categories, and the date of the final out come. Some partial information on claim contents is also provided, but only for a few of th e 200 categories. For each outcome category, there are hundreds of records distributed over the past 3 years. The Data Scientist wants to predict how many claims to expect in each category from month t o month, a few months in advance. What type of machine learning model should be used?", + "question": "A Data Scientist received a set of insurance record s, each consisting of a record ID, the final outcom e among 200 categories, and the date of the final out come. Some partial information on claim contents is also provided, but only for a few of th e 200 categories. For each outcome category, there are hundreds of records distributed over the past 3 years. The Data Scientist wants to predict how many claims to expect in each category from month t o month, a few months in advance. What type of machine learning model should be used?", "options": [ "Classification month-to-month using supervised le arning of the 200 categories based on claimcontents.", "Reinforcement learning using claim IDs and timest amps where the agent will identify how manyclaims in each category to expect from month to mon th.", @@ -1517,7 +1517,7 @@ "references": "" }, { - "question": "QUESTION 138 A company that promotes healthy sleep patterns by p roviding cloud-connected devices currently hosts a sleep tracking application on AWS. The appl ication collects device usage information from device users. The company's Data Science team is bu ilding a machine learning model to predict if and when a user will stop utilizing the company's devic es. Predictions from this model are used by a downstream application that determines the best app roach for contacting users. The Data Science team is building multiple versions of the machine learning model to evaluate each version against the companys business goals. To mea sure long-term effectiveness, the team wants to run multiple versions of the model in parallel f or long periods of time, with the ability to contro l the portion of inferences served by the models. Which solution satisfies these requirements with MI NIMAL effort?", + "question": "A company that promotes healthy sleep patterns by p roviding cloud-connected devices currently hosts a sleep tracking application on AWS. The appl ication collects device usage information from device users. The company's Data Science team is bu ilding a machine learning model to predict if and when a user will stop utilizing the company's devic es. Predictions from this model are used by a downstream application that determines the best app roach for contacting users. The Data Science team is building multiple versions of the machine learning model to evaluate each version against the companys business goals. To mea sure long-term effectiveness, the team wants to run multiple versions of the model in parallel f or long periods of time, with the ability to contro l the portion of inferences served by the models. Which solution satisfies these requirements with MI NIMAL effort?", "options": [ "Build and host multiple models in Amazon SageMake r. Create multiple Amazon SageMakerendpoints, one for each model. Programmatically con trol invoking different models for inference atthe application layer.", "Build and host multiple models in Amazon SageMake r. Create an Amazon SageMaker endpointconfiguration with multiple production variants. Pr ogrammatically control the portion of theinferences served by the multiple models by updatin g the endpoint configuration.", @@ -1529,7 +1529,7 @@ "references": "" }, { - "question": "QUESTION 139 An agricultural company is interested in using mach ine learning to detect specific types of weeds in a 100-acre grassland field. Currently, the company us es tractor-mounted cameras to capture multiple images of the field as 10 \u00c3-- 10 grids. The company also has a large training dataset that consists of annotated images of popular weed classes like broad leaf and non-broadleaf docks. The company wants to build a weed detection model t hat will detect specific types of weeds and the location of each type within the field. Once the mo del is ready, it will be hosted on Amazon SageMaker endpoints. The model will perform real-ti me inferencing using the images captured by the cameras. Which approach should a Machine Learning Specialist take to obtain accurate predictions?", + "question": "An agricultural company is interested in using mach ine learning to detect specific types of weeds in a 100-acre grassland field. Currently, the company us es tractor-mounted cameras to capture multiple images of the field as 10 \u00c3-- 10 grids. The company also has a large training dataset that consists of annotated images of popular weed classes like broad leaf and non-broadleaf docks. The company wants to build a weed detection model t hat will detect specific types of weeds and the location of each type within the field. Once the mo del is ready, it will be hosted on Amazon SageMaker endpoints. The model will perform real-ti me inferencing using the images captured by the cameras. Which approach should a Machine Learning Specialist take to obtain accurate predictions?", "options": [ "Prepare the images in RecordIO format and upload them to Amazon S3. Use Amazon SageMakerto train, test, and validate the model using an ima ge classification algorithm to categorize imagesinto various weed classes.", "Prepare the images in Apache Parquet format and u pload them to Amazon S3. Use AmazonSageMaker to train, test, and validate the model us ing an object-detection single-shot multiboxdetector (SSD) algorithm.", @@ -1541,7 +1541,7 @@ "references": "" }, { - "question": "QUESTION 140 A manufacturer is operating a large number of facto ries with a complex supply chain relationship where unexpected downtime of a machine can cause pr oduction to stop at several factories. A data scientist wants to analyze sensor data from the fac tories to identify equipment in need of preemptive maintenance and then dispatch a service team to prevent unplanned downtime. The sensor readings from a single machine can include u p to 200 data points including temperatures, voltages, vibrations, RPMs, and pressure readings. To collect this sensor data, the manufacturer deplo yed Wi-Fi and LANs across the factories. Even though many factory locations do not have reliable or high-speed internet connectivity, the manufacturer would like to maintain near-real-time inference capabilities. Which deployment architecture for the model will ad dress these business requirements?", + "question": "A manufacturer is operating a large number of facto ries with a complex supply chain relationship where unexpected downtime of a machine can cause pr oduction to stop at several factories. A data scientist wants to analyze sensor data from the fac tories to identify equipment in need of preemptive maintenance and then dispatch a service team to prevent unplanned downtime. The sensor readings from a single machine can include u p to 200 data points including temperatures, voltages, vibrations, RPMs, and pressure readings. To collect this sensor data, the manufacturer deplo yed Wi-Fi and LANs across the factories. Even though many factory locations do not have reliable or high-speed internet connectivity, the manufacturer would like to maintain near-real-time inference capabilities. Which deployment architecture for the model will ad dress these business requirements?", "options": [ "Deploy the model in Amazon SageMaker. Run sensor data through this model to predict whichmachines need maintenance.", "Deploy the model on AWS IoT Greengrass in each fa ctory. Run sensor data through this model toinfer which machines need maintenance.", @@ -1553,7 +1553,7 @@ "references": "" }, { - "question": "QUESTION 141 A Machine Learning Specialist is designing a scalab le data storage solution for Amazon SageMaker. There is an existing TensorFlow-based model impleme nted as a train.py script that relies on static training data that is currently stored as TFRecords . Which method of providing training data to Amazon S ageMaker would meet the business requirements with the LEAST development overhead?", + "question": "A Machine Learning Specialist is designing a scalab le data storage solution for Amazon SageMaker. There is an existing TensorFlow-based model impleme nted as a train.py script that relies on static training data that is currently stored as TFRecords . Which method of providing training data to Amazon S ageMaker would meet the business requirements with the LEAST development overhead?", "options": [ "Use Amazon SageMaker script mode and use train.py unchanged. Point the Amazon SageMakertraining invocation to the local path of the data w ithout reformatting the training data.", "Use Amazon SageMaker script mode and use train.py unchanged. Put the TFRecord data into anAmazon S3 bucket. Point the Amazon SageMaker traini ng invocation to the S3 bucket withoutreformatting the training data.", @@ -1565,7 +1565,7 @@ "references": "" }, { - "question": "QUESTION 142 The chief editor for a product catalog wants the re search and development team to build a machine learning system that can be used to detect whether or not individuals in a collection of images are wearing the company's retail brand. The team has a set of training data. Which machine learning algorithm should the researc hers use that BEST meets their requirements?", + "question": "The chief editor for a product catalog wants the re search and development team to build a machine learning system that can be used to detect whether or not individuals in a collection of images are wearing the company's retail brand. The team has a set of training data. Which machine learning algorithm should the researc hers use that BEST meets their requirements?", "options": [ "Latent Dirichlet Allocation (LDA)", "Recurrent neural network (RNN) C. K-means", @@ -1576,7 +1576,7 @@ "references": "" }, { - "question": "QUESTION 143 A retail company is using Amazon Personalize to pro vide personalized product recommendations for its customers during a marketing campaign. The comp any sees a significant increase in sales of recommended items to existing customers immediately after deploying a new solution version, but these sales decrease a short time after deployment. Only historical data from before the marketing campaign is available for training. How should a data scientist adjust the solution?", + "question": "A retail company is using Amazon Personalize to pro vide personalized product recommendations for its customers during a marketing campaign. The comp any sees a significant increase in sales of recommended items to existing customers immediately after deploying a new solution version, but these sales decrease a short time after deployment. Only historical data from before the marketing campaign is available for training. How should a data scientist adjust the solution?", "options": [ "Use the event tracker in Amazon Personalize to in clude real-time user interactions.", "Add user metadata and use the HRNN-Metadata recip e in Amazon Personalize.", @@ -1588,7 +1588,7 @@ "references": "" }, { - "question": "QUESTION 144 A machine learning (ML) specialist wants to secure calls to the Amazon SageMaker Service API. The specialist has configured Amazon VPC with a VPC int erface endpoint for the Amazon SageMaker Service API and is attempting to secure traffic fro m specific sets of instances and IAM users. The VPC is configured with a single public subnet. Which combination of steps should the ML specialist take to secure the traffic? (Choose two.)", + "question": "A machine learning (ML) specialist wants to secure calls to the Amazon SageMaker Service API. The specialist has configured Amazon VPC with a VPC int erface endpoint for the Amazon SageMaker Service API and is attempting to secure traffic fro m specific sets of instances and IAM users. The VPC is configured with a single public subnet. Which combination of steps should the ML specialist take to secure the traffic? (Choose two.)", "options": [ "Add a VPC endpoint policy to allow access to the IAM users.", "Modify the users' IAM policy to allow access to A mazon SageMaker Service API calls only.", @@ -1600,7 +1600,7 @@ "references": "" }, { - "question": "QUESTION 145 An e commerce company wants to launch a new cloud-b ased product recommendation feature for its web application. Due to data localization regul ations, any sensitive data must not leave its onpre mises data center, and the product recommendation model m ust be trained and tested using nonsensitive data only. Data transfer to the cloud must use IPsec. The web application is hosted on premises with a PostgreSQL database that contains a ll the data. The company wants the data to be uploaded securely to Amazon S3 each day for model retraining. How should a machine learning specialist meet these requirements? A. Create an AWS Glue job to connect to the PostgreS QL DB instance. Ingest tables without sensitive data through an AWS Site-to-Site VPN connection dir ectly into Amazon S3.", + "question": "An e commerce company wants to launch a new cloud-b ased product recommendation feature for its web application. Due to data localization regul ations, any sensitive data must not leave its onpre mises data center, and the product recommendation model m ust be trained and tested using nonsensitive data only. Data transfer to the cloud must use IPsec. The web application is hosted on premises with a PostgreSQL database that contains a ll the data. The company wants the data to be uploaded securely to Amazon S3 each day for model retraining. How should a machine learning specialist meet these requirements? A. Create an AWS Glue job to connect to the PostgreS QL DB instance. Ingest tables without sensitive data through an AWS Site-to-Site VPN connection dir ectly into Amazon S3.", "options": [ "Create an AWS Glue job to connect to the PostgreS QL DB instance. Ingest all data through an AWSSite- to-Site VPN connection into Amazon S3 while r emoving sensitive data using a PySpark job.", "Use AWS Database Migration Service (AWS DMS) with table mapping to select PostgreSQL tableswith no sensitive data through an SSL connection. R eplicate data directly into Amazon S3.", @@ -1611,7 +1611,7 @@ "references": "" }, { - "question": "QUESTION 146 A logistics company needs a forecast model to predi ct next month's inventory requirements for a single item in 10 warehouses. A machine learning sp ecialist uses Amazon Forecast to develop a forecast model from 3 years of monthly data. There is no missing data. The specialist selects the Deep AR+ algorithm to train a predictor. The predictor means absolute percentage error (MAPE) is much larger than the MAPE produced by the current human forecasters. Which changes to the CreatePredictor API call could improve the MAPE? (Choose two.)", + "question": "A logistics company needs a forecast model to predi ct next month's inventory requirements for a single item in 10 warehouses. A machine learning sp ecialist uses Amazon Forecast to develop a forecast model from 3 years of monthly data. There is no missing data. The specialist selects the Deep AR+ algorithm to train a predictor. The predictor means absolute percentage error (MAPE) is much larger than the MAPE produced by the current human forecasters. Which changes to the CreatePredictor API call could improve the MAPE? (Choose two.)", "options": [ "Set FeaturizationMethodName to filling.", "Set PerformAutoML to true.", @@ -1623,7 +1623,7 @@ "references": "" }, { - "question": "QUESTION 147 A data scientist wants to use Amazon Forecast to bu ild a forecasting model for inventory demand for a retail company. The company has provided a datase t of historic inventory demand for its products as a .csv file stored in an Amazon S3 bucket. The t able below shows a sample of the dataset. How should the data scientist transform the data?", + "question": "A data scientist wants to use Amazon Forecast to bu ild a forecasting model for inventory demand for a retail company. The company has provided a datase t of historic inventory demand for its products as a .csv file stored in an Amazon S3 bucket. The t able below shows a sample of the dataset. How should the data scientist transform the data?", "options": [ "Use ETL jobs in AWS Glue to separate the dataset into a target time series dataset and an itemmetadata dataset. Upload both datasets as .csv file s to Amazon S3.", "Use a Jupyter notebook in Amazon SageMaker to sep arate the dataset into a related time seriesdataset and an item metadata dataset. Upload both d atasets as tables in Amazon Aurora.", @@ -1635,7 +1635,7 @@ "references": "" }, { - "question": "QUESTION 148 A machine learning specialist is running an Amazon SageMaker endpoint using the built-in object detection algorithm on a P3 instance for real-time predictions in a company's production application. When evaluating the model's resource utilization, t he specialist notices that the model is using only a fraction of the GPU. Which architecture changes would ensure that provis ioned resources are being utilized effectively?", + "question": "A machine learning specialist is running an Amazon SageMaker endpoint using the built-in object detection algorithm on a P3 instance for real-time predictions in a company's production application. When evaluating the model's resource utilization, t he specialist notices that the model is using only a fraction of the GPU. Which architecture changes would ensure that provis ioned resources are being utilized effectively?", "options": [ "Redeploy the model as a batch transform job on an M5 instance.", "Redeploy the model on an M5 instance. Attach Amaz on Elastic Inference to the instance.", @@ -1647,7 +1647,7 @@ "references": "" }, { - "question": "QUESTION 149 A data scientist uses an Amazon SageMaker notebook instance to conduct data exploration and analysis. This requires certain Python packages tha t are not natively available on Amazon SageMaker to be installed on the notebook instance. How can a machine learning specialist ensure that r equired packages are automatically available on the notebook instance for the data scientist to use ?", + "question": "A data scientist uses an Amazon SageMaker notebook instance to conduct data exploration and analysis. This requires certain Python packages tha t are not natively available on Amazon SageMaker to be installed on the notebook instance. How can a machine learning specialist ensure that r equired packages are automatically available on the notebook instance for the data scientist to use ?", "options": [ "Install AWS Systems Manager Agent on the underlyi ng Amazon EC2 instance and use SystemsManager Automation to execute the package installat ion commands.", "Create a Jupyter notebook file (.ipynb) with cell s containing the package installation commands toexecute and place the file under the /etc/init dire ctory of each Amazon SageMaker notebookinstance.", @@ -1659,7 +1659,7 @@ "references": "" }, { - "question": "QUESTION 150 A data scientist needs to identify fraudulent user accounts for a company's ecommerce platform. The company wants the ability to determine if a newly c reated account is associated with a previously known fraudulent user. The data scientist is using AWS Glue to cleanse the company's application logs during ingestion. Which strategy will allow the data scientist to ide ntify fraudulent accounts?", + "question": "A data scientist needs to identify fraudulent user accounts for a company's ecommerce platform. The company wants the ability to determine if a newly c reated account is associated with a previously known fraudulent user. The data scientist is using AWS Glue to cleanse the company's application logs during ingestion. Which strategy will allow the data scientist to ide ntify fraudulent accounts?", "options": [ "Execute the built-in FindDuplicates Amazon Athena query.", "Create a FindMatches machine learning transform i n AWS Glue.", @@ -1671,7 +1671,7 @@ "references": "" }, { - "question": "QUESTION 151 A Data Scientist is developing a machine learning m odel to classify whether a financial transaction is fraudulent. The labeled data available for training consists of 100,000 non-fraudulent observations and 1,000 fraudulent observations. The Data Scientist applies the XGBoost algorithm to the data, resulting in the following confusion matrix when the trained model is applied to a previ ously unseen validation dataset. The accuracy of the model is 99.1%, but the Data Scientist needs to reduce the number of false negatives. Which combination of steps should the Data Scientis t take to reduce the number of false negative predictions by the model? (Choose two.)", + "question": "A Data Scientist is developing a machine learning m odel to classify whether a financial transaction is fraudulent. The labeled data available for training consists of 100,000 non-fraudulent observations and 1,000 fraudulent observations. The Data Scientist applies the XGBoost algorithm to the data, resulting in the following confusion matrix when the trained model is applied to a previ ously unseen validation dataset. The accuracy of the model is 99.1%, but the Data Scientist needs to reduce the number of false negatives. Which combination of steps should the Data Scientis t take to reduce the number of false negative predictions by the model? (Choose two.)", "options": [ "Change the XGBoost eval_metric parameter to optim ize based on Root Mean Square Error(RMSE).", "Increase the XGBoost scale_pos_weight parameter t o adjust the balance of positive and negativeweights.", @@ -1683,7 +1683,7 @@ "references": "" }, { - "question": "QUESTION 152 A data scientist has developed a machine learning t ranslation model for English to Japanese by using Amazon SageMaker's built-in seq2seq algorithm with 500,000 aligned sentence pairs. While testing with sample sentences, the data scientist finds tha t the translation quality is reasonable for an example as short as five words. However, the qualit y becomes unacceptable if the sentence is 100 words long. Which action will resolve the problem?", + "question": "A data scientist has developed a machine learning t ranslation model for English to Japanese by using Amazon SageMaker's built-in seq2seq algorithm with 500,000 aligned sentence pairs. While testing with sample sentences, the data scientist finds tha t the translation quality is reasonable for an example as short as five words. However, the qualit y becomes unacceptable if the sentence is 100 words long. Which action will resolve the problem?", "options": [ "Change preprocessing to use n-grams.", "Add more nodes to the recurrent neural network (R NN) than the largest sentence's word count.", @@ -1695,7 +1695,7 @@ "references": "" }, { - "question": "QUESTION 153 A financial company is trying to detect credit card fraud. The company observed that, on average, 2% of credit card transactions were fraudulent. A data scientist trained a classifier on a year's worth o f credit card transactions data. The model needs to i dentify the fraudulent transactions (positives) fro m the regular ones (negatives). The company's goal is to accurately ca pture as many positives as possible. Which metrics should the data scientist use to opti mize the model? (Choose two.)", + "question": "A financial company is trying to detect credit card fraud. The company observed that, on average, 2% of credit card transactions were fraudulent. A data scientist trained a classifier on a year's worth o f credit card transactions data. The model needs to i dentify the fraudulent transactions (positives) fro m the regular ones (negatives). The company's goal is to accurately ca pture as many positives as possible. Which metrics should the data scientist use to opti mize the model? (Choose two.)", "options": [ "True positive rate", "Specificity", @@ -1707,7 +1707,7 @@ "references": "" }, { - "question": "QUESTION 154 A machine learning specialist is developing a proof of concept for government users whose primary concern is security. The specialist is using Amazon SageMaker to train a convolutional neural network (CNN) model for a photo classifier application. The specialist wants to protect the data so that it cannot be accessed and transferred to a remote host by malicious code accidentally installed on the training container. Which action will provide the MOST secure protectio n?", + "question": "A machine learning specialist is developing a proof of concept for government users whose primary concern is security. The specialist is using Amazon SageMaker to train a convolutional neural network (CNN) model for a photo classifier application. The specialist wants to protect the data so that it cannot be accessed and transferred to a remote host by malicious code accidentally installed on the training container. Which action will provide the MOST secure protectio n?", "options": [ "Remove Amazon S3 access permissions from the Sage Maker execution role.", "Encrypt the weights of the CNN model.", @@ -1719,7 +1719,7 @@ "references": "" }, { - "question": "QUESTION 156 A company is using Amazon Textract to extract textu al data from thousands of scanned text-heavy legal documents daily. The company uses this inform ation to process loan applications automatically. Some of the documents fail business validation and are returned to human reviewers, who investigate the errors. This activity increases the time to process the loan applications. What should the company do to reduce the processing time of loan applications?", + "question": "A company is using Amazon Textract to extract textu al data from thousands of scanned text-heavy legal documents daily. The company uses this inform ation to process loan applications automatically. Some of the documents fail business validation and are returned to human reviewers, who investigate the errors. This activity increases the time to process the loan applications. What should the company do to reduce the processing time of loan applications?", "options": [ "Configure Amazon Textract to route low-confidence predictions to Amazon SageMaker GroundTruth. Perform a manual review on those words befor e performing a business validation.", "Use an Amazon Textract synchronous operation inst ead of an asynchronous operation.", @@ -1731,7 +1731,7 @@ "references": "" }, { - "question": "QUESTION 157 A company ingests machine learning (ML) data from w eb advertising clicks into an Amazon S3 data lake. Click data is added to an Amazon Kinesis data stream by using the Kinesis Producer Library (KPL). The data is loaded into the S3 data lake fro m the data stream by using an Amazon Kinesis Data Firehose delivery stream. As the data volume increa ses, an ML specialist notices that the rate of data ingested into Amazon S3 is relatively constant. The re also is an increasing backlog of data for Kinesi s Data Streams and Kinesis Data Firehose to ingest. Which next step is MOST likely to improve the data ingestion rate into Amazon S3?", + "question": "A company ingests machine learning (ML) data from w eb advertising clicks into an Amazon S3 data lake. Click data is added to an Amazon Kinesis data stream by using the Kinesis Producer Library (KPL). The data is loaded into the S3 data lake fro m the data stream by using an Amazon Kinesis Data Firehose delivery stream. As the data volume increa ses, an ML specialist notices that the rate of data ingested into Amazon S3 is relatively constant. The re also is an increasing backlog of data for Kinesi s Data Streams and Kinesis Data Firehose to ingest. Which next step is MOST likely to improve the data ingestion rate into Amazon S3?", "options": [ "Increase the number of S3 prefixes for the delive ry stream to write to.", "Decrease the retention period for the data stream .", @@ -1743,7 +1743,7 @@ "references": "" }, { - "question": "QUESTION 158 A data scientist must build a custom recommendation model in Amazon SageMaker for an online retail company. Due to the nature of the company's products, customers buy only 4-5 products every 5-10 years. So, the company relies on a steady stre am of new customers. When a new customer signs up, the company collects data on the customer's pre ferences. Below is a sample of the data available to the data scientist. How should the data scientist split the dataset int o a training and test set for this use case?", + "question": "A data scientist must build a custom recommendation model in Amazon SageMaker for an online retail company. Due to the nature of the company's products, customers buy only 4-5 products every 5-10 years. So, the company relies on a steady stre am of new customers. When a new customer signs up, the company collects data on the customer's pre ferences. Below is a sample of the data available to the data scientist. How should the data scientist split the dataset int o a training and test set for this use case?", "options": [ "Shuffle all interaction data. Split off the last 10% of the interaction data for the test set.", "Identify the most recent 10% of interactions for each user. Split off these interactions for the tes tset.", @@ -1755,7 +1755,7 @@ "references": "" }, { - "question": "QUESTION 159 A financial services company wants to adopt Amazon SageMaker as its default data science environment. The company's data scientists run mach ine learning (ML) models on confidential financial data. The company is worried about data e gress and wants an ML engineer to secure the environment. Which mechanisms can the ML engineer use to control data egress from SageMaker? (Choose three.)", + "question": "A financial services company wants to adopt Amazon SageMaker as its default data science environment. The company's data scientists run mach ine learning (ML) models on confidential financial data. The company is worried about data e gress and wants an ML engineer to secure the environment. Which mechanisms can the ML engineer use to control data egress from SageMaker? (Choose three.)", "options": [ "Protect data with encryption at rest and in trans it. Use AWS Key Management Service (AWS KMS)to manage encryption keys.", "Connect to SageMaker by using a VPC interface end point powered by AWS PrivateLink.", @@ -1767,7 +1767,7 @@ "references": "" }, { - "question": "QUESTION 160 A company needs to quickly make sense of a large am ount of data and gain insight from it. The data is in different formats, the schemas change frequen tly, and new data sources are added regularly. The company wants to use AWS services to explore mu ltiple data sources, suggest schemas, and enrich and transform the data. The solution should require the least possible coding effort for the da ta flows and the least possible infrastructure management. Which combination of AWS services will meet these r equirements?", + "question": "A company needs to quickly make sense of a large am ount of data and gain insight from it. The data is in different formats, the schemas change frequen tly, and new data sources are added regularly. The company wants to use AWS services to explore mu ltiple data sources, suggest schemas, and enrich and transform the data. The solution should require the least possible coding effort for the da ta flows and the least possible infrastructure management. Which combination of AWS services will meet these r equirements?", "options": [ "AWS Data Pipeline for data transferAWS Step Functions for orchestrating AWS Lambda job s for data discovery, enrichment, andtransformationAmazon Athena for querying and analyzing the result s in Amazon S3 using standard SQLAmazon QuickSight for reporting and getting insight s", "Amazon EMR for data discovery, enrichment, and tr ansformationAmazon Athena for querying and analyzing the result s in Amazon S3 using standard SQLAmazon QuickSight for reporting and getting insight s", @@ -1779,7 +1779,7 @@ "references": "" }, { - "question": "QUESTION 161 A company is converting a large number of unstructu red paper receipts into images. The company wants to create a model based on natural language p rocessing (NLP) to find relevant entities such as date, location, and notes, as well as some custom e ntities such as receipt numbers. The company is using optical character recognition (OCR) to extract text for data labeling. However, documents are in different structures and formats, and the company is facing challenges with setting up the manual workflows for each document type. Add itionally, the company trained a named entity recognition (NER) model for custom entity detection using a small sample size. This model has a very low confidence score and will require retraining wi th a large dataset. Which solution for text extraction and entity detec tion will require the LEAST amount of effort?", + "question": "A company is converting a large number of unstructu red paper receipts into images. The company wants to create a model based on natural language p rocessing (NLP) to find relevant entities such as date, location, and notes, as well as some custom e ntities such as receipt numbers. The company is using optical character recognition (OCR) to extract text for data labeling. However, documents are in different structures and formats, and the company is facing challenges with setting up the manual workflows for each document type. Add itionally, the company trained a named entity recognition (NER) model for custom entity detection using a small sample size. This model has a very low confidence score and will require retraining wi th a large dataset. Which solution for text extraction and entity detec tion will require the LEAST amount of effort?", "options": [ "Extract text from receipt images by using Amazon Textract. Use the Amazon SageMakerBlazingText algorithm to train on the text for enti ties and custom entities.", "Extract text from receipt images by using a deep learning OCR model from the AWS Marketplace.Use the NER deep learning model to extract entities . C. Extract text from receipt images by using Amazon Textract. Use Amazon Comprehend for entitydetection, and use Amazon Comprehend custom entity recognition for custom entity detection.", @@ -1790,7 +1790,7 @@ "references": "" }, { - "question": "QUESTION 162 A company is building a predictive maintenance mode l based on machine learning (ML). The data is stored in a fully private Amazon S3 bucket that is encrypted at rest with AWS Key Management Service (AWS KMS) CMKs. An ML specialist must run d ata preprocessing by using an Amazon SageMaker Processing job that is triggered from cod e in an Amazon SageMaker notebook. The job should read data from Amazon S3, process it, and up load it back to the same S3 bucket. The preprocessing code is stored in a container image i n Amazon Elastic Container Registry (Amazon ECR). The ML specialist needs to grant permissions to ensure a smooth data preprocessing workflow. Which set of actions should the ML specialist take to meet these requirements?", + "question": "A company is building a predictive maintenance mode l based on machine learning (ML). The data is stored in a fully private Amazon S3 bucket that is encrypted at rest with AWS Key Management Service (AWS KMS) CMKs. An ML specialist must run d ata preprocessing by using an Amazon SageMaker Processing job that is triggered from cod e in an Amazon SageMaker notebook. The job should read data from Amazon S3, process it, and up load it back to the same S3 bucket. The preprocessing code is stored in a container image i n Amazon Elastic Container Registry (Amazon ECR). The ML specialist needs to grant permissions to ensure a smooth data preprocessing workflow. Which set of actions should the ML specialist take to meet these requirements?", "options": [ "Create an IAM role that has permissions to create Amazon SageMaker Processing jobs, S3 readand write access to the relevant S3 bucket, and app ropriate KMS and ECR permissions. Attach therole to the SageMaker notebook instance. Create an Amazon SageMaker Processing job from thenotebook.", "Create an IAM role that has permissions to create Amazon SageMaker Processing jobs. Attach therole to the SageMaker notebook instance. Create an Amazon SageMaker Processing job with an IAMrole that has read and write permissions to the rel evant S3 bucket, and appropriate KMS and ECRpermissions.", @@ -1802,7 +1802,7 @@ "references": "" }, { - "question": "QUESTION 163 A data scientist has been running an Amazon SageMak er notebook instance for a few weeks. During this time, a new version of Jupyter Notebook was re leased along with additional software updates. The security team mandates that all running SageMak er notebook instances use the latest security and software updates provided by SageMaker. How can the data scientist meet these requirements?", + "question": "A data scientist has been running an Amazon SageMak er notebook instance for a few weeks. During this time, a new version of Jupyter Notebook was re leased along with additional software updates. The security team mandates that all running SageMak er notebook instances use the latest security and software updates provided by SageMaker. How can the data scientist meet these requirements?", "options": [ "Call the CreateNotebookInstanceLifecycleConfig AP I operation", "Create a new SageMaker notebook instance and moun t the Amazon Elastic Block Store (AmazonEBS) volume from the original instance", @@ -1814,7 +1814,7 @@ "references": "" }, { - "question": "QUESTION 164 A library is developing an automatic book-borrowing system that uses Amazon Rekognition. Images of library members faces are stored in an Amazon S3 bucket. When members borrow books, the Amazon Rekognition CompareFaces API operation compa res real faces against the stored faces in Amazon S3. The library needs to improve security by making sur e that images are encrypted at rest. Also, when the images are used with Amazon Rekognition. they n eed to be encrypted in transit. The library also must ensure that the images are not used to improve Amazon Rekognition as a service. How should a machine learning specialist architect the solution to satisfy these requirements?", + "question": "A library is developing an automatic book-borrowing system that uses Amazon Rekognition. Images of library members faces are stored in an Amazon S3 bucket. When members borrow books, the Amazon Rekognition CompareFaces API operation compa res real faces against the stored faces in Amazon S3. The library needs to improve security by making sur e that images are encrypted at rest. Also, when the images are used with Amazon Rekognition. they n eed to be encrypted in transit. The library also must ensure that the images are not used to improve Amazon Rekognition as a service. How should a machine learning specialist architect the solution to satisfy these requirements?", "options": [ "Enable server-side encryption on the S3 bucket. S ubmit an AWS Support ticket to opt out ofallowing images to be used for improving the servic e, and follow the process provided by AWSSupport.", "Switch to using an Amazon Rekognition collection to store the images. Use the IndexFaces andSearchFacesByImage API operations instead of the Co mpareFaces API operation.", @@ -1826,7 +1826,7 @@ "references": "" }, { - "question": "QUESTION 165 A company is building a line-counting application f or use in a quick-service restaurant. The company wants to use video cameras pointed at the line of c ustomers at a given register to measure how many people are in line and deliver notifications t o managers if the line grows too long. The restaurant locations have limited bandwidth for con nections to external services and cannot accommodate multiple video streams without impactin g other operations. Which solution should a machine learning specialist implement to meet these requirements?", + "question": "A company is building a line-counting application f or use in a quick-service restaurant. The company wants to use video cameras pointed at the line of c ustomers at a given register to measure how many people are in line and deliver notifications t o managers if the line grows too long. The restaurant locations have limited bandwidth for con nections to external services and cannot accommodate multiple video streams without impactin g other operations. Which solution should a machine learning specialist implement to meet these requirements?", "options": [ "Install cameras compatible with Amazon Kinesis Vi deo Streams to stream the data to AWS overthe restaurant's existing internet connection. Writ e an AWS Lambda function to take an image andsend it to Amazon Rekognition to count the number o f faces in the image. Send an Amazon SimpleNotification Service (Amazon SNS) notification if t he line is too long.", "Deploy AWS DeepLens cameras in the restaurant to capture video. Enable Amazon Rekognition onthe AWS DeepLens device, and use it to trigger a lo cal AWS Lambda function when a person isrecognized. Use the Lambda function to send an Amaz on Simple Notification Service (Amazon SNS)notification if the line is too long.", @@ -1838,7 +1838,7 @@ "references": "" }, { - "question": "QUESTION 166 A company has set up and deployed its machine learn ing (ML) model into production with an endpoint using Amazon SageMaker hosting services. T he ML team has configured automatic scaling for its SageMaker instances to support workload cha nges. During testing, the team notices that additional instances are being launched before the new instances are ready. This behavior needs to change as soon as possible. How can the ML team solve this issue?", + "question": "A company has set up and deployed its machine learn ing (ML) model into production with an endpoint using Amazon SageMaker hosting services. T he ML team has configured automatic scaling for its SageMaker instances to support workload cha nges. During testing, the team notices that additional instances are being launched before the new instances are ready. This behavior needs to change as soon as possible. How can the ML team solve this issue?", "options": [ "Decrease the cooldown period for the scale-in act ivity. Increase the configured maximum capacityof instances.", "Replace the current endpoint with a multi-model e ndpoint using SageMaker.", @@ -1850,7 +1850,7 @@ "references": "" }, { - "question": "QUESTION 167 A telecommunications company is developing a mobile app for its customers. The company is using an Amazon SageMaker hosted endpoint for machine lea rning model inferences. Developers want to introduce a new version of the m odel for a limited number of users who subscribed to a preview feature of the app. After t he new version of the model is tested as a preview, developers will evaluate its accuracy. If a new ver sion of the model has better accuracy, developers need to be able to gradually release the new versio n for all users over a fixed period of time. How can the company implement the testing model wit h the LEAST amount of operational overhead?", + "question": "A telecommunications company is developing a mobile app for its customers. The company is using an Amazon SageMaker hosted endpoint for machine lea rning model inferences. Developers want to introduce a new version of the m odel for a limited number of users who subscribed to a preview feature of the app. After t he new version of the model is tested as a preview, developers will evaluate its accuracy. If a new ver sion of the model has better accuracy, developers need to be able to gradually release the new versio n for all users over a fixed period of time. How can the company implement the testing model wit h the LEAST amount of operational overhead?", "options": [ "Update the ProductionVariant data type with the n ew version of the model by using theCreateEndpointConfig operation with the InitialVari antWeight parameter set to 0. Specify theTargetVariant parameter for InvokeEndpoint calls fo r users who subscribed to the preview feature.When the new version of the model is ready for rele ase, gradually increase InitialVariantWeight untilall users have the updated version.", "Configure two SageMaker hosted endpoints that ser ve the different versions of the model. Createan Application Load Balancer (ALB) to route traffic to both endpoints based on the TargetVariantquery string parameter. Reconfigure the app to send the TargetVariant query string parameter forusers who subscribed to the preview feature. When t he new version of the model is ready forrelease, change the ALB's routing algorithm to weig hted until all users have the updated version.", @@ -1862,7 +1862,7 @@ "references": "" }, { - "question": "QUESTION 168 A company offers an online shopping service to its customers. The company wants to enhance the sites security by requesting additional information when customers access the site from locations that are different from their normal location. The company wants to update the process to call a machine learning (ML) model to determine when addit ional information should be requested. The company has several terabytes of data from its existing ecommerce web servers containing the source IP addresses for each request made to the we b server. For authenticated requests, the records also contain the login name of the requesting user. Which approach should an ML specialist take to impl ement the new security feature in the web application?", + "question": "A company offers an online shopping service to its customers. The company wants to enhance the sites security by requesting additional information when customers access the site from locations that are different from their normal location. The company wants to update the process to call a machine learning (ML) model to determine when addit ional information should be requested. The company has several terabytes of data from its existing ecommerce web servers containing the source IP addresses for each request made to the we b server. For authenticated requests, the records also contain the login name of the requesting user. Which approach should an ML specialist take to impl ement the new security feature in the web application?", "options": [ "Use Amazon SageMaker Ground Truth to label each r ecord as either a successful or failed accessattempt. Use Amazon SageMaker to train a binary cla ssification model using the factorizationmachines (FM) algorithm.", "Use Amazon SageMaker to train a model using the I P Insights algorithm. Schedule updates andretraining of the model using new log data nightly.", @@ -1874,7 +1874,7 @@ "references": "" }, { - "question": "QUESTION 169 A retail company wants to combine its customer orde rs with the product description data from its product catalog. The structure and format of the re cords in each dataset is different. A data analyst tried to use a spreadsheet to combine the datasets, but the effort resulted in duplicate records and records that were not properly combined. The compan y needs a solution that it can use to combine similar records from the two datasets and remove an y duplicates. Which solution will meet these requirements?", + "question": "A retail company wants to combine its customer orde rs with the product description data from its product catalog. The structure and format of the re cords in each dataset is different. A data analyst tried to use a spreadsheet to combine the datasets, but the effort resulted in duplicate records and records that were not properly combined. The compan y needs a solution that it can use to combine similar records from the two datasets and remove an y duplicates. Which solution will meet these requirements?", "options": [ "Use an AWS Lambda function to process the data. U se two arrays to compare equal strings in thefields from the two datasets and remove any duplica tes.", "Create AWS Glue crawlers for reading and populati ng the AWS Glue Data Catalog. Call the AWSGlue SearchTables API operation to perform a fuzzy- matching search on the two datasets, andcleanse the data accordingly.", @@ -1886,7 +1886,7 @@ "references": "" }, { - "question": "QUESTION 170 A company provisions Amazon SageMaker notebook inst ances for its data science team and creates Amazon VPC interface endpoints to ensure communicat ion between the VPC and the notebook instances. All connections to the Amazon SageMaker API are contained entirely and securely using the AWS network. However, the data science team rea lizes that individuals outside the VPC can still connect to the notebook instances across the intern et. Which set of actions should the data science team t ake to fix the issue?", + "question": "A company provisions Amazon SageMaker notebook inst ances for its data science team and creates Amazon VPC interface endpoints to ensure communicat ion between the VPC and the notebook instances. All connections to the Amazon SageMaker API are contained entirely and securely using the AWS network. However, the data science team rea lizes that individuals outside the VPC can still connect to the notebook instances across the intern et. Which set of actions should the data science team t ake to fix the issue?", "options": [ "Modify the notebook instances' security group to allow traffic only from the CIDR ranges of theVPC. Apply this security group to all of the notebo ok instances' VPC interfaces.", "Create an IAM policy that allows the sagemaker:Cr eatePresignedNotebooklnstanceUrl andsagemaker:DescribeNotebooklnstance actions from onl y the VPC endpoints. Apply this policy to allIAM users, groups, and roles used to access the not ebook instances.", @@ -1898,7 +1898,7 @@ "references": "" }, { - "question": "QUESTION 171 A company will use Amazon SageMaker to train and ho st a machine learning (ML) model for a marketing campaign. The majority of data is sensiti ve customer data. The data must be encrypted at rest. The company wan ts AWS to maintain the root of trust for the master keys and wants encryption key usage to be lo gged. Which implementation will meet these requirements?", + "question": "A company will use Amazon SageMaker to train and ho st a machine learning (ML) model for a marketing campaign. The majority of data is sensiti ve customer data. The data must be encrypted at rest. The company wan ts AWS to maintain the root of trust for the master keys and wants encryption key usage to be lo gged. Which implementation will meet these requirements?", "options": [ "Use encryption keys that are stored in AWS Cloud HSM to encrypt the ML data volumes, and toencrypt the model artifacts and data in Amazon S3.", "Use SageMaker built-in transient keys to encrypt the ML data volumes. Enable default encryptionfor new Amazon Elastic Block Store (Amazon EBS) vol umes.", @@ -1910,7 +1910,7 @@ "references": "" }, { - "question": "QUESTION 172 A machine learning specialist stores IoT soil senso r data in Amazon DynamoDB table and stores weather event data as JSON files in Amazon S3. The dataset in DynamoDB is 10 GB in size and the dataset in Amazon S3 is 5 GB in size. The specialis t wants to train a model on this data to help predi ct soil moisture levels as a function of weather event s using Amazon SageMaker. Which solution will accomplish the necessary transf ormation to train the Amazon SageMaker model with the LEAST amount of administrative overhead?", + "question": "A machine learning specialist stores IoT soil senso r data in Amazon DynamoDB table and stores weather event data as JSON files in Amazon S3. The dataset in DynamoDB is 10 GB in size and the dataset in Amazon S3 is 5 GB in size. The specialis t wants to train a model on this data to help predi ct soil moisture levels as a function of weather event s using Amazon SageMaker. Which solution will accomplish the necessary transf ormation to train the Amazon SageMaker model with the LEAST amount of administrative overhead?", "options": [ "Launch an Amazon EMR cluster. Create an Apache Hi ve external table for the DynamoDB table andS3 data. Join the Hive tables and write the results out to Amazon S3.", "Crawl the data using AWS Glue crawlers. Write an AWS Glue ETL job that merges the two tablesand writes the output to an Amazon Redshift cluster .", @@ -1922,7 +1922,7 @@ "references": "" }, { - "question": "QUESTION 173 A company sells thousands of products on a public w ebsite and wants to automatically identify products with potential durability problems. The co mpany has 1.000 reviews with date, star rating, review text, review summary, and customer email fie lds, but many reviews are incomplete and have empty fields. Each review has already been labeled with the correct durability result. A machine learning specialist must train a model to identify reviews expressing concerns over product durability. The first model needs to be tra ined and ready to review in 2 days. What is the MOST direct approach to solve this prob lem within 2 days?", + "question": "A company sells thousands of products on a public w ebsite and wants to automatically identify products with potential durability problems. The co mpany has 1.000 reviews with date, star rating, review text, review summary, and customer email fie lds, but many reviews are incomplete and have empty fields. Each review has already been labeled with the correct durability result. A machine learning specialist must train a model to identify reviews expressing concerns over product durability. The first model needs to be tra ined and ready to review in 2 days. What is the MOST direct approach to solve this prob lem within 2 days?", "options": [ "Train a custom classifier by using Amazon Compreh end.", "Build a recurrent neural network (RNN) in Amazon SageMaker by using Gluon and Apache MXNet.", @@ -1934,7 +1934,7 @@ "references": "" }, { - "question": "QUESTION 174 A company that runs an online library is implementi ng a chatbot using Amazon Lex to provide book recommendations based on category. This intent is f ulfilled by an AWS Lambda function that queries an Amazon DynamoDB table for a list of book titles, given a particular category. For testing, there ar e only three categories implemented as the custom slo t types: \"comedy,\" \"adventure, and \"documentary. A machine learning (ML) specialist notices that som etimes the request cannot be fulfilled because Amazon Lex cannot understand the category spoken by users with utterances such as \"funny,\" \"fun,\" and \"humor.\" The ML specialist needs to fix the pro blem without changing the Lambda code or data in DynamoDB. How should the ML specialist fix the problem?", + "question": "A company that runs an online library is implementi ng a chatbot using Amazon Lex to provide book recommendations based on category. This intent is f ulfilled by an AWS Lambda function that queries an Amazon DynamoDB table for a list of book titles, given a particular category. For testing, there ar e only three categories implemented as the custom slo t types: \"comedy,\" \"adventure, and \"documentary. A machine learning (ML) specialist notices that som etimes the request cannot be fulfilled because Amazon Lex cannot understand the category spoken by users with utterances such as \"funny,\" \"fun,\" and \"humor.\" The ML specialist needs to fix the pro blem without changing the Lambda code or data in DynamoDB. How should the ML specialist fix the problem?", "options": [ "Add the unrecognized words in the enumeration val ues list as new values in the slot type.", "Create a new custom slot type, add the unrecogniz ed words to this slot type as enumerationvalues, and use this slot type for the slot.", @@ -1946,7 +1946,7 @@ "references": "" }, { - "question": "QUESTION 175 A manufacturing company uses machine learning (ML) models to detect quality issues. The models use images that are taken of the company's product at the end of each production step. The company has thousands of machines at the production site th at generate one image per second on average. The company ran a successful pilot with a single ma nufacturing machine. For the pilot, ML specialists used an industrial PC that ran AWS IoT Greengrass w ith a long-running AWS Lambda function that uploaded the images to Amazon S3. The uploaded imag es invoked a Lambda function that was written in Python to perform inference by using an Amazon SageMaker endpoint that ran a custom model. The inference results were forwarded back to a web service that was hosted at the production site to prevent faulty products from bei ng shipped. The company scaled the solution out to all manufact uring machines by installing similarly configured industrial PCs on each production machine. However, latency for predictions increased beyond acceptable limits. Analysis shows that the internet connection is at its capacity limit. How can the company resolve this issue MOST cost-ef fectively?", + "question": "A manufacturing company uses machine learning (ML) models to detect quality issues. The models use images that are taken of the company's product at the end of each production step. The company has thousands of machines at the production site th at generate one image per second on average. The company ran a successful pilot with a single ma nufacturing machine. For the pilot, ML specialists used an industrial PC that ran AWS IoT Greengrass w ith a long-running AWS Lambda function that uploaded the images to Amazon S3. The uploaded imag es invoked a Lambda function that was written in Python to perform inference by using an Amazon SageMaker endpoint that ran a custom model. The inference results were forwarded back to a web service that was hosted at the production site to prevent faulty products from bei ng shipped. The company scaled the solution out to all manufact uring machines by installing similarly configured industrial PCs on each production machine. However, latency for predictions increased beyond acceptable limits. Analysis shows that the internet connection is at its capacity limit. How can the company resolve this issue MOST cost-ef fectively?", "options": [ "Set up a 10 Gbps AWS Direct Connect connection be tween the production site and the nearestAWS Region. Use the Direct Connect connection to up load the images. Increase the size of theinstances and the number of instances that are used by the SageMaker endpoint.", "Extend the long-running Lambda function that runs on AWS IoT Greengrass to compress theimages and upload the compressed files to Amazon S3 . Decompress the files by using a separateLambda function that invokes the existing Lambda fu nction to run the inference pipeline.", @@ -1958,7 +1958,7 @@ "references": "" }, { - "question": "QUESTION 176 A data scientist is using an Amazon SageMaker noteb ook instance and needs to securely access data stored in a specific Amazon S3 bucket. How should the data scientist accomplish this?", + "question": "A data scientist is using an Amazon SageMaker noteb ook instance and needs to securely access data stored in a specific Amazon S3 bucket. How should the data scientist accomplish this?", "options": [ "Add an S3 bucket policy allowing GetObject, PutOb ject, and ListBucket permissions to the AmazonSageMaker notebook ARN as principal.", "Encrypt the objects in the S3 bucket with a custo m AWS Key Management Service (AWS KMS) keythat only the notebook owner has access to.", @@ -1970,7 +1970,7 @@ "references": "" }, { - "question": "QUESTION 177 A company is launching a new product and needs to b uild a mechanism to monitor comments about the company and its new product on social media. Th e company needs to be able to evaluate the sentimen t expressed in social media posts, and visualize trends and configure alarms based on vari ous thresholds. The company needs to implement this solution quickl y, and wants to minimize the infrastructure and data science resources needed to evaluate the messa ges. The company already has a solution in place to collect posts and store them within an Ama zon S3 bucket. What services should the data science team use to d eliver this solution?", + "question": "A company is launching a new product and needs to b uild a mechanism to monitor comments about the company and its new product on social media. Th e company needs to be able to evaluate the sentimen t expressed in social media posts, and visualize trends and configure alarms based on vari ous thresholds. The company needs to implement this solution quickl y, and wants to minimize the infrastructure and data science resources needed to evaluate the messa ges. The company already has a solution in place to collect posts and store them within an Ama zon S3 bucket. What services should the data science team use to d eliver this solution?", "options": [ "Train a model in Amazon SageMaker by using the Bl azingText algorithm to detect sentiment in thecorpus of social media posts. Expose an endpoint th at can be called by AWS Lambda. Trigger aLambda function when posts are added to the S3 buck et to invoke the endpoint and record thesentiment in an Amazon DynamoDB table and in a cust om Amazon CloudWatch metric. UseCloudWatch alarms to notify analysts of trends.", "Train a model in Amazon SageMaker by using the se mantic segmentation algorithm to model thesemantic content in the corpus of social media post s. Expose an endpoint that can be called by AWSLambda. Trigger a Lambda function when objects are added to the S3 bucket to invoke the endpointand record the sentiment in an Amazon DynamoDB tabl e. Schedule a second Lambda function toquery recently added records and send an Amazon Sim ple Notification Service (Amazon SNS)notification to notify analysts of trends.", @@ -1982,7 +1982,7 @@ "references": "" }, { - "question": "QUESTION 178 A bank wants to launch a low-rate credit promotion. The bank is located in a town that recently experienced economic hardship. Only some of the ban k's customers were affected by the crisis, so the bank's credit team must identify which customer s to target with the promotion. However, the credit team wants to make sure that loyal customers ' full credit history is considered when the decision is made. The bank's data science team developed a model that classifies account transactions and understands credit eligibility. The data science te am used the XGBoost algorithm to train the model. The team used 7 years of bank transaction historica l data for training and hyperparameter tuning over the course of several days. The accuracy of the model is sufficient, but the cr edit team is struggling to explain accurately why t he model denies credit to some customers. The credit t eam has almost no skill in data science. What should the data science team do to address thi s issue in the MOST operationally efficient manner?", + "question": "A bank wants to launch a low-rate credit promotion. The bank is located in a town that recently experienced economic hardship. Only some of the ban k's customers were affected by the crisis, so the bank's credit team must identify which customer s to target with the promotion. However, the credit team wants to make sure that loyal customers ' full credit history is considered when the decision is made. The bank's data science team developed a model that classifies account transactions and understands credit eligibility. The data science te am used the XGBoost algorithm to train the model. The team used 7 years of bank transaction historica l data for training and hyperparameter tuning over the course of several days. The accuracy of the model is sufficient, but the cr edit team is struggling to explain accurately why t he model denies credit to some customers. The credit t eam has almost no skill in data science. What should the data science team do to address thi s issue in the MOST operationally efficient manner?", "options": [ "Use Amazon SageMaker Studio to rebuild the model. Create a notebook that uses the XGBoosttraining container to perform model training. Deplo y the model at an endpoint. Enable AmazonSageMaker Model Monitor to store inferences. Use th e inferences to create Shapley values that helpexplain model behavior. Create a chart that shows f eatures and SHapley Additive exPlanations(SHAP) values to explain to the credit team how the features affect the model outcomes.", "Use Amazon SageMaker Studio to rebuild the model. Create a notebook that uses the XGBoosttraining container to perform model training. Activ ate Amazon SageMaker Debugger, and configure itto calculate and collect Shapley values. Create a c hart that shows features and SHapley AdditiveexPlanations (SHAP) values to explain to the credit team how the features affect the modeloutcomes.", @@ -1994,7 +1994,7 @@ "references": "" }, { - "question": "QUESTION 179 A data science team is planning to build a natural language processing (NLP) application. The applications text preprocessing stage will include part-of-speech tagging and key phase extraction. The preprocessed text will be input to a custom cla ssification algorithm that the data science team has already written and trained using Apache MXNet. Which solution can the team build MOST quickly to m eet these requirements?", + "question": "A data science team is planning to build a natural language processing (NLP) application. The applications text preprocessing stage will include part-of-speech tagging and key phase extraction. The preprocessed text will be input to a custom cla ssification algorithm that the data science team has already written and trained using Apache MXNet. Which solution can the team build MOST quickly to m eet these requirements?", "options": [ "Use Amazon Comprehend for the part-of-speech tagg ing, key phase extraction, and classificationtasks.", "Use an NLP library in Amazon SageMaker for the pa rt-of-speech tagging. Use AmazonComprehend for the key phase extraction. Use AWS De ep Learning Containers with AmazonSageMaker to build the custom classifier.", @@ -2006,7 +2006,7 @@ "references": "" }, { - "question": "QUESTION 180 A machine learning (ML) specialist must develop a c lassification model for a financial services company. A domain expert provides the dataset, whic h is tabular with 10,000 rows and 1,020 features. During exploratory data analysis, the spe cialist finds no missing values and a small percentage of duplicate rows. There are correlation scores of > 0.9 for 200 feature pairs. The mean value of each feature is similar to its 50th percen tile. Which feature engineering strategy should the ML sp ecialist use with Amazon SageMaker?", + "question": "A machine learning (ML) specialist must develop a c lassification model for a financial services company. A domain expert provides the dataset, whic h is tabular with 10,000 rows and 1,020 features. During exploratory data analysis, the spe cialist finds no missing values and a small percentage of duplicate rows. There are correlation scores of > 0.9 for 200 feature pairs. The mean value of each feature is similar to its 50th percen tile. Which feature engineering strategy should the ML sp ecialist use with Amazon SageMaker?", "options": [ "Apply dimensionality reduction by using the princ ipal component analysis (PCA) algorithm.", "Drop the features with low correlation scores by using a Jupyter notebook.", @@ -2018,7 +2018,7 @@ "references": "" }, { - "question": "QUESTION 181 A machine learning specialist needs to analyze comm ents on a news website with users across the globe. The specialist must find the most discussed topics in the comments that are in either English or Spanish. What steps could be used to accomplish this task? ( Choose two.)", + "question": "A machine learning specialist needs to analyze comm ents on a news website with users across the globe. The specialist must find the most discussed topics in the comments that are in either English or Spanish. What steps could be used to accomplish this task? ( Choose two.)", "options": [ "Use an Amazon SageMaker BlazingText algorithm to find the topics independently from language.Proceed with the analysis.", "Use an Amazon SageMaker seq2seq algorithm to tran slate from Spanish to English, if necessary.Use a SageMaker Latent Dirichlet Allocation (LDA) a lgorithm to find the topics.", @@ -2029,7 +2029,7 @@ "references": "" }, { - "question": "QUESTION 182 A machine learning (ML) specialist is administering a production Amazon SageMaker endpoint with model monitoring configured. Amazon SageMaker Model Monitor detects violations on the SageMaker endpoint, so the ML specialist retrains t he model with the latest dataset. This dataset is statistically representative of the current product ion traffic. The ML specialist notices that even af ter deploying the new SageMaker model and running the f irst monitoring job, the SageMaker endpoint still has violations. What should the ML specialist do to resolve the vio lations?", + "question": "A machine learning (ML) specialist is administering a production Amazon SageMaker endpoint with model monitoring configured. Amazon SageMaker Model Monitor detects violations on the SageMaker endpoint, so the ML specialist retrains t he model with the latest dataset. This dataset is statistically representative of the current product ion traffic. The ML specialist notices that even af ter deploying the new SageMaker model and running the f irst monitoring job, the SageMaker endpoint still has violations. What should the ML specialist do to resolve the vio lations?", "options": [ "Manually trigger the monitoring job to re-evaluat e the SageMaker endpoint traffic sample.", "Run the Model Monitor baseline job again on the n ew training set. Configure Model Monitor touse the new baseline. C. Delete the endpoint and recreate it with the origin al configuration.", @@ -2040,7 +2040,7 @@ "references": "" }, { - "question": "QUESTION 183 A company supplies wholesale clothing to thousands of retail stores. A data scientist must create a model that predicts the daily sales volume for each item for each store. The data scientist discovers that more than half of the stores have been in busi ness for less than 6 months. Sales data is highly consistent from week to week. Daily data from the d atabase has been aggregated weekly, and weeks with no sales are omitted from the current dataset. Five years (100 MB) of sales data is available in Amazon S3. Which factors will adversely impact the performance of the forecast model to be developed, and which actions should the data scientist take to mit igate them? (Choose two.)", + "question": "A company supplies wholesale clothing to thousands of retail stores. A data scientist must create a model that predicts the daily sales volume for each item for each store. The data scientist discovers that more than half of the stores have been in busi ness for less than 6 months. Sales data is highly consistent from week to week. Daily data from the d atabase has been aggregated weekly, and weeks with no sales are omitted from the current dataset. Five years (100 MB) of sales data is available in Amazon S3. Which factors will adversely impact the performance of the forecast model to be developed, and which actions should the data scientist take to mit igate them? (Choose two.)", "options": [ "Detecting seasonality for the majority of stores will be an issue. Request categorical data to relat enew stores with similar stores that have more histo rical data.", "The sales data does not have enough variance. Req uest external sales data from other industriesto improve the model's ability to generalize.", @@ -2052,7 +2052,7 @@ "references": "" }, { - "question": "QUESTION 184 An ecommerce company is automating the categorizati on of its products based on images. A data scientist has trained a computer vision model using the Amazon SageMaker image classification algorithm. The images for each product are classifi ed according to specific product lines. The accuracy of the model is too low when categorizing new products. All of the product images have the same dimensions and are stored within an Amazon S3 bucket. The company wants to improve the model so it can be used for new products as soon as possible. Which steps would improve the accuracy of the solut ion? (Choose three.)", + "question": "An ecommerce company is automating the categorizati on of its products based on images. A data scientist has trained a computer vision model using the Amazon SageMaker image classification algorithm. The images for each product are classifi ed according to specific product lines. The accuracy of the model is too low when categorizing new products. All of the product images have the same dimensions and are stored within an Amazon S3 bucket. The company wants to improve the model so it can be used for new products as soon as possible. Which steps would improve the accuracy of the solut ion? (Choose three.)", "options": [ "Use the SageMaker semantic segmentation algorithm to train a new model to achieve improvedaccuracy.", "Use the Amazon Rekognition DetectLabels API to cl assify the products in the dataset.", @@ -2064,7 +2064,7 @@ "references": "" }, { - "question": "QUESTION 185 A data scientist is training a text classification model by using the Amazon SageMaker built-in BlazingText algorithm. There are 5 classes in the d ataset, with 300 samples for category A, 292 samples for category B, 240 samples for category C, 258 samples for category D, and 310 samples for category E. The data scientist shuffles the data and splits off 10% for testing. After training the model, the dat a scientist generates confusion matrices for the trai ning and test sets. What could the data scientist conclude form these r esults?", + "question": "A data scientist is training a text classification model by using the Amazon SageMaker built-in BlazingText algorithm. There are 5 classes in the d ataset, with 300 samples for category A, 292 samples for category B, 240 samples for category C, 258 samples for category D, and 310 samples for category E. The data scientist shuffles the data and splits off 10% for testing. After training the model, the dat a scientist generates confusion matrices for the trai ning and test sets. What could the data scientist conclude form these r esults?", "options": [ "Classes C and D are too similar.", "The dataset is too small for holdout cross-valida tion.", @@ -2076,7 +2076,7 @@ "references": "" }, { - "question": "QUESTION 186 A company that manufactures mobile devices wants to determine and calibrate the appropriate sales price for its devices. The company is collecting th e relevant data and is determining data features that it can use to train machine learning (ML) mode ls. There are more than 1,000 features, and the company wants to determine the primary features tha t contribute to the sales price. Which techniques should the company use for feature selection? (Choose three.)", + "question": "A company that manufactures mobile devices wants to determine and calibrate the appropriate sales price for its devices. The company is collecting th e relevant data and is determining data features that it can use to train machine learning (ML) mode ls. There are more than 1,000 features, and the company wants to determine the primary features tha t contribute to the sales price. Which techniques should the company use for feature selection? (Choose three.)", "options": [ "Data scaling with standardization and normalizati on", "Correlation plot with heat maps", @@ -2088,7 +2088,7 @@ "references": "" }, { - "question": "QUESTION 187 A power company wants to forecast future energy con sumption for its customers in residential properties and commercial business properties. Hist orical power consumption data for the last 10 years is available. A team of data scientists who p erformed the initial data analysis and feature selection will include the historical power consump tion data and data such as weather, number of individuals on the property, and public holidays. The data scientists are using Amazon Forecast to ge nerate the forecasts. Which algorithm in Forecast should the data scienti sts use to meet these requirements?", + "question": "A power company wants to forecast future energy con sumption for its customers in residential properties and commercial business properties. Hist orical power consumption data for the last 10 years is available. A team of data scientists who p erformed the initial data analysis and feature selection will include the historical power consump tion data and data such as weather, number of individuals on the property, and public holidays. The data scientists are using Amazon Forecast to ge nerate the forecasts. Which algorithm in Forecast should the data scienti sts use to meet these requirements?", "options": [ "Autoregressive Integrated Moving Average (AIRMA)", "Exponential Smoothing (ETS)", @@ -2100,7 +2100,7 @@ "references": "" }, { - "question": "QUESTION 188 A company wants to use automatic speech recognition (ASR) to transcribe messages that are less than 60 seconds long from a voicemail-style applica tion. The company requires the correct identification of 200 unique product names, some of which have unique spellings or pronunciations. The company has 4,000 words of Amazon SageMaker Gro und Truth voicemail transcripts it can use to customize the chosen ASR model. The company needs t o ensure that everyone can update their customizations multiple times each hour. Which approach will maximize transcription accuracy during the development phase?", + "question": "A company wants to use automatic speech recognition (ASR) to transcribe messages that are less than 60 seconds long from a voicemail-style applica tion. The company requires the correct identification of 200 unique product names, some of which have unique spellings or pronunciations. The company has 4,000 words of Amazon SageMaker Gro und Truth voicemail transcripts it can use to customize the chosen ASR model. The company needs t o ensure that everyone can update their customizations multiple times each hour. Which approach will maximize transcription accuracy during the development phase?", "options": [ "Use a voice-driven Amazon Lex bot to perform the ASR customization. Create customer slotswithin the bot that specifically identify each of t he required product names. Use the Amazon Lexsynonym mechanism to provide additional variations of each product name as mis-transcriptions areidentified in development.", "Use Amazon Transcribe to perform the ASR customiz ation. Analyze the word confidence scores inthe transcript, and automatically create or update a custom vocabulary file with any word that has aconfidence score below an acceptable threshold valu e. Use this updated custom vocabulary file in allfuture transcription tasks.", @@ -2112,7 +2112,7 @@ "references": "" }, { - "question": "QUESTION 189 A company is building a demand forecasting model ba sed on machine learning (ML). In the development stage, an ML specialist uses an Amazon SageMaker notebook to perform feature engineering during work hours that consumes low amo unts of CPU and memory resources. A data engineer uses the same notebook to perform data pre processing once a day on average that requires very high memory and completes in only 2 hours. The data preprocessing is not configured to use GPU. All the processes are running well on an ml.m5 .4xlarge notebook instance. The company receives an AWS Budgets alert that the billing for this month exceeds the allocated budget. Which solution will result in the MOST cost savings ?", + "question": "A company is building a demand forecasting model ba sed on machine learning (ML). In the development stage, an ML specialist uses an Amazon SageMaker notebook to perform feature engineering during work hours that consumes low amo unts of CPU and memory resources. A data engineer uses the same notebook to perform data pre processing once a day on average that requires very high memory and completes in only 2 hours. The data preprocessing is not configured to use GPU. All the processes are running well on an ml.m5 .4xlarge notebook instance. The company receives an AWS Budgets alert that the billing for this month exceeds the allocated budget. Which solution will result in the MOST cost savings ?", "options": [ "Change the notebook instance type to a memory opt imized instance with the same vCPU numberas the ml.m5.4xlarge instance has. Stop the noteboo k when it is not in use. Run both datapreprocessing and feature engineering development o n that instance.", "Keep the notebook instance type and size the same . Stop the notebook when it is not in use. Rundata preprocessing on a P3 instance type with the s ame memory as the ml.m5.4xlarge instance byusing Amazon SageMaker Processing.", @@ -2124,7 +2124,7 @@ "references": "" }, { - "question": "QUESTION 190 A machine learning specialist is developing a regre ssion model to predict rental rates from rental listings. A variable named Wall_Color represents th e most prominent exterior wall color of the property. The following is the sample data, excludi ng all other variables: The specialist chose a model that needs numerical i nput data. Which feature engineering approaches should the spe cialist use to allow the regression model to learn from the Wall_Color data? (Choose two.)", + "question": "A machine learning specialist is developing a regre ssion model to predict rental rates from rental listings. A variable named Wall_Color represents th e most prominent exterior wall color of the property. The following is the sample data, excludi ng all other variables: The specialist chose a model that needs numerical i nput data. Which feature engineering approaches should the spe cialist use to allow the regression model to learn from the Wall_Color data? (Choose two.)", "options": [ "Apply integer transformation and set Red = 1, Whi te = 5, and Green = 10.", "Add new columns that store one-hot representation of colors.", @@ -2136,7 +2136,7 @@ "references": "" }, { - "question": "QUESTION 191 A data scientist is working on a public sector proj ect for an urban traffic system. While studying the traffic patterns, it is clear to the data scientist that the traffic behavior at each light is correla ted, subject to a small stochastic error term. The data scientist must model the traffic behavior to analyz e the traffic patterns and reduce congestion. How will the data scientist MOST effectively model the problem?", + "question": "A data scientist is working on a public sector proj ect for an urban traffic system. While studying the traffic patterns, it is clear to the data scientist that the traffic behavior at each light is correla ted, subject to a small stochastic error term. The data scientist must model the traffic behavior to analyz e the traffic patterns and reduce congestion. How will the data scientist MOST effectively model the problem?", "options": [ "The data scientist should obtain a correlated equ ilibrium policy by formulating this problem as amulti-agent reinforcement learning problem.", "The data scientist should obtain the optimal equi librium policy by formulating this problem as asingle-agent reinforcement learning problem.", @@ -2148,7 +2148,7 @@ "references": "" }, { - "question": "QUESTION 192 A data scientist is using the Amazon SageMaker Neur al Topic Model (NTM) algorithm to build a model that recommends tags from blog posts. The raw blog post data is stored in an Amazon S3 bucket in JSON format. During model evaluation, the data scientist discovered that the model recommends certain stopwords such as \"a,\" \"an, and \"the\" as tags to certain blog posts, along with a few rare words that are present only in certain blo g entries. After a few iterations of tag review wit h the content team, the data scientist notices that t he rare words are unusual but feasible. The data scientist also must ensure that the tag recommendat ions of the generated model do not include the stopwords. What should the data scientist do to meet these req uirements?", + "question": "A data scientist is using the Amazon SageMaker Neur al Topic Model (NTM) algorithm to build a model that recommends tags from blog posts. The raw blog post data is stored in an Amazon S3 bucket in JSON format. During model evaluation, the data scientist discovered that the model recommends certain stopwords such as \"a,\" \"an, and \"the\" as tags to certain blog posts, along with a few rare words that are present only in certain blo g entries. After a few iterations of tag review wit h the content team, the data scientist notices that t he rare words are unusual but feasible. The data scientist also must ensure that the tag recommendat ions of the generated model do not include the stopwords. What should the data scientist do to meet these req uirements?", "options": [ "Use the Amazon Comprehend entity recognition API operations. Remove the detected words fromthe blog post data. Replace the blog post data sour ce in the S3 bucket.", "Run the SageMaker built-in principal component an alysis (PCA) algorithm with the blog post datafrom the S3 bucket as the data source. Replace the blog post data in the S3 bucket with the results ofthe training job.", @@ -2160,7 +2160,7 @@ "references": "" }, { - "question": "QUESTION 193 A company wants to create a data repository in the AWS Cloud for machine learning (ML) projects. The company wants to use AWS to perform complete ML lifecycles and wants to use Amazon S3 for the data storage. All of the companys data currentl y resides on premises and is 40 \u00d0\u00a2\u00d0' in size. The company wants a solution that can transfer and automatically update data between the onpremises object storage and Amazon S3. The solution must sup port encryption, scheduling, monitoring, and data integrity validation. Which solution meets these requirements?", + "question": "A company wants to create a data repository in the AWS Cloud for machine learning (ML) projects. The company wants to use AWS to perform complete ML lifecycles and wants to use Amazon S3 for the data storage. All of the companys data currentl y resides on premises and is 40 \u00d0\u00a2\u00d0' in size. The company wants a solution that can transfer and automatically update data between the onpremises object storage and Amazon S3. The solution must sup port encryption, scheduling, monitoring, and data integrity validation. Which solution meets these requirements?", "options": [ "Use the S3 sync command to compare the source S3 bucket and the destination S3 bucket.Determine which source files do not exist in the de stination S3 bucket and which source files weremodified.", "Use AWS Transfer for FTPS to transfer the files f rom the on-premises storage to Amazon S3.", @@ -2172,7 +2172,7 @@ "references": "" }, { - "question": "QUESTION 194 A company has video feeds and images of a subway tr ain station. The company wants to create a deep learning model that will alert the station man ager if any passenger crosses the yellow safety line when there is no train in the station. The ale rt will be based on the video feeds. The company wants the model to detect the yellow line, the pass engers who cross the yellow line, and the trains in the video feeds. This task requires labeling. The v ideo data must remain confidential. A data scientist creates a bounding box to label th e sample data and uses an object detection model. However, the object detection model cannot clearly demarcate the yellow line, the passengers who cross the yellow line, and the trains. Which labeling approach will help the company impro ve this model?", + "question": "A company has video feeds and images of a subway tr ain station. The company wants to create a deep learning model that will alert the station man ager if any passenger crosses the yellow safety line when there is no train in the station. The ale rt will be based on the video feeds. The company wants the model to detect the yellow line, the pass engers who cross the yellow line, and the trains in the video feeds. This task requires labeling. The v ideo data must remain confidential. A data scientist creates a bounding box to label th e sample data and uses an object detection model. However, the object detection model cannot clearly demarcate the yellow line, the passengers who cross the yellow line, and the trains. Which labeling approach will help the company impro ve this model?", "options": [ "Use Amazon Rekognition Custom Labels to label the dataset and create a custom AmazonRekognition object detection model. Create a privat e workforce. Use Amazon Augmented AI(Amazon A2I) to review the low-confidence predictio ns and retrain the custom Amazon Rekognitionmodel.", "Use an Amazon SageMaker Ground Truth object detec tion labeling task. Use Amazon MechanicalTurk as the labeling workforce.", @@ -2184,7 +2184,7 @@ "references": "" }, { - "question": "QUESTION 195 A data engineer at a bank is evaluating a new tabul ar dataset that includes customer data. The data engineer will use the customer data to cre ate a new model to predict customer behavior. After creating a correlation matrix for the variabl es, the data engineer notices that many of the 100 features are highly correlated with each other. Which steps should the data engineer take to addres s this issue? (Choose two.)", + "question": "A data engineer at a bank is evaluating a new tabul ar dataset that includes customer data. The data engineer will use the customer data to cre ate a new model to predict customer behavior. After creating a correlation matrix for the variabl es, the data engineer notices that many of the 100 features are highly correlated with each other. Which steps should the data engineer take to addres s this issue? (Choose two.)", "options": [ "Apply one-hot encoding category-based variables.", "Use a linear-based algorithm to train the model.", @@ -2196,7 +2196,7 @@ "references": "" }, { - "question": "QUESTION 196 A company is building a new version of a recommenda tion engine. Machine learning (ML) specialists need to keep adding new data from users to improve personalized recommendations. The ML specialists gather data from the users interactions on the platform and from sources such as external websites and social media. The pipeline cleans, transforms, enriches, and comp resses terabytes of data daily, and this data is stored in Amazon S3. A set of Python scripts was co ded to do the job and is stored in a large Amazon EC2 instance. The whole process takes more than 20 hours to finish, with each script taking at least an hour. The company wants to move the scripts out of Amazon EC2 into a more managed solution that will eliminate the need to maintain servers. Which approach will address all of these requiremen ts with the LEAST development effort?", + "question": "A company is building a new version of a recommenda tion engine. Machine learning (ML) specialists need to keep adding new data from users to improve personalized recommendations. The ML specialists gather data from the users interactions on the platform and from sources such as external websites and social media. The pipeline cleans, transforms, enriches, and comp resses terabytes of data daily, and this data is stored in Amazon S3. A set of Python scripts was co ded to do the job and is stored in a large Amazon EC2 instance. The whole process takes more than 20 hours to finish, with each script taking at least an hour. The company wants to move the scripts out of Amazon EC2 into a more managed solution that will eliminate the need to maintain servers. Which approach will address all of these requiremen ts with the LEAST development effort?", "options": [ "Load the data into an Amazon Redshift cluster. Ex ecute the pipeline by using SQL. Store the resultsin Amazon S3.", "Load the data into Amazon DynamoDB. Convert the s cripts to an AWS Lambda function. Executethe pipeline by triggering Lambda executions. Store the results in Amazon S3.", @@ -2208,7 +2208,7 @@ "references": "" }, { - "question": "QUESTION 197 A retail company is selling products through a glob al online marketplace. The company wants to use machine learning (ML) to analyze customer feedback and identify specific areas for improvement. A developer has built a tool that collects customer r eviews from the online marketplace and stores them in an Amazon S3 bucket. This process yields a dataset of 40 reviews. A data scientist building the ML models must identify additional sources of d ata to increase the size of the dataset. Which data sources should the data scientist use to augment the dataset of reviews? (Choose three.)", + "question": "A retail company is selling products through a glob al online marketplace. The company wants to use machine learning (ML) to analyze customer feedback and identify specific areas for improvement. A developer has built a tool that collects customer r eviews from the online marketplace and stores them in an Amazon S3 bucket. This process yields a dataset of 40 reviews. A data scientist building the ML models must identify additional sources of d ata to increase the size of the dataset. Which data sources should the data scientist use to augment the dataset of reviews? (Choose three.)", "options": [ "Emails exchanged by customers and the companys cu stomer service agents", "Social media posts containing the name of the com pany or its products", @@ -2220,7 +2220,7 @@ "references": "" }, { - "question": "QUESTION 198 A machine learning (ML) specialist wants to create a data preparation job that uses a PySpark script with complex window aggregation operations to creat e data for training and testing. The ML specialist needs to evaluate the impact of the numb er of features and the sample count on model performance. Which approach should the ML specialist use to dete rmine the ideal data transformations for the model?", + "question": "A machine learning (ML) specialist wants to create a data preparation job that uses a PySpark script with complex window aggregation operations to creat e data for training and testing. The ML specialist needs to evaluate the impact of the numb er of features and the sample count on model performance. Which approach should the ML specialist use to dete rmine the ideal data transformations for the model?", "options": [ "Add an Amazon SageMaker Debugger hook to the scri pt to capture key metrics. Run the script asan AWS Glue job.", "Add an Amazon SageMaker Experiments tracker to th e script to capture key metrics. Run the scriptas an AWS Glue job.", @@ -2232,7 +2232,7 @@ "references": "" }, { - "question": "QUESTION 199 A data scientist has a dataset of machine part imag es stored in Amazon Elastic File System (Amazon EFS). The data scientist needs to use Amazon SageMa ker to create and train an image classification machine learning model based on this dataset. Becau se of budget and time constraints, management wants the data scientist to create and t rain a model with the least number of steps and integration work required. How should the data scientist meet these requiremen ts?", + "question": "A data scientist has a dataset of machine part imag es stored in Amazon Elastic File System (Amazon EFS). The data scientist needs to use Amazon SageMa ker to create and train an image classification machine learning model based on this dataset. Becau se of budget and time constraints, management wants the data scientist to create and t rain a model with the least number of steps and integration work required. How should the data scientist meet these requiremen ts?", "options": [ "Mount the EFS file system to a SageMaker notebook and run a script that copies the data to anAmazon FSx for Lustre file system. Run the SageMake r training job with the FSx for Lustre file systemas the data source.", "Launch a transient Amazon EMR cluster. Configure steps to mount the EFS file system and copy the data to an Amazon S3 bucket by using S3DistCp. Run the SageMaker training job with Amazon S3 asthe data source.", @@ -2244,7 +2244,7 @@ "references": "" }, { - "question": "QUESTION 200 A retail company uses a machine learning (ML) model for daily sales forecasting. The companys brand manager reports that the model has provided i naccurate results for the past 3 weeks. At the end of each day, an AWS Glue job consolidate s the input data that is used for the forecasting with the actual daily sales data and the prediction s of the model. The AWS Glue job stores the data in Amazon S3. The companys ML team is using an Amazon SageMaker Studio notebook to gain an understanding about the source of the model's inacc uracies. What should the ML team do on the SageMaker Studio notebook to visualize the model's degradation MOST accurately?", + "question": "A retail company uses a machine learning (ML) model for daily sales forecasting. The companys brand manager reports that the model has provided i naccurate results for the past 3 weeks. At the end of each day, an AWS Glue job consolidate s the input data that is used for the forecasting with the actual daily sales data and the prediction s of the model. The AWS Glue job stores the data in Amazon S3. The companys ML team is using an Amazon SageMaker Studio notebook to gain an understanding about the source of the model's inacc uracies. What should the ML team do on the SageMaker Studio notebook to visualize the model's degradation MOST accurately?", "options": [ "Create a histogram of the daily sales over the la st 3 weeks. In addition, create a histogram of thedaily sales from before that period.", "Create a histogram of the model errors over the l ast 3 weeks. In addition, create a histogram ofthe model errors from before that period.", @@ -2256,7 +2256,7 @@ "references": "" }, { - "question": "QUESTION 201 An ecommerce company sends a weekly email newslette r to all of its customers. Management has hired a team of writers to create additional target ed content. A data scientist needs to identify five customer segments based on age, income, and locatio n. The customers current segmentation is unknown. The data scientist previously built an XGB oost model to predict the likelihood of a customer responding to an email based on age, incom e, and location. Why does the XGBoost model NOT meet the current req uirements, and how can this be fixed?", + "question": "An ecommerce company sends a weekly email newslette r to all of its customers. Management has hired a team of writers to create additional target ed content. A data scientist needs to identify five customer segments based on age, income, and locatio n. The customers current segmentation is unknown. The data scientist previously built an XGB oost model to predict the likelihood of a customer responding to an email based on age, incom e, and location. Why does the XGBoost model NOT meet the current req uirements, and how can this be fixed?", "options": [ "The XGBoost model provides a true/false binary ou tput. Apply principal component analysis (PCA)with five feature dimensions to predict a segment.", "The XGBoost model provides a true/false binary ou tput. Increase the number of classes theXGBoost model predicts to five classes to predict a segment.", @@ -2268,7 +2268,7 @@ "references": "" }, { - "question": "QUESTION 202 A global financial company is using machine learnin g to automate its loan approval process. The company has a dataset of customer information. The dataset contains some categorical fields, such as customer location by city and housing status. Th e dataset also includes financial fields in differe nt units, such as account balances in US dollars and m onthly interest in US cents. The companys data scientists are using a gradient b oosting regression model to infer the credit score for each customer. The model has a training accurac y of 99% and a testing accuracy of 75%. The data scientists want to improve the models testing accur acy. Which process will improve the testing accuracy the MOST?", + "question": "A global financial company is using machine learnin g to automate its loan approval process. The company has a dataset of customer information. The dataset contains some categorical fields, such as customer location by city and housing status. Th e dataset also includes financial fields in differe nt units, such as account balances in US dollars and m onthly interest in US cents. The companys data scientists are using a gradient b oosting regression model to infer the credit score for each customer. The model has a training accurac y of 99% and a testing accuracy of 75%. The data scientists want to improve the models testing accur acy. Which process will improve the testing accuracy the MOST?", "options": [ "Use a one-hot encoder for the categorical fields in the dataset. Perform standardization on thefinancial fields in the dataset. Apply L1 regulariz ation to the data.", "Use tokenization of the categorical fields in the dataset. Perform binning on the financial fields i nthe dataset. Remove the outliers in the data by usi ng the z-score.", @@ -2280,7 +2280,7 @@ "references": "" }, { - "question": "QUESTION 203 A machine learning (ML) specialist needs to extract embedding vectors from a text series. The goal is to provide a ready-to-ingest feature space for a da ta scientist to develop downstream ML predictive models. The text consists of curated sentences in E nglish. Many sentences use similar words but in different contexts. There are among the sentences, and the embedding space must differentiate between them. Which options can produce the required embedding ve ctors that capture word context and sequential QA information? (Choose two.)", + "question": "A machine learning (ML) specialist needs to extract embedding vectors from a text series. The goal is to provide a ready-to-ingest feature space for a da ta scientist to develop downstream ML predictive models. The text consists of curated sentences in E nglish. Many sentences use similar words but in different contexts. There are among the sentences, and the embedding space must differentiate between them. Which options can produce the required embedding ve ctors that capture word context and sequential QA information? (Choose two.)", "options": [ "Amazon SageMaker seq2seq algorithm", "Amazon SageMaker BlazingText algorithm in Skip-gr am mode", @@ -2292,7 +2292,7 @@ "references": "" }, { - "question": "QUESTION 204 A retail company wants to update its customer suppo rt system. The company wants to implement automatic routing of customer claims to different q ueues to prioritize the claims by category. Currently, an operator manually performs the catego ry assignment and routing. After the operator classifies and routes the claim, the company stores the claims record in a central database. The claims record includes the claims category. The company has no data science team or experience in the field of machine learning (ML). The companys small development team needs a solution th at requires no ML expertise. Which solution meets these requirements?", + "question": "A retail company wants to update its customer suppo rt system. The company wants to implement automatic routing of customer claims to different q ueues to prioritize the claims by category. Currently, an operator manually performs the catego ry assignment and routing. After the operator classifies and routes the claim, the company stores the claims record in a central database. The claims record includes the claims category. The company has no data science team or experience in the field of machine learning (ML). The companys small development team needs a solution th at requires no ML expertise. Which solution meets these requirements?", "options": [ "Export the database to a .csv file with two colum ns: claim_label and claim_text. Use the AmazonSageMaker Object2Vec algorithm and the .csv file to train a model. Use SageMaker to deploy themodel to an inference endpoint. Develop a service i n the application to use the inference endpoint toprocess incoming claims, predict the labels, and ro ute the claims to the appropriate queue.", "Export the database to a .csv file with one colum n: claim_text. Use the Amazon SageMaker LatentDirichlet Allocation (LDA) algorithm and the .csv f ile to train a model. Use the LDA algorithm todetect labels automatically. Use SageMaker to deplo y the model to an inference endpoint. Develop aservice in the application to use the inference end point to process incoming claims, predict thelabels, and route the claims to the appropriate que ue.", @@ -2304,7 +2304,7 @@ "references": "" }, { - "question": "QUESTION 205 A machine learning (ML) specialist is using Amazon SageMaker hyperparameter optimization (HPO) to improve a models accuracy. The learning rate par ameter is specified in the following HPO configuration: During the results analysis, the ML specialist dete rmines that most of the training jobs had a learnin g rate between 0.01 and 0.1. The best result had a le arning rate of less than 0.01. Training jobs need t o run regularly over a changing dataset. The ML speci alist needs to find a tuning mechanism that uses different learning rates more evenly from the provi ded range between MinValue and MaxValue. Which solution provides the MOST accurate result?", + "question": "A machine learning (ML) specialist is using Amazon SageMaker hyperparameter optimization (HPO) to improve a models accuracy. The learning rate par ameter is specified in the following HPO configuration: During the results analysis, the ML specialist dete rmines that most of the training jobs had a learnin g rate between 0.01 and 0.1. The best result had a le arning rate of less than 0.01. Training jobs need t o run regularly over a changing dataset. The ML speci alist needs to find a tuning mechanism that uses different learning rates more evenly from the provi ded range between MinValue and MaxValue. Which solution provides the MOST accurate result?", "options": [ "Modify the HPO configuration as follows:Select the most accurate hyperparameter configurati on form this HPO job.", "Run three different HPO jobs that use different l earning rates form the following intervals forMinValue and MaxValue while using the same number o f training jobs for each HPO job:[0.01, 0.1][0.001, 0.01][0.0001, 0.001]Select the most accurate hyperparameter configurati on form these three HPO jobs. C. Modify the HPO configuration as follows:Select the most accurate hyperparameter configurati on form this training job.", @@ -2315,7 +2315,7 @@ "references": "" }, { - "question": "QUESTION 206 A manufacturing company wants to use machine learni ng (ML) to automate quality control in its facilities. The facilities are in remote locations and have limited internet connectivity. The company has 20 \u00d0\u00a2\u00d0' of training data that consists of label ed images of defective product parts. The training data is in the corporate on-premises data center. The company will use this data to train a model for real-time defect detection in new parts as the parts move on a conveyor belt in the facilities. Th e company needs a solution that minimizes costs for compute infrastructure and that maximizes the s calability of resources for training. The solution also must facilitate the companys use of an ML mode l in the low-connectivity environments. Which solution will meet these requirements?", + "question": "A manufacturing company wants to use machine learni ng (ML) to automate quality control in its facilities. The facilities are in remote locations and have limited internet connectivity. The company has 20 \u00d0\u00a2\u00d0' of training data that consists of label ed images of defective product parts. The training data is in the corporate on-premises data center. The company will use this data to train a model for real-time defect detection in new parts as the parts move on a conveyor belt in the facilities. Th e company needs a solution that minimizes costs for compute infrastructure and that maximizes the s calability of resources for training. The solution also must facilitate the companys use of an ML mode l in the low-connectivity environments. Which solution will meet these requirements?", "options": [ "Move the training data to an Amazon S3 bucket. Tr ain and evaluate the model by using AmazonSageMaker. Optimize the model by using SageMaker Ne o. Deploy the model on a SageMaker hostingservices endpoint.", "Train and evaluate the model on premises. Upload the model to an Amazon S3 bucket. Deploy themodel on an Amazon SageMaker hosting services endpo int.", @@ -2327,7 +2327,7 @@ "references": "" }, { - "question": "QUESTION 207 A company has an ecommerce website with a product r ecommendation engine built in TensorFlow. The recommendation engine endpoint is hosted by Ama zon SageMaker. Three compute-optimized instances support the expected peak load of the web site. Response times on the product recommendation page a re increasing at the beginning of each month. Some users are encountering errors. The webs ite receives the majority of its traffic between 8 AM and 6 PM on weekdays in a single time zone. Which of the following options are the MOST effecti ve in solving the issue while keeping costs to a minimum? (Choose two.)", + "question": "A company has an ecommerce website with a product r ecommendation engine built in TensorFlow. The recommendation engine endpoint is hosted by Ama zon SageMaker. Three compute-optimized instances support the expected peak load of the web site. Response times on the product recommendation page a re increasing at the beginning of each month. Some users are encountering errors. The webs ite receives the majority of its traffic between 8 AM and 6 PM on weekdays in a single time zone. Which of the following options are the MOST effecti ve in solving the issue while keeping costs to a minimum? (Choose two.)", "options": [ "Configure the endpoint to use Amazon Elastic Infe rence (EI) accelerators.", "Create a new endpoint configuration with two prod uction variants.", @@ -2339,7 +2339,7 @@ "references": "" }, { - "question": "QUESTION 208 A real-estate company is launching a new product th at predicts the prices of new houses. The historical data for the properties and prices is st ored in .csv format in an Amazon S3 bucket. The dat a has a header, some categorical fields, and some mis sing values. The companys data scientists have used Python with a common open-source library to fi ll the missing values with zeros. The data scientists have dropped all of the categorical fiel ds and have trained a model by using the opensource linear regression algorithm with the default parame ters. The accuracy of the predictions with the current mo del is below 50%. The company wants to improve the model performance and launch the new product as soon as possible. Which solution will meet these requirements with th e LEAST operational overhead?", + "question": "A real-estate company is launching a new product th at predicts the prices of new houses. The historical data for the properties and prices is st ored in .csv format in an Amazon S3 bucket. The dat a has a header, some categorical fields, and some mis sing values. The companys data scientists have used Python with a common open-source library to fi ll the missing values with zeros. The data scientists have dropped all of the categorical fiel ds and have trained a model by using the opensource linear regression algorithm with the default parame ters. The accuracy of the predictions with the current mo del is below 50%. The company wants to improve the model performance and launch the new product as soon as possible. Which solution will meet these requirements with th e LEAST operational overhead?", "options": [ "Create a service-linked role for Amazon Elastic C ontainer Service (Amazon ECS) with access to theS3 bucket. Create an ECS cluster that is based on a n AWS Deep Learning Containers image. Write thecode to perform the feature engineering. Train a lo gistic regression model for predicting the price,pointing to the bucket with the dataset. Wait for t he training job to complete. Perform theinferences.", "Create an Amazon SageMaker notebook with a new IA M role that is associated with the notebook.Pull the dataset from the S3 bucket. Explore differ ent combinations of feature engineering transformations, regression algorithms, and hyperpa rameters. Compare all the results in thenotebook, and deploy the most accurate configuratio n in an endpoint for predictions.", @@ -2351,7 +2351,7 @@ "references": "" }, { - "question": "QUESTION 209 A data scientist is training a large PyTorch model by using Amazon SageMaker. It takes 10 hours on average to train the model on GPU instances. The da ta scientist suspects that training is not converging and that resource utilization is not optimal. What should the data scientist do to identify and a ddress training issues with the LEAST development effort? A. Use CPU utilization metrics that are captured in Amazon CloudWatch. Configure a CloudWatch alarm to stop the training job early if low CPU uti lization occurs.", + "question": "A data scientist is training a large PyTorch model by using Amazon SageMaker. It takes 10 hours on average to train the model on GPU instances. The da ta scientist suspects that training is not converging and that resource utilization is not optimal. What should the data scientist do to identify and a ddress training issues with the LEAST development effort? A. Use CPU utilization metrics that are captured in Amazon CloudWatch. Configure a CloudWatch alarm to stop the training job early if low CPU uti lization occurs.", "options": [ "Use high-resolution custom metrics that are captu red in Amazon CloudWatch. Configure an AWSLambda function to analyze the metrics and to stop the training job early if issues are detected.", "Use the SageMaker Debugger vanishing_gradient and LowGPUUtilization built-in rules to detectissues and to launch the StopTrainingJob action if issues are detected.", @@ -2362,7 +2362,7 @@ "references": "" }, { - "question": "QUESTION 210 A company needs to deploy a chatbot to answer commo n questions from customers. The chatbot must base its answers on company documentation. Which solution will meet these requirements with th e LEAST development effort? A. Index company documents by using Amazon Kendra. I ntegrate the chatbot with Amazon Kendra by using the Amazon Kendra Query API operation to a nswer customer questions.", + "question": "A company needs to deploy a chatbot to answer commo n questions from customers. The chatbot must base its answers on company documentation. Which solution will meet these requirements with th e LEAST development effort? A. Index company documents by using Amazon Kendra. I ntegrate the chatbot with Amazon Kendra by using the Amazon Kendra Query API operation to a nswer customer questions.", "options": [ "Train a Bidirectional Attention Flow (BiDAF) netw ork based on past customer questions andcompany documents. Deploy the model as a real-time Amazon SageMaker endpoint. Integrate themodel with the chatbot by using the SageMaker Runti me InvokeEndpoint API operation to answercustomer questions.", "Train an Amazon SageMaker BlazingText model based on past customer questions and companydocuments. Deploy the model as a real-time SageMake r endpoint. Integrate the model with thechatbot by using the SageMaker Runtime InvokeEndpoi nt API operation to answer customerquestions.", @@ -2373,7 +2373,7 @@ "references": "" }, { - "question": "QUESTION 211 A company ingests machine learning (ML) data from w eb advertising clicks into an Amazon S3 data lake. Click data is added to an Amazon Kinesis data stream by using the Kinesis Producer Library (KPL). The data is loaded into the S3 data lake fro m the data stream by using an Amazon Kinesis Data Firehose delivery stream. As the data volume increa ses, an ML specialist notices that the rate of dataingested into Amazon S3 is relatively constant. The re also is an increasing backlog of data for Kinesi s Data Streams and Kinesis Data Firehose to ingest. Which next step is MOST likely to improve the data ingestion rate into Amazon S3?", + "question": "A company ingests machine learning (ML) data from w eb advertising clicks into an Amazon S3 data lake. Click data is added to an Amazon Kinesis data stream by using the Kinesis Producer Library (KPL). The data is loaded into the S3 data lake fro m the data stream by using an Amazon Kinesis Data Firehose delivery stream. As the data volume increa ses, an ML specialist notices that the rate of dataingested into Amazon S3 is relatively constant. The re also is an increasing backlog of data for Kinesi s Data Streams and Kinesis Data Firehose to ingest. Which next step is MOST likely to improve the data ingestion rate into Amazon S3?", "options": [ "Increase the number of S3 prefixes for the delive ry stream to write to.", "Decrease the retention period for the data stream .", @@ -2385,7 +2385,7 @@ "references": "" }, { - "question": "QUESTION 212 A manufacturing company has a production line with sensors that collect hundreds of quality metrics. The company has stored sensor data and man ual inspection results in a data lake for several months. To automate quality control, the machine le arning team must build an automated mechanism that determines whether the produced good s are good quality, replacement market quality, or scrap quality based on the manual inspe ction results. Which modeling approach will deliver the MOST accur ate prediction of product quality?", + "question": "A manufacturing company has a production line with sensors that collect hundreds of quality metrics. The company has stored sensor data and man ual inspection results in a data lake for several months. To automate quality control, the machine le arning team must build an automated mechanism that determines whether the produced good s are good quality, replacement market quality, or scrap quality based on the manual inspe ction results. Which modeling approach will deliver the MOST accur ate prediction of product quality?", "options": [ "Amazon SageMaker DeepAR forecasting algorithm", "Amazon SageMaker XGBoost algorithm", @@ -2397,7 +2397,7 @@ "references": "" }, { - "question": "QUESTION 213 A media company wants to create a solution that ide ntifies celebrities in pictures that users upload. The company also wants to identify the IP address a nd the timestamp details from the users so the company can prevent users from uploading pictures f rom unauthorized locations. Which solution will meet these requirements with LE AST development effort?", + "question": "A media company wants to create a solution that ide ntifies celebrities in pictures that users upload. The company also wants to identify the IP address a nd the timestamp details from the users so the company can prevent users from uploading pictures f rom unauthorized locations. Which solution will meet these requirements with LE AST development effort?", "options": [ "Use AWS Panorama to identify celebrities in the p ictures. Use AWS CloudTrail to capture IPaddress and timestamp details.", "Use AWS Panorama to identify celebrities in the p ictures. Make calls to the AWS Panorama DeviceSDK to capture IP address and timestamp details.", @@ -2409,7 +2409,7 @@ "references": "" }, { - "question": "QUESTION 214 A retail company is ingesting purchasing records fr om its network of 20,000 stores to Amazon S3 by using Amazon Kinesis Data Firehose. The company use s a small, server-based application in each store to send the data to AWS over the internet. Th e company uses this data to train a machine learning model that is retrained each day. The comp any's data science team has identified existing attributes on these records that could be combined to create an improved model. Which change will create the required transformed r ecords with the LEAST operational overhead?", + "question": "A retail company is ingesting purchasing records fr om its network of 20,000 stores to Amazon S3 by using Amazon Kinesis Data Firehose. The company use s a small, server-based application in each store to send the data to AWS over the internet. Th e company uses this data to train a machine learning model that is retrained each day. The comp any's data science team has identified existing attributes on these records that could be combined to create an improved model. Which change will create the required transformed r ecords with the LEAST operational overhead?", "options": [ "Create an AWS Lambda function that can transform the incoming records. Enable datatransformation on the ingestion Kinesis Data Fireho se delivery stream. Use the Lambda function asthe invocation target.", "Deploy an Amazon EMR cluster that runs Apache Spa rk and includes the transformation logic. UseAmazon EventBridge (Amazon CloudWatch Events) to sc hedule an AWS Lambda function to launchthe cluster each day and transform the records that accumulate in Amazon S3. Deliver thetransformed records to Amazon S3.", @@ -2421,7 +2421,7 @@ "references": "" }, { - "question": "QUESTION 215 A company wants to segment a large group of custome rs into subgroups based on shared characteristics. The companys data scientist is pla nning to use the Amazon SageMaker built-in kmeans clustering algorithm for this task. The data scient ist needs to determine the optimal number of subgroups (k) to use. Which data visualization approach will MOST accurat ely determine the optimal value of k?", + "question": "A company wants to segment a large group of custome rs into subgroups based on shared characteristics. The companys data scientist is pla nning to use the Amazon SageMaker built-in kmeans clustering algorithm for this task. The data scient ist needs to determine the optimal number of subgroups (k) to use. Which data visualization approach will MOST accurat ely determine the optimal value of k?", "options": [ "Calculate the principal component analysis (PCA) components. Run the k-means clusteringalgorithm for a range of k by using only the first two PCA components. For each value of k, create ascatter plot with a different color for each cluste r. The optimal value of k is the value where theclusters start to look reasonably separated.", "Calculate the principal component analysis (PCA) components. Create a line plot of the number ofcomponents against the explained variance. The opti mal value of k is the number of PCAcomponents after which the curve starts decreasing in a linear fashion.", @@ -2433,7 +2433,7 @@ "references": "" }, { - "question": "QUESTION 216 A car company is developing a machine learning solu tion to detect whether a car is present in an image. The image dataset consists of one million im ages. Each image in the dataset is 200 pixels in height by 200 pixels in width. Each image is labele d as either having a car or not having a car. Which architecture is MOST likely to produce a mode l that detects whether a car is present in an image with the highest accuracy?", + "question": "A car company is developing a machine learning solu tion to detect whether a car is present in an image. The image dataset consists of one million im ages. Each image in the dataset is 200 pixels in height by 200 pixels in width. Each image is labele d as either having a car or not having a car. Which architecture is MOST likely to produce a mode l that detects whether a car is present in an image with the highest accuracy?", "options": [ "Use a deep convolutional neural network (CNN) cla ssifier with the images as input. Include alinear output layer that outputs the probability th at an image contains a car.", "Use a deep convolutional neural network (CNN) cla ssifier with the images as input. Include asoftmax output layer that outputs the probability t hat an image contains a car.", @@ -2445,7 +2445,7 @@ "references": "" }, { - "question": "QUESTION 217 A data science team is working with a tabular datas et that the team stores in Amazon S3. The team wants to experiment with different feature transfor mations such as categorical feature encoding. Then the team wants to visualize the resulting dist ribution of the dataset. After the team finds an appropriate set of feature transformations, the tea m wants to automate the workflow for feature transformations. Which solution will meet these requirements with th e MOST operational efficiency?", + "question": "A data science team is working with a tabular datas et that the team stores in Amazon S3. The team wants to experiment with different feature transfor mations such as categorical feature encoding. Then the team wants to visualize the resulting dist ribution of the dataset. After the team finds an appropriate set of feature transformations, the tea m wants to automate the workflow for feature transformations. Which solution will meet these requirements with th e MOST operational efficiency?", "options": [ "Use Amazon SageMaker Data Wrangler preconfigured transformations to explore featuretransformations. Use SageMaker Data Wrangler templa tes for visualization. Export the featureprocessing workflow to a SageMaker pipeline for aut omation.", "Use an Amazon SageMaker notebook instance to expe riment with different featuretransformations. Save the transformations to Amazon S3. Use Amazon QuickSight for visualization.Package the feature processing steps into an AWS La mbda function for automation.", @@ -2457,7 +2457,7 @@ "references": "" }, { - "question": "QUESTION 218 A company wants to conduct targeted marketing to se ll solar panels to homeowners. The company wants to use machine learning (ML) technologies to identify which houses already have solar panels. The company has collected 8,000 satellite images as training data and will use Amazon SageMaker Ground Truth to label the data. The company has a small internal team that is worki ng on the project. The internal team has no ML expertise and no ML experience. Which solution will meet these requirements with th e LEAST amount of effort from the internal team?", + "question": "A company wants to conduct targeted marketing to se ll solar panels to homeowners. The company wants to use machine learning (ML) technologies to identify which houses already have solar panels. The company has collected 8,000 satellite images as training data and will use Amazon SageMaker Ground Truth to label the data. The company has a small internal team that is worki ng on the project. The internal team has no ML expertise and no ML experience. Which solution will meet these requirements with th e LEAST amount of effort from the internal team?", "options": [ "Set up a private workforce that consists of the i nternal team. Use the private workforce and theSageMaker Ground Truth active learning feature to l abel the data. Use Amazon Rekognition CustomLabels for model training and hosting.", "Set up a private workforce that consists of the i nternal team. Use the private workforce to labelthe data. Use Amazon Rekognition Custom Labels for model training and hosting.", @@ -2469,7 +2469,7 @@ "references": "" }, { - "question": "QUESTION 219 A media company is building a computer vision model to analyze images that are on social media. The model consists of CNNs that the company trained by using images that the company stores in Amazon S3. The company used an Amazon SageMaker tra ining job in File mode with a single Amazon EC2 On-Demand Instance. Every day, the company updates the model by using a bout 10,000 images that the company has collected in the last 24 hours. The company configu res training with only one epoch. The company wants to speed up training and lower costs without the need to make any code changes. Which solution will meet these requirements?", + "question": "A media company is building a computer vision model to analyze images that are on social media. The model consists of CNNs that the company trained by using images that the company stores in Amazon S3. The company used an Amazon SageMaker tra ining job in File mode with a single Amazon EC2 On-Demand Instance. Every day, the company updates the model by using a bout 10,000 images that the company has collected in the last 24 hours. The company configu res training with only one epoch. The company wants to speed up training and lower costs without the need to make any code changes. Which solution will meet these requirements?", "options": [ "Instead of File mode, configure the SageMaker tra ining job to use Pipe mode. Ingest the data froma pipe.", "Instead Of File mode, configure the SageMaker tra ining job to use FastFile mode with no Otherchanges.", @@ -2481,7 +2481,7 @@ "references": "" }, { - "question": "QUESTION 220 A data scientist is working on a forecast problem b y using a dataset that consists of .csv files that are stored in Amazon S3. The files contain a timestamp variable in the following format: March 1st, 2020, 08:14pm - There is a hypothesis about seasonal differences in the dependent variable. This number could be higher or lower for weekdays because some days and hours present varying values, so the day of the week, month, or hour could be an important factor. As a result, the data scientist needs to transform the timestamp into weekdays, month, and day as thre e separate variables to conduct an analysis. Which solution requires the LEAST operational overh ead to create a new dataset with the added features?", + "question": "A data scientist is working on a forecast problem b y using a dataset that consists of .csv files that are stored in Amazon S3. The files contain a timestamp variable in the following format: March 1st, 2020, 08:14pm - There is a hypothesis about seasonal differences in the dependent variable. This number could be higher or lower for weekdays because some days and hours present varying values, so the day of the week, month, or hour could be an important factor. As a result, the data scientist needs to transform the timestamp into weekdays, month, and day as thre e separate variables to conduct an analysis. Which solution requires the LEAST operational overh ead to create a new dataset with the added features?", "options": [ "Create an Amazon EMR cluster. Develop PySpark cod e that can read the timestamp variable as astring, transform and create the new variables, and save the dataset as a new file in Amazon S3.", "Create a processing job in Amazon SageMaker. Deve lop Python code that can read the timestampvariable as a string, transform and create the new variables, and save the dataset as a new file inAmazon S3.", @@ -2493,7 +2493,7 @@ "references": "" }, { - "question": "QUESTION 221 An automotive company uses computer vision in its a utonomous cars. The company trained its object detection models successfully by using trans fer learning from a convolutional neural network (CNN). The company trained the models by using PyTo rch through the Amazon SageMaker SDK. The vehicles have limited hardware and compute powe r. The company wants to optimize the model to reduce memory, battery, and hardware consumption without a significant sacrifice in accuracy. Which solution will improve the computational effic iency of the models?", + "question": "An automotive company uses computer vision in its a utonomous cars. The company trained its object detection models successfully by using trans fer learning from a convolutional neural network (CNN). The company trained the models by using PyTo rch through the Amazon SageMaker SDK. The vehicles have limited hardware and compute powe r. The company wants to optimize the model to reduce memory, battery, and hardware consumption without a significant sacrifice in accuracy. Which solution will improve the computational effic iency of the models?", "options": [ "Use Amazon CloudWatch metrics to gain visibility into the SageMaker training weights, gradients,biases, and activation outputs. Compute the filter ranks based on the training information. Applypruning to remove the low-ranking filters. Set new weights based on the pruned set of filters. Run anew training job with the pruned model.", "Use Amazon SageMaker Ground Truth to build and ru n data labeling workflows. Collect a largerlabeled dataset with the labelling workflows. Run a new training job that uses the new labeled datawith previous training data.", @@ -2505,7 +2505,7 @@ "references": "" }, { - "question": "QUESTION 222 A chemical company has developed several machine le arning (ML) solutions to identify chemical process abnormalities. The time series values of in dependent variables and the labels are available for the past 2 years and are sufficient to accurate ly model the problem. The regular operation label is marked as 0. The abn ormal operation label is marked as 1 . Process abnormalities have a significant negative effect on the companys profits. The company must avoid these abnormalities. Which metrics will indicate an ML solution that wil l provide the GREATEST probability of detecting an abnormality?", + "question": "A chemical company has developed several machine le arning (ML) solutions to identify chemical process abnormalities. The time series values of in dependent variables and the labels are available for the past 2 years and are sufficient to accurate ly model the problem. The regular operation label is marked as 0. The abn ormal operation label is marked as 1 . Process abnormalities have a significant negative effect on the companys profits. The company must avoid these abnormalities. Which metrics will indicate an ML solution that wil l provide the GREATEST probability of detecting an abnormality?", "options": [ "Precision = 0.91Recall = 0.6", "Precision = 0.61Recall = 0.98", @@ -2517,7 +2517,7 @@ "references": "" }, { - "question": "QUESTION 223 A pharmaceutical company performs periodic audits o f clinical trial sites to quickly resolve critical findings. The company stores audit documents in tex t format. Auditors have requested help from a data science team to quickly analyze the documents. The auditors need to discover the 10 main topics within the documents to prioritize and distr ibute the review work among the auditing team members. Documents that describe adverse events mus t receive the highest priority. A data scientist will use statistical modeling to d iscover abstract topics and to provide a list of th e top words for each category to help the auditors assess the relevance of the topic. Which algorithms are best suited to this scenario? (Choose two.)", + "question": "A pharmaceutical company performs periodic audits o f clinical trial sites to quickly resolve critical findings. The company stores audit documents in tex t format. Auditors have requested help from a data science team to quickly analyze the documents. The auditors need to discover the 10 main topics within the documents to prioritize and distr ibute the review work among the auditing team members. Documents that describe adverse events mus t receive the highest priority. A data scientist will use statistical modeling to d iscover abstract topics and to provide a list of th e top words for each category to help the auditors assess the relevance of the topic. Which algorithms are best suited to this scenario? (Choose two.)", "options": [ "Latent Dirichlet allocation (LDA)", "Random Forest classifier", @@ -2529,7 +2529,7 @@ "references": "" }, { - "question": "QUESTION 224 A company wants to predict the classification of do cuments that are created from an application. New documents are saved to an Amazon S3 bucket ever y 3 seconds. The company has developed three versions of a machine learning (ML) model wit hin Amazon SageMaker to classify document text. The company wants to deploy these three versi ons to predict the classification of each document. Which approach will meet these requirements with th e LEAST operational overhead?", + "question": "A company wants to predict the classification of do cuments that are created from an application. New documents are saved to an Amazon S3 bucket ever y 3 seconds. The company has developed three versions of a machine learning (ML) model wit hin Amazon SageMaker to classify document text. The company wants to deploy these three versi ons to predict the classification of each document. Which approach will meet these requirements with th e LEAST operational overhead?", "options": [ "Configure an S3 event notification that invokes a n AWS Lambda function when new documentsare created. Configure the Lambda function to creat e three SageMaker batch transform jobs, onebatch transform job for each model for each documen t.", "Deploy all the models to a single SageMaker endpo int. Treat each model as a production variant.Configure an S3 event notification that invokes an AWS Lambda function when new documents arecreated. Configure the Lambda function to call each production variant and return the results of eachmodel.", @@ -2540,7 +2540,7 @@ "references": "" }, { - "question": "QUESTION 225 A company wants to detect credit card fraud. The co mpany has observed that an average of 2% of credit card transactions are fraudulent. A data sci entist trains a classifier on a year's worth of cre dit card transaction data. The classifier needs to iden tify the fraudulent transactions. The company wants to accurately capture as many fraudulent transactions as possible . Which metrics should the data scientist use to opti mize the classifier? (Select TWO.)", + "question": "A company wants to detect credit card fraud. The co mpany has observed that an average of 2% of credit card transactions are fraudulent. A data sci entist trains a classifier on a year's worth of cre dit card transaction data. The classifier needs to iden tify the fraudulent transactions. The company wants to accurately capture as many fraudulent transactions as possible . Which metrics should the data scientist use to opti mize the classifier? (Select TWO.)", "options": [ "True positive rate", "Specificity", @@ -2552,7 +2552,7 @@ "references": "" }, { - "question": "QUESTION 226 Each morning, a data scientist at a rental car comp any creates insights about the previous days rental car reservation demands. The company needs t o automate this process by streaming the data to Amazon S3 in near real time. The solution must d etect high-demand rental cars at each of the companys locations. The solution also must create a visualization dashboard that automatically refreshes with the most recent data. Which solution will meet these requirements with th e LEAST development time?", + "question": "Each morning, a data scientist at a rental car comp any creates insights about the previous days rental car reservation demands. The company needs t o automate this process by streaming the data to Amazon S3 in near real time. The solution must d etect high-demand rental cars at each of the companys locations. The solution also must create a visualization dashboard that automatically refreshes with the most recent data. Which solution will meet these requirements with th e LEAST development time?", "options": [ "Use Amazon Kinesis Data Firehose to stream the re servation data directly to Amazon S3. Detecthigh-demand outliers by using Amazon QuickSight ML Insights. Visualize the data in QuickSight.", "Use Amazon Kinesis Data Streams to stream the res ervation data directly to Amazon S3. Detecthigh-demand outliers by using the Random Cut Forest (RCF) trained model in Amazon SageMaker.Visualize the data in Amazon QuickSight.", @@ -2564,7 +2564,7 @@ "references": "" }, { - "question": "QUESTION 227 A network security vendor needs to ingest telemetry data from thousands of endpoints that run all over the world. The data is transmitted every 30 se conds in the form of records that contain 50 fields . Each record is up to 1 KB in size. The security ven dor uses Amazon Kinesis Data Streams to ingest the data. The vendor requires hourly summaries of the r ecords that Kinesis Data Streams ingests. The vendor will use Amazon Athena to query the records and to generate the summaries. The Athena queries will target 7 to 12 of the available data f ields. Which solution will meet these requirements with th e LEAST amount of customization to transform and store the ingested data?", + "question": "A network security vendor needs to ingest telemetry data from thousands of endpoints that run all over the world. The data is transmitted every 30 se conds in the form of records that contain 50 fields . Each record is up to 1 KB in size. The security ven dor uses Amazon Kinesis Data Streams to ingest the data. The vendor requires hourly summaries of the r ecords that Kinesis Data Streams ingests. The vendor will use Amazon Athena to query the records and to generate the summaries. The Athena queries will target 7 to 12 of the available data f ields. Which solution will meet these requirements with th e LEAST amount of customization to transform and store the ingested data?", "options": [ "Use AWS Lambda to read and aggregate the data hou rly. Transform the data and store it inAmazon S3 by using Amazon Kinesis Data Firehose.", "Use Amazon Kinesis Data Firehose to read and aggr egate the data hourly. Transform the data andstore it in Amazon S3 by using a short-lived Amazon EMR cluster.", @@ -2576,7 +2576,7 @@ "references": "" }, { - "question": "QUESTION 228 A machine learning (ML) specialist uploads 5 TB of data to an Amazon SageMaker Studio environment. The ML specialist performs initial dat a cleansing. Before the ML specialist begins to train a model, the ML specialist needs to create an d view an analysis report that details potential bi as in the uploaded data. Which combination of actions will meet these requir ements with the LEAST operational overhead? (Choose two.)", + "question": "A machine learning (ML) specialist uploads 5 TB of data to an Amazon SageMaker Studio environment. The ML specialist performs initial dat a cleansing. Before the ML specialist begins to train a model, the ML specialist needs to create an d view an analysis report that details potential bi as in the uploaded data. Which combination of actions will meet these requir ements with the LEAST operational overhead? (Choose two.)", "options": [ "Use SageMaker Clarify to automatically detect dat a bias", "Turn on the bias detection option in SageMaker Gr ound Truth to automatically analyze datafeatures.", @@ -2588,7 +2588,7 @@ "references": "" }, { - "question": "QUESTION 229 A medical device company is building a machine lear ning (ML) model to predict the likelihood of device recall based on customer data that the compa ny collects from a plain text survey. One of the survey questions asks which medications the custome r is taking. The data for this field contains the names of medications that customers enter manually. Customers misspell some of the medication names. The column that contains the medication name data gives a categorical feature with high cardinality but redundancy. What is the MOST effective way to encode this categ orical feature into a numeric feature?", + "question": "A medical device company is building a machine lear ning (ML) model to predict the likelihood of device recall based on customer data that the compa ny collects from a plain text survey. One of the survey questions asks which medications the custome r is taking. The data for this field contains the names of medications that customers enter manually. Customers misspell some of the medication names. The column that contains the medication name data gives a categorical feature with high cardinality but redundancy. What is the MOST effective way to encode this categ orical feature into a numeric feature?", "options": [ "Spell check the column. Use Amazon SageMaker one- hot encoding on the column to transform acategorical feature to a numerical feature.", "Fix the spelling in the column by using char-RNN. Use Amazon SageMaker Data Wrangler one-hotencoding to transform a categorical feature to a nu merical feature.", @@ -2600,7 +2600,7 @@ "references": "" }, { - "question": "QUESTION 230 A manufacturing company wants to create a machine l earning (ML) model to predict when equipment is likely to fail. A data science team al ready constructed a deep learning model by using TensorFlow and a custom Python script in a local en vironment. The company wants to use Amazon SageMaker to train the model. Which TensorFlow estimator configuration will train the model MOST cost-effectively?", + "question": "A manufacturing company wants to create a machine l earning (ML) model to predict when equipment is likely to fail. A data science team al ready constructed a deep learning model by using TensorFlow and a custom Python script in a local en vironment. The company wants to use Amazon SageMaker to train the model. Which TensorFlow estimator configuration will train the model MOST cost-effectively?", "options": [ "Turn on SageMaker Training Compiler by adding com piler_config=TrainingCompilerConfig() as aparameter. Pass the script to the estimator in the call to the TensorFlow fit() method.", "Turn on SageMaker Training Compiler by adding com piler_config=TrainingCompilerConfig() as aparameter. Turn on managed spot training by setting the use_spot_instances parameter to True. Passthe script to the estimator in the call to the Tens orFlow fit() method.", @@ -2612,7 +2612,7 @@ "references": "" }, { - "question": "QUESTION 231 A company is creating an application to identify, c ount, and classify animal images that are uploaded to the companys website. The company is using the A mazon SageMaker image classification algorithm with an ImageNetV2 convolutional neural n etwork (CNN). The solution works well for most animal images but does not recognize many anim al species that are less common. The company obtains 10,000 labeled images of less c ommon animal species and stores the images in Amazon S3. A machine learning (ML) engineer needs t o incorporate the images into the model by using Pipe mode in SageMaker. Which combination of steps should the ML engineer t ake to train the model? (Choose two.)", + "question": "A company is creating an application to identify, c ount, and classify animal images that are uploaded to the companys website. The company is using the A mazon SageMaker image classification algorithm with an ImageNetV2 convolutional neural n etwork (CNN). The solution works well for most animal images but does not recognize many anim al species that are less common. The company obtains 10,000 labeled images of less c ommon animal species and stores the images in Amazon S3. A machine learning (ML) engineer needs t o incorporate the images into the model by using Pipe mode in SageMaker. Which combination of steps should the ML engineer t ake to train the model? (Choose two.)", "options": [ "Use a ResNet model. Initiate full training mode b y initializing the network with random weights.", "Use an Inception model that is available with the SageMaker image classification algorithm.", @@ -2624,7 +2624,7 @@ "references": "" }, { - "question": "QUESTION 232 A credit card company wants to identify fraudulent transactions in real time. A data scientist builds a machine learning model for this purpose. The transa ctional data is captured and stored in Amazon S3. The historic data is already labeled with two c lasses: fraud (positive) and fair transactions (negative). The data scientist removes all the miss ing data and builds a classifier by using the XGBoost algorithm in Amazon SageMaker. The model pr oduces the following results: True positive rate (TPR): 0.700 False negative rate (FNR): 0.300 True negative rate (TNR): 0.977 False positive rate (FPR): 0.023 Overall accuracy: 0.949 Which solution should the data scientist use to imp rove the performance of the model?", + "question": "A credit card company wants to identify fraudulent transactions in real time. A data scientist builds a machine learning model for this purpose. The transa ctional data is captured and stored in Amazon S3. The historic data is already labeled with two c lasses: fraud (positive) and fair transactions (negative). The data scientist removes all the miss ing data and builds a classifier by using the XGBoost algorithm in Amazon SageMaker. The model pr oduces the following results: True positive rate (TPR): 0.700 False negative rate (FNR): 0.300 True negative rate (TNR): 0.977 False positive rate (FPR): 0.023 Overall accuracy: 0.949 Which solution should the data scientist use to imp rove the performance of the model?", "options": [ "Apply the Synthetic Minority Oversampling Techniq ue (SMOTE) on the minority class in thetraining dataset. Retrain the model with the update d training data.", "Apply the Synthetic Minority Oversampling Techniq ue (SMOTE) on the majority class in thetraining dataset. Retrain the model with the update d training data.", @@ -2636,7 +2636,7 @@ "references": "" }, { - "question": "QUESTION 233 A companys data scientist has trained a new machine learning model that performs better on test data than the companys existing model performs in t he production environment. The data scientist wants to replace the existing model that runs on an Amazon SageMaker endpoint in the production environment. However, the company is concerned that the new model might not work well on the production environment data. The data scientist needs to perform A/B testing in the production environment to evaluate whether the new model performs well on production environme nt data. Which combination of steps must the data scientist take to perform the A/B testing? (Choose two.)", + "question": "A companys data scientist has trained a new machine learning model that performs better on test data than the companys existing model performs in t he production environment. The data scientist wants to replace the existing model that runs on an Amazon SageMaker endpoint in the production environment. However, the company is concerned that the new model might not work well on the production environment data. The data scientist needs to perform A/B testing in the production environment to evaluate whether the new model performs well on production environme nt data. Which combination of steps must the data scientist take to perform the A/B testing? (Choose two.)", "options": [ "Create a new endpoint configuration that includes a production variant for each of the twomodels.", "Create a new endpoint configuration that includes two target variants that point to differentendpoints.", @@ -2648,7 +2648,7 @@ "references": "" }, { - "question": "QUESTION 234 An online store is predicting future book sales by using a linear regression model that is based on past sales data. The data includes duration, a nume rical feature that represents the number of days th at a book has been listed in the online store. A data scienti st performs an exploratory data analysis and discovers that the relationship between book sales and duration is skewed and non-linear. Which data transformation step should the data scie ntist take to improve the predictions of the model?", + "question": "An online store is predicting future book sales by using a linear regression model that is based on past sales data. The data includes duration, a nume rical feature that represents the number of days th at a book has been listed in the online store. A data scienti st performs an exploratory data analysis and discovers that the relationship between book sales and duration is skewed and non-linear. Which data transformation step should the data scie ntist take to improve the predictions of the model?", "options": [ "One-hot encoding", "Cartesian product transformation", @@ -2660,7 +2660,7 @@ "references": "" }, { - "question": "QUESTION 235 A beauty supply store wants to understand some char acteristics of visitors to the store. The store has security video recordings from the past several yea rs. The store wants to generate a report of hourly visitors from the recordings. The report should gro up visitors by hair style and hair color. Which solution will meet these requirements with th e LEAST amount of effort?", + "question": "A beauty supply store wants to understand some char acteristics of visitors to the store. The store has security video recordings from the past several yea rs. The store wants to generate a report of hourly visitors from the recordings. The report should gro up visitors by hair style and hair color. Which solution will meet these requirements with th e LEAST amount of effort?", "options": [ "Use an object detection algorithm to identify a v isitors hair in video frames. Pass the identifiedhair to an ResNet-50 algorithm to determine hair st yle and hair color.", "Use an object detection algorithm to identify a v isitors hair in video frames. Pass the identifiedhair to an XGBoost algorithm to determine hair styl e and hair color.", @@ -2672,7 +2672,7 @@ "references": "" }, { - "question": "QUESTION 236 A company wants to predict stock market price trend s. The company stores stock market data each business day in Amazon S3 in Apache Parquet format. The company stores 20 GB of data each day for each stock code. A data engineer must use Apache Spark to perform ba tch preprocessing data transformations quickly so the company can complete prediction jobs before the stock market opens the next day. The company plans to track more stock market codes and needs a way to scale the preprocessing data transformations. Which AWS service or feature will meet these requir ements with the LEAST development effort over time?", + "question": "A company wants to predict stock market price trend s. The company stores stock market data each business day in Amazon S3 in Apache Parquet format. The company stores 20 GB of data each day for each stock code. A data engineer must use Apache Spark to perform ba tch preprocessing data transformations quickly so the company can complete prediction jobs before the stock market opens the next day. The company plans to track more stock market codes and needs a way to scale the preprocessing data transformations. Which AWS service or feature will meet these requir ements with the LEAST development effort over time?", "options": [ "AWS Glue jobs", "Amazon EMR cluster", @@ -2684,7 +2684,7 @@ "references": "" }, { - "question": "QUESTION 237 A company wants to enhance audits for its machine l earning (ML) systems. The auditing system must be able to perform metadata analysis on the feature s that the ML models use. The audit solution must generate a report that analyzes the metadata. The solution also must be able to set the data sensitivity and authorship of features. Which solution will meet these requirements with th e LEAST development effort?", + "question": "A company wants to enhance audits for its machine l earning (ML) systems. The auditing system must be able to perform metadata analysis on the feature s that the ML models use. The audit solution must generate a report that analyzes the metadata. The solution also must be able to set the data sensitivity and authorship of features. Which solution will meet these requirements with th e LEAST development effort?", "options": [ "Use Amazon SageMaker Feature Store to select the features. Create a data flow to performfeature-level metadata analysis. Create an Amazon D ynamoDB table to store feature-level metadata.Use Amazon QuickSight to analyze the metadata.", "Use Amazon SageMaker Feature Store to set feature groups for the current features that the MLmodels use. Assign the required metadata for each f eature. Use SageMaker Studio to analyze themetadata.", @@ -2696,7 +2696,7 @@ "references": "" }, { - "question": "QUESTION 238 A machine learning (ML) engineer has created a feat ure repository in Amazon SageMaker Feature Store for the company. The company has AWS accounts for development, integration, and production. The company hosts a feature store in th e development account. The company uses Amazon S3 buckets to store feature values offline. The company wants to share features and to allow the integration account and the production account to reuse the features that are in the feature repository. Which combination of steps will meet these requirem ents? (Select TWO.)", + "question": "A machine learning (ML) engineer has created a feat ure repository in Amazon SageMaker Feature Store for the company. The company has AWS accounts for development, integration, and production. The company hosts a feature store in th e development account. The company uses Amazon S3 buckets to store feature values offline. The company wants to share features and to allow the integration account and the production account to reuse the features that are in the feature repository. Which combination of steps will meet these requirem ents? (Select TWO.)", "options": [ "Create an IAM role in the development account tha t the integration account and productionaccount can assume. Attach IAM policies to the role that allow access to the feature repository andthe S3 buckets.", "Share the feature repository that is associated t he S3 buckets from the development account tothe integration account and the production account by using AWS Resource Access Manager (AWSRAM).", @@ -2708,7 +2708,7 @@ "references": "" }, { - "question": "QUESTION 239 A company processes millions of orders every day. T he company uses Amazon DynamoDB tables to store order information. When customers submit new orders, the new orders are immediately added to the DynamoDB tables. New orders arrive in the Dy namoDB tables continuously. A data scientist must build a peak-time prediction solution. The data scientist must also create an Amazon OuickSight dashboard to display near real-li me order insights. The data scientist needs to build a solution that will give QuickSight access t o the data as soon as new order information arrives . Which solution will meet these requirements with th e LEAST delay between when a new order is processed and when QuickSight can access the new or der information? A. Use AWS Glue to export the data from Amazon Dynam oDB to Amazon S3. Configure OuickSight to access the data in Amazon S3.", + "question": "A company processes millions of orders every day. T he company uses Amazon DynamoDB tables to store order information. When customers submit new orders, the new orders are immediately added to the DynamoDB tables. New orders arrive in the Dy namoDB tables continuously. A data scientist must build a peak-time prediction solution. The data scientist must also create an Amazon OuickSight dashboard to display near real-li me order insights. The data scientist needs to build a solution that will give QuickSight access t o the data as soon as new order information arrives . Which solution will meet these requirements with th e LEAST delay between when a new order is processed and when QuickSight can access the new or der information? A. Use AWS Glue to export the data from Amazon Dynam oDB to Amazon S3. Configure OuickSight to access the data in Amazon S3.", "options": [ "Use Amazon Kinesis Data Streams to export the dat a from Amazon DynamoDB to Amazon S3.Configure OuickSight to access the data in Amazon S 3.", "Use an API call from OuickSight to access the dat a that is in Amazon DynamoDB directly", @@ -2719,7 +2719,7 @@ "references": "" }, { - "question": "QUESTION 240 A retail company wants to build a recommendation sy stem for the company's website. The system needs to provide recommendations for existing users and needs to base those recommendations on each user's past browsing history. The system also must filter out any items that the user previously purchased. Which solution will meet these requirements with th e LEAST development effort?", + "question": "A retail company wants to build a recommendation sy stem for the company's website. The system needs to provide recommendations for existing users and needs to base those recommendations on each user's past browsing history. The system also must filter out any items that the user previously purchased. Which solution will meet these requirements with th e LEAST development effort?", "options": [ "Train a model by using a user-based collaborative filtering algorithm on Amazon SageMaker. Hostthe model on a SageMaker real-time endpoint. Config ure an Amazon API Gateway API and an AWSLambda function to handle real-time inference reque sts that the web application sends. Exclude theitems that the user previously purchased from the r esults before sending the results back to the webapplication.", "Use an Amazon Personalize PERSONALIZED_RANKING re cipe to train a model. Create a real-timefilter to exclude items that the user previously pu rchased. Create and deploy a campaign on AmazonPersonalize. Use the GetPersonalizedRanking API ope ration to get the real-time recommendations.", @@ -2731,7 +2731,7 @@ "references": "" }, { - "question": "QUESTION 241 A data engineer is preparing a dataset that a retai l company will use to predict the number of visitor s to stores. The data engineer created an Amazon S3 b ucket. The engineer subscribed the S3 bucket to an AWS Data Exchange data product for general econo mic indicators. The data engineer wants to join the economic indicator data to an existing table in Amazon Athena to merge with the business data. All these transformations must finish running in 30 -60 minutes. Which solution will meet these requirements MOST co st-effectively?", + "question": "A data engineer is preparing a dataset that a retai l company will use to predict the number of visitor s to stores. The data engineer created an Amazon S3 b ucket. The engineer subscribed the S3 bucket to an AWS Data Exchange data product for general econo mic indicators. The data engineer wants to join the economic indicator data to an existing table in Amazon Athena to merge with the business data. All these transformations must finish running in 30 -60 minutes. Which solution will meet these requirements MOST co st-effectively?", "options": [ "Configure the AWS Data Exchange product as a prod ucer for an Amazon Kinesis data stream. Usean Amazon Kinesis Data Firehose delivery stream to transfer the data to Amazon S3 Run an AWS Gluejob that will merge the existing business data with the Athena table. Write the result set back toAmazon S3.", "Use an S3 event on the AWS Data Exchange S3 bucke t to invoke an AWS Lambda function. Programthe Lambda function to use Amazon SageMaker Data Wr angler to merge the existing business datawith the Athena table. Write the result set back to Amazon S3.", @@ -2743,7 +2743,7 @@ "references": "" }, { - "question": "QUESTION 242 A social media company wants to develop a machine l earning (ML) model to detect Inappropriate or offensive content in images. The company has collec ted a large dataset of labeled images and plans to use the built-in Amazon SageMaker image classifi cation algorithm to train the model. The company also intends to use SageMaker pipe mode to speed up the training. ...company splits the dataset into training, valida tion, and testing datasets. The company stores the training and validation images in folders that are named Training and Validation, respectively. The folder ...ain subfolders that correspond to the nam es of the dataset classes. The company resizes the images to the same sue and generates two input mani fest files named training.1st and validation.1st, for the ..ing dataset and the validation dataset. r espectively. Finally, the company creates two separate Amazon S3 buckets for uploads of the train ing dataset and the validation dataset. ...h additional data preparation steps should the c ompany take before uploading the files to Amazon S3?", + "question": "A social media company wants to develop a machine l earning (ML) model to detect Inappropriate or offensive content in images. The company has collec ted a large dataset of labeled images and plans to use the built-in Amazon SageMaker image classifi cation algorithm to train the model. The company also intends to use SageMaker pipe mode to speed up the training. ...company splits the dataset into training, valida tion, and testing datasets. The company stores the training and validation images in folders that are named Training and Validation, respectively. The folder ...ain subfolders that correspond to the nam es of the dataset classes. The company resizes the images to the same sue and generates two input mani fest files named training.1st and validation.1st, for the ..ing dataset and the validation dataset. r espectively. Finally, the company creates two separate Amazon S3 buckets for uploads of the train ing dataset and the validation dataset. ...h additional data preparation steps should the c ompany take before uploading the files to Amazon S3?", "options": [ "Generate two Apache Parquet files, training.parqu et and validation.parquet. by reading theimages into a Pandas data frame and storing the dat a frame as a Parquet file. Upload the Parquetfiles to the training S3 bucket", "Compress the training and validation directories by using the Snappy compression library Uploadthe manifest and compressed files to the training S 3 bucket", @@ -2755,7 +2755,7 @@ "references": "" }, { - "question": "QUESTION 243 A company operates large cranes at a busy port. The company plans to use machine learning (ML) for predictive maintenance of the cranes to avoid unexp ected breakdowns and to improve productivity. The company already uses sensor data from each cran e to monitor the health of the cranes in real time. The sensor data includes rotation speed, tens ion, energy consumption, vibration, pressure, and \u00a6perature for each crane. The company contracts AWS ML experts to implement an ML solution. Which potential findings would indicate that an ML- based solution is suitable for this scenario? (Select TWO.)", + "question": "A company operates large cranes at a busy port. The company plans to use machine learning (ML) for predictive maintenance of the cranes to avoid unexp ected breakdowns and to improve productivity. The company already uses sensor data from each cran e to monitor the health of the cranes in real time. The sensor data includes rotation speed, tens ion, energy consumption, vibration, pressure, and \u00a6perature for each crane. The company contracts AWS ML experts to implement an ML solution. Which potential findings would indicate that an ML- based solution is suitable for this scenario? (Select TWO.)", "options": [ "The historical sensor data does not include a sig nificant number of data points and attributes forcertain time periods.", "The historical sensor data shows that simple rule -based thresholds can predict crane failures.", @@ -2766,7 +2766,7 @@ "references": "" }, { - "question": "QUESTION 244 A company wants to create an artificial intelligenc e (Al) yoga instructor that can lead large classes of students. The company needs to create a feature tha t can accurately count the number of students who are in a class. The company also needs a featur e that can differentiate students who are performing a yoga stretch correctly from students w ho are performing a stretch incorrectly. ...etermine whether students are performing a stret ch correctly, the solution needs to measure the location and angle of each student's arms and legs A data scientist must use Amazon SageMaker to ...ss video footage of a yoga class by extracting i mage frames and applying computer vision models. Which combination of models will meet these require ments with the LEAST effort? (Select TWO.)", + "question": "A company wants to create an artificial intelligenc e (Al) yoga instructor that can lead large classes of students. The company needs to create a feature tha t can accurately count the number of students who are in a class. The company also needs a featur e that can differentiate students who are performing a yoga stretch correctly from students w ho are performing a stretch incorrectly. ...etermine whether students are performing a stret ch correctly, the solution needs to measure the location and angle of each student's arms and legs A data scientist must use Amazon SageMaker to ...ss video footage of a yoga class by extracting i mage frames and applying computer vision models. Which combination of models will meet these require ments with the LEAST effort? (Select TWO.)", "options": [ "Image Classification", "Optical Character Recognition (OCR)", @@ -2778,7 +2778,7 @@ "references": "" }, { - "question": "QUESTION 245 A wildlife research company has a set of images of lions and cheetahs. The company created a dataset of the images. The company labeled each ima ge with a binary label that indicates whether an image contains a lion or cheetah. The company wa nts to train a model to identify whether new images contain a lion or cheetah. .... Dh Amazon SageMaker algorithm will meet this r equirement?", + "question": "A wildlife research company has a set of images of lions and cheetahs. The company created a dataset of the images. The company labeled each ima ge with a binary label that indicates whether an image contains a lion or cheetah. The company wa nts to train a model to identify whether new images contain a lion or cheetah. .... Dh Amazon SageMaker algorithm will meet this r equirement?", "options": [ "XGBoost", "Image Classification - TensorFlow", @@ -2790,7 +2790,7 @@ "references": "" }, { - "question": "QUESTION 246 An ecommerce company has used Amazon SageMaker to d eploy a factorization machines (FM) model to suggest products for customers. The compan y's data science team has developed two new models by using the TensorFlow and PyTorch deep lea rning frameworks. The company needs to use A/B testing to evaluate the new models against the deployed model. ...required A/B testing setup is as follows: Send 70% of traffic to the FM model, 15% of traffic to the TensorFlow model, and 15% of traffic to the Py Torch model. For customers who are from Europe, send all traffic to the TensorFlow model ..sh architecture can the company use to implement the required A/B testing setup?", + "question": "An ecommerce company has used Amazon SageMaker to d eploy a factorization machines (FM) model to suggest products for customers. The compan y's data science team has developed two new models by using the TensorFlow and PyTorch deep lea rning frameworks. The company needs to use A/B testing to evaluate the new models against the deployed model. ...required A/B testing setup is as follows: Send 70% of traffic to the FM model, 15% of traffic to the TensorFlow model, and 15% of traffic to the Py Torch model. For customers who are from Europe, send all traffic to the TensorFlow model ..sh architecture can the company use to implement the required A/B testing setup?", "options": [ "Create two new SageMaker endpoints for the Tensor Flow and PyTorch models in addition to theexisting SageMaker endpoint. Create an Application Load Balancer Create a target group for eachendpoint. Configure listener rules and add weight t o the target groups. To send traffic to theTensorFlow model for customers who are from Europe, create an additional listener rule to forwardtraffic to the TensorFlow target group.", "Create two production variants for the TensorFlow and PyTorch models. Create an auto scalingpolicy and configure the desired A/B weights to dir ect traffic to each production variant Update theexisting SageMaker endpoint with the auto scaling p olicy. To send traffic to the TensorFlow model forcustomers who are from Europe, set the TargetVarian t header in the request to point to the variantname of the TensorFlow model.", @@ -2802,7 +2802,7 @@ "references": "" }, { - "question": "QUESTION 247 A data scientist stores financial datasets in Amazo n S3. The data scientist uses Amazon Athena to query the datasets by using SQL. The data scientist uses Amazon SageMaker to deploy a machine learning (ML) model. The data scientist wants to obtain inferences from the model at the SageMaker endpoint However, when the data \u00a6. ntist attempts to invoke the SageMaker endp oint, the data scientist receives SOL statement failures The data scientist's 1AM user is currently unable to invoke the SageMaker endpoint Which combination of actions will give the data sci entist's 1AM user the ability to invoke the SageMaker endpoint? (Select THREE.)", + "question": "A data scientist stores financial datasets in Amazo n S3. The data scientist uses Amazon Athena to query the datasets by using SQL. The data scientist uses Amazon SageMaker to deploy a machine learning (ML) model. The data scientist wants to obtain inferences from the model at the SageMaker endpoint However, when the data \u00a6. ntist attempts to invoke the SageMaker endp oint, the data scientist receives SOL statement failures The data scientist's 1AM user is currently unable to invoke the SageMaker endpoint Which combination of actions will give the data sci entist's 1AM user the ability to invoke the SageMaker endpoint? (Select THREE.)", "options": [ "Attach the AmazonAthenaFullAccess AWS managed pol icy to the user identity.", "Include a policy statement for the data scientist 's 1AM user that allows the 1AM user to performthe sagemaker: lnvokeEndpoint action,", @@ -2814,7 +2814,7 @@ "references": "" }, { - "question": "QUESTION 248 A company is using Amazon SageMaker to build a mach ine learning (ML) model to predict customer churn based on customer call transcripts. Audio fil es from customer calls are located in an onpremises VoIP system that has petabytes of recorded calls. T he on-premises infrastructure has highvelocity networking and connects to the company's AWS infras tructure through a VPN connection over a 100 Mbps connection. The company has an algorithm for transcribing custo mer calls that requires GPUs for inference. The company wants to store these transcriptions in an A mazon S3 bucket in the AWS Cloud for model development. Which solution should an ML specialist use to deliv er the transcriptions to the S3 bucket as quickly a s possible?", + "question": "A company is using Amazon SageMaker to build a mach ine learning (ML) model to predict customer churn based on customer call transcripts. Audio fil es from customer calls are located in an onpremises VoIP system that has petabytes of recorded calls. T he on-premises infrastructure has highvelocity networking and connects to the company's AWS infras tructure through a VPN connection over a 100 Mbps connection. The company has an algorithm for transcribing custo mer calls that requires GPUs for inference. The company wants to store these transcriptions in an A mazon S3 bucket in the AWS Cloud for model development. Which solution should an ML specialist use to deliv er the transcriptions to the S3 bucket as quickly a s possible?", "options": [ "Order and use an AWS Snowball Edge Compute Optimi zed device with an NVIDIA Tesla module torun the transcription algorithm. Use AWS DataSync t o send the resulting transcriptions to thetranscription S3 bucket.", "Order and use an AWS Snowcone device with Amazon EC2 Inf1 instances to run the transcriptionalgorithm Use AWS DataSync to send the resulting tr anscriptions to the transcription S3 bucket", @@ -2826,7 +2826,7 @@ "references": "" }, { - "question": "QUESTION 249 A data scientist is building a linear regression mo del. The scientist inspects the dataset and notices that the mode of the distribution is lower than the median, and the median is lower than the mean. Which data transformation will give the data scient ist the ability to apply a linear regression model?", + "question": "A data scientist is building a linear regression mo del. The scientist inspects the dataset and notices that the mode of the distribution is lower than the median, and the median is lower than the mean. Which data transformation will give the data scient ist the ability to apply a linear regression model?", "options": [ "Exponential transformation", "Logarithmic transformation", @@ -2838,7 +2838,7 @@ "references": "" }, { - "question": "QUESTION 250 A company is planning a marketing campaign to promo te a new product to existing customers. The company has data (or past promotions that are simil ar. The company decides to try an experiment to send a more expensive marketing package to a smalle r number of customers. The company wants to target the marketing campaign to customers who are most likely to buy the new product. The experiment requires that at least 90% of the custom ers who are likely to purchase the new product receive the marketing materials. ...company trains a model by using the linear learn er algorithm in Amazon SageMaker. The model has a recall score of 80% and a precision of 75%. ...should the company retrain the model to meet the se requirements?", + "question": "A company is planning a marketing campaign to promo te a new product to existing customers. The company has data (or past promotions that are simil ar. The company decides to try an experiment to send a more expensive marketing package to a smalle r number of customers. The company wants to target the marketing campaign to customers who are most likely to buy the new product. The experiment requires that at least 90% of the custom ers who are likely to purchase the new product receive the marketing materials. ...company trains a model by using the linear learn er algorithm in Amazon SageMaker. The model has a recall score of 80% and a precision of 75%. ...should the company retrain the model to meet the se requirements?", "options": [ "Set the target_recall hyperparameter to 90% Set t he binaryclassrfier model_selection_critena hyperparameter to recall_at_target_precision.", "Set the targetprecision hyperparameter to 90%. Se t the binary classifier model selection criteriahyperparameter to precision at_jarget recall.", @@ -2850,7 +2850,7 @@ "references": "" }, { - "question": "QUESTION 251 A data scientist receives a collection of insurance claim records. Each record includes a claim ID. th e final outcome of the insurance claim, and the date of the final outcome. The final outcome of each claim is a selection from among 200 outcome categories. Some claim records include only partial information. However, incomplete claim records include only 3 or 4 outcome ...gones from among the 200 available outco me categories. The collection includes hundreds of records for each outcome category. The records are from the previous 3 years. The data scientist must create a solution to predic t the number of claims that will be in each outcome category every month, several months in advance. Which solution will meet these requirements?", + "question": "A data scientist receives a collection of insurance claim records. Each record includes a claim ID. th e final outcome of the insurance claim, and the date of the final outcome. The final outcome of each claim is a selection from among 200 outcome categories. Some claim records include only partial information. However, incomplete claim records include only 3 or 4 outcome ...gones from among the 200 available outco me categories. The collection includes hundreds of records for each outcome category. The records are from the previous 3 years. The data scientist must create a solution to predic t the number of claims that will be in each outcome category every month, several months in advance. Which solution will meet these requirements?", "options": [ "Perform classification every month by using super vised learning of the 20X3 outcome categoriesbased on claim contents.", "Perform reinforcement learning by using claim IDs and dates Instruct the insurance agents whosubmit the claim records to estimate the expected n umber of claims in each outcome category everymonth", @@ -2862,7 +2862,7 @@ "references": "" }, { - "question": "QUESTION 252 A retail company stores 100 GB of daily transaction al data in Amazon S3 at periodic intervals. The company wants to identify the schema of the transac tional data. The company also wants to perform transformations o n the transactional data that is in Amazon S3. The company wants to use a machine learning (ML) ap proach to detect fraud in the transformed data. Which combination of solutions will meet these requ irements with the LEAST operational overhead? {Select THREE.)", + "question": "A retail company stores 100 GB of daily transaction al data in Amazon S3 at periodic intervals. The company wants to identify the schema of the transac tional data. The company also wants to perform transformations o n the transactional data that is in Amazon S3. The company wants to use a machine learning (ML) ap proach to detect fraud in the transformed data. Which combination of solutions will meet these requ irements with the LEAST operational overhead? {Select THREE.)", "options": [ "Use Amazon Athena to scan the data and identify t he schema.", "Use AWS Glue crawlers to scan the data and identi fy the schema.", @@ -2874,7 +2874,7 @@ "references": "" }, { - "question": "QUESTION 253 A data scientist uses Amazon SageMaker Data Wrangle r to define and perform transformations and feature engineering on historical data. The data sc ientist saves the transformations to SageMaker Feat ure Store. The historical data is periodically uploaded to an Amazon S3 bucket. The data scientist needs to transform the new historic data and add it to the o nline feature store The data scientist needs to prepare the .....historic data for training and inf erence by using native integrations. Which solution will meet these requirements with th e LEAST development effort?", + "question": "A data scientist uses Amazon SageMaker Data Wrangle r to define and perform transformations and feature engineering on historical data. The data sc ientist saves the transformations to SageMaker Feat ure Store. The historical data is periodically uploaded to an Amazon S3 bucket. The data scientist needs to transform the new historic data and add it to the o nline feature store The data scientist needs to prepare the .....historic data for training and inf erence by using native integrations. Which solution will meet these requirements with th e LEAST development effort?", "options": [ "Use AWS Lambda to run a predefined SageMaker pipe line to perform the transformations on eachnew dataset that arrives in the S3 bucket.", "Run an AWS Step Functions step and a predefined S ageMaker pipeline to perform thetransformations on each new dalaset that arrives in the S3 bucket", @@ -2886,7 +2886,7 @@ "references": "" }, { - "question": "QUESTION 254 A data scientist at a financial services company us ed Amazon SageMaker to train and deploy a model that predicts loan defaults. The model analyzes new loan applications and predicts the risk of loan default. To train the model, the data scientist man ually extracted loan data from a database. The data scientist performed the model training and deployme nt steps in a Jupyter notebook that is hosted on SageMaker Studio notebooks. The model's prediction accuracy is decreasing over time. Which combination of slept in the MOST operationall y efficient way for the data scientist to maintain the model's accuracy? (Select TWO.)", + "question": "A data scientist at a financial services company us ed Amazon SageMaker to train and deploy a model that predicts loan defaults. The model analyzes new loan applications and predicts the risk of loan default. To train the model, the data scientist man ually extracted loan data from a database. The data scientist performed the model training and deployme nt steps in a Jupyter notebook that is hosted on SageMaker Studio notebooks. The model's prediction accuracy is decreasing over time. Which combination of slept in the MOST operationall y efficient way for the data scientist to maintain the model's accuracy? (Select TWO.)", "options": [ "Use SageMaker Pipelines to create an automated wo rkflow that extracts fresh data, trains themodel, and deploys a new version of the model.", "Configure SageMaker Model Monitor with an accurac y threshold to check for model drift. Initiatean Amazon CloudWatch alarm when the threshold is ex ceeded. Connect the workflow in SageMakerPipelines with the CloudWatch alarm to automaticall y initiate retraining.", @@ -2898,7 +2898,7 @@ "references": "" }, { - "question": "QUESTION 255 An insurance company developed a new experimental m achine learning (ML) model to replace an existing model that is in production. The company m ust validate the quality of predictions from the new experimental model in a production environment before the company uses the new experimental model to serve general user requests. Which one model can serve user requests at a time. The company must measure the performance of the new experimental model without affecting the cu rrent live traffic Which solution will meet these requirements?", + "question": "An insurance company developed a new experimental m achine learning (ML) model to replace an existing model that is in production. The company m ust validate the quality of predictions from the new experimental model in a production environment before the company uses the new experimental model to serve general user requests. Which one model can serve user requests at a time. The company must measure the performance of the new experimental model without affecting the cu rrent live traffic Which solution will meet these requirements?", "options": [ "A/B testing", "Canary release", @@ -2910,7 +2910,7 @@ "references": "" }, { - "question": "QUESTION 256 An ecommerce company wants to use machine learning (ML) to monitor fraudulent transactions on its website. The company is using Amazon SageMaker to research, train, deploy, and monitor the ML models. The historical transactions data is in a .csv file that is stored in Amazon S3 The data contains featu res such as the user's IP address, navigation time, ave rage time on each page, and the number of clicks for ....session. There is no label in the data to i ndicate if a transaction is anomalous. Which models should the company use in combination to detect anomalous transactions? (Select TWO.)", + "question": "An ecommerce company wants to use machine learning (ML) to monitor fraudulent transactions on its website. The company is using Amazon SageMaker to research, train, deploy, and monitor the ML models. The historical transactions data is in a .csv file that is stored in Amazon S3 The data contains featu res such as the user's IP address, navigation time, ave rage time on each page, and the number of clicks for ....session. There is no label in the data to i ndicate if a transaction is anomalous. Which models should the company use in combination to detect anomalous transactions? (Select TWO.)", "options": [ "IP Insights", "K-nearest neighbors (k-NN)", @@ -2922,7 +2922,7 @@ "references": "" }, { - "question": "QUESTION 257 A finance company needs to forecast the price of a commodity. The company has compiled a dataset of historical daily prices. A data scientist must t rain various forecasting models on 80% of the datas et and must validate the efficacy of those models on t he remaining 20% of the dataset. What should the data scientist split the dataset in to a training dataset and a validation dataset to compare model performance?", + "question": "A finance company needs to forecast the price of a commodity. The company has compiled a dataset of historical daily prices. A data scientist must t rain various forecasting models on 80% of the datas et and must validate the efficacy of those models on t he remaining 20% of the dataset. What should the data scientist split the dataset in to a training dataset and a validation dataset to compare model performance?", "options": [ "Pick a date so that 80% to the data points preced e the date Assign that group of data points as thetraining dataset. Assign all the remaining data poi nts to the validation dataset.", "Pick a date so that 80% of the data points occur after the date. Assign that group of data points asthe training dataset. Assign all the remaining data points to the validation dataset.", @@ -2934,7 +2934,7 @@ "references": "" }, { - "question": "QUESTION 258 A manufacturing company needs to identify returned smartphones that have been damaged by moisture. The company has an automated process that produces 2.000 diagnostic values for each phone. The database contains more than five million phone evaluations. The evaluation process is consistent, and there are no missing values in the data. A machine learning (ML) specialist has traine d an Amazon SageMaker linear learner ML model to classify phones as moisture damaged or not moisture damaged by using all available features. The model's F1 score is 0.6. What changes in model training would MOST likely im prove the model's F1 score? (Select TWO.)", + "question": "A manufacturing company needs to identify returned smartphones that have been damaged by moisture. The company has an automated process that produces 2.000 diagnostic values for each phone. The database contains more than five million phone evaluations. The evaluation process is consistent, and there are no missing values in the data. A machine learning (ML) specialist has traine d an Amazon SageMaker linear learner ML model to classify phones as moisture damaged or not moisture damaged by using all available features. The model's F1 score is 0.6. What changes in model training would MOST likely im prove the model's F1 score? (Select TWO.)", "options": [ "Continue to use the SageMaker linear learner algo rithm. Reduce the number of features with theSageMaker principal component analysis (PCA) algori thm.", "Continue to use the SageMaker linear learner algo rithm. Reduce the number of features with thescikit-iearn multi-dimensional scaling (MDS) algori thm.", @@ -2946,7 +2946,7 @@ "references": "" }, { - "question": "QUESTION 259 A company deployed a machine learning (ML) model on the company website to predict real estate prices. Several months after deployment, an ML engi neer notices that the accuracy of the model has gradually decreased. The ML engineer needs to improve the accuracy of th e model. The engineer also needs to receive notifications for any future performance issues. Which solution will meet these requirements?", + "question": "A company deployed a machine learning (ML) model on the company website to predict real estate prices. Several months after deployment, an ML engi neer notices that the accuracy of the model has gradually decreased. The ML engineer needs to improve the accuracy of th e model. The engineer also needs to receive notifications for any future performance issues. Which solution will meet these requirements?", "options": [ "Perform incremental training to update the model. Activate Amazon SageMaker Model Monitor todetect model performance issues and to send notific ations.", "Use Amazon SageMaker Model Governance. Configure Model Governance to automatically adjustmodel hyper para meters. Create a performance thres hold alarm in Amazon CloudWatch to sendnotifications.", @@ -2958,7 +2958,7 @@ "references": "" }, { - "question": "QUESTION 260 A university wants to develop a targeted recruitmen t strategy to increase new student enrollment. A data scientist gathers information about the academ ic performance history of students. The data scientist wants to use the data to build student pr ofiles. The university will use the profiles to dir ect resources to recruit students who are likely to enr oll in the university. Which combination of steps should the data scientis t take to predict whether a particular student applicant is likely to enroll in the university? (S elect TWO)", + "question": "A university wants to develop a targeted recruitmen t strategy to increase new student enrollment. A data scientist gathers information about the academ ic performance history of students. The data scientist wants to use the data to build student pr ofiles. The university will use the profiles to dir ect resources to recruit students who are likely to enr oll in the university. Which combination of steps should the data scientis t take to predict whether a particular student applicant is likely to enroll in the university? (S elect TWO)", "options": [ "Use Amazon SageMaker Ground Truth to sort the dat a into two groups named \"enrolled\" or \"notenrolled.\"", "Use a forecasting algorithm to run predictions.", @@ -2970,7 +2970,7 @@ "references": "" }, { - "question": "QUESTION 261 A company's machine learning (ML) specialist is bui lding a computer vision model to classify 10 different traffic signs. The company has stored 100 images of each class in Amazon S3, and the company has another 10.000 unlabeled images. All th e images come from dash cameras and are a size of 224 pixels * 224 pixels. After several trai ning runs, the model is overfitting on the training data. Which actions should the ML specialist take to addr ess this problem? (Select TWO.)", + "question": "A company's machine learning (ML) specialist is bui lding a computer vision model to classify 10 different traffic signs. The company has stored 100 images of each class in Amazon S3, and the company has another 10.000 unlabeled images. All th e images come from dash cameras and are a size of 224 pixels * 224 pixels. After several trai ning runs, the model is overfitting on the training data. Which actions should the ML specialist take to addr ess this problem? (Select TWO.)", "options": [ "Use Amazon SageMaker Ground Truth to label the un labeled images", "Use image preprocessing to transform the images i nto grayscale images.", @@ -2982,7 +2982,7 @@ "references": "" }, { - "question": "QUESTION 262 A machine learning (ML) specialist is using the Ama zon SageMaker DeepAR forecasting algorithm to train a model on CPU-based Amazon EC2 On-Demand ins tances. The model currently takes multiple hours to train. The ML specialist wants to decrease the training time of the model. Which approaches will meet this requirement7 (SELEC T TWO )", + "question": "A machine learning (ML) specialist is using the Ama zon SageMaker DeepAR forecasting algorithm to train a model on CPU-based Amazon EC2 On-Demand ins tances. The model currently takes multiple hours to train. The ML specialist wants to decrease the training time of the model. Which approaches will meet this requirement7 (SELEC T TWO )", "options": [ "Replace On-Demand Instances with Spot Instances", "Configure model auto scaling dynamically to adjus t the number of instances automatically.", @@ -2994,7 +2994,7 @@ "references": "" }, { - "question": "QUESTION 263 An engraving company wants to automate its quality control process for plaques. The company performs the process before mailing each customized plaque to a customer. The company has created an Amazon S3 bucket that contains images of defects that should cause a plaque to be rejected. Low-confidence predictions must be sent t o an internal team of reviewers who are using Amazon Augmented Al (Amazon A2I). Which solution will meet these requirements?", + "question": "An engraving company wants to automate its quality control process for plaques. The company performs the process before mailing each customized plaque to a customer. The company has created an Amazon S3 bucket that contains images of defects that should cause a plaque to be rejected. Low-confidence predictions must be sent t o an internal team of reviewers who are using Amazon Augmented Al (Amazon A2I). Which solution will meet these requirements?", "options": [ "Use Amazon Textract for automatic processing. Use Amazon A2I with Amazon Mechanical Turk formanual review.", "Use Amazon Rekognition for automatic processing. Use Amazon A2I with a private workforceoption for manual review.", @@ -3006,7 +3006,7 @@ "references": "" }, { - "question": "QUESTION 264 An online delivery company wants to choose the fast est courier for each delivery at the moment an order is placed. The company wants to implement thi s feature for existing users and new users of its application. Data scientists have trained separate models with XGBoost for this purpose, and the models are stored in Amazon S3. There is one model fof each city where the company operates. The engineers are hosting these models in Amazon EC 2 for responding to the web client requests, with one instance for each model, but the instances have only a 5% utilization in CPU and memory, ....operation engineers want to avoid managing unne cessary resources. Which solution will enable the company to achieve i ts goal with the LEAST operational overhead?", + "question": "An online delivery company wants to choose the fast est courier for each delivery at the moment an order is placed. The company wants to implement thi s feature for existing users and new users of its application. Data scientists have trained separate models with XGBoost for this purpose, and the models are stored in Amazon S3. There is one model fof each city where the company operates. The engineers are hosting these models in Amazon EC 2 for responding to the web client requests, with one instance for each model, but the instances have only a 5% utilization in CPU and memory, ....operation engineers want to avoid managing unne cessary resources. Which solution will enable the company to achieve i ts goal with the LEAST operational overhead?", "options": [ "Create an Amazon SageMaker notebook instance for pulling all the models from Amazon S3 using the boto3 library. Remove the existing instances an d use the notebook to perform a SageMaker batchtransform for performing inferences offline for all the possible users in all the cities. Store the re sultsin different files in Amazon S3. Point the web clie nt to the files.", "Prepare an Amazon SageMaker Docker container base d on the open-source multi-model server.Remove the existing instances and create a multi-mo del endpoint in SageMaker instead, pointing tothe S3 bucket containing all the models Invoke the endpoint from the web client at runtime,specifying the TargetModel parameter according to t he city of each request.", @@ -3018,7 +3018,7 @@ "references": "" }, { - "question": "QUESTION 265 A company builds computer-vision models that use de ep learning for the autonomous vehicle industry. A machine learning (ML) specialist uses a n Amazon EC2 instance that has a CPU: GPU ratio of 12:1 to train the models. The ML specialist examines the instance metric logs and notices that the GPU is idle half of the time The ML specialist must reduce training costs withou t increasing the duration of the training jobs. Which solution will meet these requirements?", + "question": "A company builds computer-vision models that use de ep learning for the autonomous vehicle industry. A machine learning (ML) specialist uses a n Amazon EC2 instance that has a CPU: GPU ratio of 12:1 to train the models. The ML specialist examines the instance metric logs and notices that the GPU is idle half of the time The ML specialist must reduce training costs withou t increasing the duration of the training jobs. Which solution will meet these requirements?", "options": [ "Switch to an instance type that has only CPUs.", "Use a heterogeneous cluster that has two differen t instances groups.", @@ -3030,7 +3030,7 @@ "references": "" }, { - "question": "QUESTION 266 A company is building a new supervised classificati on model in an AWS environment. The company's data science team notices that the dataset has a la rge quantity of variables Ail the variables are numeric. The model accuracy for training and valida tion is low. The model's processing time is affected by high latency The data science team need s to increase the accuracy of the model and decrease the processing. How it should the data science team do to meet thes e requirements?", + "question": "A company is building a new supervised classificati on model in an AWS environment. The company's data science team notices that the dataset has a la rge quantity of variables Ail the variables are numeric. The model accuracy for training and valida tion is low. The model's processing time is affected by high latency The data science team need s to increase the accuracy of the model and decrease the processing. How it should the data science team do to meet thes e requirements?", "options": [ "Create new features and interaction variables.", "Use a principal component analysis (PCA) model.", @@ -3042,7 +3042,7 @@ "references": "" }, { - "question": "QUESTION 267 A company wants to forecast the daily price of newl y launched products based on 3 years of data for older product prices, sales, and rebates. The time- series data has irregular timestamps and is missing some values. Data scientist must build a dataset to replace the missing values. The data scientist needs a solution that resamptes the data daily and exports the data for further modeling. Which solution will meet these requirements with th e LEAST implementation effort?", + "question": "A company wants to forecast the daily price of newl y launched products based on 3 years of data for older product prices, sales, and rebates. The time- series data has irregular timestamps and is missing some values. Data scientist must build a dataset to replace the missing values. The data scientist needs a solution that resamptes the data daily and exports the data for further modeling. Which solution will meet these requirements with th e LEAST implementation effort?", "options": [ "Use Amazon EMR Serveriess with PySpark.", "Use AWS Glue DataBrew.", @@ -3054,7 +3054,7 @@ "references": "" }, { - "question": "QUESTION 268 A data scientist is building a forecasting model fo r a retail company by using the most recent 5 years of sales records that are stored in a data warehous e. The dataset contains sales records for each of the company's stores across five commercial regions The data scientist creates a working dataset with StorelD. Region. Date, and Sales Amount as col umns. The data scientist wants to analyze yearly average sales for each region. The scientist also w ants to compare how each region performed compared to average sales across all commercial reg ions. Which visualization will help the data scientist be tter understand the data trend?", + "question": "A data scientist is building a forecasting model fo r a retail company by using the most recent 5 years of sales records that are stored in a data warehous e. The dataset contains sales records for each of the company's stores across five commercial regions The data scientist creates a working dataset with StorelD. Region. Date, and Sales Amount as col umns. The data scientist wants to analyze yearly average sales for each region. The scientist also w ants to compare how each region performed compared to average sales across all commercial reg ions. Which visualization will help the data scientist be tter understand the data trend?", "options": [ "Create an aggregated dataset by using the Pandas GroupBy function to get average sales for eachyear for each store. Create a bar plot, faceted by year, of average sales for each store. Add an extrabar in each facet to represent average sales.", "Create an aggregated dataset by using the Pandas GroupBy function to get average sales for eachyear for each store. Create a bar plot, colored by region and faceted by year, of average sales for ea chstore. Add a horizontal line in each facet to repre sent average sales.", @@ -3066,7 +3066,7 @@ "references": "" }, { - "question": "QUESTION 269 A company uses sensors on devices such as motor eng ines and factory machines to measure parameters, temperature and pressure. The company w ants to use the sensor data to predict equipment malfunctions and reduce services outages. The Machine learning (ML) specialist needs to gathe r the sensors data to train a model to predict device malfunctions The ML spoctafst must ensure th at the data does not contain outliers before training the ..el. What can the ML specialist meet these requirements with the LEAST operational overhead?", + "question": "A company uses sensors on devices such as motor eng ines and factory machines to measure parameters, temperature and pressure. The company w ants to use the sensor data to predict equipment malfunctions and reduce services outages. The Machine learning (ML) specialist needs to gathe r the sensors data to train a model to predict device malfunctions The ML spoctafst must ensure th at the data does not contain outliers before training the ..el. What can the ML specialist meet these requirements with the LEAST operational overhead?", "options": [ "Load the data into an Amazon SagcMaker Studio not ebook. Calculate the first and third quartileUse a SageMaker Data Wrangler data (low to remove o nly values that are outside of those quartiles.", "Use an Amazon SageMaker Data Wrangler bias report to find outliers in the dataset Use a Data Wrangler data flow to remove outliers based on the bias report.", @@ -3078,7 +3078,7 @@ "references": "" }, { - "question": "QUESTION 270 A data engineer needs to provide a team of data sci entists with the appropriate dataset to run machine learning training jobs. The data will be st ored in Amazon S3. The data engineer is obtaining the data from an Amazon Redshift database and is us ing join queries to extract a single tabular dataset. A portion of the schema is as follows: ...traction Timestamp (Timeslamp) ...JName(Varchar) ...JNo (Varchar) Th data engineer must provide the data so that any row with a CardNo value of NULL is removed. Also, the TransactionTimestamp column must be separ ated into a TransactionDate column and a isactionTime column Finally, the CardName column mu st be renamed to NameOnCard. The data will be extracted on a monthly basis and w ill be loaded into an S3 bucket. The solution must minimize the effort that is needed to set up infras tructure for the ingestion and transformation. The solution must be automated and must minimize the lo ad on the Amazon Redshift cluster Which solution meets these requirements?", + "question": "A data engineer needs to provide a team of data sci entists with the appropriate dataset to run machine learning training jobs. The data will be st ored in Amazon S3. The data engineer is obtaining the data from an Amazon Redshift database and is us ing join queries to extract a single tabular dataset. A portion of the schema is as follows: ...traction Timestamp (Timeslamp) ...JName(Varchar) ...JNo (Varchar) Th data engineer must provide the data so that any row with a CardNo value of NULL is removed. Also, the TransactionTimestamp column must be separ ated into a TransactionDate column and a isactionTime column Finally, the CardName column mu st be renamed to NameOnCard. The data will be extracted on a monthly basis and w ill be loaded into an S3 bucket. The solution must minimize the effort that is needed to set up infras tructure for the ingestion and transformation. The solution must be automated and must minimize the lo ad on the Amazon Redshift cluster Which solution meets these requirements?", "options": [ "Set up an Amazon EMR cluster Create an Apache Spa rk job to read the data from the AmazonRedshift cluster and transform the data. Load the d ata into the S3 bucket. Schedule the job to runmonthly.", "Set up an Amazon EC2 instance with a SQL client t ool, such as SQL Workbench/J. to query the datafrom the Amazon Redshift cluster directly. Export t he resulting dataset into a We. Upload the file int othe S3 bucket. Perform these tasks monthly.", @@ -3090,7 +3090,7 @@ "references": "" }, { - "question": "QUESTION 271 A data scientist obtains a tabular dataset that con tains 150 correlated features with different ranges to build a regression model. The data scientist nee ds to achieve more efficient model training by implementing a solution that minimizes impact on th e model's performance. The data scientist decides to perform a principal component analysis ( PCA) preprocessing step to reduce the number of features to a smaller set of independent features b efore the data scientist uses the new features in the regression model. Which preprocessing step will meet these requiremen ts?", + "question": "A data scientist obtains a tabular dataset that con tains 150 correlated features with different ranges to build a regression model. The data scientist nee ds to achieve more efficient model training by implementing a solution that minimizes impact on th e model's performance. The data scientist decides to perform a principal component analysis ( PCA) preprocessing step to reduce the number of features to a smaller set of independent features b efore the data scientist uses the new features in the regression model. Which preprocessing step will meet these requiremen ts?", "options": [ "Use the Amazon SageMaker built-in algorithm for P CA on the dataset to transform the data", "Load the data into Amazon SageMaker Data Wrangler . Scale the data with a Min Max Scalertransformation step Use the SageMaker built-in algo rithm for PCA on the scaled dataset to transformthe data.", @@ -3102,7 +3102,7 @@ "references": "" }, { - "question": "QUESTION 272 A financial services company wants to automate its loan approval process by building a machine learning (ML) model. Each loan data point contains credit history from a third-party data source and demographic information about the customer. Each lo an approval prediction must come with a report that contains an explanation for why the cus tomer was approved for a loan or was denied for a loan. The company will use Amazon SageMaker to bu ild the model. Which solution will meet these requirements with th e LEAST development effort?", + "question": "A financial services company wants to automate its loan approval process by building a machine learning (ML) model. Each loan data point contains credit history from a third-party data source and demographic information about the customer. Each lo an approval prediction must come with a report that contains an explanation for why the cus tomer was approved for a loan or was denied for a loan. The company will use Amazon SageMaker to bu ild the model. Which solution will meet these requirements with th e LEAST development effort?", "options": [ "Use SageMaker Model Debugger to automatically deb ug the predictions, generate theexplanation, and attach the explanation report.", "Use AWS Lambda to provide feature importance and partial dependence plots. Use the plots togenerate and attach the explanation report.", @@ -3114,7 +3114,7 @@ "references": "" }, { - "question": "QUESTION 273 An obtain relator collects the following data on cu stomer orders: demographics, behaviors, location, shipment progress, and delivery time. A data scient ist joins all the collected datasets. The result is a single dataset that includes 980 variables. The data scientist must develop a machine learning (ML) model to identify groups of customers who are likely to respond to a marketing campaign. Which combination of algorithms should the data sci entist use to meet this requirement? (Select TWO.)", + "question": "An obtain relator collects the following data on cu stomer orders: demographics, behaviors, location, shipment progress, and delivery time. A data scient ist joins all the collected datasets. The result is a single dataset that includes 980 variables. The data scientist must develop a machine learning (ML) model to identify groups of customers who are likely to respond to a marketing campaign. Which combination of algorithms should the data sci entist use to meet this requirement? (Select TWO.)", "options": [ "Latent Dirichlet Allocation (LDA)", "K-means", @@ -3126,7 +3126,7 @@ "references": "" }, { - "question": "QUESTION 274 A machine learning (ML) developer for an online ret ailer recently uploaded a sales dataset into Amazon SageMaker Studio. The ML developer wants to obtain importance scores for each feature of the dataset. The ML developer will use the importan ce scores to feature engineer the dataset. Which solution will meet this requirement with the LEAST development effort?", + "question": "A machine learning (ML) developer for an online ret ailer recently uploaded a sales dataset into Amazon SageMaker Studio. The ML developer wants to obtain importance scores for each feature of the dataset. The ML developer will use the importan ce scores to feature engineer the dataset. Which solution will meet this requirement with the LEAST development effort?", "options": [ "Use SageMaker Data Wrangler to perform a Gini imp ortance score analysis.", "Use a SageMaker notebook instance to perform prin cipal component analysis (PCA).", @@ -3138,7 +3138,7 @@ "references": "" }, { - "question": "QUESTION 275 A machine learning engineer is building a bird clas sification model. The engineer randomly separates a dataset into a training dataset and a validation dataset. During the training phase, the model achieves very high accuracy. However, the model did not generalize well during validation of the validation dataset. The engineer realizes that the original dataset was imbalanced. What should the engineer do to improve the validati on accuracy of the model?", + "question": "A machine learning engineer is building a bird clas sification model. The engineer randomly separates a dataset into a training dataset and a validation dataset. During the training phase, the model achieves very high accuracy. However, the model did not generalize well during validation of the validation dataset. The engineer realizes that the original dataset was imbalanced. What should the engineer do to improve the validati on accuracy of the model?", "options": [ "Perform stratified sampling on the original datas et.", "Acquire additional data about the majority classe s in the original dataset.", @@ -3150,7 +3150,7 @@ "references": "" }, { - "question": "QUESTION 276 A data scientist is trying to improve the accuracy of a neural network classification model. The data scientist wants to run a large hyperparameter tunin g job in Amazon SageMaker. However, previous smaller tuning jobs on the same m odel often ran for several weeks. The ML specialist wants to reduce the computation time req uired to run the tuning job. Which actions will MOST reduce the computation time for the hyperparameter tuning job? (Select TWO.) A. Use the Hyperband tuning strategy.", + "question": "A data scientist is trying to improve the accuracy of a neural network classification model. The data scientist wants to run a large hyperparameter tunin g job in Amazon SageMaker. However, previous smaller tuning jobs on the same m odel often ran for several weeks. The ML specialist wants to reduce the computation time req uired to run the tuning job. Which actions will MOST reduce the computation time for the hyperparameter tuning job? (Select TWO.) A. Use the Hyperband tuning strategy.", "options": [ "Increase the number of hyperparameters.", "Set a lower value for the MaxNumberOfTrainingJobs parameter.", @@ -3161,7 +3161,7 @@ "references": "" }, { - "question": "QUESTION 277 A company is setting up a mechanism for data scient ists and engineers from different departments to access an Amazon SageMaker Studio domain. Each depa rtment has a unique SageMaker Studio domain. The company wants to build a central proxy applicat ion that data scientists and engineers can log in to by using their corporate credentials. The proxy application will authenticate users by using the company's existing Identity provider (IdP). The app lication will then route users to the appropriate SageMaker Studio domain. The company plans to maintain a table in Amazon Dyn amoDB that contains SageMaker domains for each department. How should the company meet these requirements?", + "question": "A company is setting up a mechanism for data scient ists and engineers from different departments to access an Amazon SageMaker Studio domain. Each depa rtment has a unique SageMaker Studio domain. The company wants to build a central proxy applicat ion that data scientists and engineers can log in to by using their corporate credentials. The proxy application will authenticate users by using the company's existing Identity provider (IdP). The app lication will then route users to the appropriate SageMaker Studio domain. The company plans to maintain a table in Amazon Dyn amoDB that contains SageMaker domains for each department. How should the company meet these requirements?", "options": [ "Use the SageMaker CreatePresignedDomainUrl API to generate a presigned URL for each domainaccording to the DynamoDB table. Pass the presigned URL to the proxy application.", "Use the SageMaker CreateHuman TaskUi API to gener ate a UI URL. Pass the URL to the proxyapplication.", @@ -3173,7 +3173,7 @@ "references": "" }, { - "question": "QUESTION 278 A global bank requires a solution to predict whethe r customers will leave the bank and choose another bank. The bank is using a dataset to train a model to predict customer loss. The training dataset has 1,000 rows. The training dataset includ es 100 instances of customers who left the bank. A machine learning (ML) specialist is using Amazon SageMaker Data Wrangler to train a churn prediction model by using a SageMaker training job. After training, the ML specialist notices that the model returns only false results. The ML specialist must correct the model so that it returns more accurate predictions. Which solution will meet these requirements?", + "question": "A global bank requires a solution to predict whethe r customers will leave the bank and choose another bank. The bank is using a dataset to train a model to predict customer loss. The training dataset has 1,000 rows. The training dataset includ es 100 instances of customers who left the bank. A machine learning (ML) specialist is using Amazon SageMaker Data Wrangler to train a churn prediction model by using a SageMaker training job. After training, the ML specialist notices that the model returns only false results. The ML specialist must correct the model so that it returns more accurate predictions. Which solution will meet these requirements?", "options": [ "Apply anomaly detection to remove outliers from t he training dataset before training.", "Apply Synthetic Minority Oversampling Technique ( SMOTE) to the training dataset before training.", @@ -3185,7 +3185,7 @@ "references": "" }, { - "question": "QUESTION 279 A developer at a retail company is creating a daily demand forecasting model. The company stores the historical hourly demand data in an Amazon S3 b ucket. However, the historical data does not include demand data for some hours. The developer wants to verify that an autoregressiv e integrated moving average (ARIMA) approach will be a suitable model for the use case. How should the developer verify the suitability of an ARIMA approach?", + "question": "A developer at a retail company is creating a daily demand forecasting model. The company stores the historical hourly demand data in an Amazon S3 b ucket. However, the historical data does not include demand data for some hours. The developer wants to verify that an autoregressiv e integrated moving average (ARIMA) approach will be a suitable model for the use case. How should the developer verify the suitability of an ARIMA approach?", "options": [ "Use Amazon SageMaker Data Wrangler. Import the da ta from Amazon S3. Impute hourly missingdata. Perform a Seasonal Trend decomposition.", "Use Amazon SageMaker Autopilot. Create a new expe riment that specifies the S3 data location.Choose ARIMA as the machine learning (ML) problem. Check the model performance.", @@ -3197,7 +3197,7 @@ "references": "" }, { - "question": "QUESTION 280 A law firm handles thousands of contracts every day . Every contract must be signed. Currently, a lawyer manually checks all contracts for signatures . The law firm is developing a machine learning (ML) solution to automate signature detection for each contract. The ML solution must also provide a confidence score for each contract page. Which Amazon Textract API action can the law firm u se to generate a confidence score for each page of each contract?", + "question": "A law firm handles thousands of contracts every day . Every contract must be signed. Currently, a lawyer manually checks all contracts for signatures . The law firm is developing a machine learning (ML) solution to automate signature detection for each contract. The ML solution must also provide a confidence score for each contract page. Which Amazon Textract API action can the law firm u se to generate a confidence score for each page of each contract?", "options": [ "Use the AnalyzeDocument API action. Set the Featu reTypes parameter to SIGNATURES. Return theconfidence scores for each page.", "Use the Prediction API call on the documents. Ret urn the signatures and confidence scores foreach page.", @@ -3209,7 +3209,7 @@ "references": "" }, { - "question": "QUESTION 281 An ecommerce company has developed a XGBoost model in Amazon SageMaker to predict whether a customer will return a purchased item. The datase t is imbalanced. Only 5% of customers return items. A data scientist must find the hyperparameters to c apture as many instances of returned items as possible. The company has a small budget for comput e. How should the data scientist meet these requiremen ts MOST cost-effectively?", + "question": "An ecommerce company has developed a XGBoost model in Amazon SageMaker to predict whether a customer will return a purchased item. The datase t is imbalanced. Only 5% of customers return items. A data scientist must find the hyperparameters to c apture as many instances of returned items as possible. The company has a small budget for comput e. How should the data scientist meet these requiremen ts MOST cost-effectively?", "options": [ "Tune all possible hyperparameters by using automa tic model tuning (AMT). Optimize on {\"HyperParameterTuningJobObjective\": {\"MetricName\": \"validation:accuracy\", \"Type\": \"Maximize\"}}", "Tune the csv_weight hyperparameter and the scale_ pos_weight hyperparameter by usingautomatic model tuning (AMT). Optimize on {\"HyperPa rameterTuningJobObjective\": {\"MetricName\":\"validation:f1\", \"Type\": \"Maximize\"}}.",