pvbhanuteja · August 4, 2020 19:52
diff --git a/download and pre-process data b/download and pre-process data
 # Download the file
 path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
    extract=True)

 path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

 # Converts the unicode file to ascii
 def unicode_to_ascii(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')


 def preprocess_sentence(w):
  w = unicode_to_ascii(w.lower().strip())

  # creating a space between a word and the punctuation following it
  # eg: "he is a boy." => "he is a boy ."
  # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
  w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

  w = w.strip()

  # adding a start and an end token to the sentence
  # so that the model know when to start and stop predicting.
  w = '<start> ' + w + ' <end>'
  return w
diff --git a/Imports b/Imports
 import tensorflow as tf

 import matplotlib.pyplot as plt
 import matplotlib.ticker as ticker
 from sklearn.model_selection import train_test_split

 import unicodedata
 import re
 import numpy as np
 import os
 import io
 import time
	# Download the file
	path_to_zip = tf.keras.utils.get_file(
	'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
	extract=True)

	path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

	# Converts the unicode file to ascii
	def unicode_to_ascii(s):
	return ''.join(c for c in unicodedata.normalize('NFD', s)
	if unicodedata.category(c) != 'Mn')


	def preprocess_sentence(w):
	w = unicode_to_ascii(w.lower().strip())

	# creating a space between a word and the punctuation following it
	# eg: "he is a boy." => "he is a boy ."
	# Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
	w = re.sub(r"([?.!,¿])", r" \1 ", w)
	w = re.sub(r'[" "]+', " ", w)

	# replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
	w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

	w = w.strip()

	# adding a start and an end token to the sentence
	# so that the model know when to start and stop predicting.
	w = '<start> ' + w + ' <end>'
	return w
	import tensorflow as tf

	import matplotlib.pyplot as plt
	import matplotlib.ticker as ticker
	from sklearn.model_selection import train_test_split

	import unicodedata
	import re
	import numpy as np
	import os
	import io
	import time