AkashC-goML · May 6, 2024 17:00
diff --git a/re-ranking-and-rag.ipynb b/re-ranking-and-rag.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyPkGR1di8gMBLLk4Pyuhr8I",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "2223dc261ac84ed2ad14d037859f3ca5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e334812855214917a63e6a76155a6c35",
              "IPY_MODEL_29e76a510c8c46d0808ce49388c670ea",
              "IPY_MODEL_998658520adc4b6aa48b153794ff0be2"
            ],
            "layout": "IPY_MODEL_cc7ea3053cd04d659c2b5dc7373badfa"
          }
        },
        "e334812855214917a63e6a76155a6c35": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_351dfdd986214391839d13c213fce093",
            "placeholder": "",
            "style": "IPY_MODEL_d32bbb0aa04f4639a5d94dc7cea034c2",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "29e76a510c8c46d0808ce49388c670ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0d105cb2d94743f19bbea2c3bcda2bc5",
            "max": 48,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_7607e8c994b34a18a8981ef8a7ef8783",
            "value": 48
          }
        },
        "998658520adc4b6aa48b153794ff0be2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_532aa34688e9423e94594d4967ef2022",
            "placeholder": "",
            "style": "IPY_MODEL_1857a6d5f40143e6941b27353af710a4",
            "value": " 48.0/48.0 [00:00&lt;00:00, 846B/s]"
          }
        },
        "cc7ea3053cd04d659c2b5dc7373badfa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "351dfdd986214391839d13c213fce093": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d32bbb0aa04f4639a5d94dc7cea034c2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0d105cb2d94743f19bbea2c3bcda2bc5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7607e8c994b34a18a8981ef8a7ef8783": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "532aa34688e9423e94594d4967ef2022": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1857a6d5f40143e6941b27353af710a4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7ae13d0e2fa545aaa4d0cbc02a756f76": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_842ca0565e144b498183ca469afcaeb5",
              "IPY_MODEL_ef65b485574d420c9d29ab713064df60",
              "IPY_MODEL_81d7e39ffc8f4e238a16e1e051d6a70a"
            ],
            "layout": "IPY_MODEL_687c5ad3bace41749062628140d6cef4"
          }
        },
        "842ca0565e144b498183ca469afcaeb5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6bc98bc064bf46fabbc72425e6a04cde",
            "placeholder": "",
            "style": "IPY_MODEL_5bdb424674d5428a9718968f7ccb417f",
            "value": "config.json: 100%"
          }
        },
        "ef65b485574d420c9d29ab713064df60": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_93a9a41588b140d59f423059745f0b74",
            "max": 570,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e60dbeba37df4f0db8c81f8413f61307",
            "value": 570
          }
        },
        "81d7e39ffc8f4e238a16e1e051d6a70a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e990efa20a9e4f589d80b59fb294457e",
            "placeholder": "",
            "style": "IPY_MODEL_834352bbfa06497087ccc0fd282387c0",
            "value": " 570/570 [00:00&lt;00:00, 9.03kB/s]"
          }
        },
        "687c5ad3bace41749062628140d6cef4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6bc98bc064bf46fabbc72425e6a04cde": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5bdb424674d5428a9718968f7ccb417f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "93a9a41588b140d59f423059745f0b74": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e60dbeba37df4f0db8c81f8413f61307": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e990efa20a9e4f589d80b59fb294457e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "834352bbfa06497087ccc0fd282387c0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5d5f74c4c8144064b4608167be6bedcf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_268be2da8a5a473d9cea08e3b9084b66",
              "IPY_MODEL_0b011360bbae45a3a7c2d222b93af477",
              "IPY_MODEL_3c9e08b0edda4dfda74df38cc3a7137b"
            ],
            "layout": "IPY_MODEL_889fb571413443a4adc255bbde65f337"
          }
        },
        "268be2da8a5a473d9cea08e3b9084b66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f422ba05478b47b9b81f107c8591451e",
            "placeholder": "",
            "style": "IPY_MODEL_1ab691653c744ee7a38a4d9988bd7e78",
            "value": "vocab.txt: 100%"
          }
        },
        "0b011360bbae45a3a7c2d222b93af477": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_085dca686edb4aa9b115833ecf3de5aa",
            "max": 231508,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_40846a21498d4c6c80c2a1d61c1e3a5a",
            "value": 231508
          }
        },
        "3c9e08b0edda4dfda74df38cc3a7137b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6b570e5bf0eb4897be55c0498644bcb5",
            "placeholder": "",
            "style": "IPY_MODEL_8cac7ad24f624936a5b7b663da0413dd",
            "value": " 232k/232k [00:00&lt;00:00, 3.28MB/s]"
          }
        },
        "889fb571413443a4adc255bbde65f337": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f422ba05478b47b9b81f107c8591451e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1ab691653c744ee7a38a4d9988bd7e78": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "085dca686edb4aa9b115833ecf3de5aa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "40846a21498d4c6c80c2a1d61c1e3a5a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "6b570e5bf0eb4897be55c0498644bcb5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8cac7ad24f624936a5b7b663da0413dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "37d1a03a632e4eb887e93535e54a02fa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_d0c3dcd59c1d4a95ba1d517e57d4bc75",
              "IPY_MODEL_7991b3f594ed49dfad4c3b099a9d0340",
              "IPY_MODEL_46715f152a8342e78e4877c99a579008"
            ],
            "layout": "IPY_MODEL_d81a1a65d90e4e1b9ef1822b58318a2f"
          }
        },
        "d0c3dcd59c1d4a95ba1d517e57d4bc75": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f5421fc2d3cb43c3bfcb986c0aed330d",
            "placeholder": "",
            "style": "IPY_MODEL_cd9fc73c223344fb89b9b2b9fd8d7e40",
            "value": "tokenizer.json: 100%"
          }
        },
        "7991b3f594ed49dfad4c3b099a9d0340": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b1895ceb637241bfb039845ffa00cd9d",
            "max": 466062,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_c9df6b93c24448b29bdb661a08ddc7a8",
            "value": 466062
          }
        },
        "46715f152a8342e78e4877c99a579008": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b675dca9a4034c21b7a15b35de53489d",
            "placeholder": "",
            "style": "IPY_MODEL_f3b229ef0b1d4081844c3b3cda230278",
            "value": " 466k/466k [00:00&lt;00:00, 5.67MB/s]"
          }
        },
        "d81a1a65d90e4e1b9ef1822b58318a2f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f5421fc2d3cb43c3bfcb986c0aed330d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cd9fc73c223344fb89b9b2b9fd8d7e40": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b1895ceb637241bfb039845ffa00cd9d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c9df6b93c24448b29bdb661a08ddc7a8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b675dca9a4034c21b7a15b35de53489d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f3b229ef0b1d4081844c3b3cda230278": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2c6dcabb67cb44dd9a404b95ca420e25": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2a309261c8674b78b5fdf624f560ddd6",
              "IPY_MODEL_c88b636452e24cc7b2b683778c64eca5",
              "IPY_MODEL_d5b2849304fe4064a650930a77f24b63"
            ],
            "layout": "IPY_MODEL_3a908ad25df0497d9d32bb93e9ac76b3"
          }
        },
        "2a309261c8674b78b5fdf624f560ddd6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fd415a26ea2743ffa7fcedb3da165cfc",
            "placeholder": "",
            "style": "IPY_MODEL_fd9a381814f34925ba39cffb24485695",
            "value": "model.safetensors: 100%"
          }
        },
        "c88b636452e24cc7b2b683778c64eca5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0df6c55dfaad4e448e6ed3b676c7f3a2",
            "max": 440449768,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6e0ac3e6f31e49a980815c8a691d6691",
            "value": 440449768
          }
        },
        "d5b2849304fe4064a650930a77f24b63": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_588fe7a3b69048cda86fc1855b94d2fe",
            "placeholder": "",
            "style": "IPY_MODEL_23d8d7ba8f5540678b25ec8e002a7b22",
            "value": " 440M/440M [00:06&lt;00:00, 96.5MB/s]"
          }
        },
        "3a908ad25df0497d9d32bb93e9ac76b3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fd415a26ea2743ffa7fcedb3da165cfc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fd9a381814f34925ba39cffb24485695": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0df6c55dfaad4e448e6ed3b676c7f3a2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6e0ac3e6f31e49a980815c8a691d6691": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "588fe7a3b69048cda86fc1855b94d2fe": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "23d8d7ba8f5540678b25ec8e002a7b22": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/AkashC-goML/cc059a7c201f3a465b559970d2bdcb0c/re-ranking-and-rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Install the required libraries:"
      ],
      "metadata": {
        "id": "9CfJ7EOet--K"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D-U2zRqmt0Bb",
        "outputId": "dc3b1c2f-8894-4cb6-a842-c38137545949"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.40.1)\n",
            "Collecting faiss-cpu\n",
            "  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n",
            "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.11.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n",
            "Installing collected packages: faiss-cpu\n",
            "Successfully installed faiss-cpu-1.8.0\n"
          ]
        }
      ],
      "source": [
        "pip install transformers faiss-cpu\n"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Install the required libraries:\n"
      ],
      "metadata": {
        "id": "ptNyqH3muE1c"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
        "import faiss\n",
        "import numpy as np"
      ],
      "metadata": {
        "id": "ptPZOdafuIIk"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Load a pre-trained language model and tokenizer:"
      ],
      "metadata": {
        "id": "6uixBl_VuKwm"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
        "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 336,
          "referenced_widgets": [
            "2223dc261ac84ed2ad14d037859f3ca5",
            "e334812855214917a63e6a76155a6c35",
            "29e76a510c8c46d0808ce49388c670ea",
            "998658520adc4b6aa48b153794ff0be2",
            "cc7ea3053cd04d659c2b5dc7373badfa",
            "351dfdd986214391839d13c213fce093",
            "d32bbb0aa04f4639a5d94dc7cea034c2",
            "0d105cb2d94743f19bbea2c3bcda2bc5",
            "7607e8c994b34a18a8981ef8a7ef8783",
            "532aa34688e9423e94594d4967ef2022",
            "1857a6d5f40143e6941b27353af710a4",
            "7ae13d0e2fa545aaa4d0cbc02a756f76",
            "842ca0565e144b498183ca469afcaeb5",
            "ef65b485574d420c9d29ab713064df60",
            "81d7e39ffc8f4e238a16e1e051d6a70a",
            "687c5ad3bace41749062628140d6cef4",
            "6bc98bc064bf46fabbc72425e6a04cde",
            "5bdb424674d5428a9718968f7ccb417f",
            "93a9a41588b140d59f423059745f0b74",
            "e60dbeba37df4f0db8c81f8413f61307",
            "e990efa20a9e4f589d80b59fb294457e",
            "834352bbfa06497087ccc0fd282387c0",
            "5d5f74c4c8144064b4608167be6bedcf",
            "268be2da8a5a473d9cea08e3b9084b66",
            "0b011360bbae45a3a7c2d222b93af477",
            "3c9e08b0edda4dfda74df38cc3a7137b",
            "889fb571413443a4adc255bbde65f337",
            "f422ba05478b47b9b81f107c8591451e",
            "1ab691653c744ee7a38a4d9988bd7e78",
            "085dca686edb4aa9b115833ecf3de5aa",
            "40846a21498d4c6c80c2a1d61c1e3a5a",
            "6b570e5bf0eb4897be55c0498644bcb5",
            "8cac7ad24f624936a5b7b663da0413dd",
            "37d1a03a632e4eb887e93535e54a02fa",
            "d0c3dcd59c1d4a95ba1d517e57d4bc75",
            "7991b3f594ed49dfad4c3b099a9d0340",
            "46715f152a8342e78e4877c99a579008",
            "d81a1a65d90e4e1b9ef1822b58318a2f",
            "f5421fc2d3cb43c3bfcb986c0aed330d",
            "cd9fc73c223344fb89b9b2b9fd8d7e40",
            "b1895ceb637241bfb039845ffa00cd9d",
            "c9df6b93c24448b29bdb661a08ddc7a8",
            "b675dca9a4034c21b7a15b35de53489d",
            "f3b229ef0b1d4081844c3b3cda230278",
            "2c6dcabb67cb44dd9a404b95ca420e25",
            "2a309261c8674b78b5fdf624f560ddd6",
            "c88b636452e24cc7b2b683778c64eca5",
            "d5b2849304fe4064a650930a77f24b63",
            "3a908ad25df0497d9d32bb93e9ac76b3",
            "fd415a26ea2743ffa7fcedb3da165cfc",
            "fd9a381814f34925ba39cffb24485695",
            "0df6c55dfaad4e448e6ed3b676c7f3a2",
            "6e0ac3e6f31e49a980815c8a691d6691",
            "588fe7a3b69048cda86fc1855b94d2fe",
            "23d8d7ba8f5540678b25ec8e002a7b22"
          ]
        },
        "id": "HfBjG-0ZuPvi",
        "outputId": "fdfa57db-5cb3-40ce-a139-179fb60889f0"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "2223dc261ac84ed2ad14d037859f3ca5"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "7ae13d0e2fa545aaa4d0cbc02a756f76"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "5d5f74c4c8144064b4608167be6bedcf"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "37d1a03a632e4eb887e93535e54a02fa"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "2c6dcabb67cb44dd9a404b95ca420e25"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Create a function to encode text using the language model:"
      ],
      "metadata": {
        "id": "2nTwEIeLuVBE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def encode_text(text, tokenizer, model):\n",
        "    inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n",
        "    with torch.no_grad():\n",
        "        outputs = model(**inputs)\n",
        "    return outputs.logits.mean(dim=1).cpu().numpy()\n"
      ],
      "metadata": {
        "id": "UXLQH5zeuTB0"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Prepare a dataset of question-answer pairs and encode them:"
      ],
      "metadata": {
        "id": "ptKZt4ixuZBM"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Replace this with your actual dataset\n",
        "qas = [\n",
        "    (\"What is the capital of France?\", \"Paris\"),\n",
        "    # Add more question-answer pairs here\n",
        "]\n",
        "\n",
        "# Encode question-answer pairs\n",
        "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in qas]\n"
      ],
      "metadata": {
        "id": "Xk3CPu-auX3y"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Build a FAISS index for efficient similarity search:"
      ],
      "metadata": {
        "id": "cPW9fezDufK1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dimension = encoded_qas[0].shape[0]\n",
        "index = faiss.IndexFlatL2(dimension)\n",
        "index.add(np.array(encoded_qas))\n"
      ],
      "metadata": {
        "id": "6_vk8Bi2ugtV"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Implement a function to retrieve the top-k most similar question-answer pairs:"
      ],
      "metadata": {
        "id": "BroJVAwJupye"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def retrieve_top_k(query, index, k):\n",
        "    encoded_query = encode_text(query, tokenizer, model)\n",
        "    distances, indices = index.search(np.array([encoded_query]), k)\n",
        "    return [(qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n"
      ],
      "metadata": {
        "id": "k1bEwKjCurvd"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Implement a re-ranking function using another language model or a custom scoring function:"
      ],
      "metadata": {
        "id": "GTK7Uo02uuT9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def re_rank(candidates, query):\n",
        "    # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n",
        "    # For simplicity, we'll just sort the candidates by their original distances\n",
        "    return sorted(candidates, key=lambda x: x[1])\n"
      ],
      "metadata": {
        "id": "p14aFDYCuxdS"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Test the retrieval and re-ranking:"
      ],
      "metadata": {
        "id": "TmFnZ1lHvIam"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "query = \"Where is the capital of France?\"\n",
        "top_k = 5\n",
        "\n",
        "# Retrieve top-k candidates\n",
        "candidates = retrieve_top_k(query, index, top_k)\n",
        "\n",
        "# Re-rank the candidates\n",
        "re_ranked_candidates = re_rank(candidates, query)\n",
        "\n",
        "# Print the re-ranked results\n",
        "for qa, distance in re_ranked_candidates:\n",
        "    print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tToEEPQ9vK9T",
        "outputId": "aee62066-ff9d-4e01-d618-53a785c84697"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: What is the capital of France?\n",
            "Answer: Paris\n",
            "Distance: 0.0031302475836127996\n",
            "\n",
            "Question: What is the capital of France?\n",
            "Answer: Paris\n",
            "Distance: 3.4028234663852886e+38\n",
            "\n",
            "Question: What is the capital of France?\n",
            "Answer: Paris\n",
            "Distance: 3.4028234663852886e+38\n",
            "\n",
            "Question: What is the capital of France?\n",
            "Answer: Paris\n",
            "Distance: 3.4028234663852886e+38\n",
            "\n",
            "Question: What is the capital of France?\n",
            "Answer: Paris\n",
            "Distance: 3.4028234663852886e+38\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# IPL dataset of question-answer pairs\n",
        "ipl_qas = [\n",
        "    (\"Which team won the first IPL season?\", \"Rajasthan Royals\"),\n",
        "    (\"Who scored the highest individual score in IPL?\", \"Chris Gayle (175*)\"),\n",
        "    (\"Who is the leading run-scorer in IPL history?\", \"Virat Kohli\"),\n",
        "    (\"Who is the leading wicket-taker in IPL history?\", \"Lasith Malinga\"),\n",
        "    (\"Which team has won the most IPL titles?\", \"Mumbai Indians\")\n",
        "]\n",
        "\n",
        "# Encode question-answer pairs\n",
        "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in ipl_qas]\n"
      ],
      "metadata": {
        "id": "fkUmgvaexFQS"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Build a FAISS index for efficient similarity search\n",
        "dimension = encoded_qas[0].shape[0]\n",
        "index = faiss.IndexFlatL2(dimension)\n",
        "index.add(np.array(encoded_qas))\n",
        "\n",
        "# Implement a function to retrieve the top-k most similar question-answer pairs\n",
        "def retrieve_top_k(query, index, k):\n",
        "    encoded_query = encode_text(query, tokenizer, model)\n",
        "    distances, indices = index.search(np.array([encoded_query]), k)\n",
        "    return [(ipl_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n",
        "\n",
        "# Implement a re-ranking function using another language model or a custom scoring function\n",
        "def re_rank(candidates, query):\n",
        "    # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n",
        "    # For simplicity, we'll just sort the candidates by their original distances\n",
        "    return sorted(candidates, key=lambda x: x[1])\n",
        "\n",
        "# Test the retrieval and re-ranking with a sample query\n",
        "query = \"who is the caption of csk \"\n",
        "top_k = 3\n",
        "\n",
        "# Retrieve top-k candidates\n",
        "candidates = retrieve_top_k(query, index, top_k)\n",
        "\n",
        "# Re-rank the candidates\n",
        "re_ranked_candidates = re_rank(candidates, query)\n",
        "\n",
        "# Print the re-ranked results\n",
        "for qa, distance in re_ranked_candidates:\n",
        "    print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zOh-ThfSvP_P",
        "outputId": "6931c2cf-9e7f-4b7d-b666-6490a7867b07"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: Who is the leading wicket-taker in IPL history?\n",
            "Answer: Lasith Malinga\n",
            "Distance: 0.02710317261517048\n",
            "\n",
            "Question: Who scored the highest individual score in IPL?\n",
            "Answer: Chris Gayle (175*)\n",
            "Distance: 0.02815784327685833\n",
            "\n",
            "Question: Which team has won the most IPL titles?\n",
            "Answer: Mumbai Indians\n",
            "Distance: 0.03018650971353054\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install wikipedia~=1.4.0"
      ],
      "metadata": {
        "id": "kg0IcRDXy6nv"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install langchain~=0.1.16"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eq934O_yy4w5",
        "outputId": "b1d4abc1-f876-4517-ec5c-2d973a8db9da"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting langchain~=0.1.16\n",
            "  Downloading langchain-0.1.17-py3-none-any.whl (867 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m867.6/867.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (6.0.1)\n",
            "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.0.29)\n",
            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (3.9.5)\n",
            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (4.0.3)\n",
            "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain~=0.1.16)\n",
            "  Downloading dataclasses_json-0.6.5-py3-none-any.whl (28 kB)\n",
            "Collecting jsonpatch<2.0,>=1.33 (from langchain~=0.1.16)\n",
            "  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
            "Collecting langchain-community<0.1,>=0.0.36 (from langchain~=0.1.16)\n",
            "  Downloading langchain_community-0.0.36-py3-none-any.whl (2.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting langchain-core<0.2.0,>=0.1.48 (from langchain~=0.1.16)\n",
            "  Downloading langchain_core-0.1.50-py3-none-any.whl (302 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.8/302.8 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain~=0.1.16)\n",
            "  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n",
            "Collecting langsmith<0.2.0,>=0.1.17 (from langchain~=0.1.16)\n",
            "  Downloading langsmith-0.1.54-py3-none-any.whl (116 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.7/116.7 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (1.25.2)\n",
            "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.7.1)\n",
            "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.31.0)\n",
            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (8.2.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (23.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (6.0.5)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.9.4)\n",
            "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n",
            "  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n",
            "  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
            "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain~=0.1.16)\n",
            "  Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n",
            "Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.48->langchain~=0.1.16)\n",
            "  Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain~=0.1.16)\n",
            "  Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (0.6.0)\n",
            "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (2.18.2)\n",
            "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (4.11.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.7)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2024.2.2)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain~=0.1.16) (3.0.3)\n",
            "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n",
            "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
            "Installing collected packages: packaging, orjson, mypy-extensions, jsonpointer, typing-inspect, marshmallow, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain\n",
            "  Attempting uninstall: packaging\n",
            "    Found existing installation: packaging 24.0\n",
            "    Uninstalling packaging-24.0:\n",
            "      Successfully uninstalled packaging-24.0\n",
            "Successfully installed dataclasses-json-0.6.5 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.17 langchain-community-0.0.36 langchain-core-0.1.50 langchain-text-splitters-0.0.1 langsmith-0.1.54 marshmallow-3.21.2 mypy-extensions-1.0.0 orjson-3.10.3 packaging-23.2 typing-inspect-0.9.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# from langchain_community.document_loaders import WikipediaLoader\n",
        "\n",
        "pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n",
        "\n",
        "import nltk\n",
        "from nltk.corpus import stopwords\n",
        "from nltk.tokenize import sent_tokenize, word_tokenize\n",
        "\n",
        "nltk.download(\"punkt\")\n",
        "nltk.download(\"stopwords\")\n",
        "\n",
        "# Load Wikipedia pages\n",
        "pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n",
        "\n",
        "# Preprocess the data\n",
        "# Preprocess the data\n",
        "stop_words = set(stopwords.words(\"english\"))\n",
        "wiki_qas = []\n",
        "\n",
        "for page in pages:\n",
        "    sentences = sent_tokenize(page.page_content)\n",
        "    for sentence in sentences:\n",
        "        words = word_tokenize(sentence)\n",
        "        filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]\n",
        "        question = \" \".join(filtered_words)\n",
        "        answer = sentence\n",
        "        if answer.strip():  # Check if the answer is not empty\n",
        "            wiki_qas.append((question, answer))\n",
        "\n",
        "# Encode question-answer pairs\n",
        "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n",
        "\n",
        "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n",
        "\n",
        "# Build a FAISS index for efficient similarity search\n",
        "dimension = encoded_qas[0].shape[0]\n",
        "index = faiss.IndexFlatL2(dimension)\n",
        "index.add(np.array(encoded_qas))\n",
        "\n",
        "# Use the retrieve_top_k and re_rank functions provided in the previous code examples\n",
        "\n",
        "# Test the retrieval and re-ranking with a sample query\n",
        "query = \"Who is Harry Potter?\"\n",
        "top_k = 3\n",
        "\n",
        "def re_rank(candidates, query, num_candidates):\n",
        "    # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n",
        "    # For simplicity, we'll just sort the candidates by their original distances\n",
        "    return sorted(candidates[:num_candidates], key=lambda x: x[1])\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mbMysjGWyLpJ",
        "outputId": "e7d942ed-d891-4a12-9d79-9543ffc87cfa"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
            "[nltk_data]   Package punkt is already up-to-date!\n",
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Package stopwords is already up-to-date!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def retrieve_top_k(query, index, k):\n",
        "    encoded_query = encode_text(query, tokenizer, model)\n",
        "    distances, indices = index.search(np.array([encoded_query]), k)\n",
        "    return [(wiki_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n"
      ],
      "metadata": {
        "id": "3sm9-aFA09Ts"
      },
      "execution_count": 29,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "candidates = retrieve_top_k(query, index, len(encoded_qas))\n",
        "\n",
        "# Re-rank the candidates\n",
        "num_candidates = min(top_k, len(candidates))\n",
        "re_ranked_candidates = re_rank(candidates, query, num_candidates)\n",
        "\n",
        "# Print the re-ranked results\n",
        "for qa, distance in re_ranked_candidates:\n",
        "    print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bL4aAqNy0g6D",
        "outputId": "ff925a1d-a699-4652-91d6-1ae8ea6f42e0"
      },
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: series mainly produced David Heyman stars Daniel Radcliffe Rupert Grint Emma Watson three leading characters Harry Potter Ron Weasley Hermione Granger\n",
            "Answer: The series was mainly produced by David Heyman, and stars Daniel Radcliffe, Rupert Grint, and Emma Watson as the three leading characters: Harry Potter, Ron Weasley, and Hermione Granger.\n",
            "Distance: 7.031151199043961e-06\n",
            "\n",
            "Question: first book Harry Potter Philosopher Stone Harry lives cupboard stairs house Dursleys aunt uncle cousin treat poorly\n",
            "Answer: In the first book, Harry Potter and the Philosopher's Stone, Harry lives in a cupboard under the stairs in the house of the Dursleys, his aunt, uncle and cousin, who all treat him poorly.\n",
            "Distance: 3.793924770434387e-05\n",
            "\n",
            "Question: Harry Potter series used source object lessons educational techniques sociological analysis marketing\n",
            "Answer: The Harry Potter series has been used as a source of object lessons in educational techniques, sociological analysis and marketing.\n",
            "Distance: 0.00018532716785557568\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "VqenpHe3yK4a"
      }
    }
  ]
 }
No results found