Created
May 6, 2024 17:00
-
-
Save AkashC-goML/cc059a7c201f3a465b559970d2bdcb0c to your computer and use it in GitHub Desktop.
Re-Ranking and RAG.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyPkGR1di8gMBLLk4Pyuhr8I", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "widgets": { | |
| "application/vnd.jupyter.widget-state+json": { | |
| "2223dc261ac84ed2ad14d037859f3ca5": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_e334812855214917a63e6a76155a6c35", | |
| "IPY_MODEL_29e76a510c8c46d0808ce49388c670ea", | |
| "IPY_MODEL_998658520adc4b6aa48b153794ff0be2" | |
| ], | |
| "layout": "IPY_MODEL_cc7ea3053cd04d659c2b5dc7373badfa" | |
| } | |
| }, | |
| "e334812855214917a63e6a76155a6c35": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_351dfdd986214391839d13c213fce093", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_d32bbb0aa04f4639a5d94dc7cea034c2", | |
| "value": "tokenizer_config.json: 100%" | |
| } | |
| }, | |
| "29e76a510c8c46d0808ce49388c670ea": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_0d105cb2d94743f19bbea2c3bcda2bc5", | |
| "max": 48, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_7607e8c994b34a18a8981ef8a7ef8783", | |
| "value": 48 | |
| } | |
| }, | |
| "998658520adc4b6aa48b153794ff0be2": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_532aa34688e9423e94594d4967ef2022", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_1857a6d5f40143e6941b27353af710a4", | |
| "value": " 48.0/48.0 [00:00<00:00, 846B/s]" | |
| } | |
| }, | |
| "cc7ea3053cd04d659c2b5dc7373badfa": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "351dfdd986214391839d13c213fce093": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "d32bbb0aa04f4639a5d94dc7cea034c2": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "0d105cb2d94743f19bbea2c3bcda2bc5": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "7607e8c994b34a18a8981ef8a7ef8783": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "532aa34688e9423e94594d4967ef2022": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "1857a6d5f40143e6941b27353af710a4": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "7ae13d0e2fa545aaa4d0cbc02a756f76": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_842ca0565e144b498183ca469afcaeb5", | |
| "IPY_MODEL_ef65b485574d420c9d29ab713064df60", | |
| "IPY_MODEL_81d7e39ffc8f4e238a16e1e051d6a70a" | |
| ], | |
| "layout": "IPY_MODEL_687c5ad3bace41749062628140d6cef4" | |
| } | |
| }, | |
| "842ca0565e144b498183ca469afcaeb5": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_6bc98bc064bf46fabbc72425e6a04cde", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_5bdb424674d5428a9718968f7ccb417f", | |
| "value": "config.json: 100%" | |
| } | |
| }, | |
| "ef65b485574d420c9d29ab713064df60": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_93a9a41588b140d59f423059745f0b74", | |
| "max": 570, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_e60dbeba37df4f0db8c81f8413f61307", | |
| "value": 570 | |
| } | |
| }, | |
| "81d7e39ffc8f4e238a16e1e051d6a70a": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_e990efa20a9e4f589d80b59fb294457e", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_834352bbfa06497087ccc0fd282387c0", | |
| "value": " 570/570 [00:00<00:00, 9.03kB/s]" | |
| } | |
| }, | |
| "687c5ad3bace41749062628140d6cef4": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "6bc98bc064bf46fabbc72425e6a04cde": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "5bdb424674d5428a9718968f7ccb417f": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "93a9a41588b140d59f423059745f0b74": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "e60dbeba37df4f0db8c81f8413f61307": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "e990efa20a9e4f589d80b59fb294457e": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "834352bbfa06497087ccc0fd282387c0": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "5d5f74c4c8144064b4608167be6bedcf": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_268be2da8a5a473d9cea08e3b9084b66", | |
| "IPY_MODEL_0b011360bbae45a3a7c2d222b93af477", | |
| "IPY_MODEL_3c9e08b0edda4dfda74df38cc3a7137b" | |
| ], | |
| "layout": "IPY_MODEL_889fb571413443a4adc255bbde65f337" | |
| } | |
| }, | |
| "268be2da8a5a473d9cea08e3b9084b66": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_f422ba05478b47b9b81f107c8591451e", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_1ab691653c744ee7a38a4d9988bd7e78", | |
| "value": "vocab.txt: 100%" | |
| } | |
| }, | |
| "0b011360bbae45a3a7c2d222b93af477": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_085dca686edb4aa9b115833ecf3de5aa", | |
| "max": 231508, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_40846a21498d4c6c80c2a1d61c1e3a5a", | |
| "value": 231508 | |
| } | |
| }, | |
| "3c9e08b0edda4dfda74df38cc3a7137b": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_6b570e5bf0eb4897be55c0498644bcb5", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_8cac7ad24f624936a5b7b663da0413dd", | |
| "value": " 232k/232k [00:00<00:00, 3.28MB/s]" | |
| } | |
| }, | |
| "889fb571413443a4adc255bbde65f337": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "f422ba05478b47b9b81f107c8591451e": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "1ab691653c744ee7a38a4d9988bd7e78": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "085dca686edb4aa9b115833ecf3de5aa": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "40846a21498d4c6c80c2a1d61c1e3a5a": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "6b570e5bf0eb4897be55c0498644bcb5": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "8cac7ad24f624936a5b7b663da0413dd": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "37d1a03a632e4eb887e93535e54a02fa": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_d0c3dcd59c1d4a95ba1d517e57d4bc75", | |
| "IPY_MODEL_7991b3f594ed49dfad4c3b099a9d0340", | |
| "IPY_MODEL_46715f152a8342e78e4877c99a579008" | |
| ], | |
| "layout": "IPY_MODEL_d81a1a65d90e4e1b9ef1822b58318a2f" | |
| } | |
| }, | |
| "d0c3dcd59c1d4a95ba1d517e57d4bc75": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_f5421fc2d3cb43c3bfcb986c0aed330d", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_cd9fc73c223344fb89b9b2b9fd8d7e40", | |
| "value": "tokenizer.json: 100%" | |
| } | |
| }, | |
| "7991b3f594ed49dfad4c3b099a9d0340": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_b1895ceb637241bfb039845ffa00cd9d", | |
| "max": 466062, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_c9df6b93c24448b29bdb661a08ddc7a8", | |
| "value": 466062 | |
| } | |
| }, | |
| "46715f152a8342e78e4877c99a579008": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_b675dca9a4034c21b7a15b35de53489d", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_f3b229ef0b1d4081844c3b3cda230278", | |
| "value": " 466k/466k [00:00<00:00, 5.67MB/s]" | |
| } | |
| }, | |
| "d81a1a65d90e4e1b9ef1822b58318a2f": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "f5421fc2d3cb43c3bfcb986c0aed330d": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "cd9fc73c223344fb89b9b2b9fd8d7e40": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "b1895ceb637241bfb039845ffa00cd9d": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "c9df6b93c24448b29bdb661a08ddc7a8": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "b675dca9a4034c21b7a15b35de53489d": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "f3b229ef0b1d4081844c3b3cda230278": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "2c6dcabb67cb44dd9a404b95ca420e25": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_2a309261c8674b78b5fdf624f560ddd6", | |
| "IPY_MODEL_c88b636452e24cc7b2b683778c64eca5", | |
| "IPY_MODEL_d5b2849304fe4064a650930a77f24b63" | |
| ], | |
| "layout": "IPY_MODEL_3a908ad25df0497d9d32bb93e9ac76b3" | |
| } | |
| }, | |
| "2a309261c8674b78b5fdf624f560ddd6": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_fd415a26ea2743ffa7fcedb3da165cfc", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_fd9a381814f34925ba39cffb24485695", | |
| "value": "model.safetensors: 100%" | |
| } | |
| }, | |
| "c88b636452e24cc7b2b683778c64eca5": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_0df6c55dfaad4e448e6ed3b676c7f3a2", | |
| "max": 440449768, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_6e0ac3e6f31e49a980815c8a691d6691", | |
| "value": 440449768 | |
| } | |
| }, | |
| "d5b2849304fe4064a650930a77f24b63": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_588fe7a3b69048cda86fc1855b94d2fe", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_23d8d7ba8f5540678b25ec8e002a7b22", | |
| "value": " 440M/440M [00:06<00:00, 96.5MB/s]" | |
| } | |
| }, | |
| "3a908ad25df0497d9d32bb93e9ac76b3": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "fd415a26ea2743ffa7fcedb3da165cfc": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "fd9a381814f34925ba39cffb24485695": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "0df6c55dfaad4e448e6ed3b676c7f3a2": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "6e0ac3e6f31e49a980815c8a691d6691": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "588fe7a3b69048cda86fc1855b94d2fe": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "23d8d7ba8f5540678b25ec8e002a7b22": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/AkashC-goML/cc059a7c201f3a465b559970d2bdcb0c/re-ranking-and-rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "## Install the required libraries:" | |
| ], | |
| "metadata": { | |
| "id": "9CfJ7EOet--K" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "D-U2zRqmt0Bb", | |
| "outputId": "dc3b1c2f-8894-4cb6-a842-c38137545949" | |
| }, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.40.1)\n", | |
| "Collecting faiss-cpu\n", | |
| " Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n", | |
| "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", | |
| "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", | |
| "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n", | |
| "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", | |
| "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n", | |
| "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", | |
| "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n", | |
| "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n", | |
| "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n", | |
| "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n", | |
| "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.11.0)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n", | |
| "Installing collected packages: faiss-cpu\n", | |
| "Successfully installed faiss-cpu-1.8.0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "pip install transformers faiss-cpu\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "## Install the required libraries:\n" | |
| ], | |
| "metadata": { | |
| "id": "ptNyqH3muE1c" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import torch\n", | |
| "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", | |
| "import faiss\n", | |
| "import numpy as np" | |
| ], | |
| "metadata": { | |
| "id": "ptPZOdafuIIk" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Load a pre-trained language model and tokenizer:" | |
| ], | |
| "metadata": { | |
| "id": "6uixBl_VuKwm" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", | |
| "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 336, | |
| "referenced_widgets": [ | |
| "2223dc261ac84ed2ad14d037859f3ca5", | |
| "e334812855214917a63e6a76155a6c35", | |
| "29e76a510c8c46d0808ce49388c670ea", | |
| "998658520adc4b6aa48b153794ff0be2", | |
| "cc7ea3053cd04d659c2b5dc7373badfa", | |
| "351dfdd986214391839d13c213fce093", | |
| "d32bbb0aa04f4639a5d94dc7cea034c2", | |
| "0d105cb2d94743f19bbea2c3bcda2bc5", | |
| "7607e8c994b34a18a8981ef8a7ef8783", | |
| "532aa34688e9423e94594d4967ef2022", | |
| "1857a6d5f40143e6941b27353af710a4", | |
| "7ae13d0e2fa545aaa4d0cbc02a756f76", | |
| "842ca0565e144b498183ca469afcaeb5", | |
| "ef65b485574d420c9d29ab713064df60", | |
| "81d7e39ffc8f4e238a16e1e051d6a70a", | |
| "687c5ad3bace41749062628140d6cef4", | |
| "6bc98bc064bf46fabbc72425e6a04cde", | |
| "5bdb424674d5428a9718968f7ccb417f", | |
| "93a9a41588b140d59f423059745f0b74", | |
| "e60dbeba37df4f0db8c81f8413f61307", | |
| "e990efa20a9e4f589d80b59fb294457e", | |
| "834352bbfa06497087ccc0fd282387c0", | |
| "5d5f74c4c8144064b4608167be6bedcf", | |
| "268be2da8a5a473d9cea08e3b9084b66", | |
| "0b011360bbae45a3a7c2d222b93af477", | |
| "3c9e08b0edda4dfda74df38cc3a7137b", | |
| "889fb571413443a4adc255bbde65f337", | |
| "f422ba05478b47b9b81f107c8591451e", | |
| "1ab691653c744ee7a38a4d9988bd7e78", | |
| "085dca686edb4aa9b115833ecf3de5aa", | |
| "40846a21498d4c6c80c2a1d61c1e3a5a", | |
| "6b570e5bf0eb4897be55c0498644bcb5", | |
| "8cac7ad24f624936a5b7b663da0413dd", | |
| "37d1a03a632e4eb887e93535e54a02fa", | |
| "d0c3dcd59c1d4a95ba1d517e57d4bc75", | |
| "7991b3f594ed49dfad4c3b099a9d0340", | |
| "46715f152a8342e78e4877c99a579008", | |
| "d81a1a65d90e4e1b9ef1822b58318a2f", | |
| "f5421fc2d3cb43c3bfcb986c0aed330d", | |
| "cd9fc73c223344fb89b9b2b9fd8d7e40", | |
| "b1895ceb637241bfb039845ffa00cd9d", | |
| "c9df6b93c24448b29bdb661a08ddc7a8", | |
| "b675dca9a4034c21b7a15b35de53489d", | |
| "f3b229ef0b1d4081844c3b3cda230278", | |
| "2c6dcabb67cb44dd9a404b95ca420e25", | |
| "2a309261c8674b78b5fdf624f560ddd6", | |
| "c88b636452e24cc7b2b683778c64eca5", | |
| "d5b2849304fe4064a650930a77f24b63", | |
| "3a908ad25df0497d9d32bb93e9ac76b3", | |
| "fd415a26ea2743ffa7fcedb3da165cfc", | |
| "fd9a381814f34925ba39cffb24485695", | |
| "0df6c55dfaad4e448e6ed3b676c7f3a2", | |
| "6e0ac3e6f31e49a980815c8a691d6691", | |
| "588fe7a3b69048cda86fc1855b94d2fe", | |
| "23d8d7ba8f5540678b25ec8e002a7b22" | |
| ] | |
| }, | |
| "id": "HfBjG-0ZuPvi", | |
| "outputId": "fdfa57db-5cb3-40ce-a139-179fb60889f0" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", | |
| "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", | |
| "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", | |
| "You will be able to reuse this secret in all of your notebooks.\n", | |
| "Please note that authentication is recommended but still optional to access public models or datasets.\n", | |
| " warnings.warn(\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "2223dc261ac84ed2ad14d037859f3ca5" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "config.json: 0%| | 0.00/570 [00:00<?, ?B/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "7ae13d0e2fa545aaa4d0cbc02a756f76" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "5d5f74c4c8144064b4608167be6bedcf" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "37d1a03a632e4eb887e93535e54a02fa" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "model.safetensors: 0%| | 0.00/440M [00:00<?, ?B/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "2c6dcabb67cb44dd9a404b95ca420e25" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", | |
| "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Create a function to encode text using the language model:" | |
| ], | |
| "metadata": { | |
| "id": "2nTwEIeLuVBE" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def encode_text(text, tokenizer, model):\n", | |
| " inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n", | |
| " with torch.no_grad():\n", | |
| " outputs = model(**inputs)\n", | |
| " return outputs.logits.mean(dim=1).cpu().numpy()\n" | |
| ], | |
| "metadata": { | |
| "id": "UXLQH5zeuTB0" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Prepare a dataset of question-answer pairs and encode them:" | |
| ], | |
| "metadata": { | |
| "id": "ptKZt4ixuZBM" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Replace this with your actual dataset\n", | |
| "qas = [\n", | |
| " (\"What is the capital of France?\", \"Paris\"),\n", | |
| " # Add more question-answer pairs here\n", | |
| "]\n", | |
| "\n", | |
| "# Encode question-answer pairs\n", | |
| "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in qas]\n" | |
| ], | |
| "metadata": { | |
| "id": "Xk3CPu-auX3y" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Build a FAISS index for efficient similarity search:" | |
| ], | |
| "metadata": { | |
| "id": "cPW9fezDufK1" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "dimension = encoded_qas[0].shape[0]\n", | |
| "index = faiss.IndexFlatL2(dimension)\n", | |
| "index.add(np.array(encoded_qas))\n" | |
| ], | |
| "metadata": { | |
| "id": "6_vk8Bi2ugtV" | |
| }, | |
| "execution_count": 13, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Implement a function to retrieve the top-k most similar question-answer pairs:" | |
| ], | |
| "metadata": { | |
| "id": "BroJVAwJupye" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def retrieve_top_k(query, index, k):\n", | |
| " encoded_query = encode_text(query, tokenizer, model)\n", | |
| " distances, indices = index.search(np.array([encoded_query]), k)\n", | |
| " return [(qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n" | |
| ], | |
| "metadata": { | |
| "id": "k1bEwKjCurvd" | |
| }, | |
| "execution_count": 14, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Implement a re-ranking function using another language model or a custom scoring function:" | |
| ], | |
| "metadata": { | |
| "id": "GTK7Uo02uuT9" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def re_rank(candidates, query):\n", | |
| " # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
| " # For simplicity, we'll just sort the candidates by their original distances\n", | |
| " return sorted(candidates, key=lambda x: x[1])\n" | |
| ], | |
| "metadata": { | |
| "id": "p14aFDYCuxdS" | |
| }, | |
| "execution_count": 15, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Test the retrieval and re-ranking:" | |
| ], | |
| "metadata": { | |
| "id": "TmFnZ1lHvIam" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "query = \"Where is the capital of France?\"\n", | |
| "top_k = 5\n", | |
| "\n", | |
| "# Retrieve top-k candidates\n", | |
| "candidates = retrieve_top_k(query, index, top_k)\n", | |
| "\n", | |
| "# Re-rank the candidates\n", | |
| "re_ranked_candidates = re_rank(candidates, query)\n", | |
| "\n", | |
| "# Print the re-ranked results\n", | |
| "for qa, distance in re_ranked_candidates:\n", | |
| " print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "tToEEPQ9vK9T", | |
| "outputId": "aee62066-ff9d-4e01-d618-53a785c84697" | |
| }, | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Question: What is the capital of France?\n", | |
| "Answer: Paris\n", | |
| "Distance: 0.0031302475836127996\n", | |
| "\n", | |
| "Question: What is the capital of France?\n", | |
| "Answer: Paris\n", | |
| "Distance: 3.4028234663852886e+38\n", | |
| "\n", | |
| "Question: What is the capital of France?\n", | |
| "Answer: Paris\n", | |
| "Distance: 3.4028234663852886e+38\n", | |
| "\n", | |
| "Question: What is the capital of France?\n", | |
| "Answer: Paris\n", | |
| "Distance: 3.4028234663852886e+38\n", | |
| "\n", | |
| "Question: What is the capital of France?\n", | |
| "Answer: Paris\n", | |
| "Distance: 3.4028234663852886e+38\n", | |
| "\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# IPL dataset of question-answer pairs\n", | |
| "ipl_qas = [\n", | |
| " (\"Which team won the first IPL season?\", \"Rajasthan Royals\"),\n", | |
| " (\"Who scored the highest individual score in IPL?\", \"Chris Gayle (175*)\"),\n", | |
| " (\"Who is the leading run-scorer in IPL history?\", \"Virat Kohli\"),\n", | |
| " (\"Who is the leading wicket-taker in IPL history?\", \"Lasith Malinga\"),\n", | |
| " (\"Which team has won the most IPL titles?\", \"Mumbai Indians\")\n", | |
| "]\n", | |
| "\n", | |
| "# Encode question-answer pairs\n", | |
| "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in ipl_qas]\n" | |
| ], | |
| "metadata": { | |
| "id": "fkUmgvaexFQS" | |
| }, | |
| "execution_count": 11, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Build a FAISS index for efficient similarity search\n", | |
| "dimension = encoded_qas[0].shape[0]\n", | |
| "index = faiss.IndexFlatL2(dimension)\n", | |
| "index.add(np.array(encoded_qas))\n", | |
| "\n", | |
| "# Implement a function to retrieve the top-k most similar question-answer pairs\n", | |
| "def retrieve_top_k(query, index, k):\n", | |
| " encoded_query = encode_text(query, tokenizer, model)\n", | |
| " distances, indices = index.search(np.array([encoded_query]), k)\n", | |
| " return [(ipl_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n", | |
| "\n", | |
| "# Implement a re-ranking function using another language model or a custom scoring function\n", | |
| "def re_rank(candidates, query):\n", | |
| " # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
| " # For simplicity, we'll just sort the candidates by their original distances\n", | |
| " return sorted(candidates, key=lambda x: x[1])\n", | |
| "\n", | |
| "# Test the retrieval and re-ranking with a sample query\n", | |
| "query = \"who is the caption of csk \"\n", | |
| "top_k = 3\n", | |
| "\n", | |
| "# Retrieve top-k candidates\n", | |
| "candidates = retrieve_top_k(query, index, top_k)\n", | |
| "\n", | |
| "# Re-rank the candidates\n", | |
| "re_ranked_candidates = re_rank(candidates, query)\n", | |
| "\n", | |
| "# Print the re-ranked results\n", | |
| "for qa, distance in re_ranked_candidates:\n", | |
| " print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "zOh-ThfSvP_P", | |
| "outputId": "6931c2cf-9e7f-4b7d-b666-6490a7867b07" | |
| }, | |
| "execution_count": 18, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Question: Who is the leading wicket-taker in IPL history?\n", | |
| "Answer: Lasith Malinga\n", | |
| "Distance: 0.02710317261517048\n", | |
| "\n", | |
| "Question: Who scored the highest individual score in IPL?\n", | |
| "Answer: Chris Gayle (175*)\n", | |
| "Distance: 0.02815784327685833\n", | |
| "\n", | |
| "Question: Which team has won the most IPL titles?\n", | |
| "Answer: Mumbai Indians\n", | |
| "Distance: 0.03018650971353054\n", | |
| "\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "! pip install wikipedia~=1.4.0" | |
| ], | |
| "metadata": { | |
| "id": "kg0IcRDXy6nv" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!pip install langchain~=0.1.16" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "eq934O_yy4w5", | |
| "outputId": "b1d4abc1-f876-4517-ec5c-2d973a8db9da" | |
| }, | |
| "execution_count": 23, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Collecting langchain~=0.1.16\n", | |
| " Downloading langchain-0.1.17-py3-none-any.whl (867 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m867.6/867.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (6.0.1)\n", | |
| "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.0.29)\n", | |
| "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (3.9.5)\n", | |
| "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (4.0.3)\n", | |
| "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain~=0.1.16)\n", | |
| " Downloading dataclasses_json-0.6.5-py3-none-any.whl (28 kB)\n", | |
| "Collecting jsonpatch<2.0,>=1.33 (from langchain~=0.1.16)\n", | |
| " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", | |
| "Collecting langchain-community<0.1,>=0.0.36 (from langchain~=0.1.16)\n", | |
| " Downloading langchain_community-0.0.36-py3-none-any.whl (2.0 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hCollecting langchain-core<0.2.0,>=0.1.48 (from langchain~=0.1.16)\n", | |
| " Downloading langchain_core-0.1.50-py3-none-any.whl (302 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.8/302.8 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain~=0.1.16)\n", | |
| " Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n", | |
| "Collecting langsmith<0.2.0,>=0.1.17 (from langchain~=0.1.16)\n", | |
| " Downloading langsmith-0.1.54-py3-none-any.whl (116 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.7/116.7 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (1.25.2)\n", | |
| "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.7.1)\n", | |
| "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (2.31.0)\n", | |
| "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain~=0.1.16) (8.2.3)\n", | |
| "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.3.1)\n", | |
| "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (23.2.0)\n", | |
| "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.4.1)\n", | |
| "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (6.0.5)\n", | |
| "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain~=0.1.16) (1.9.4)\n", | |
| "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
| " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
| " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", | |
| "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain~=0.1.16)\n", | |
| " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", | |
| "Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.48->langchain~=0.1.16)\n", | |
| " Downloading packaging-23.2-py3-none-any.whl (53 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain~=0.1.16)\n", | |
| " Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (0.6.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (2.18.2)\n", | |
| "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain~=0.1.16) (4.11.0)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.3.2)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (3.7)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2.0.7)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain~=0.1.16) (2024.2.2)\n", | |
| "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain~=0.1.16) (3.0.3)\n", | |
| "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain~=0.1.16)\n", | |
| " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", | |
| "Installing collected packages: packaging, orjson, mypy-extensions, jsonpointer, typing-inspect, marshmallow, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain\n", | |
| " Attempting uninstall: packaging\n", | |
| " Found existing installation: packaging 24.0\n", | |
| " Uninstalling packaging-24.0:\n", | |
| " Successfully uninstalled packaging-24.0\n", | |
| "Successfully installed dataclasses-json-0.6.5 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.17 langchain-community-0.0.36 langchain-core-0.1.50 langchain-text-splitters-0.0.1 langsmith-0.1.54 marshmallow-3.21.2 mypy-extensions-1.0.0 orjson-3.10.3 packaging-23.2 typing-inspect-0.9.0\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "\n", | |
| "# from langchain_community.document_loaders import WikipediaLoader\n", | |
| "\n", | |
| "pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n", | |
| "\n", | |
| "import nltk\n", | |
| "from nltk.corpus import stopwords\n", | |
| "from nltk.tokenize import sent_tokenize, word_tokenize\n", | |
| "\n", | |
| "nltk.download(\"punkt\")\n", | |
| "nltk.download(\"stopwords\")\n", | |
| "\n", | |
| "# Load Wikipedia pages\n", | |
| "pages = WikipediaLoader(query=\"Harry Potter\", load_max_docs=3, lang=\"en\").load()\n", | |
| "\n", | |
| "# Preprocess the data\n", | |
| "# Preprocess the data\n", | |
| "stop_words = set(stopwords.words(\"english\"))\n", | |
| "wiki_qas = []\n", | |
| "\n", | |
| "for page in pages:\n", | |
| " sentences = sent_tokenize(page.page_content)\n", | |
| " for sentence in sentences:\n", | |
| " words = word_tokenize(sentence)\n", | |
| " filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]\n", | |
| " question = \" \".join(filtered_words)\n", | |
| " answer = sentence\n", | |
| " if answer.strip(): # Check if the answer is not empty\n", | |
| " wiki_qas.append((question, answer))\n", | |
| "\n", | |
| "# Encode question-answer pairs\n", | |
| "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n", | |
| "\n", | |
| "encoded_qas = [encode_text(qa[0], tokenizer, model) for qa in wiki_qas]\n", | |
| "\n", | |
| "# Build a FAISS index for efficient similarity search\n", | |
| "dimension = encoded_qas[0].shape[0]\n", | |
| "index = faiss.IndexFlatL2(dimension)\n", | |
| "index.add(np.array(encoded_qas))\n", | |
| "\n", | |
| "# Use the retrieve_top_k and re_rank functions provided in the previous code examples\n", | |
| "\n", | |
| "# Test the retrieval and re-ranking with a sample query\n", | |
| "query = \"Who is Harry Potter?\"\n", | |
| "top_k = 3\n", | |
| "\n", | |
| "def re_rank(candidates, query, num_candidates):\n", | |
| " # You can implement your re-ranking logic here, e.g., using an ensemble model or custom scoring\n", | |
| " # For simplicity, we'll just sort the candidates by their original distances\n", | |
| " return sorted(candidates[:num_candidates], key=lambda x: x[1])\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "mbMysjGWyLpJ", | |
| "outputId": "e7d942ed-d891-4a12-9d79-9543ffc87cfa" | |
| }, | |
| "execution_count": 27, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "[nltk_data] Downloading package punkt to /root/nltk_data...\n", | |
| "[nltk_data] Package punkt is already up-to-date!\n", | |
| "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", | |
| "[nltk_data] Package stopwords is already up-to-date!\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def retrieve_top_k(query, index, k):\n", | |
| " encoded_query = encode_text(query, tokenizer, model)\n", | |
| " distances, indices = index.search(np.array([encoded_query]), k)\n", | |
| " return [(wiki_qas[i], distances[0][j]) for j, i in enumerate(indices[0])]\n" | |
| ], | |
| "metadata": { | |
| "id": "3sm9-aFA09Ts" | |
| }, | |
| "execution_count": 29, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "candidates = retrieve_top_k(query, index, len(encoded_qas))\n", | |
| "\n", | |
| "# Re-rank the candidates\n", | |
| "num_candidates = min(top_k, len(candidates))\n", | |
| "re_ranked_candidates = re_rank(candidates, query, num_candidates)\n", | |
| "\n", | |
| "# Print the re-ranked results\n", | |
| "for qa, distance in re_ranked_candidates:\n", | |
| " print(f\"Question: {qa[0]}\\nAnswer: {qa[1]}\\nDistance: {distance}\\n\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "bL4aAqNy0g6D", | |
| "outputId": "ff925a1d-a699-4652-91d6-1ae8ea6f42e0" | |
| }, | |
| "execution_count": 30, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Question: series mainly produced David Heyman stars Daniel Radcliffe Rupert Grint Emma Watson three leading characters Harry Potter Ron Weasley Hermione Granger\n", | |
| "Answer: The series was mainly produced by David Heyman, and stars Daniel Radcliffe, Rupert Grint, and Emma Watson as the three leading characters: Harry Potter, Ron Weasley, and Hermione Granger.\n", | |
| "Distance: 7.031151199043961e-06\n", | |
| "\n", | |
| "Question: first book Harry Potter Philosopher Stone Harry lives cupboard stairs house Dursleys aunt uncle cousin treat poorly\n", | |
| "Answer: In the first book, Harry Potter and the Philosopher's Stone, Harry lives in a cupboard under the stairs in the house of the Dursleys, his aunt, uncle and cousin, who all treat him poorly.\n", | |
| "Distance: 3.793924770434387e-05\n", | |
| "\n", | |
| "Question: Harry Potter series used source object lessons educational techniques sociological analysis marketing\n", | |
| "Answer: The Harry Potter series has been used as a source of object lessons in educational techniques, sociological analysis and marketing.\n", | |
| "Distance: 0.00018532716785557568\n", | |
| "\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [], | |
| "metadata": { | |
| "id": "VqenpHe3yK4a" | |
| } | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment