danishi · July 23, 2024 01:53
diff --git a/Get_answers_from_Gemini_with_context_caching_across_your_Git_repositories.ipynb b/Get_answers_from_Gemini_with_context_caching_across_your_Git_repositories.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Get answers from Gemini with context caching across your Git repositories\n",
        "## Reference\n",
        "* [Practical Guide: Using Gemini Context Caching with Large Codebases](https://medium.com/google-cloud/practical-guide-using-gemini-context-caching-with-large-codebases-08d46d946c3d)\n"
      ],
      "metadata": {
        "id": "RumJursAlRAA"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "4l1LpSkalQPW"
      },
      "outputs": [],
      "source": [
        "!pip install GitPython==\"3.1.43\""
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import git\n",
        "import os\n",
        "\n",
        "def list_and_read_repo_files(repo_url, branch=\"main\"):\n",
        "    \"\"\"\n",
        "    Clones a Git repository, lists all files (excluding .git folder), and reads their contents.\n",
        "\n",
        "    Args:\n",
        "        repo_url (str): URL of the Git repository.\n",
        "        branch (str, optional): Branch to checkout. Defaults to \"main\".\n",
        "\n",
        "    Returns:\n",
        "        dict: A dictionary where keys are file paths and values are their contents.\n",
        "    \"\"\"\n",
        "    try:\n",
        "        # Temporary directory for the clone\n",
        "        repo_dir = \"temp_repo\"\n",
        "\n",
        "        # Clone the repository\n",
        "        print(f\"Cloning repository from {repo_url}...\")\n",
        "        git.Repo.clone_from(repo_url, repo_dir, branch=branch)\n",
        "        print(\"Cloning complete!\")\n",
        "\n",
        "        file_contents = {}\n",
        "        for root, _, files in os.walk(repo_dir):\n",
        "            for file in files:\n",
        "                ##print(file)\n",
        "                # Exclude .git folder and its contents\n",
        "                if \".git\" not in root:\n",
        "                    ##print(\".git not in root\")\n",
        "                    file_path = os.path.join(root, file)\n",
        "                    try:\n",
        "                      with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
        "                        file_contents[file_path] = f.read()\n",
        "                    except Exception as e:  # Catch any unexpected errors\n",
        "                         print(f\"An unexpected error occurred: {e}\")\n",
        "\n",
        "        return file_contents\n",
        "\n",
        "    except git.exc.GitCommandError as e:\n",
        "        print(f\"Error cloning repository: {e}\")\n",
        "    except UnicodeDecodeError as e:\n",
        "        print(f\"Error reading file: {e}\")\n",
        "    except Exception as e:  # Catch any unexpected errors\n",
        "        print(f\"An unexpected error occurred: {e}\")\n",
        "    finally:\n",
        "        # Clean up the temporary repository directory\n",
        "        if os.path.exists(repo_dir):\n",
        "            git.rmtree(repo_dir)"
      ],
      "metadata": {
        "id": "qzNXG5QZlkXZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "repo_url = \"https://github.com/danishi/dynamodb-csv.git\"  # Replace with actual URL\n",
        "branch = \"master\"  # Replace if different\n",
        "\n",
        "file_data = list_and_read_repo_files(repo_url, branch)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "UOayP8zklmRk",
        "outputId": "544d72e2-9aab-4e34-9ae6-f15db2d1498f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning repository from https://github.com/danishi/dynamodb-csv.git...\n",
            "Cloning complete!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "if file_data:\n",
        "   output_file = \"fullcode.text\"\n",
        "   with open(output_file, \"w\", encoding=\"utf-8\") as outfile:\n",
        "\n",
        "       for file_path, content in file_data.items():\n",
        "           outfile.write(f\"<file path={file_path}>\")\n",
        "           outfile.write(f\"{content}\")\n",
        "           outfile.write(\"</file>\")\n",
        "       outfile.close()\n",
        "       #print(f\"File: {file_path}\\nContent:\\n{content}\\n---\")"
      ],
      "metadata": {
        "id": "cgdTUsNEl-RI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "size_in_bytes = os.path.getsize(output_file)\n",
        "\n",
        "print(f\"File Size: {size_in_bytes/1024/1024} MB\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FPnWPKRfm84O",
        "outputId": "58ef420d-066b-45db-f6dc-3f63a76059ca"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "File Size: 0.09016227722167969 MB\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install --upgrade google-cloud-aiplatform==\"1.59.0\""
      ],
      "metadata": {
        "id": "5l1zpPk_nG3v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import auth\n",
        "auth.authenticate_user()"
      ],
      "metadata": {
        "id": "0IAcilUFn8DM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import vertexai\n",
        "from vertexai.preview import caching\n",
        "\n",
        "project_id = \"<USE YOUR PROJECT ID HERE>\"\n",
        "\n",
        "vertexai.init(project=project_id, location=\"asia-northeast1\")\n",
        "\n",
        "system_instruction = \"\"\"\n",
        "あなたは優秀なソフトウェアエンジニアです。提供されたソース内の事実に常に従い、新しい事実を作り上げることは決してありません。\n",
        "では、このプロジェクトのコードベースを見て、次の質問に答えてください。\n",
        "\"\"\""
      ],
      "metadata": {
        "id": "Gg5eyVisnXph"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "with open(\"fullcode.text\", \"r\", encoding=\"utf-8\") as f:\n",
        "     fullcode_as_string = f.read()\n",
        "\n",
        "contents = [\n",
        "    fullcode_as_string\n",
        "]"
      ],
      "metadata": {
        "id": "BttyzCDonzKY"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import datetime\n",
        "\n",
        "cached_content = caching.CachedContent.create(\n",
        "    model_name=\"gemini-1.5-pro-001\",\n",
        "    system_instruction=system_instruction,\n",
        "    contents=contents,\n",
        "    ttl=datetime.timedelta(minutes=10),\n",
        ")"
      ],
      "metadata": {
        "id": "JOpqJgVSn0Iz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from vertexai.preview.generative_models import GenerativeModel\n",
        "\n",
        "cache_id=cached_content.name\n",
        "cached_content = caching.CachedContent(cached_content_name=cache_id)\n",
        "\n",
        "model = GenerativeModel.from_cached_content(cached_content=cached_content)"
      ],
      "metadata": {
        "id": "B_337eiuoIAD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from IPython.display import Markdown\n",
        "\n",
        "response = model.generate_content(\"このプロジェクトは何をしていますか？\")\n",
        "Markdown(response.text)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 713
        },
        "id": "H00HA2JOoQ7c",
        "outputId": "d8c2a74c-be4c-41cc-dfdb-397aae398d62"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ],
            "text/markdown": "このPythonプロジェクトは、CSVファイルとAmazon DynamoDBテーブル間でデータをインポート/エクスポートするためのコマンドラインユーティリティ`dynamodb-csv`を提供します。\n\n**機能**\n\n- **CSVインポート**: CSVファイルをDynamoDBテーブルにインポートします。バッチ書き込みにより高速処理を実現し、キー スキーマの不一致などのエラーが発生した場合にCSVレコードを無視するオプションを提供します。空の値をNullに変換したり、属性自体を設定しないなどのインポートオプションもサポートしています。\n- **CSVエクスポート**: DynamoDBテーブルからCSVファイルにデータをエクスポートします。インデックスやクエリを使用してエクスポートするデータを選択できます。\n- **テーブルの切り捨て**: DynamoDBテーブルからすべてのアイテムを削除します。この操作は元に戻せないので注意が必要です。\n- **テーブルの移動**: あるテーブルから別のテーブルにすべてのアイテムを移動します。事前に同じスキーマを持つテーブルを作成しておく必要があります。テーブルのアイテムは削除されず、コピーのように動作します。\n\n**使い方**\n\n1. **インストール**: `pip install dynamodb-csv`\n2. **設定**: 現在のディレクトリに`config.ini`ファイルを作成し、AWSの認証情報とDynamoDBテーブルのリージョンを指定します。\n3. **CSVとCSV仕様ファイルの作成**: インポートまたはエクスポートするデータの形式を定義するCSVファイルとCSV仕様ファイル（`.spec`拡張子）を作成します。\n4. **DynamoDBテーブルの作成**: CSV仕様に準拠したDynamoDBテーブルを作成します。\n5. **コマンドの実行**: `dynamodb-csv`コマンドを使用して、インポート、エクスポート、切り捨て、または移動の操作を実行します。\n\n**例**\n\n```\n# CSVファイルをDynamoDBテーブルにインポートする\ndynamodb-csv -i -t my_table -f sample.csv\n\n# DynamoDBテーブルをCSVファイルにエクスポートする\ndynamodb-csv -e -t my_table -o sample_exp.csv\n\n# DynamoDBテーブルを切り捨てる\ndynamodb-csv --truncate -t my_table\n\n# あるテーブルから別のテーブルにアイテムを移動する\ndynamodb-csv --move -t my_table_from my_table_to\n```\n\n**追加機能**\n\n- Dockerイメージを使用してユーティリティを実行できます。\n- 開発者向けに、仮想環境の設定とインストール手順が提供されています。\n\n**ライセンス**\n\nMITライセンス"
          },
          "metadata": {},
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "response.usage_metadata"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "o_VJLl9uoXli",
        "outputId": "1b5dfae2-cc41-4721-8745-33fad4868d6f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "prompt_token_count: 36290\n",
              "candidates_token_count: 526\n",
              "total_token_count: 36816"
            ]
          },
          "metadata": {},
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "response = model.generate_content(\"インポート処理のロジックを詳細に解説して\")\n",
        "Markdown(response.text)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 790
        },
        "id": "brYXmWoapArV",
        "outputId": "2fdbfa2d-d833-41dc-a94a-92766669a8bb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ],
            "text/markdown": "`app/dynamodb/csv_import.py` ファイルの `csv_import` 関数は、CSV ファイルを DynamoDB テーブルにインポートするロジックを実装しています。\n\n**ステップ 1: CSV 仕様の読み込み**\n1. 最初に、CSV ファイルに対応する `.spec` ファイルを読み込みます。このファイルには、CSV の各カラムのデータ型や区切り文字などの情報が定義されています。\n2. `configparser` を使用して `.spec` ファイルを読み込み、`csv_spec` オブジェクトに格納します。\n3. `DELIMITER_OPTION` セクションが存在する場合は、`DelimiterCharacter` オプションの値を区切り文字として取得します。存在しない場合は、デフォルトの区切り文字としてスペースが使用されます。\n\n**ステップ 2: CSV ファイルの読み込み**\n1. CSV ファイルを UTF-8 エンコーディングで開き、`csv.DictReader` を使用して各行を辞書として読み込みます。\n2. `tqdm` を使用して、インポートの進捗状況を表示します。\n\n**ステップ 3: データの変換とバッチ処理**\n1. 各行に対して、`IMPORT_OPTION` セクションで指定された変換オプションを適用します。\n    - `ConvertBlankToNullAttrs` オプション: 指定されたカラムの値が空欄の場合、`None` に変換します。\n    - `ConvertBlankToDropAttrs` オプション: 指定されたカラムの値が空欄の場合、カラム自体を削除します。\n2. 各カラムの値を、CSV 仕様で定義されたデータ型に変換します。\n    - `convert_column` 関数を使用して、各カラムの値を適切なデータ型 (文字列、整数、10進数、ブール値、JSON、文字列リスト、文字列セット、10進数リスト、10進数セット) に変換します。\n3. 変換された行データを `batch` リストに追加します。\n4. `batch` リストのサイズが `batch_size` に達するか、すべての行が処理されたら、`write_to_dynamo` 関数を使用して DynamoDB に書き込みます。\n\n**ステップ 4: DynamoDB への書き込み**\n1. `write_to_dynamo` 関数は、`batch` リスト内の各行データを DynamoDB テーブルに書き込みます。\n2. `ignore` フラグが True の場合、各行を個別に `put_item` で書き込みます。エラーが発生した場合は無視され、インポート処理は続行されます。\n3. `ignore` フラグが False の場合、`batch_writer` を使用してバッチ書き込みを行います。\n    - `overwrite_by_pkeys` オプションを使用して、重複する主キーを持つアイテムを上書きします。\n    - エラーが発生した場合は、エラーメッセージを出力します。\n\n**ステップ 5: 結果の返却**\n1. すべての行が処理されたら、インポートされたアイテム数とエラーが発生したアイテム数をメッセージとして返却します。\n\n**追加情報**\n\n- `csv_import` 関数は、最大 25 個のアイテムを一度にバッチ書き込みできます。これは、DynamoDB の `batch_write_item` 操作の制限によるものです。\n- `ignore` フラグを使用すると、エラーが発生したアイテムを無視してインポート処理を続行できます。ただし、この場合、バッチ書き込みは使用されません。\n- CSV 仕様ファイルは、CSV ファイルと同じディレクトリに配置し、ファイル名に `.spec` を追加する必要があります。\n\nこのインポート処理のロジックにより、CSV ファイルから DynamoDB テーブルにデータを効率的かつ安全にインポートできます。\n"
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "response = model.generate_content(\"このリポジトリの改善点を教えて\")\n",
        "Markdown(response.text)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 588
        },
        "id": "mUqD7XAopmWR",
        "outputId": "6e0933a2-8a53-4dca-b766-09c62febb804"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ],
            "text/markdown": "このリポジトリのコードベースに基づいて、いくつかの改善点を提案します。\n\n**エラー処理**\n\n* いくつかの関数のエラー処理は改善の余地があります。例えば、`app/dynamodb/csv_import.py` の `write_to_dynamo` 関数では、 `Exception` をキャッチしていますが、エラーの種類を特定していません。より具体的なエラータイプをキャッチすることで、エラー処理がより正確になります。\n* `app/dynamodb/csv_export.py` の `csv_export` 関数では、`IOError` と `Exception` をキャッチしていますが、具体的なエラーの内容を表示していません。エラーメッセージに詳細情報を追加することで、デバッグが容易になります。\n\n**型ヒント**\n\n* コードベース全体で型ヒントが使用されていますが、一部の関数では型ヒントが不足しています。例えば、`app/main.py` の `execute` 関数の戻り値は `Tuple` となっていますが、具体的な型を指定する方がより良いでしょう。\n* 型ヒントを追加することで、コードの可読性と保守性を向上させることができます。\n\n**テスト**\n\n* ユニットテストが提供されていますが、テストカバレッジを向上させることができます。例えば、 `app/main.py` の `config_read_and_get_table` 関数はテストされていません。\n* 統合テストを追加することで、モジュール間の連携を確認することができます。\n\n**ドキュメント**\n\n* README.mdは詳細な情報を含んでいますが、いくつかの点が改善できます。\n    * 各関数の詳細な説明を追加することで、ユーザーがコードを理解しやすくなります。\n    * 例外処理の詳細を説明することで、ユーザーがエラーを適切に処理できるようになります。\n* コードベース全体にdocstringを追加することで、コードの理解を深めることができます。\n\n**コードスタイル**\n\n* コードベース全体で一貫したコードスタイルを採用することで、コードの可読性を向上させることができます。Pythonのコーディング規約に従うことをお勧めします (PEP 8)。\n\n**機能**\n\n* バイナリタイプとバイナリセットタイプのサポートを追加すると、ツールの汎用性が向上します。\n* CSVファイルのヘッダー行を無視するオプションを追加すると、柔軟性が高まります。\n* DynamoDBテーブルを更新する機能を追加すると、より多くのユースケースに対応できます。\n\nこれらの改善により、リポジトリの品質と使いやすさが向上すると考えられます。"
          },
          "metadata": {},
          "execution_count": 22
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": []
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"source": [
	"# Get answers from Gemini with context caching across your Git repositories\n",
	"## Reference\n",
	"* [Practical Guide: Using Gemini Context Caching with Large Codebases](https://medium.com/google-cloud/practical-guide-using-gemini-context-caching-with-large-codebases-08d46d946c3d)\n"
	],
	"metadata": {
	"id": "RumJursAlRAA"
	}
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "4l1LpSkalQPW"
	},
	"outputs": [],
	"source": [
	"!pip install GitPython==\"3.1.43\""
	]
	},
	{
	"cell_type": "code",
	"source": [
	"import git\n",
	"import os\n",
	"\n",
	"def list_and_read_repo_files(repo_url, branch=\"main\"):\n",
	" \"\"\"\n",
	" Clones a Git repository, lists all files (excluding .git folder), and reads their contents.\n",
	"\n",
	" Args:\n",
	" repo_url (str): URL of the Git repository.\n",
	" branch (str, optional): Branch to checkout. Defaults to \"main\".\n",
	"\n",
	" Returns:\n",
	" dict: A dictionary where keys are file paths and values are their contents.\n",
	" \"\"\"\n",
	" try:\n",
	" # Temporary directory for the clone\n",
	" repo_dir = \"temp_repo\"\n",
	"\n",
	" # Clone the repository\n",
	" print(f\"Cloning repository from {repo_url}...\")\n",
	" git.Repo.clone_from(repo_url, repo_dir, branch=branch)\n",
	" print(\"Cloning complete!\")\n",
	"\n",
	" file_contents = {}\n",
	" for root, _, files in os.walk(repo_dir):\n",
	" for file in files:\n",
	" ##print(file)\n",
	" # Exclude .git folder and its contents\n",
	" if \".git\" not in root:\n",
	" ##print(\".git not in root\")\n",
	" file_path = os.path.join(root, file)\n",
	" try:\n",
	" with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
	" file_contents[file_path] = f.read()\n",
	" except Exception as e: # Catch any unexpected errors\n",
	" print(f\"An unexpected error occurred: {e}\")\n",
	"\n",
	" return file_contents\n",
	"\n",
	" except git.exc.GitCommandError as e:\n",
	" print(f\"Error cloning repository: {e}\")\n",
	" except UnicodeDecodeError as e:\n",
	" print(f\"Error reading file: {e}\")\n",
	" except Exception as e: # Catch any unexpected errors\n",
	" print(f\"An unexpected error occurred: {e}\")\n",
	" finally:\n",
	" # Clean up the temporary repository directory\n",
	" if os.path.exists(repo_dir):\n",
	" git.rmtree(repo_dir)"
	],
	"metadata": {
	"id": "qzNXG5QZlkXZ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"repo_url = \"https://github.com/danishi/dynamodb-csv.git\" # Replace with actual URL\n",
	"branch = \"master\" # Replace if different\n",
	"\n",
	"file_data = list_and_read_repo_files(repo_url, branch)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "UOayP8zklmRk",
	"outputId": "544d72e2-9aab-4e34-9ae6-f15db2d1498f"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Cloning repository from https://github.com/danishi/dynamodb-csv.git...\n",
	"Cloning complete!\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"if file_data:\n",
	" output_file = \"fullcode.text\"\n",
	" with open(output_file, \"w\", encoding=\"utf-8\") as outfile:\n",
	"\n",
	" for file_path, content in file_data.items():\n",
	" outfile.write(f\"<file path={file_path}>\")\n",
	" outfile.write(f\"{content}\")\n",
	" outfile.write(\"</file>\")\n",
	" outfile.close()\n",
	" #print(f\"File: {file_path}\\nContent:\\n{content}\\n---\")"
	],
	"metadata": {
	"id": "cgdTUsNEl-RI"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"size_in_bytes = os.path.getsize(output_file)\n",
	"\n",
	"print(f\"File Size: {size_in_bytes/1024/1024} MB\")"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "FPnWPKRfm84O",
	"outputId": "58ef420d-066b-45db-f6dc-3f63a76059ca"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"File Size: 0.09016227722167969 MB\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!pip install --upgrade google-cloud-aiplatform==\"1.59.0\""
	],
	"metadata": {
	"id": "5l1zpPk_nG3v"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from google.colab import auth\n",
	"auth.authenticate_user()"
	],
	"metadata": {
	"id": "0IAcilUFn8DM"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"import vertexai\n",
	"from vertexai.preview import caching\n",
	"\n",
	"project_id = \"<USE YOUR PROJECT ID HERE>\"\n",
	"\n",
	"vertexai.init(project=project_id, location=\"asia-northeast1\")\n",
	"\n",
	"system_instruction = \"\"\"\n",
	"あなたは優秀なソフトウェアエンジニアです。提供されたソース内の事実に常に従い、新しい事実を作り上げることは決してありません。\n",
	"では、このプロジェクトのコードベースを見て、次の質問に答えてください。\n",
	"\"\"\""
	],
	"metadata": {
	"id": "Gg5eyVisnXph"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"with open(\"fullcode.text\", \"r\", encoding=\"utf-8\") as f:\n",
	" fullcode_as_string = f.read()\n",
	"\n",
	"contents = [\n",
	" fullcode_as_string\n",
	"]"
	],
	"metadata": {
	"id": "BttyzCDonzKY"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"import datetime\n",
	"\n",
	"cached_content = caching.CachedContent.create(\n",
	" model_name=\"gemini-1.5-pro-001\",\n",
	" system_instruction=system_instruction,\n",
	" contents=contents,\n",
	" ttl=datetime.timedelta(minutes=10),\n",
	")"
	],
	"metadata": {
	"id": "JOpqJgVSn0Iz"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from vertexai.preview.generative_models import GenerativeModel\n",
	"\n",
	"cache_id=cached_content.name\n",
	"cached_content = caching.CachedContent(cached_content_name=cache_id)\n",
	"\n",
	"model = GenerativeModel.from_cached_content(cached_content=cached_content)"
	],
	"metadata": {
	"id": "B_337eiuoIAD"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from IPython.display import Markdown\n",
	"\n",
	"response = model.generate_content(\"このプロジェクトは何をしていますか？\")\n",
	"Markdown(response.text)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 713
	},
	"id": "H00HA2JOoQ7c",
	"outputId": "d8c2a74c-be4c-41cc-dfdb-397aae398d62"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<IPython.core.display.Markdown object>"
	],
	"text/markdown": "このPythonプロジェクトは、CSVファイルとAmazon DynamoDBテーブル間でデータをインポート/エクスポートするためのコマンドラインユーティリティ`dynamodb-csv`を提供します。\n\n機能\n\n- CSVインポート: CSVファイルをDynamoDBテーブルにインポートします。バッチ書き込みにより高速処理を実現し、キースキーマの不一致などのエラーが発生した場合にCSVレコードを無視するオプションを提供します。空の値をNullに変換したり、属性自体を設定しないなどのインポートオプションもサポートしています。\n- CSVエクスポート: DynamoDBテーブルからCSVファイルにデータをエクスポートします。インデックスやクエリを使用してエクスポートするデータを選択できます。\n- テーブルの切り捨て: DynamoDBテーブルからすべてのアイテムを削除します。この操作は元に戻せないので注意が必要です。\n- テーブルの移動: あるテーブルから別のテーブルにすべてのアイテムを移動します。事前に同じスキーマを持つテーブルを作成しておく必要があります。テーブルのアイテムは削除されず、コピーのように動作します。\n\n使い方\n\n1. インストール: `pip install dynamodb-csv`\n2. 設定: 現在のディレクトリに`config.ini`ファイルを作成し、AWSの認証情報とDynamoDBテーブルのリージョンを指定します。\n3. CSVとCSV仕様ファイルの作成: インポートまたはエクスポートするデータの形式を定義するCSVファイルとCSV仕様ファイル（`.spec`拡張子）を作成します。\n4. DynamoDBテーブルの作成: CSV仕様に準拠したDynamoDBテーブルを作成します。\n5. コマンドの実行: `dynamodb-csv`コマンドを使用して、インポート、エクスポート、切り捨て、または移動の操作を実行します。\n\n例\n\n```\n# CSVファイルをDynamoDBテーブルにインポートする\ndynamodb-csv -i -t my_table -f sample.csv\n\n# DynamoDBテーブルをCSVファイルにエクスポートする\ndynamodb-csv -e -t my_table -o sample_exp.csv\n\n# DynamoDBテーブルを切り捨てる\ndynamodb-csv --truncate -t my_table\n\n# あるテーブルから別のテーブルにアイテムを移動する\ndynamodb-csv --move -t my_table_from my_table_to\n```\n\n追加機能\n\n- Dockerイメージを使用してユーティリティを実行できます。\n- 開発者向けに、仮想環境の設定とインストール手順が提供されています。\n\nライセンス\n\nMITライセンス"
	},
	"metadata": {},
	"execution_count": 18
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"response.usage_metadata"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "o_VJLl9uoXli",
	"outputId": "1b5dfae2-cc41-4721-8745-33fad4868d6f"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"prompt_token_count: 36290\n",
	"candidates_token_count: 526\n",
	"total_token_count: 36816"
	]
	},
	"metadata": {},
	"execution_count": 19
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"response = model.generate_content(\"インポート処理のロジックを詳細に解説して\")\n",
	"Markdown(response.text)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 790
	},
	"id": "brYXmWoapArV",
	"outputId": "2fdbfa2d-d833-41dc-a94a-92766669a8bb"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<IPython.core.display.Markdown object>"
	],
	"text/markdown": "`app/dynamodb/csv_import.py` ファイルの `csv_import` 関数は、CSV ファイルを DynamoDB テーブルにインポートするロジックを実装しています。\n\nステップ 1: CSV 仕様の読み込み\n1. 最初に、CSV ファイルに対応する `.spec` ファイルを読み込みます。このファイルには、CSV の各カラムのデータ型や区切り文字などの情報が定義されています。\n2. `configparser` を使用して `.spec` ファイルを読み込み、`csv_spec` オブジェクトに格納します。\n3. `DELIMITER_OPTION` セクションが存在する場合は、`DelimiterCharacter` オプションの値を区切り文字として取得します。存在しない場合は、デフォルトの区切り文字としてスペースが使用されます。\n\nステップ 2: CSV ファイルの読み込み\n1. CSV ファイルを UTF-8 エンコーディングで開き、`csv.DictReader` を使用して各行を辞書として読み込みます。\n2. `tqdm` を使用して、インポートの進捗状況を表示します。\n\nステップ 3: データの変換とバッチ処理\n1. 各行に対して、`IMPORT_OPTION` セクションで指定された変換オプションを適用します。\n - `ConvertBlankToNullAttrs` オプション: 指定されたカラムの値が空欄の場合、`None` に変換します。\n - `ConvertBlankToDropAttrs` オプション: 指定されたカラムの値が空欄の場合、カラム自体を削除します。\n2. 各カラムの値を、CSV 仕様で定義されたデータ型に変換します。\n - `convert_column` 関数を使用して、各カラムの値を適切なデータ型 (文字列、整数、10進数、ブール値、JSON、文字列リスト、文字列セット、10進数リスト、10進数セット) に変換します。\n3. 変換された行データを `batch` リストに追加します。\n4. `batch` リストのサイズが `batch_size` に達するか、すべての行が処理されたら、`write_to_dynamo` 関数を使用して DynamoDB に書き込みます。\n\nステップ 4: DynamoDB への書き込み\n1. `write_to_dynamo` 関数は、`batch` リスト内の各行データを DynamoDB テーブルに書き込みます。\n2. `ignore` フラグが True の場合、各行を個別に `put_item` で書き込みます。エラーが発生した場合は無視され、インポート処理は続行されます。\n3. `ignore` フラグが False の場合、`batch_writer` を使用してバッチ書き込みを行います。\n - `overwrite_by_pkeys` オプションを使用して、重複する主キーを持つアイテムを上書きします。\n - エラーが発生した場合は、エラーメッセージを出力します。\n\nステップ 5: 結果の返却\n1. すべての行が処理されたら、インポートされたアイテム数とエラーが発生したアイテム数をメッセージとして返却します。\n\n追加情報\n\n- `csv_import` 関数は、最大 25 個のアイテムを一度にバッチ書き込みできます。これは、DynamoDB の `batch_write_item` 操作の制限によるものです。\n- `ignore` フラグを使用すると、エラーが発生したアイテムを無視してインポート処理を続行できます。ただし、この場合、バッチ書き込みは使用されません。\n- CSV 仕様ファイルは、CSV ファイルと同じディレクトリに配置し、ファイル名に `.spec` を追加する必要があります。\n\nこのインポート処理のロジックにより、CSV ファイルから DynamoDB テーブルにデータを効率的かつ安全にインポートできます。\n"
	},
	"metadata": {},
	"execution_count": 20
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"response = model.generate_content(\"このリポジトリの改善点を教えて\")\n",
	"Markdown(response.text)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 588
	},
	"id": "mUqD7XAopmWR",
	"outputId": "6e0933a2-8a53-4dca-b766-09c62febb804"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<IPython.core.display.Markdown object>"
	],
	"text/markdown": "このリポジトリのコードベースに基づいて、いくつかの改善点を提案します。\n\nエラー処理\n\n* いくつかの関数のエラー処理は改善の余地があります。例えば、`app/dynamodb/csv_import.py` の `write_to_dynamo` 関数では、 `Exception` をキャッチしていますが、エラーの種類を特定していません。より具体的なエラータイプをキャッチすることで、エラー処理がより正確になります。\n* `app/dynamodb/csv_export.py` の `csv_export` 関数では、`IOError` と `Exception` をキャッチしていますが、具体的なエラーの内容を表示していません。エラーメッセージに詳細情報を追加することで、デバッグが容易になります。\n\n型ヒント\n\n* コードベース全体で型ヒントが使用されていますが、一部の関数では型ヒントが不足しています。例えば、`app/main.py` の `execute` 関数の戻り値は `Tuple` となっていますが、具体的な型を指定する方がより良いでしょう。\n* 型ヒントを追加することで、コードの可読性と保守性を向上させることができます。\n\nテスト\n\n* ユニットテストが提供されていますが、テストカバレッジを向上させることができます。例えば、 `app/main.py` の `config_read_and_get_table` 関数はテストされていません。\n* 統合テストを追加することで、モジュール間の連携を確認することができます。\n\nドキュメント\n\n* README.mdは詳細な情報を含んでいますが、いくつかの点が改善できます。\n * 各関数の詳細な説明を追加することで、ユーザーがコードを理解しやすくなります。\n * 例外処理の詳細を説明することで、ユーザーがエラーを適切に処理できるようになります。\n* コードベース全体にdocstringを追加することで、コードの理解を深めることができます。\n\nコードスタイル\n\n* コードベース全体で一貫したコードスタイルを採用することで、コードの可読性を向上させることができます。Pythonのコーディング規約に従うことをお勧めします (PEP 8)。\n\n機能\n\n* バイナリタイプとバイナリセットタイプのサポートを追加すると、ツールの汎用性が向上します。\n* CSVファイルのヘッダー行を無視するオプションを追加すると、柔軟性が高まります。\n* DynamoDBテーブルを更新する機能を追加すると、より多くのユースケースに対応できます。\n\nこれらの改善により、リポジトリの品質と使いやすさが向上すると考えられます。"
	},
	"metadata": {},
	"execution_count": 22
	}
	]
	}
	]
	}
No results found