• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Generative AI

    NVIDIA NeMo ???????? ???? ??? ????? AI ???? ????

    Reading Time: 8 minutes

    ?? ???? ???? ???? ????, ?? ??? ??? ?? AI ???? ???? ???? ? ??? ?? ??? ?????. ???? ????? ????? ?? ???? ???? AI ???? ?? ? ?????, ??? ???? ???? ???? ??? ??? ???? ?? ????? ???? ?? ?? ???????.

    ?? ???? NVIDIA NeMo ???????? ??? ??? ????? ?? ???? ???? ??? ?????. ?? ?????? ???? ?? ?? ??? ??? ?????.

    NeMo ???????? ??? ???? ?? ? ???? ?? ??? ??? ??? ?????? ????, Maximize AI Agent Performance Using NVIDIA NeMo Microservices ??? ??????.

    ???? AI ?? ??? ?? ??, ??? ????

    ??? ????? ?? ???? ?? ?????. ????? ????? ?? ??? ???? AI ??? ??? ?????, ??? ??? ? ???? ??? ??? ???? ? ?? ???? ??????. ?? ?? ???? ?? ???? ?? ???? ???? ??? ???, ??? ???? ?????. ?? ??? ??? ????? ??? ?? ???? ??? ??? ??? ??? ??? ?????.

    ?? 1. ?????? AI ??? ????? ???? ??????.

    ??? ??? ??????? ????? ???? ?? ??? ?????, ???? ?? ???? ??? ????? ???? ?? ??????.

    ???? ??? ???

    ?? ?? ???? AI ??????? ???? ?? ?? ? ??? ?? ???????. ?? ??, ??? ??? ?? ????? ????? AI ????? ?????, ? ????? ???? ?? ???, ???? ??, ???? ?? ?? ?? ??? ???? ????? ?????. ??? ??? ???? ??? ? ?? ????? ???, ???? ???? ?? ??? ??? ????. ? ?? ??? ??? ????:

    • ?? ?? ?? ? ??? ???? ????
    • ??? ??? ?? ??? ??
    • ??? API? ?? ?? ??

    ?? ??, ?? ??? ???? ?? ?? ?? SQL(PostgreSQL) ??????? ???? ?? ??? ?? ?? ??(LLM) ????? ?????. ?? ??? ??? MongoDB ????? ????, ? ???? ?? ???? ?????, ?? ?????? ? ?? ??? ???. ???? ???? ?? ????, ????? ??? ?? ??? ?? ??? ???? ??, ?? ?? ??? ??? ?? ???? ??? ? ????. ????? ?? ??? ?????, ? ??? ?????? ???? ??? ? ????.

    ???? ?? ??

    AI ????? ?? ??? ??? ???? ???, ???? ???? ???? ?? ?? ????? ????. ??? ??? ???? ?????, ??? ???? ???? ? ??? ?? ?? ?? ??? ???? ????. ?? ???? ?? AI ???? ?? ?? ? ??? ??? ???? ?? ??? ??, ??–??–??? ???? ??? ???? ?? ? ?? ?? ??? ???? ???

    ?? ??, ????? ??? ?? ??? ???? ?? ?? ??? ???? ??? ???? ?? ??, ?? ?? ?? ??? ?? ?? 5~10? ??? ???? ??? ? ????. ??? ???? ???? ?? ??? ??? ??????? ???? ???.

    ??? ??? ???? ??, ??? ??? ??? ??? ??? ?? ??? ?? ?? ??? ???? ?????, ?? ??? ?????(TCO)? ????? ?? ? ????. ??, ??? ?? ???? ????? ???? ??, ??? ???? ???? ???? ?? ??? fine-tuning? ??? ????? ???? ??????, ??? ???? ??? ???? ?? ?????.

    ??? ????? ???? ?? NVIDIA NeMo ??????? ??

    NVIDIA NeMo ???????? ??? ????? ??? ? ?? ?? ? ?? ???? ???, ??? ?? ??? ???? AI ????? ????? ???? ? ??? ?????.

    ?? 2?? ? ? ??, NVIDIA NeMo? ??? AI ???? ??? ???? ??? ????, ???? fine-tuning ??? ??? LLM? ????? ??????? ? ??? ?????. ?? ?? ??? ?? ??? ?? ??? ????? ????, ??? ? ??? ??? ??? ???? ?? ???? ???? ??? ???.

    ?? 2. ????? ?? ?????? ? ?? ?????? ???? NVIDIA NeMo ???????

    NeMo ???????? ??? ????? ? ?? ??? ???? ?? ??

    NeMo ??????? ??? ?? ? ?? ?????? ???? ??, ??????? ? ?? ??? ?? ???????. ? ??? LLM? ?? ???? ??????, ????? ????, ?? ????? ???? ?? ??? ??? ??? ? ??? ???.

    ?? 3. ?? ? ?? ??? ?

    ???? ? ??? ????, LLM? ?? ??? ?? ?? ??? ??? ??? ????, ??? ???? ??? ????? ??? ????, ??? ?? ?? ??? ????? ????? ??? ?? ??? ??? ? ??? ???. ?? ?? ???? ?????, ???? ???? ???? ?? ??????? ???? ????.

    ?? ??, Llama 3.2 1B Instruct ??? ? 60,000?? ? ?? ??? ??? xLAM ?????? fine-tuning??, Llama 3.1 70B Instruct ??? ??? ? ?? ???? ??? ? ????. ??? ?? ??? ? 70? ????? ??? ??? ? ????.

    ??? ?? ?? ??? ??? ??? ?????. ??? ????? Jupyter ????? ??? ? ????.

    1??: NVIDIA NeMo ??????? ??

    NeMo ??????? ???? Helm ?? ??? ????, ???? ??? ????? ?? ???? ??? ? ????. ?????, ?? ?? NVIDIA GPU ????(?: NVIDIA A100 80GB ?? H100 80GB GPU ?? 2? ??)?? minikube? ??? ? ????.

    2??: ??? ??

    xLAM ????? NeMo Customizer(???)? NeMo Evaluator(????)?? ??? ? ??? ?????. ? ??? ??? JSON ???? ????, ??? ??, ?? ??? ? ??(??? ???? ??), ?? ??(??? ? ? ????)?? ???? ????. ??, ??, ??, ???? ???? ?????.

    NeMo Customizer?? ???? ??? ??? ??? ????. messages?? ??? ??? ?????? ?? ??? ????, tools?? ?? ??? ? ??? ?????.

    {
        "messages": [
            {
                "role": "user",
                "content": "Where can I find live giveaways for beta access?"
            },
            {
                "role": "assistant",
                "tool_calls": [
                    {
                        "id": "call_beta",
                        "type": "function",
                        "function": {
                            "name": "live_giveaways_by_type",
                            "arguments": {"type": "beta"}
                        }
                    },
                ]
            }
        ],
        "tools": [
            {
                "type": "function",
                "function": {
                    "name": "live_giveaways_by_type",
                    "description": "Retrieve live giveaways from the GamerPower API based on the specified type.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "type": {
                                "type": "string",
                                "description": "The type of giveaways to retrieve (e.g., game, loot, beta).",
                                "default": "game"
                            }
                        },
                        "required": []
                    }
                }
            }
        ]
    }

    NeMo Evaluator? ??? ??? ??? ?? ??? ??? ???, ??? ??? ????. ??? ??? Jupyter ????? ??? ? ????.

    3??: ??? ??

    NVIDIA NeMo Entity Store ???????? ??????, ????, ????, ??? ?? ?? ? ???? ????? ??? ? ??? ?????. ?? ?? ???? ??? ??? ??? ? ??? ?? ??, ??? ??? ??????. ??, NVIDIA NeMo Datastore ???????? ?? ???? ??? ?? ???? ????, ???, ????, ?? ?? ?? ??? ?????.

    ? ????? ?? ??? ????? Hugging Face Hub ?????(HfApi)? ??? NeMo Datastore? ?????, REST API? ?? Entity Store? Datastore ??? ?????. ?? NeMo Customizer? Evaluator? ? ??? ??? ?? ???? ?????.

    4??: LoRA fine-tuning

    Llama 3.2 1B Instruct ??? ?? LoRA ???? fine-tuning? ??? ?? NeMo Customizer? ?????. ?????? ?? ?? ? ?? ?? ???? ?? NeMo Customizer ??????? REST API ??? ?? ?????. ?? ????? ???? ??? ?? ???? ???? ??? ? ???, NeMo Customizer? Weights & Biases?? ????? ???? ?? ??? ????? ????? ? ????.

    headers = {"wandb-api-key": WANDB_API_KEY} if WANDB_API_KEY else None
      
    training_params = {
        "name": "llama-3.2-1b-xlam-ft",
        "output_model": f"{NAMESPACE}/llama-3.1-8b-xlam-run1",
        "config": BASE_MODEL,
        "dataset": {"name": DATASET_NAME, "namespace" : NAMESPACE},
        "hyperparameters": {
            "training_type": "sft",
            "finetuning_type": "lora",
            "epochs": 2,
            "batch_size": 16,
            "learning_rate": 0.0001,
            "lora": {
                "adapter_dim": 32,
                "adapter_dropout": 0.1
            }
        }
    }
      
    # Trigger the job.
    resp = requests.post(f"{NEMO_URL}/v1/customization/jobs", json=training_params, headers=headers)
    customization = resp.json()
      
    # Used to track status
    JOB_ID = customization["id"]
      
    # This will be the name of the model that will be used to send inference queries to
    CUSTOMIZED_MODEL = customization["output_model"]

    5??: ??(Inference)

    ?? ??? ????, ?? ??? LoRA ???? NeMo Entity Store? ????, NVIDIA NIM?? ?? ???? ?????. ?? NIM ?????? ????? ????, ??? ??? ??? ?? ?? ???? ??? ? ????.

    inference_client = OpenAI(
      base_url = f"{NIM_URL}/v1",
      api_key = "None"
    )
      
    completion = inference_client.chat.completions.create(
      model = CUSTOMIZED_MODEL,
      messages = test_sample["messages"],
      tools = test_sample["tools"],
      tool_choice = 'auto',
      temperature = 0.1,
      top_p = 0.7,
      max_tokens = 512,
      stream = False
    )
      
    print(completion.choices[0].message.tool_calls)

    ??? ?? ??? ??? ???? ???? ??? ??? ?????:

    [ChatCompletionMessageToolCall(id='chatcmpl-tool-bd3e4ee65e0641b7ae2285a9f82c7aae',
    function=Function(arguments='{"type": "beta"}', name=’live_giveaways_by_type’), type='function')]
    At

    ? ???? ??? ? ?? ???? ????? ??? ??? ? ?????.

    6??: ??(Evaluation)

    fine-tuning? ??? NeMo Evaluator? ??? ????, ??(base) ???? ???? ??? ? ?? ??? ??? ?????.
    function_name_accuracy? function_name_and_args_accuracy ?? ??? ? ?? ??? ?? ??? ????, ?? ?? ?? ? ???? ??? ??? ?? ???? ?????.

    ??? ????? ??? ?? ??? ?????:

    1. ?? ?? ??: NeMo Evaluator? ?? ????? ????, ??? ?? ?, ??? ?? ?? ? ??? ?? ??? ?? ?? ??? ?????. ? ?? ??? ???? ??? ???? ?????.

      simple_tool_calling_eval_config = {
          "type": "custom",
          "tasks": {
              "custom-tool-calling": {
                  "type": "chat-completion",
                  "dataset": {
                      "files_url": f"hf://datasets/{NAMESPACE}/{DATASET_NAME}/testing/xlam-test.jsonl",
                      "limit": 50
                  },
                  "params": {
                      "template": {
                          "messages": "{{ item.messages | tojson}}",
                          "tools": "{{ item.tools | tojson }}",
                          "tool_choice": "auto"
                      }
                  },
                  "metrics": {
                      "tool-calling-accuracy": {
                          "type": "tool-calling",
                          "params": {"tool_calls_ground_truth": "{{ item.tool_calls | tojson }}"}
                      }
                  }
              }
          }
      }

      2. ?? ?? ?????: ???? ??? ??? ?? ??(NIM)? ?? ?? ??? ???? ?? ?????.

      res = requests.post(
          f"{NEMO_URL}/v1/evaluation/jobs",
          json={
              "config": simple_tool_calling_eval_config,
              "target": {"type": "model", "model": CUSTOM_MODEL_NAME}
          }
      )
        
      base_eval_job_id = res.json()["id"]

      3. ?? ?? ??: ?? ??? ???? ???? ???? ?? REST ?????.

      res = requests.get(f"{NEMO_URL}/v1/evaluation/jobs/{base_eval_job_id}/results")
        
      ft_function_name_accuracy_score = res.json()["tasks"]["custom-tool-calling"]["metrics"]["tool-calling-accuracy"]["scores"]["function_name_accuracy"]["value"]
        
      ft_function_name_and_args_accuracy = res.json()["tasks"]["custom-tool-calling"]["metrics"]["tool-calling-accuracy"]["scores"]["function_name_and_args_accuracy"]["value"]

      ??? ?? ???? ?? ????, ?? ??? meta/llama-3.2-1B-instruct? ?? ? ?? ??? ?? ???? ?? ??? ? ????. ???? ?? ??? ?? ??? ????:

      • function_name_accuracy: 12% → 92%
      • function_name_and_args_accuracy: 8% → 72%

      ?? ?? 70? ? ? ??? meta/llama-3.1-70B-instruct? ?? ???? ???? ???? ??? ????, ?? ???? ???? ? ? ??? ?????.

      7??: ???? ?? (Guardrailing)

      ?????? ??? ???, ??? ?? ??? NIM? ????? ???? ????. ? ????? nvidia/llama-3_1-nemoguard-8b-content-safety ??? ??? ??? ???? ?????. ? ??? ??? LLM ?? ???? ??? ??????, ??? ????? LLM ??? ???? ??? ?????. ???? ???? ?? ???? ??? ???? ??? ??? ??? ?? ?????.

      ? ??? ?? ??? ???? ?? ??? ?????:

      1. ???? ?? ?? ? ??: ???? ? ?? ???? ???, ??? ?? ? ??? ? ??? ??? ?? ??? ???, ?? ???? ?????.

        data = {
            "name": "toolcalling",
            "namespace": "default",
            "data": {
              "models": [
                {
                  "type": "content_safety",
                  "engine": "nim",
                  "model": "nvidia/llama-3.1-nemoguard-8b-content-safety"
                }
              ],
              "rails": {
                "input": {
                  "flows": [
                    "content safety check input $model=content_safety"
                  ]
                },
                "dialog": {
                  "single_call": {
                    "enabled": False
                  }
                }
              },
              "prompts": [
                {
                  "task": "content_safety_check_input $model=content_safety",
                  "messages": [
                    {
                      "type": "system",
                      "content": "Task: Check if there is unsafe content in the user or agent messages in the below conversation in accordance with our
        safety policy with the below categories. Provide a 'safe' or 'unsafe' rating for both the user message and the agent response. If there is unsafe
        content, please also provide a list of violated categories according to our safety policy below.\n\n&lt;BEGIN UNSAFE CONTENT
        CATEGORIES&gt;\nS1: Violence.\nS2: Sexual.\nS3: Criminal Planning/Confessions.\nS4: Guns and Illegal Weapons.\nS5: Controlled/Regulated
        Substances.\nS6: Suicide and Self Harm.\nS7: Sexual (minor).\nS8: Hate/Identity Hate.\nS9: PII/Privacy.\nS10: Harassment.\nS11: Threat.\nS12:
        Profanity.\nS13: Needs Caution.\nS14: Other.\nS15: Manipulation.\nS16: Fraud/Deception.\nS17: Malware.\nS18: High Risk Gov Decision
        Making.\nS19: Political/Misinformation/Conspiracy.\nS20: Copyright/Trademark/Plagiarism.\nS21: Unauthorized Advice.\nS22: Illegal
        Activity.\nS23: Immoral/Unethical.\nS24: Deleting Logs.\n&lt;END UNSAFE CONTENT CATEGORIES&gt;\n\n&lt;BEGIN CONVERSATION&gt;\nuser:
        {{user_query}}\n&lt;END CONVERSATION&gt;\n\nProvide your safety assessment for the above conversation in the below output JSON
        format:\n{\"User Safety\": safe or unsafe rating for the user message, \"Response Safety\": safe or unsafe rating for the agent response.
        Omit if no agent response present. \"Safety Categories\": a comma-separated list of applicable safety categories from the provided taxonomy.
        Omit if all safe.}\n\nDo not include anything other than the output JSON in your response.\nOutput JSON:"
                    },
                    {
                      "type": "user",
                      "content": "{{ user_input }}"
                    }
                  ],
                  "output_parser": "nemoguard_parse_prompt_safety",
                  "max_tokens": 50
                }
              ]
            },
        }
        # Register the guardrails configuration
        response = requests.post(GUARDRAILS_URL, json=data)

        2. LLM NIM ?? ?? ?? ??? ??? ???? ????

        payload = {
                    "model": BASE_MODEL,
                    "messages": [
                        {
                            "role": "user",
                            "content": user_message
                        }
                    ],
                    "guardrails": {
                        "config_id": "toolcalling"
                    },
                    "temperature": 0.2,
                    "top_p": 1
         }
          
        # Check for unsafe user message in guardrails
        response = requests.post(f"{NEMO_URL}/v1/guardrail/checks", json=payload)
        status = response.json()
          
        if status == “success”:
            # SAFE
            … (Proceed with your LLM inference call as in step 5)
        else:
            # UNSAFE
            print(f"Not a safe input, the guardrails have resulted in status as {status}. Tool-calling shall not happen")

        ????

        ? ??? ??? ???? ?? ??, NeMo ???????? ??? ?? ??????, ??, ??, ??? ???? ???? ??? ?? ? ?? ?????? ??? ? ????. ? ?????? ????? ????? ?? ?? ????? ??? ??? ??? ???? ????? ????, ??? ????? ?????. ? ?? ?? ??? ???? ????? ???? ???? ??? ? ??? ??, ???? ?? ??? ??????.

        ?? NVIDIA NeMo ???????? ?? ????? ????, ??? ??? ???? ???? ?? ??? ?? ?????. ?? ??? ?? ??? ??????.

        NeMo ???????? ?? ? ??? ?? ??? ?? ??? ?????, ?? ?? ???? ????? NVIDIA AI Enterprise 90? ?? ????? ??????.

        ?? ???

        Discuss (0)
        0

        Tags

        人人超碰97caoporen国产