• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Generative AI

    ?? ?? ??? ???? RAG: Llama 3.2 NeMo Retriever ??? ??? ????? ???? ??? ??

    Reading Time: 5 minutes

    ???? ??? ???? ??? ???, ???, ??? ? ??? ??? ???? ???? ??? ??? ????. ??? ???? ???? ????? ??? ??? ??? ?? ????? ???? ?????. ????? PDF, ?? ???, ???? ? ??? ???? ???? ??? ?????, ?? 1?? ?? ?? ??? ???? ??? ????? ?????.

    ??? ?? ???? ?? ??? ???? ??? ???? ?? ???? ?? ??, ? ?? ?? ??(VLM)? ???????. ? ??? ???? ?? ???? ?? ????, ??? ?? ??? ??? ?????.

    ?? 1. ??? ? ???? ??? ??? ?? ?? (??? ??: An Easy Introduction to Multimodal RAG)

    VLM? ??? ??? ??? ??? ??? ??? ?? ?? ??? ?? ?????. ?? ?? ??? ?? ??? ???? ??? ???? ????, ?? ????, ???? ??, ??? ?? ?? ? ?? ????? ???? ??????? ??? ? ????. ??? ???? ????? ???? ???? ??? ???? ??? ????? ???? ?????. ?? 1?? Gemma 3, PaliGemma, SmolVLM, QwenVL, LLaVA-1.5 ? ??? ?? ??? ????? ????(??: ChartQA) VLM ???? ?? ???? ??? ???????.

    ?? 2. OCR? ??? ??? ??? ???? ???? ???? ??? ?? RAG ?????(??? ??? ??? LLM ??)? ???? ??? ??, ??, VLM? ???? ???? RAG ?????? ??

    ??? RAG(?? ?? ??) ?????? ?? ??? ???? ???? ????, ?? 2?? ?? ??? ???? ???? ???? ??? ????????. ??? ?? VLM ??? ?? ?????, ??? ? ???? ???? ??? ?? ???????.

    ?? ?? ???? RAG ?????? ???? ???, ??? ??? ?? ??? ??? ??? ??? ?? ? ?????. ??, VLM? ??? ??? ?? LLM? ?? ?? ??(hallucination) ???? ? ????. ??? ??? ???? ????, ????(??) ??? ??? ?? ??? ?? ??? ?? ??? ??? ?????.

    ????(??) ??? ??? ???? ???? ??? ?? ??? ???? ?? ???? ?? ?? ??? ???? ???. ? ??? ??? ??? ?? ??? ???? ???, ??? ??? ???? ?? ???? ???? ???? ?? ?????. ?? ?? ??, ??? ?? ???, ??? ????? ???? ?? ?? ? ??? ???? ??-?? ??? ??? ??? ??? ?? ?? ??? ?? ??? ?????.

    ???? ?? ?? ??? ??? ???? ???? ?? ?? ? ??? ?? ????? ??? ???? ?? ???? ?? ??? ?? ??? ???? ??? ?????. ??? ??? ????, ?? ???? ?? ?? ????? ????, VLM? ??? ??? ???? ??? ??? ??? ?????.

    ?? 1?? ??? ??? ??? ?? ???? ??? ???? ?? ??????(?: ViDoRe retrieval leaderboard), ?? ??? ??? ??? ???, ???? ?? ????? ??? ??? ?? ??? ???? ??? ????. ??? ???? RAG ????? ??? ???? ???? ??, NVIDIA? ??? ?? ??? ??? ?? NVIDIA NeMo Retriever ???????? NVIDIA NIM ??? ??? ??????.

    Llama 3.2 NeMo Retriever ???? ??? 1B ??

    Llama 3.2 NeMo Retriever ???? ??? ??? 16? ? ????? ?????? ??? ?? ??? ?????. NVIDIA NIM?? ??? ? ??? ??????? ???? ???? ?? ?? ???? ??? ? ??? ?????.

    ???? ?? ??

    ???? ???? ??? ??? ?? ?? ??? ?? ??? ??? ?? ??, ? PDF ?? ?????(?? 2)? ?????. PDF ??? ??? ????? ?? ??? ??????. ?? ? ????? ??? ??, ???, ?, ?? ? ??? ??? ??? ???? ??? ?? ??? ?????. ?? ?? ??? ?? ??? ?? ??? ?????, ??? ???? ?? ?? ??(OCR)??, ??? ???? ?? ???? ?? ?????.

    ????, VLM? ??? ??? ???? ??? ??? ??? ? ????. ??? ???? ??? ??? ??? ??(?: llama-3.2-nv-embedqa-1b-v2)? ???? ?? ?? ?? ???? ?????.

    ???? ??? ??? ??? ?? ?? ??? ?????? ??? ???? ?? ???????. ??? ?? ??? ??, ?? ??? ???? ?? ???? ? ?? ?? ??? ????? ??? ??? ?? ??? ? ????(?? 3). ?? ??? ???? ?????? ????? ??? ?? ??? ??? ? ?? ???? ?????.

    ?? ???, ‘?? ????? ??(retrieval in vision space)’ ??? ???? ??? ??-?? ??? Llama 3.2 NeMo Retriever ???? ??? 1B ??? ???????. ? ??? ?? ???(SigLIP2-So400m-patch16-512), ?? ?? ??(Llama-3.2-1B), ??? ???? ???? LLM ??? ??? ???? ?? ???? ???? ???? ????.

    Eagle 2 ??? ???? ??, ??? ??? ?? ?? ???(tiled mixture of vision encoders) ??? ??? Llama 3.2 NeMo Retriever ???? ??? 1B ??? ? ??? ??? ?? ??? ?? 2,048??? ???? ?????. ? ??? ??? ??? ??? ???? ?? ??? ???? ???? ???? ?? ?? ??(contrastive loss)? ??? ????(fine-tuning)?????. ?? ??? ??? ?? ??(negative examples)? ?? ?? ?? ???? ???(positive-aware hard-negative mining) ??? ?? ???????(??? ??? NV-Retriever ??? ?????).

    ?? 3. ??? ?? ??? ??? ?? ?? ??? ??? ???? ?? ??? ?? ??

    ?? ?? ????

    ??? 10?? ViDoRe V1 ????? ???? ??? ? ?? ???? ????? DigitalCorpora? Earnings?? 1? ???? ??? ???? ??? ???????.

    • Earnings ????: 512?? PDF(?? ???, ??? ?? ?? ?)? ??? ?? ??????, ??, ?, ?????? ?? 3,000? ?? ???? ???, 600? ??? ?? ??? ?? ??? ?? ??? ?? ?????.
    • DigitalCorpora-767 ????: Digital Corpora?? ??? 767?? PDF? ??? ??? ?? ?????, ???, ?, ??, ?????? ?? ? 991?? ?? ?? ?? ??? ???? ????.

    ? ??? ??? ?? ??? ??? ???? ?????? ???????. ?? 4?? ??? ? ??, NVIDIA llama-3.2-nemoretriever-1b-vlm-embed-v1 ??? Recall@5 ???? ?? ??? 20? ???? ??? ?? ?? ??? ?? ?? ??? ?? ???? ?????.

    ?? 4. 10?? ViDoRe V1 ????, Earnings, DigitalCorpora-767 ?????? NVIDIA llama-3.2-nemoretriever-1b-vlm-embed-v1? ?? ?? ??? ???? ??? ?? ?? (Recall@5 ??)
    ??(????)?Digital Corpora ?? ?Digital Corpora?? Nemo-retriever-vlm-embed-v1 ??Earnings ?? ?Earnings?? Nemo-retriever-vlm-embed-v1 ??
    ????99184.5%62866.1%
    ???23575.3%15744.6%
    ??26888.1%12989.9%
    ???48886.9%24296.0%
    ?????10055.0%

    ? 1. DigitalCorpora-767 ? Earnings ????? ? ????? Recall@5 ??

    ??? OpenAI API? ???? ???? llama-3.2-nemoretriever-1b-vlm-embed-v1 NIM? ??? ??? ???(???) ???? ???? ?? ?? ???????.

    ??:

    from openai import OpenAI
     
    client = OpenAI(
      api_key="$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC",
      base_url="https://integrate.api.nvidia.com/v1"
    )
     
    response = client.embeddings.create(
        input=["What is the civil caseload in South Dakota courts?"],
        model="nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
        encoding_format="float",
        extra_body={"modality": ["text"], "input_type": "query", "truncate": "NONE"}
    )
     
    print(response.data[0].embedding)

    ???(???):

    import base64
    import requests
    from openai import OpenAI
     
    image_source = "https://assets.ngc.nvidia.com/products/api-catalog/nemo-retriever/embedding/court-sizing-metrics.png"
     
    if image_source.startswith(('http://', 'https://')):
        response = requests.get(image_source)
        image_b64 = base64.b64encode(response.content).decode()
    else:
        with open(image_source, "rb") as image_file:
            image_b64 = base64.b64encode(image_file.read()).decode()
     
    client = OpenAI(
      api_key="$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC",
      base_url="https://integrate.api.nvidia.com/v1"
    )
     
    response = client.embeddings.create(
        input=[f"data:image/jpeg;base64,{image_b64}"],
        model="nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
        encoding_format="float",
        extra_body={"modality": ["image"], "input_type": "passage", "truncate": "NONE"}
    )
     
    print(response.data[0].embedding)

    ??? ??? ?? ?? ????? ??? ?????

    NVIDIA NeMo Retriever? ?? ???? ??? ?????? ???? ??????? ????, ??? ??? ???? ????? ??? ? ??? ?????. ?? ?? API ????? ??? NVIDIA NeMo Retriever NIM ???????? ??? ???? ??????.

    ?? ???

    Discuss (0)
    +1

    Tags

    人人超碰97caoporen国产