python-3.xflaskpydanticpy-langchain

TypeError: issubclass() arg 1 must be a class when importing langchain in Flask


I am doing a microservice with a document loader, and the app can't launch at the import level, when trying to import langchain's UnstructuredMarkdownLoader

$ flask --app main run --debug
Traceback (most recent call last):
  File "venv/bin/flask", line 8, in <module>
    sys.exit(main())
  File "venv/lib/python3.9/site-packages/flask/cli.py", line 1063, in main
    cli.main()
  File "venv/lib/python3.9/site-packages/click/core.py", line 1055, in main
    rv = self.invoke(ctx)
  File "venv/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "venv/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
    return __callback(*args, **kwargs)
  File "venv/lib/python3.9/site-packages/click/decorators.py", line 84, in new_func
    return ctx.invoke(f, obj, *args, **kwargs)
  File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
    return __callback(*args, **kwargs)
  File "venv/lib/python3.9/site-packages/flask/cli.py", line 911, in run_command
    raise e from None
  File "venv/lib/python3.9/site-packages/flask/cli.py", line 897, in run_command
    app = info.load_app()
  File "venv/lib/python3.9/site-packages/flask/cli.py", line 308, in load_app
    app = locate_app(import_name, name)
  File "venv/lib/python3.9/site-packages/flask/cli.py", line 218, in locate_app
    __import__(module_name)
  File "main.py", line 5, in <module>
    from lc_indexer import index_documents
  File "lc_indexer.py", line 5, in <module>
    from langchain.document_loaders import UnstructuredMarkdownLoader
  File "venv/lib/python3.9/site-packages/langchain/__init__.py", line 6, in <module>
    from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain
  File "venv/lib/python3.9/site-packages/langchain/agents/__init__.py", line 2, in <module>
    from langchain.agents.agent import (
  File "venv/lib/python3.9/site-packages/langchain/agents/agent.py", line 16, in <module>
    from langchain.agents.tools import InvalidTool
  File "venv/lib/python3.9/site-packages/langchain/agents/tools.py", line 8, in <module>
    from langchain.tools.base import BaseTool, Tool, tool
  File "venv/lib/python3.9/site-packages/langchain/tools/__init__.py", line 42, in <module>
    from langchain.tools.vectorstore.tool import (
  File "venv/lib/python3.9/site-packages/langchain/tools/vectorstore/tool.py", line 13, in <module>
    from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
  File "venv/lib/python3.9/site-packages/langchain/chains/__init__.py", line 2, in <module>
    from langchain.chains.api.base import APIChain
  File "venv/lib/python3.9/site-packages/langchain/chains/api/base.py", line 13, in <module>
    from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
  File "venv/lib/python3.9/site-packages/langchain/chains/api/prompt.py", line 2, in <module>
    from langchain.prompts.prompt import PromptTemplate
  File "venv/lib/python3.9/site-packages/langchain/prompts/__init__.py", line 3, in <module>
    from langchain.prompts.chat import (
  File "venv/lib/python3.9/site-packages/langchain/prompts/chat.py", line 10, in <module>
    from langchain.memory.buffer import get_buffer_string
  File "venv/lib/python3.9/site-packages/langchain/memory/__init__.py", line 28, in <module>
    from langchain.memory.vectorstore import VectorStoreRetrieverMemory
  File "venv/lib/python3.9/site-packages/langchain/memory/vectorstore.py", line 10, in <module>
    from langchain.vectorstores.base import VectorStoreRetriever
  File "venv/lib/python3.9/site-packages/langchain/vectorstores/__init__.py", line 2, in <module>
    from langchain.vectorstores.analyticdb import AnalyticDB
  File "venv/lib/python3.9/site-packages/langchain/vectorstores/analyticdb.py", line 16, in <module>
    from langchain.embeddings.base import Embeddings
  File "venv/lib/python3.9/site-packages/langchain/embeddings/__init__.py", line 19, in <module>
    from langchain.embeddings.openai import OpenAIEmbeddings
  File "venv/lib/python3.9/site-packages/langchain/embeddings/openai.py", line 67, in <module>
    class OpenAIEmbeddings(BaseModel, Embeddings):
  File "pydantic/main.py", line 197, in pydantic.main.ModelMetaclass.__new__
  File "pydantic/fields.py", line 506, in pydantic.fields.ModelField.infer
  File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
  File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
  File "pydantic/fields.py", line 663, in pydantic.fields.ModelField._type_analysis
  File "pydantic/fields.py", line 808, in pydantic.fields.ModelField._create_sub_type
  File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
  File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
  File "pydantic/fields.py", line 668, in pydantic.fields.ModelField._type_analysis
  File "/home/my_username/.pyenv/versions/3.9.16/lib/python3.9/typing.py", line 852, in __subclasscheck__
    return issubclass(cls, self.__origin__)
TypeError: issubclass() arg 1 must be a class

Here is the content of lc_indexer.py where the langchain imports occur

# INDEX DOCUMENTS
import os
from os.path import join, isfile

from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter
from langchain.vectorstores import Chroma


def index_documents(source_directories: list[str], persist_directory: str, chunk_size: int = 1000,
                    chunk_overlap: int = 15):
    """
    Indexe les documents venant des répertoires fournis

    :param source_directories: list[str]
    :param persist_directory: str
    :param chunk_size: int = 1000
    :param chunk_overlap: int = 15
    :return:
    """

    only_files = []
    for directory in source_directories:
        my_path = f'{directory}'
        for f in os.listdir(my_path):
            if isfile(join(my_path, f)):
                only_files.append(f'{my_path}/{f}')

    embeddings = OpenAIEmbeddings()
    for file in only_files:
        index_file_to_chroma(file, persist_directory, embeddings, chunk_size, chunk_overlap)


def index_file_to_chroma(file: str, persist_directory: str, embeddings: OpenAIEmbeddings, chunk_size: int, chunk_overlap: int):
    """
    Indexe un document dans Chroma

    :param embeddings: OpenAIEmbeddings
    :param file: str
    :param persist_directory: str
    :param chunk_size: int
    :param chunk_overlap: int
    :return:
    """

    loader = UnstructuredMarkdownLoader(file_path=file, encoding='utf8')
    docs = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    pages = text_splitter.split_documents(docs)
    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(pages)
    db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
    db.persist()
    print(f'Indexed file {file} for module {persist_directory}')
    db = None
# /INDEX DOCUMENTS

This file has been copied from a test project where no such error occurs at all when trying it but it was tested from the CLI so it may change something here.

Already tried copying those functions and the imports into the main.py file, but I get the same error.

I have tried commenting the import of lc_indexer.py and the call to the index_documents function in the main.py, and it launches no problem.

What is the root of the problem here? Langchain requirements have been installed


Solution

  • Try downgrading the typing-extensions module back to version 4.5.0.

    I've just had an issue with the version 4.6.0 that was breaking my project execution after Poetry updated the dependencies. Did the downgrade and now it is working again.