muryshev commited on
Commit
d28a9db
1 Parent(s): 6ccc002

MORE PARAMETERS

Browse files
Files changed (2) hide show
  1. app.py +15 -12
  2. llm_backend.py +1 -4
app.py CHANGED
@@ -15,18 +15,21 @@ import sys
15
  llm = LlmBackend()
16
  _lock = threading.Lock()
17
 
18
- SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT') or "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
19
- CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', '500'))
20
- HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR') or '/home/user/app/.cache'
21
- USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', '').lower() == "true" or False
22
- ENABLE_GPU = os.environ.get('ENABLE_GPU', '').lower() == "true" or False
23
- GPU_LAYERS = int(os.environ.get('GPU_LAYERS', '0'))
24
- CHAT_FORMAT = os.environ.get('CHAT_FORMAT') or 'llama-2'
25
- REPO_NAME = os.environ.get('REPO_NAME') or 'IlyaGusev/saiga2_7b_gguf'
26
- MODEL_NAME = os.environ.get('MODEL_NAME') or 'model-q4_K.gguf'
27
- DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL') or "https://huggingface.co/datasets/muryshev/saiga-chat"
28
- DATA_FILENAME = os.environ.get('DATA_FILENAME') or "data-saiga-cuda-release.xml"
29
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
 
30
 
31
  # Create a lock object
32
  lock = threading.Lock()
@@ -174,5 +177,5 @@ if __name__ == "__main__":
174
  # scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
175
  # scheduler.start()
176
 
177
- app.run(host="0.0.0.0", port=7860, debug=True, threaded=True)
178
 
 
15
  llm = LlmBackend()
16
  _lock = threading.Lock()
17
 
18
+ SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', default="Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык.")
19
+ CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', default='500'))
20
+ HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR', default='/home/user/app/.cache')
21
+ USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', default='False').lower() == 'true'
22
+ ENABLE_GPU = os.environ.get('ENABLE_GPU', default='False').lower() == 'true'
23
+ GPU_LAYERS = int(os.environ.get('GPU_LAYERS', default='0'))
24
+ CHAT_FORMAT = os.environ.get('CHAT_FORMAT', default='llama-2')
25
+ REPO_NAME = os.environ.get('REPO_NAME', default='IlyaGusev/saiga2_7b_gguf')
26
+ MODEL_NAME = os.environ.get('MODEL_NAME', default='model-q4_K.gguf')
27
+ DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL', default="https://huggingface.co/datasets/muryshev/saiga-chat")
28
+ DATA_FILENAME = os.environ.get('DATA_FILENAME', default="data-saiga-cuda-release.xml")
29
  HF_TOKEN = os.environ.get("HF_TOKEN")
30
+ APP_HOST = os.environ.get('APP_HOST', default='0.0.0.0')
31
+ APP_PORT = int(os.environ.get('APP_PORT', default='7860'))
32
+ FLASK_THREADED = os.environ.get('FLASK_THREADED', default='False').lower() == "true"
33
 
34
  # Create a lock object
35
  lock = threading.Lock()
 
177
  # scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
178
  # scheduler.start()
179
 
180
+ app.run(host=APP_HOST, port=APP_PORT, debug=False, threaded=FLASK_THREADED)
181
 
llm_backend.py CHANGED
@@ -176,7 +176,4 @@ class LlmBackend:
176
  except Exception as e:
177
  log.error('generate_tokens - error')
178
  log.error(e)
179
- yield b'' # End of chunk
180
-
181
-
182
-
 
176
  except Exception as e:
177
  log.error('generate_tokens - error')
178
  log.error(e)
179
+ yield b'' # End of chunk